In [1]:
import numpy as np
import pandas as pd

from collections import Counter
from scipy.stats import kendalltau
from sklearn.cluster import KMeans
from ast import literal_eval
import torch.nn as nn
from sklearn.neural_network import MLPClassifier
from scipy.stats import kurtosis
import pickle
from joblib import dump, load

In [2]:
query_embedding_df = pd.read_csv('../../Embeddings/CNN_Image_Retrieval/caltech101_700-query-features.csv')
db_embedding_df    = pd.read_csv('../../Embeddings/CNN_Image_Retrieval/caltech101_700-dataset-features.csv')

In [3]:
def get_embeddings(query_embeddings, dataset_embeddings):
     
    num_images   =  len(dataset_embeddings)
    num_queries  =  len(query_embeddings)
    
    print('Generating query embedding array')
    
    q_embeddings = []
    
    for query_id in range(num_queries):
        image_name = query_embeddings.iloc[query_id]['image_name']
        query_embedding = np.array(literal_eval(query_embeddings.iloc[query_id]['embedding']))
        q_embeddings.append(query_embedding)

    q_embeddings = np.array(q_embeddings)
    
    
    print('Generating dataset embedding_array')
    
    db_embeddings = []
    
    for item_id in range(num_images):
        if(item_id%1000 == 0):
            print(item_id)
        image_embedding = literal_eval(dataset_embeddings.iloc[item_id]['embedding'])
        db_embeddings.append(image_embedding)
    
    db_embeddings = np.array(db_embeddings)    
    
    return q_embeddings,db_embeddings


In [4]:
query_emb, db_emb = get_embeddings(query_embedding_df,db_embedding_df)

Generating query embedding array
Generating dataset embedding_array
0
1000
2000
3000
4000
5000
6000
7000
8000


In [5]:
clustering = KMeans(150)
clustering.fit(db_emb)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
       n_clusters=150, n_init=10, n_jobs=None, precompute_distances='auto',
       random_state=None, tol=0.0001, verbose=0)

In [6]:
x_train = db_emb
y_train = clustering.predict(db_emb)

In [7]:
clf = MLPClassifier(hidden_layer_sizes= (50,50),verbose=True, early_stopping=True,random_state=1).fit(x_train, y_train)

Iteration 1, loss = 4.96347620
Validation score: 0.015385
Iteration 2, loss = 4.66563335
Validation score: 0.080473
Iteration 3, loss = 4.06338962
Validation score: 0.157396
Iteration 4, loss = 3.36545771
Validation score: 0.252071
Iteration 5, loss = 2.78977111
Validation score: 0.349112
Iteration 6, loss = 2.34652025
Validation score: 0.410651
Iteration 7, loss = 2.01374255
Validation score: 0.471006
Iteration 8, loss = 1.75768572
Validation score: 0.525444
Iteration 9, loss = 1.55180047
Validation score: 0.557396
Iteration 10, loss = 1.39414542
Validation score: 0.596450
Iteration 11, loss = 1.26194248
Validation score: 0.624852
Iteration 12, loss = 1.15519062
Validation score: 0.623669
Iteration 13, loss = 1.05980249
Validation score: 0.660355
Iteration 14, loss = 0.98346797
Validation score: 0.675740
Iteration 15, loss = 0.91701052
Validation score: 0.691124
Iteration 16, loss = 0.85912351
Validation score: 0.698225
Iteration 17, loss = 0.80321332
Validation score: 0.710059
Iterat

In [10]:
scores = []
for i in range(preds.shape[0]):
    score = kurtosis(preds[i])
    scores.append(score)
    
scores = np.array(scores)
result_df = pd.DataFrame({'path': np.array(query_embedding_df['image_name']), 'score': scores})
result_df.to_csv('../../Results/kurtosis-cnnimageretrieval-caltech101_700.csv',index=False)

In [11]:
scores = []
for i in range(preds.shape[0]):
    score = np.std(preds[i])
    scores.append(score)
    
scores = np.array(scores)
result_df = pd.DataFrame({'path': np.array(query_embedding_df['image_name']), 'score': scores})
result_df.to_csv('../../Results/head-deviation-cnnimageretrieval-caltech101_700.csv',index=False)
