# Deep features for image retrieval

In [1]:
import pandas as pd
import numpy as np

In [62]:
train_data = pd.read_csv('./image_train_data.csv')
test_data = pd.read_csv('./image_test_data.csv')

In [63]:
train_data.head()

Unnamed: 0,id,image,label,deep_features,image_array
0,24,Height: 32 Width: 32,bird,[0.242872 1.09545 0 0.39363 0 0 11.8949 0 0 0 ...,[73 77 58 71 68 50 77 69 44 120 116 83 125 120...
1,33,Height: 32 Width: 32,cat,[0.525088 0 0 0 0 0 9.94829 0 0 0 0 0 1.01264 ...,[7 5 8 7 5 8 5 4 6 7 4 7 11 5 9 11 5 9 17 11 1...
2,36,Height: 32 Width: 32,cat,[0.566016 0 0 0 0 0 9.9972 0 0 0 1.38345 0 0.7...,[169 122 65 131 108 75 193 196 192 218 221 222...
3,70,Height: 32 Width: 32,dog,[1.1298 0 0 0.778194 0 0.758051 9.83053 0 0 0....,[154 179 152 159 183 157 165 189 162 174 199 1...
4,90,Height: 32 Width: 32,bird,[1.71787 0 0 0 0 0 9.33936 0 0 0 0 0 0.412137 ...,[216 195 180 201 178 160 210 184 164 212 188 1...


# Train a classifier on the raw image pixels

In [64]:
from sklearn.linear_model import LogisticRegressionCV
from sklearn.preprocessing import LabelEncoder

In [65]:
raw_pixel_model = LogisticRegressionCV()

In [66]:
label_encode = LabelEncoder()

In [67]:
train_data['image_array'] = train_data['image_array'].apply(lambda x :[int(i) for i in x[1:-1].split(' ')])
test_data['image_array'] = test_data['image_array'].apply(lambda x :[int(i) for i in x[1:-1].split(' ')])

In [68]:
y_train = label_encode.fit_transform(train_data['label'])

In [69]:
X_train = [i for i in train_data['image_array'].values]

In [70]:
raw_pixel_model.fit(X_train, y_train)

LogisticRegressionCV(Cs=10, class_weight=None, cv=None, dual=False,
           fit_intercept=True, intercept_scaling=1.0, max_iter=100,
           multi_class='ovr', n_jobs=1, penalty='l2', random_state=None,
           refit=True, scoring=None, solver='lbfgs', tol=0.0001, verbose=0)

# Evaluating raw pixel model on test data

In [71]:
from sklearn.metrics import accuracy_score

In [72]:
y_test = label_encode.transform(test_data['label'])

In [73]:
X_test = [i for i in test_data['image_array'].values]

In [74]:
y_pred = raw_pixel_model.predict(X_test)

In [75]:
accuracy_score(y_test, y_pred)

0.4385

# Using deep features model

In [78]:
train_data['deep_features'] = train_data['deep_features'].apply(lambda x :[float(i) for i in x[1:-1].split(' ')])
test_data['deep_features'] = test_data['deep_features'].apply(lambda x :[float(i) for i in x[1:-1].split(' ')])

In [79]:
X_train = [i for i in train_data['deep_features'].values]
X_test = [i for i in test_data['deep_features'].values]

In [81]:
y_train = label_encode.transform(train_data['label'])
y_test = label_encode.transform(test_data['label'])

In [82]:
deep_features_model = LogisticRegressionCV()

In [83]:
deep_features_model.fit(X_train, y_train)

LogisticRegressionCV(Cs=10, class_weight=None, cv=None, dual=False,
           fit_intercept=True, intercept_scaling=1.0, max_iter=100,
           multi_class='ovr', n_jobs=1, penalty='l2', random_state=None,
           refit=True, scoring=None, solver='lbfgs', tol=0.0001, verbose=0)

In [84]:
y_pred = deep_features_model.predict(X_test)

In [85]:
accuracy_score(y_test, y_pred)

0.8075

# image retrieval

In [86]:
from sklearn.neighbors import KNeighborsClassifier

In [87]:
knn_model = KNeighborsClassifier()

In [88]:
knn_model.fit(X_train, train_data['id'].values)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

In [120]:
cat = X_train[18:19]

In [121]:
dist, ind = knn_model.kneighbors(cat)
label = train_data['label'][ind[0]]

In [126]:
pd.DataFrame({'id': ind[0], 'dist': dist[0], 'label': label})

Unnamed: 0,id,dist,label
18,18,0.0,cat
288,288,36.940312,cat
1565,1565,38.463489,cat
1468,1468,39.755967,cat
1633,1633,39.786597,cat
