M3. Research Fisher Vectors

In [2]:
import numpy as np
import pickle
import os

import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.preprocessing import normalize

from utils.CodeTimer import CodeTimer
from utils.DatasetManager import DatasetManager
from descriptors.SIFT import DenseSIFT as DenseSIFT_
from descriptors.VisualWords import VisualWords as VisualWords_
from utils.Kernels import histogram_intersection_kernel
from fishervector import FisherVectorGMM

In [3]:
dataset = DatasetManager('../Databases/MIT_split')
train_img_paths, train_labels, test_img_paths, test_labels = dataset.load_dataset()
SAVE_PATH = '../SavePath/'

In [None]:
train_desc_path = SAVE_PATH + 'desc_fisher' + os.sep + 'train.dat'
test_desc_path = SAVE_PATH  + 'desc_fisher' + os.sep + 'test.dat'
train_data_path = SAVE_PATH + 'data_fisher' + os.sep + 'train.dat'
test_data_path = SAVE_PATH  + 'data_fisher' + os.sep + 'test.dat'

train_desc = DenseSIFT.compute(train_img_paths, STEP_SIZE, DESC_SIZE)
test_desc = DenseSIFT.compute(test_img_paths, STEP_SIZE, DESC_SIZE)
# Save computed data
pickle.dump(train_desc, open(train_desc_path, 'wb'))
pickle.dump(test_desc, open(test_desc_path, 'wb'))

In [14]:
N_CLUSTERS = 896
STEP_SIZE = 16
DESC_SIZE = [8,16]
K_FOLDS = 5
PARAMETERS = {
    'C': [1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1],
    'gamma': ["scale"],
    'kernel': ["rbf", "sigmoid", histogram_intersection_kernel]}

Example given in the github of fishervector library

In [26]:
import numpy as np
shape = [300, 20, 32] # e.g. SIFT image features
image_data = np.random.rand(300,256,64)
print(image_data.shape)
#image_data = np.concatenate([np.random.normal(np.ones(30), size=shape), np.random.normal(np.ones(30), size=shape)], axis=0)
from fishervector import FisherVectorGMM
fv_gmm_test = FisherVectorGMM(n_kernels=15).fit(image_data)
image_data_test = image_data[:10] # use a fraction of the data to compute the fisher vectors
fv = fv_gmm_test.predict(image_data_test)
print(fv.shape)

(300, 256, 64)
fitted GMM with 15 kernels
(10, 30, 64)


In [13]:
STEP_SIZE = 16
DESC_SIZE = [8,16]
N_CLUSTERS = 256
N_KERNELS = 15
K_FOLDS = 5
PARAMETERS = {
    'C': [1e-1],
    'gamma': ["scale"],
    'kernel': ["rbf", "sigmoid", histogram_intersection_kernel]}

In [14]:
VisualWords = VisualWords_(N_CLUSTERS)
VisualWords.fit(train_desc)
vw_train_data = VisualWords.get_visual_words(train_desc)
print(np.array(vw_train_data).shape)
vw_test_data = VisualWords.get_visual_words(test_desc)

(1881, 256)


In [6]:
train_desc_path = SAVE_PATH + 'desc_fisher' + os.sep + 'train.dat'
test_desc_path = SAVE_PATH  + 'desc_fisher' + os.sep + 'test.dat'
train_data_path = SAVE_PATH + 'data_fisher' + os.sep + 'train.dat'
test_data_path = SAVE_PATH  + 'data_fisher' + os.sep + 'test.dat'
# Check for existing data files already computed
train_desc = pickle.load(open(train_desc_path, 'rb'))
test_desc = pickle.load(open(test_desc_path, 'rb'))
# Obtain Fisher Vectors for train and test sets
print("Compute FisherVectors Gaussian Mixture Model")
fv_gmm = FisherVectorGMM(n_kernels=N_KERNELS).fit(np.array(train_desc))

Compute FisherVectors Gaussian Mixture Model
fitted GMM with 15 kernels


In [33]:
train_data_500 = fv_gmm.predict(np.array(train_desc)[0:500])
train_data_500_1000 = fv_gmm.predict(np.array(train_desc)[500:1000])
train_data_1000_1500 = fv_gmm.predict(np.array(train_desc)[1000:1500])
train_data_1500_end = fv_gmm.predict(np.array(train_desc)[1500:np.array(train_desc).shape[0]])
train_data = np.concatenate((train_data_500, train_data_500_1000, train_data_1000_1500, train_data_1500_end), axis=0)
pickle.dump(train_data, open(train_data_path, 'wb'))

test_data_500  = fv_gmm.predict(np.array(test_desc)[0:500])
test_data_500_end  = fv_gmm.predict(np.array(test_desc)[500:np.array(test_desc).shape[0]])
test_data = np.concatenate((test_data_500, test_data_500_end), axis=0)
pickle.dump(test_data, open(test_data_path, 'wb'))

In [36]:
train_data = pickle.load(open(train_data_path, 'rb'))
test_data = pickle.load(open(test_data_path, 'rb'))
        
with CodeTimer("Train SVM"):
    print("Train SVM")
    cvNotNormalized = GridSearchCV(SVC(), param_grid=PARAMETERS, cv=K_FOLDS, n_jobs=-1, verbose=5)
    cvNotNormalized.fit(train_data, train_labels)

# Test SVM
with CodeTimer("Test SVM"):
    trainNotNormalized_score = cvNotNormalized.score(train_data, train_labels)
    testNotNormalized_score = cvNotNormalized.score(test_data, test_labels)
        
print("Normalized Train accuracy score: {}\nTest accuracy score: {}\nBest params: {}\n".format(trainNormalized_score, testNormalized_score, cvNormalized.best_params_))
print("All results: {}".format(cvNormalized.cv_results_))
print("Not Normalized Train accuracy score: {}\nTest accuracy score: {}\nBest params: {}\n".format(trainNotNormalized_score, testNotNormalized_score, cvNotNormalized.best_params_))
print("All results: {}".format(cvNotNormalized.cv_results_))

Train SVM
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   4 out of  15 | elapsed:    1.8s remaining:    5.0s
[Parallel(n_jobs=-1)]: Done   8 out of  15 | elapsed:    1.8s remaining:    1.6s
[Parallel(n_jobs=-1)]: Done  12 out of  15 | elapsed:    1.8s remaining:    0.5s


Train SVM: 2.2763378620147705 s


ValueError: Found array with dim 3. Estimator expected <= 2.