In [1]:
import os
import numpy as np
from yael.yael import ynumpy
from utils.DatasetManager import DatasetManager
from utils.Kernels import histogram_intersection_kernel
import pickle

In [2]:
train_desc_path = "/home/joan/workspace/MCV_M3/M3_project/SavePath/desc_fisher/train.dat"
test_desc_path = "/home/joan/workspace/MCV_M3/M3_project/SavePath/desc_fisher/test.dat"

In [3]:
train_desc = pickle.load(open(train_desc_path, 'rb'))
test_desc = pickle.load(open(test_desc_path, 'rb'))
all_train_desc = np.vstack(train_desc)

In [4]:
k = 64
n_sample = k * 5000

# choose n_sample descriptors at random
sample_indices = np.random.choice(all_train_desc.shape[0], n_sample)
sample = all_train_desc[sample_indices]

# until now sample was in uint8. Convert to float32
sample = sample.astype('float32')

# until now sample was in uint8. Convert to float32
gmm = ynumpy.gmm_learn(sample, k)

In [5]:
train_fvs = []

for image_desc in train_desc:
    fv = ynumpy.fisher(gmm, image_desc, include = 'mu')
    train_fvs.append(fv)

In [6]:
test_fvs = []

for image_desc in test_desc:
    fv = ynumpy.fisher(gmm, image_desc, include = 'mu')
    test_fvs.append(fv)

In [7]:
train_data_path = "/home/joan/workspace/MCV_M3/M3_project/SavePath/data_fisher/train.dat"
test_data_path = "/home/joan/workspace/MCV_M3/M3_project/SavePath/data_fisher/test.dat"

# Save computed data
pickle.dump(train_fvs, open(train_data_path, 'wb'), protocol=2)
pickle.dump(test_fvs, open(test_data_path, 'wb'), protocol=2)

In [8]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.preprocessing import normalize

In [9]:
train_data_path = "/home/joan/workspace/MCV_M3/M3_project/SavePath/data_fisher/train.dat"
test_data_path = "/home/joan/workspace/MCV_M3/M3_project/SavePath/data_fisher/test.dat"
dataset = DatasetManager("/home/joan/workspace/MCV_M3/M3_project/Databases/MIT_split")
_, train_labels, _, test_labels = dataset.load_dataset()
train_fvs = pickle.load(open(train_data_path, 'rb'))
test_fvs = pickle.load(open(test_data_path, 'rb'))
train_data = np.array(train_fvs)
test_data = np.array(test_fvs)

In [10]:
K_FOLDS = 5
PARAMETERS = {
    'C': [0.00134],
    'gamma': ["scale"],
    'kernel': [histogram_intersection_kernel]}

cv = GridSearchCV(SVC(), param_grid=PARAMETERS, cv=K_FOLDS, n_jobs=-1, verbose=5)
cv.fit(train_data, train_labels)

Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed:  9.1min remaining: 13.7min
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:  9.3min finished


GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False),
       fit_params=None, iid='warn', n_jobs=-1,
       param_grid={'kernel': [<function histogram_intersection_kernel at 0x7ff82c0d83d0>], 'C': [0.00134], 'gamma': ['scale']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=5)

In [11]:
train_score = cv.score(train_data, train_labels)
test_score = cv.score(test_data, test_labels)
        
print("Train accuracy score: {}\nTest accuracy score: {}\nBest params: {}\n".format(train_score, test_score, cv.best_params_))
print("All results: {}".format(cv.cv_results_))

Train accuracy score: 0.999468367889
Test accuracy score: 0.877323420074
Best params: {'kernel': <function histogram_intersection_kernel at 0x7ff82c0d83d0>, 'C': 0.00134, 'gamma': 'scale'}

All results: {'std_train_score': array([0.00026587]), 'split4_test_score': array([0.86021505]), 'split0_train_score': array([0.99933422]), 'rank_test_score': array([1], dtype=int32), 'param_gamma': masked_array(data=['scale'],
             mask=[False],
       fill_value='?',
            dtype=object), 'split2_train_score': array([0.99933555]), 'std_score_time': array([1.92198117]), 'split4_train_score': array([0.99933731]), 'split2_test_score': array([0.88297872]), 'mean_score_time': array([52.24375176]), 'mean_fit_time': array([222.23916001]), 'param_kernel': masked_array(data=[<function histogram_intersection_kernel at 0x7ff82c0d83d0>],
             mask=[False],
       fill_value='?',
            dtype=object), 'param_C': masked_array(data=[0.00134],
             mask=[False],
       fill_value=