In [11]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.neural_network import MLPClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC, LinearSVC

In [12]:
import imagedataset as imgdat
import fishervector as fv

In [13]:
#Get Annotations
annotations = imgdat.GetAnnotations()

In [14]:
#Try to load all descriptors previously generated if not generate one
try:
    all_descriptors = imgdat.LoadAllDescriptors()
except:
    all_descriptors = imgdat.GenerateAllDescriptors(annotations)

Loading all sift descriptors in dataset from file...
All descriptors loaded ! 


In [15]:
#Use standardscaler for all SIFT descriptors before process them with any representation algorithm.
sc_dscs = StandardScaler()
all_descriptors = sc_dscs.fit_transform(all_descriptors)

In [16]:
#Clustering descriptors using KMeans in 120 classes 
try:
    gmm_fv = fv.LoadKmeans()
except:
    n_clusters = 10
    gmm_fv = fv.GenerateGmm(all_descriptors, n_clusters)

Training GMM of size 10
Initialization 0
  Iteration 0	 time lapse 13.75431s	 ll change inf
  Iteration 10	 time lapse 11.42145s	 ll change 0.80572
  Iteration 20	 time lapse 12.66631s	 ll change 0.15752
  Iteration 30	 time lapse 10.83891s	 ll change 0.05631
  Iteration 40	 time lapse 11.65419s	 ll change 0.06779
  Iteration 50	 time lapse 11.05441s	 ll change 0.03430
  Iteration 60	 time lapse 10.84238s	 ll change 0.03212
  Iteration 70	 time lapse 11.98666s	 ll change 0.04236
  Iteration 80	 time lapse 13.22512s	 ll change 0.00114
Initialization converged: True	 time lapse 108.48788s	 ll -105.92980
GaussianMixture(covariance_type='diag', init_params='kmeans', max_iter=100,
        means_init=None, n_components=10, n_init=1, precisions_init=None,
        random_state=None, reg_covar=1e-06, tol=0.001, verbose=2,
        verbose_interval=10, warm_start=False, weights_init=None)
Saving gmm at: means.gmm.npy, covs.gmm.npy, weights.gmm.npy


In [17]:
#Image representation
try:
    X_fv = np.load("fv_X.npy")
    y_fv = np.load("fv_y.npy")
except:
    X_fv, y_fv = fv.FisherFeatures(annotations, gmm_fv, sc_dscs)
    np.save("fv_X", X_fv)
    np.save("fv_y", y_fv)

In [18]:
#Classification 
#Dataset splitting into the training set and test set
X_train_fv, X_test_fv, y_train_fv, y_test_fv = train_test_split(X_fv, y_fv, test_size = 0.2, random_state = 0)

In [19]:
#Scaling representation features
sc_repr_fv = StandardScaler()
X_train_fv = sc_repr_fv.fit_transform(X_train_fv)
X_test_fv = sc_repr_fv.transform(X_test_fv)

In [24]:
#Using a OVR
#One-vs-the-rest (OvR) multiclass/multilabel strategy
#C-Support Vector Classification.
clf_ovr = OneVsRestClassifier(SVC(kernel='sigmoid', C=1.0))
clf_ovr.fit(X_train_fv, y_train_fv)
print('==========')
print('train score:' + str(clf_ovr.score(X_train_fv,y_train_fv)))
print('test score:' + str(clf_ovr.score(X_test_fv,y_test_fv)))
y_pred = clf_ovr.predict(X_test_fv)
y_pred = (y_pred > 0.5)
cm = confusion_matrix(y_test_fv, y_pred)
print(cm)

train score:0.984732824427
test score:0.919191919192
[[31  7]
 [ 1 60]]
