In [1]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier

In [2]:
import imagedataset as imgdat
import fishervector as fv

In [3]:
#Get Annotations
annotations = imgdat.GetAnnotations()

In [4]:
#Try to load all descriptors previously generated if not generate one
try:
    all_descriptors = imgdat.LoadAllDescriptors()
except:
    all_descriptors = imgdat.GenerateAllDescriptors(annotations)

Loading all sift descriptors in dataset from file...
All descriptors loaded ! 


In [5]:
#Use standardscaler for all SIFT descriptors before process them with any representation algorithm.
sc_dscs = StandardScaler()
all_descriptors = sc_dscs.fit_transform(all_descriptors)

In [6]:
#Clustering descriptors using KMeans in 120 classes 
try:
    gmm_fv = fv.LoadKmeans()
except:
    n_clusters = 10
    gmm_fv = fv.GenerateGmm(all_descriptors, n_clusters)

Training GMM of size 10
Initialization 0
  Iteration 0	 time lapse 7.98771s	 ll change inf
  Iteration 10	 time lapse 11.21843s	 ll change 0.81235
  Iteration 20	 time lapse 13.04296s	 ll change 0.31420
  Iteration 30	 time lapse 11.96479s	 ll change 0.28584
  Iteration 40	 time lapse 13.09017s	 ll change 0.10359
  Iteration 50	 time lapse 12.60143s	 ll change 0.03044
  Iteration 60	 time lapse 13.20818s	 ll change 0.02088
  Iteration 70	 time lapse 11.27446s	 ll change 0.00337
  Iteration 80	 time lapse 11.28994s	 ll change 0.00448
  Iteration 90	 time lapse 11.37298s	 ll change 0.00148
Initialization converged: False	 time lapse 127.14686s	 ll -106.82201
GaussianMixture(covariance_type='diag', init_params='kmeans', max_iter=100,
        means_init=None, n_components=10, n_init=1, precisions_init=None,
        random_state=None, reg_covar=1e-06, tol=0.001, verbose=2,
        verbose_interval=10, warm_start=False, weights_init=None)
Saving gmm at: means.gmm.npy, covs.gmm.npy, weights.g



In [7]:
#Image representation
try:
    X_fv = np.load("fv_X.npy")
    y_fv = np.load("fv_y.npy")
except:
    X_fv, y_fv = fv.FisherFeatures(annotations, gmm_fv, sc_dscs)
    np.save("fv_X", X_fv)
    np.save("fv_y", y_fv)

In [8]:
#Classification 
#Dataset splitting into the training set and test set
X_train_fv, X_test_fv, y_train_fv, y_test_fv = train_test_split(X_fv, y_fv, test_size = 0.2, random_state = 0)

In [9]:
#Scaling representation features
sc_repr_fv = StandardScaler()
X_train_fv = sc_repr_fv.fit_transform(X_train_fv)
X_test_fv = sc_repr_fv.transform(X_test_fv)

In [10]:
#Using A random forest classifier.
clf_rf = RandomForestClassifier(max_depth=2, random_state=0)
clf_rf.fit(X_train_fv, y_train_fv)

print('==========')
print('train score:' + str(clf_rf.score(X_train_fv,y_train_fv)))
print('test score:' + str(clf_rf.score(X_test_fv,y_test_fv)))
y_pred = clf_rf.predict(X_test_fv)
y_pred = (y_pred > 0.5)
cm = confusion_matrix(y_test_fv, y_pred)
print(cm)

train score:0.839694656489
test score:0.747474747475
[[15 23]
 [ 2 59]]
