In [1]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier

In [2]:
import imagedataset as imgdat
import bagofvisualwords as bovw

In [3]:
#Get Annotations
annotations = imgdat.GetAnnotations()

In [4]:
#Try to load all descriptors previously generated if not generate one
try:
    all_descriptors = imgdat.LoadAllDescriptors()
except:
    all_descriptors = imgdat.GenerateAllDescriptors(annotations)

Loading all sift descriptors in dataset from file...
All descriptors loaded ! 


In [5]:
#Use standardscaler for all SIFT descriptors before process them with any representation algorithm.
sc_dscs = StandardScaler()
all_descriptors = sc_dscs.fit_transform(all_descriptors)

In [6]:
#Clustering descriptors using KMeans in 120 classes 
try:
    kmeans_bovw = bovw.LoadKmeans()
except:
    n_clusters = 120
    kmeans_bovw = bovw.GenerateKmeans(all_descriptors, n_clusters)

Loading kmeans...
bovw_centers.kmeans Loaded !


In [7]:
#Image representation
try:
    X_bovw = np.load("bovw_X.npy")
    y_bovw = np.load("bovw_y.npy")
except:
    X_bovw, y_bovw = bovw.BovwFeatures(annotations, kmeans_bovw, sc_dscs)
    np.save("bovw_X", X_bovw)
    np.save("bovw_y", y_bovw)

In [8]:
#Classification 
#Dataset splitting into the training set and test set
X_train_bovw, X_test_bovw, y_train_bovw, y_test_bovw = train_test_split(X_bovw, y_bovw, test_size = 0.2, random_state = 0)

In [9]:
#Scaling representation features
sc_repr_bovw = StandardScaler()
X_train_bovw = sc_repr_bovw.fit_transform(X_train_bovw)
X_test_bovw = sc_repr_bovw.transform(X_test_bovw)



In [13]:
#Using A random forest classifier.
#Just for test the max deep in order to prevent over-fitting 
deep=1

for x in range (1,200):
    deep=x
    clf_rf = RandomForestClassifier(max_depth=deep, random_state=0)
    clf_rf.fit(X_train_bovw, y_train_bovw)

    print('==========')
    print('deep:'+str(deep))
    print('train score:' + str(clf_rf.score(X_train_bovw,y_train_bovw)))
    print('test score:' + str(clf_rf.score(X_test_bovw,y_test_bovw)))
    y_pred = clf_rf.predict(X_test_bovw)
    y_pred = (y_pred > 0.5)
    cm = confusion_matrix(y_test_bovw, y_pred)
    print(cm)

deep:1
train score:0.684478371501
test score:0.616161616162
[[ 0 38]
 [ 0 61]]
deep:2
train score:0.722646310433
test score:0.646464646465
[[ 3 35]
 [ 0 61]]
deep:3
train score:0.811704834606
test score:0.717171717172
[[10 28]
 [ 0 61]]
deep:4
train score:0.867684478372
test score:0.757575757576
[[15 23]
 [ 1 60]]
deep:5
train score:0.908396946565
test score:0.79797979798
[[20 18]
 [ 2 59]]
deep:6
train score:0.928753180662
test score:0.787878787879
[[20 18]
 [ 3 58]]
deep:7
train score:0.954198473282
test score:0.868686868687
[[27 11]
 [ 2 59]]
deep:8
train score:0.972010178117
test score:0.767676767677
[[21 17]
 [ 6 55]]
deep:9
train score:0.979643765903
test score:0.79797979798
[[23 15]
 [ 5 56]]
deep:10
train score:0.98727735369
test score:0.777777777778
[[23 15]
 [ 7 54]]
deep:11
train score:0.98727735369
test score:0.777777777778
[[23 15]
 [ 7 54]]
deep:12
train score:0.98727735369
test score:0.777777777778
[[24 14]
 [ 8 53]]
deep:13
train score:1.0
test score:0.767676767677
[[22

deep:93
train score:0.997455470738
test score:0.777777777778
[[24 14]
 [ 8 53]]
deep:94
train score:0.997455470738
test score:0.777777777778
[[24 14]
 [ 8 53]]
deep:95
train score:0.997455470738
test score:0.777777777778
[[24 14]
 [ 8 53]]
deep:96
train score:0.997455470738
test score:0.777777777778
[[24 14]
 [ 8 53]]
deep:97
train score:0.997455470738
test score:0.777777777778
[[24 14]
 [ 8 53]]
deep:98
train score:0.997455470738
test score:0.777777777778
[[24 14]
 [ 8 53]]
deep:99
train score:0.997455470738
test score:0.777777777778
[[24 14]
 [ 8 53]]
deep:100
train score:0.997455470738
test score:0.777777777778
[[24 14]
 [ 8 53]]
deep:101
train score:0.997455470738
test score:0.777777777778
[[24 14]
 [ 8 53]]
deep:102
train score:0.997455470738
test score:0.777777777778
[[24 14]
 [ 8 53]]
deep:103
train score:0.997455470738
test score:0.777777777778
[[24 14]
 [ 8 53]]
deep:104
train score:0.997455470738
test score:0.777777777778
[[24 14]
 [ 8 53]]
deep:105
train score:0.997455470738

deep:183
train score:0.997455470738
test score:0.777777777778
[[24 14]
 [ 8 53]]
deep:184
train score:0.997455470738
test score:0.777777777778
[[24 14]
 [ 8 53]]
deep:185
train score:0.997455470738
test score:0.777777777778
[[24 14]
 [ 8 53]]
deep:186
train score:0.997455470738
test score:0.777777777778
[[24 14]
 [ 8 53]]
deep:187
train score:0.997455470738
test score:0.777777777778
[[24 14]
 [ 8 53]]
deep:188
train score:0.997455470738
test score:0.777777777778
[[24 14]
 [ 8 53]]
deep:189
train score:0.997455470738
test score:0.777777777778
[[24 14]
 [ 8 53]]
deep:190
train score:0.997455470738
test score:0.777777777778
[[24 14]
 [ 8 53]]
deep:191
train score:0.997455470738
test score:0.777777777778
[[24 14]
 [ 8 53]]
deep:192
train score:0.997455470738
test score:0.777777777778
[[24 14]
 [ 8 53]]
deep:193
train score:0.997455470738
test score:0.777777777778
[[24 14]
 [ 8 53]]
deep:194
train score:0.997455470738
test score:0.777777777778
[[24 14]
 [ 8 53]]
deep:195
train score:0.99745