In [1]:
import argparse as ap
import cv2
import imutils2 as imutils 
import numpy as np
import os
from sklearn.svm import LinearSVC
from sklearn.externals import joblib
from scipy.cluster.vq import *
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier

In [2]:
train_path = "data/train"
training_names = os.listdir(train_path)

In [3]:
training_names

['small', 'big']

In [4]:
# Get all the path to the images and save them in a list
# image_paths and the corresponding label in image_paths
image_paths = []
image_classes = []
class_id = 0
for training_name in training_names:
    dir = os.path.join(train_path, training_name)
    class_path = imutils.imlist(dir)
    #class_path=list(paths.list_images(dir))
    #print class_path
    image_paths+=class_path
    image_classes+=[class_id]*len(class_path)
    class_id+=1

In [5]:
# Create feature extraction and keypoint detector objects
orb = cv2.ORB_create()
surf=cv2.xfeatures2d.SURF_create()

In [6]:
# List where all the descriptors are stored
des_list_orb = []
des_list_surf=[]

for image_path in image_paths:
    im = cv2.imread(image_path)
    #kpts = fea_det.detect(im)
    #kpts, des = des_ext.compute(im, kpts)
    kpts = orb.detect(im,None)
    kpts, des = orb.compute(im, kpts)
    skp,sdes=surf.detectAndCompute(im,None)
    des_list_orb.append((image_path, des.astype(float)))
    des_list_surf.append((image_path, sdes)) 
    #des_list.append((image_path, sdes)) 

In [7]:
# Stack all the descriptors vertically in a numpy array
descriptors1 = des_list_orb[0][1]
for image_path, descriptor in des_list_orb[1:]:
    descriptors1 = np.vstack((descriptors1, descriptor))  
    
descriptors2 = des_list_surf[0][1]
for image_path, descriptor in des_list_surf[1:]:
    descriptors2 = np.vstack((descriptors2, descriptor)) 



In [8]:
# Perform k-means clustering
k1 = 120
voc1, variance1 = kmeans(descriptors1, k1, 1) 

k2 = 120
voc2, variance2 = kmeans(descriptors2, k2, 1) 

In [9]:
# Calculate the histogram of features
im_features1 = np.zeros((len(image_paths), k1), "float32")
for i in xrange(len(image_paths)):
    words, distance = vq(des_list_orb[i][1],voc1)
    for w in words:
        im_features1[i][w] += 1
        
im_features2 = np.zeros((len(image_paths), k2), "float32")
for i in xrange(len(image_paths)):
    words, distance = vq(des_list_surf[i][1],voc2)
    for w in words:
        im_features2[i][w] += 1
        


In [17]:
im_features=np.concatenate((im_features1,im_features2),axis=1)
#im_features=im_features2

nbr_occurences = np.sum( (im_features > 0) * 1, axis = 0)
idf = np.array(np.log((1.0*len(image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

stdSlr = StandardScaler().fit(im_features)
im_features = stdSlr.transform(im_features)

In [None]:
'''
# Perform Tf-Idf vectorization
nbr_occurences = np.sum( (im_features1 > 0) * 1, axis = 0)
idf = np.array(np.log((1.0*len(image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

nbr_occurences = np.sum( (im_features2 > 0) * 1, axis = 0)
idf = np.array(np.log((1.0*len(image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')
'''

In [None]:
'''
# Scaling the words

stdSlr = StandardScaler().fit(im_features1)
im_features1 = stdSlr.transform(im_features1)

stdSlr = StandardScaler().fit(im_features2)
im_features2 = stdSlr.transform(im_features2)


im_features=np.concatenate((im_features1,im_features2),axis=1)
'''

In [None]:
from sklearn.decomposition import PCA
pca = PCA(n_components=150)
pca.fit(im_features)
pca_f=pca.transform(im_features)

In [None]:
from sklearn.decomposition import FactorAnalysis as FA
fa = FA(n_components=150)
fa.fit(im_features)
fa_f=fa.transform(im_features)

In [26]:
from sklearn.model_selection import cross_val_score
clf = svm.SVC(kernel='rbf', C=4)
#clf = RandomForestClassifier(max_depth=20)
scores = cross_val_score(clf, im_features, np.array(image_classes), cv=10)


In [27]:
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.92 (+/- 0.10)


In [None]:
joblib.dump((clf, training_names, stdSlr, [k1,k2], [voc1,voc2]), "bof.pkl", compress=3) 