In [1]:
import numpy as np
import pandas as pd
import cv2
import os
from sklearn.cluster import KMeans
from scipy.spatial import distance

In [37]:
def load_images_from_folder(folder,scale = 0.2):
    images = {}
    for filename in ('cup', 'anticeptik','other'):
        category = []
        path = folder + "/" + filename
        for cat in os.listdir(path):
            img = cv2.imread(path + "/" + cat,0)
            if img is not None:
                img = cv2.resize(img, (0,0), fx=scale, fy=scale)
                category.append(img)
        images[filename] = category
    return images

In [38]:
images = load_images_from_folder('train')
test = load_images_from_folder("test")

In [4]:
def sift_features(images):
    sift_vectors = {}
    descriptor_list = []
    sift = cv2.SIFT_create()
    for key,value in images.items():
        features = []
        for img in value:
            kp, des = sift.detectAndCompute(img,None)
           
            
            descriptor_list.extend(des)
            features.append(des)
        sift_vectors[key] = features
    return [descriptor_list, sift_vectors]

In [5]:
def brisk_features(images):
    sift_vectors = {}
    descriptor_list = []
    sift = cv2.BRISK_create()
    for key,value in images.items():
        features = []
        for img in value:
            kp, des = sift.detectAndCompute(img,None)
           
            
            descriptor_list.extend(des)
            features.append(des)
        sift_vectors[key] = features
    return [descriptor_list, sift_vectors]

In [6]:
sifts = sift_features(images) 
descriptor_list = sifts[0] 
all_bovw_feature = sifts[1] 
test_bovw_feature = sift_features(test)[1] 

In [7]:
len(descriptor_list)

257309

In [8]:
def kmeans(k, descriptor_list):
    kmeans = KMeans(n_clusters = k, n_init=10)
    kmeans.fit(descriptor_list)
    visual_words = kmeans.cluster_centers_ 
    return visual_words

In [9]:
visual_words = kmeans(150, descriptor_list) 

In [39]:
brisks = brisk_features(images) 
descriptor_list = brisks[0] 
all_bovw_feature = brisks[1] 
test_bovw_feature = brisk_features(test)[1] 

In [40]:
len(descriptor_list)

415675

In [41]:
visual_words_brisk = kmeans(150, descriptor_list) 

In [154]:
len(visual_words_brisk)

150

In [10]:
np.save('centers_sift.npy', visual_words)

In [42]:
np.save('centers_brisk.npy', visual_words_brisk)

In [45]:
visual_words = np.load('centers_brisk.npy')

In [43]:
len(visual_words)

150

In [44]:
type(visual_words)

numpy.ndarray

In [46]:
visual_words_brisk.shape

(150, 64)

In [14]:
def find_index(feature, centers):
    return np.argmin(np.sum((centers - feature) **2, axis = 1))

In [15]:
def image_class(all_bovw, centers):
    dict_feature = {}
    for key,value in all_bovw.items():
        category = []
        for img in value:
            histogram = np.zeros(len(centers))
            for each_feature in img:
                ind = find_index(each_feature, centers)
                histogram[ind] += 1
            category.append(histogram)
        dict_feature[key] = category
    return dict_feature

In [16]:
bovw_train = image_class(all_bovw_feature, visual_words)
bovw_test = image_class(test_bovw_feature, visual_words) 

In [47]:
bovw_train = image_class(all_bovw_feature, visual_words_brisk)
bovw_test = image_class(test_bovw_feature, visual_words_brisk) 

In [17]:
len(bovw_train['other'])

24

In [18]:
columns = ['x' + str(i + 1) for i in range(150)]

In [48]:
train_array = bovw_train['anticeptik']
train_array.extend(bovw_train['cup'])
train_array.extend(bovw_train['other'])
train_array = np.array(train_array)
train_array.shape

(198, 150)

In [49]:
train = pd.DataFrame(train_array, columns = columns)

In [50]:
train

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,...,x141,x142,x143,x144,x145,x146,x147,x148,x149,x150
0,9.0,36.0,17.0,14.0,10.0,7.0,26.0,16.0,9.0,5.0,...,8.0,15.0,17.0,10.0,20.0,5.0,11.0,24.0,69.0,14.0
1,1.0,2.0,7.0,1.0,7.0,6.0,6.0,4.0,1.0,13.0,...,11.0,10.0,11.0,7.0,0.0,6.0,10.0,8.0,0.0,10.0
2,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,1.0,...,2.0,0.0,3.0,0.0,0.0,2.0,1.0,1.0,0.0,0.0
3,69.0,62.0,75.0,83.0,58.0,23.0,34.0,79.0,54.0,87.0,...,23.0,105.0,19.0,110.0,85.0,97.0,73.0,16.0,41.0,23.0
4,101.0,100.0,95.0,124.0,77.0,102.0,53.0,68.0,68.0,88.0,...,98.0,111.0,52.0,93.0,88.0,98.0,133.0,46.0,54.0,68.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193,2.0,4.0,3.0,1.0,8.0,7.0,7.0,3.0,9.0,8.0,...,3.0,13.0,5.0,2.0,6.0,10.0,5.0,3.0,2.0,1.0
194,7.0,0.0,3.0,3.0,1.0,0.0,1.0,0.0,1.0,1.0,...,4.0,16.0,0.0,5.0,1.0,0.0,13.0,1.0,0.0,0.0
195,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
196,3.0,4.0,9.0,3.0,2.0,5.0,2.0,14.0,4.0,9.0,...,8.0,4.0,3.0,5.0,0.0,5.0,1.0,1.0,3.0,2.0


### 1 = anticeptik, -1 = cup, 0 = other

In [55]:
y = [1 for i in range(94)]
y.extend([-1 for i in range(80)])
y.extend([0 for i in range(24)])

In [56]:
y = pd.Series(y, dtype=int)

In [57]:
train['y'] = y

In [58]:
train

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,...,x142,x143,x144,x145,x146,x147,x148,x149,x150,y
0,9.0,36.0,17.0,14.0,10.0,7.0,26.0,16.0,9.0,5.0,...,15.0,17.0,10.0,20.0,5.0,11.0,24.0,69.0,14.0,1
1,1.0,2.0,7.0,1.0,7.0,6.0,6.0,4.0,1.0,13.0,...,10.0,11.0,7.0,0.0,6.0,10.0,8.0,0.0,10.0,1
2,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,1.0,...,0.0,3.0,0.0,0.0,2.0,1.0,1.0,0.0,0.0,1
3,69.0,62.0,75.0,83.0,58.0,23.0,34.0,79.0,54.0,87.0,...,105.0,19.0,110.0,85.0,97.0,73.0,16.0,41.0,23.0,1
4,101.0,100.0,95.0,124.0,77.0,102.0,53.0,68.0,68.0,88.0,...,111.0,52.0,93.0,88.0,98.0,133.0,46.0,54.0,68.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193,2.0,4.0,3.0,1.0,8.0,7.0,7.0,3.0,9.0,8.0,...,13.0,5.0,2.0,6.0,10.0,5.0,3.0,2.0,1.0,0
194,7.0,0.0,3.0,3.0,1.0,0.0,1.0,0.0,1.0,1.0,...,16.0,0.0,5.0,1.0,0.0,13.0,1.0,0.0,0.0,0
195,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
196,3.0,4.0,9.0,3.0,2.0,5.0,2.0,14.0,4.0,9.0,...,4.0,3.0,5.0,0.0,5.0,1.0,1.0,3.0,2.0,0


In [59]:
train.to_csv('train_brisk.csv')

In [60]:
test_array = bovw_test['anticeptik']
test_array.extend(bovw_test['cup'])
test_array.extend(bovw_test['other'])
test_array = np.array(test_array)

In [61]:
test_array.shape

(68, 150)

In [62]:
test = pd.DataFrame(test_array, columns = columns)

In [63]:
y = [1 for i in range(27)]
y.extend([-1 for i in range(24)])
y.extend([0 for i in range(17)])
y = pd.Series(y, dtype=int)

In [64]:
test['y'] = y

In [65]:
test.to_csv('test_brisk.csv')

In [36]:
test

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,...,x142,x143,x144,x145,x146,x147,x148,x149,x150,y
0,23.0,0.0,5.0,4.0,0.0,0.0,0.0,2.0,2.0,11.0,...,4.0,11.0,7.0,6.0,1.0,7.0,8.0,10.0,4.0,1
1,0.0,1.0,3.0,8.0,0.0,0.0,1.0,0.0,11.0,13.0,...,2.0,14.0,7.0,9.0,12.0,11.0,14.0,11.0,0.0,1
2,0.0,11.0,34.0,30.0,1.0,1.0,9.0,2.0,67.0,8.0,...,58.0,0.0,8.0,55.0,93.0,36.0,64.0,17.0,9.0,1
3,5.0,2.0,4.0,3.0,0.0,0.0,2.0,1.0,1.0,4.0,...,2.0,3.0,0.0,3.0,2.0,0.0,0.0,5.0,4.0,1
4,2.0,9.0,49.0,10.0,0.0,0.0,9.0,3.0,27.0,11.0,...,19.0,4.0,20.0,31.0,63.0,25.0,35.0,14.0,11.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63,1179.0,360.0,17.0,6.0,88.0,0.0,177.0,151.0,60.0,46.0,...,10.0,15.0,3.0,21.0,42.0,17.0,57.0,8.0,2.0,0
64,2.0,0.0,14.0,10.0,0.0,0.0,0.0,0.0,8.0,5.0,...,11.0,0.0,2.0,11.0,59.0,1.0,2.0,5.0,3.0,0
65,57.0,15.0,41.0,17.0,10.0,1.0,28.0,31.0,132.0,105.0,...,20.0,38.0,76.0,48.0,13.0,105.0,119.0,36.0,111.0,0
66,8.0,3.0,2.0,2.0,0.0,0.0,0.0,2.0,2.0,71.0,...,0.0,7.0,12.0,0.0,0.0,21.0,33.0,12.0,36.0,0
