In [4]:
import numpy as np
import cv2 as cv
from sklearn.cluster import KMeans
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score
class Dictionary(object):
    def __init__(self, name, img_filenames, num_words):
        self.name = name #name of your dictionary
        self.img_filenames = img_filenames #list of image filenames
        self.num_words = num_words #the number of words
        self.training_data = [] #this is the training data required by the K-Means algorithm
        self.words = [] #list of words, which are the centroids of clusters
 
    def learn(self):
        sift = cv.xfeatures2d.SIFT_create()
        num_keypoints = [] #this is used to store the number of keypoints in each image
        #load training images and compute SIFT descriptors
        for filename in self.img_filenames:
            img = cv.imread(filename)
            img_gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
            list_des = sift.detectAndCompute(img_gray, None)[1]
            if list_des is None:
                num_keypoints.append(0)
            else:
                num_keypoints.append(len(list_des))
                for des in list_des:
                    self.training_data.append(des)
        #cluster SIFT descriptors using K-means algorithm
        kmeans = KMeans(self.num_words)
        kmeans.fit(self.training_data)
        self.words = kmeans.cluster_centers_
        #create word histograms for training images
        training_word_histograms = [] #list of word histograms of all training images
        index = 0
        for i in range(0, len(self.img_filenames)):
            #for each file, create a histogram
            histogram = np.zeros(self.num_words, np.float32)
            #if some keypoints exist
            if num_keypoints[i] > 0:
                for j in range(0, num_keypoints[i]):
                    histogram[kmeans.labels_[j + index]] += 1
                index += num_keypoints[i]
                histogram /= num_keypoints[i]
                training_word_histograms.append(histogram)
        return training_word_histograms
 
    def create_word_histograms(self, img_filenames):
        sift = cv.xfeatures2d.SIFT_create()
        histograms = []
        for filename in img_filenames:
            img = cv.imread(filename)
            img_gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
            descriptors = sift.detectAndCompute(img_gray, None)[1]
            histogram = np.zeros(self.num_words, np.float32) #word histogram for the input image
            if descriptors is not None:
                for des in descriptors:
                    #find the best matching word
                    min_distance = 1111111 #this can be any large number
                    matching_word_ID = -1 #initial matching_word_ID=-1 means no matching
                    for i in range(0, self.num_words): #search for the best matching word
                        distance = np.linalg.norm(des - self.words[i])
                        if distance < min_distance:
                            min_distance = distance
                            matching_word_ID = i
                    histogram[matching_word_ID] += 1
                histogram /= len(descriptors) #normalise histogram to frequencies
            histograms.append(histogram)
        return histograms

C:\Users\shane\Anaconda3\lib\site-packages\numpy\.libs\libopenblas.IPBC74C7KURV7CB2PKT5Z5FNR3SIBV4J.gfortran-win_amd64.dll
C:\Users\shane\Anaconda3\lib\site-packages\numpy\.libs\libopenblas.NOIJJG62EMASZI6NYURL6JBKM4EVBGM7.gfortran-win_amd64.dll
C:\Users\shane\Anaconda3\lib\site-packages\numpy\.libs\libopenblas.XWYDX2IKJW2NMTWSFYNGFUWKQU3LYTCZ.gfortran-win_amd64.dll
  stacklevel=1)


In [5]:
import os
foods = ['Cakes', 'Pasta', 'Pizza']
path = 'FoodImages/'
training_file_names = []
training_food_labels = []
for i in range(0, len(foods)):
    sub_path = path + 'Train/' + foods[i] + '/'
    sub_file_names = [os.path.join(sub_path, f) for f in os.listdir(sub_path)]
    sub_food_labels = [i] * len(sub_file_names) #create a list of N elements, all are i
    training_file_names += sub_file_names
    training_food_labels += sub_food_labels
training_file_names.sort()
training_food_labels.sort()

In [6]:
num_words = 50
dictionary_name = 'food'
dictionary = Dictionary(dictionary_name, training_file_names, num_words)

In [7]:
training_word_histograms = dictionary.learn()

In [8]:
import pickle
#save dictionary
with open('food_dictionary.dic', 'wb') as f: #'wb' is for binary write
    pickle.dump(dictionary, f)

In [9]:
with open('food_dictionary.dic', 'rb') as f: #'rb' is for binary read
    dictionary = pickle.load(f)

### KNN

In [10]:
num_nearest_neighbours = 5 #number of neighbours
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors = num_nearest_neighbours)
knn.fit(training_word_histograms, training_food_labels)

KNeighborsClassifier()

In [11]:
test_file_names = ['FoodImages/Test/Pasta/pasta35.jpg']
word_histograms = dictionary.create_word_histograms(test_file_names)
predicted_food_labels = knn.predict(word_histograms)
print('Food label: ', predicted_food_labels)

Food label:  [1]


### SVM

In [12]:
from sklearn import svm
svm_classifier = svm.SVC(C = 50,kernel = 'linear') 
svm_classifier.fit(training_word_histograms, training_food_labels)

SVC(C=50, kernel='linear')

In [13]:
test_file_names = ['FoodImages/Test/Pasta/pasta35.jpg']
word_histograms = dictionary.create_word_histograms(test_file_names)
predicted_food_labels = svm_classifier.predict(word_histograms)
print('Food label: ', predicted_food_labels)

Food label:  [1]


### AdaBoost

In [14]:
from sklearn.ensemble import AdaBoostClassifier
adb_classifier = AdaBoostClassifier(n_estimators = 150, #weak classifiers
 random_state = 0)
adb_classifier.fit(training_word_histograms, training_food_labels)

AdaBoostClassifier(n_estimators=150, random_state=0)

In [15]:
test_file_names = ['FoodImages/Test/Pasta/pasta35.jpg']
word_histograms = dictionary.create_word_histograms(test_file_names)
predicted_food_labels = adb_classifier.predict(word_histograms)
print('Food label: ', predicted_food_labels)

Food label:  [1]


----

In [16]:
foods = ['Cakes', 'Pasta', 'Pizza']
path = 'FoodImages/'
test_file_names = []
test_food_labels = []
for i in range(0, len(foods)):
    sub_path = path + 'Test/' + foods[i] + '/'
    sub_file_names = [os.path.join(sub_path, f) for f in os.listdir(sub_path)]
    sub_food_labels = [i] * len(sub_file_names) #create a list of N elements, all are i
    test_file_names += sub_file_names
    test_food_labels += sub_food_labels
test_file_names.sort()
test_food_labels.sort()

----

In [26]:
num_nearest_neighbours = 10 #number of neighbours
knn = KNeighborsClassifier(n_neighbors = num_nearest_neighbours)
knn.fit(training_word_histograms, training_food_labels)
word_histograms = dictionary.create_word_histograms(test_file_names)
predicted_food_labels = knn.predict(word_histograms)
cm = confusion_matrix(test_food_labels, predicted_food_labels)
acc = accuracy_score(test_food_labels, predicted_food_labels)
print(cm)
print(acc*100)

[[17  6  7]
 [ 0 28  2]
 [ 0  8 22]]
74.44444444444444


In [27]:
svm_classifier = svm.SVC(C = 10,kernel = 'linear') 
svm_classifier.fit(training_word_histograms, training_food_labels)
word_histograms = dictionary.create_word_histograms(test_file_names)
predicted_food_labels = svm_classifier.predict(word_histograms)
cm = confusion_matrix(test_food_labels, predicted_food_labels)
acc = accuracy_score(test_food_labels, predicted_food_labels)
print(cm)
print(acc*100)

[[24  3  3]
 [ 0 23  7]
 [ 1  3 26]]
81.11111111111111


In [28]:
adb_classifier = AdaBoostClassifier(n_estimators = 50, random_state = 0)
adb_classifier.fit(training_word_histograms, training_food_labels)
word_histograms = dictionary.create_word_histograms(test_file_names)
predicted_food_labels = adb_classifier.predict(word_histograms)
cm = confusion_matrix(test_food_labels, predicted_food_labels)
acc = accuracy_score(test_food_labels, predicted_food_labels)
print(cm)
print(acc*100)

[[23  2  5]
 [ 1 20  9]
 [ 3  7 20]]
70.0


----

In [29]:
num_nearest_neighbours = 15 #number of neighbours
knn = KNeighborsClassifier(n_neighbors = num_nearest_neighbours)
knn.fit(training_word_histograms, training_food_labels)
word_histograms = dictionary.create_word_histograms(test_file_names)
predicted_food_labels = knn.predict(word_histograms)
cm = confusion_matrix(test_food_labels, predicted_food_labels)
acc = accuracy_score(test_food_labels, predicted_food_labels)
print(cm)
print(acc*100)

[[16  7  7]
 [ 0 25  5]
 [ 0  8 22]]
70.0


In [30]:
svm_classifier = svm.SVC(C = 20,kernel = 'linear') 
svm_classifier.fit(training_word_histograms, training_food_labels)
word_histograms = dictionary.create_word_histograms(test_file_names)
predicted_food_labels = svm_classifier.predict(word_histograms)
cm = confusion_matrix(test_food_labels, predicted_food_labels)
acc = accuracy_score(test_food_labels, predicted_food_labels)
print(cm)
print(acc*100)

[[25  2  3]
 [ 0 22  8]
 [ 1  1 28]]
83.33333333333334


In [31]:
adb_classifier = AdaBoostClassifier(n_estimators = 100, random_state = 0)
adb_classifier.fit(training_word_histograms, training_food_labels)
word_histograms = dictionary.create_word_histograms(test_file_names)
predicted_food_labels = adb_classifier.predict(word_histograms)
cm = confusion_matrix(test_food_labels, predicted_food_labels)
acc = accuracy_score(test_food_labels, predicted_food_labels)
print(cm)
print(acc*100)

[[22  3  5]
 [ 0 22  8]
 [ 4  6 20]]
71.11111111111111


----

In [32]:
num_nearest_neighbours = 20 #number of neighbours
knn = KNeighborsClassifier(n_neighbors = num_nearest_neighbours)
knn.fit(training_word_histograms, training_food_labels)
word_histograms = dictionary.create_word_histograms(test_file_names)
predicted_food_labels = knn.predict(word_histograms)
cm = confusion_matrix(test_food_labels, predicted_food_labels)
acc = accuracy_score(test_food_labels, predicted_food_labels)
print(cm)
print(acc*100)

[[13  9  8]
 [ 0 25  5]
 [ 0 10 20]]
64.44444444444444


In [33]:
svm_classifier = svm.SVC(C = 30,kernel = 'linear') 
svm_classifier.fit(training_word_histograms, training_food_labels)
word_histograms = dictionary.create_word_histograms(test_file_names)
predicted_food_labels = svm_classifier.predict(word_histograms)
cm = confusion_matrix(test_food_labels, predicted_food_labels)
acc = accuracy_score(test_food_labels, predicted_food_labels)
print(cm)
print(acc*100)

[[25  2  3]
 [ 0 23  7]
 [ 1  1 28]]
84.44444444444444


In [34]:
adb_classifier = AdaBoostClassifier(n_estimators = 150, random_state = 0)
adb_classifier.fit(training_word_histograms, training_food_labels)
word_histograms = dictionary.create_word_histograms(test_file_names)
predicted_food_labels = adb_classifier.predict(word_histograms)
cm = confusion_matrix(test_food_labels, predicted_food_labels)
acc = accuracy_score(test_food_labels, predicted_food_labels)
print(cm)
print(acc*100)

[[18  2 10]
 [ 0 21  9]
 [ 3  5 22]]
67.77777777777779


----

In [35]:
num_nearest_neighbours = 25 #number of neighbours
knn = KNeighborsClassifier(n_neighbors = num_nearest_neighbours)
knn.fit(training_word_histograms, training_food_labels)
word_histograms = dictionary.create_word_histograms(test_file_names)
predicted_food_labels = knn.predict(word_histograms)
cm = confusion_matrix(test_food_labels, predicted_food_labels)
acc = accuracy_score(test_food_labels, predicted_food_labels)
print(cm)
print(acc*100)

[[12 10  8]
 [ 0 25  5]
 [ 0 10 20]]
63.33333333333333


In [36]:
svm_classifier = svm.SVC(C = 40,kernel = 'linear') 
svm_classifier.fit(training_word_histograms, training_food_labels)
word_histograms = dictionary.create_word_histograms(test_file_names)
predicted_food_labels = svm_classifier.predict(word_histograms)
cm = confusion_matrix(test_food_labels, predicted_food_labels)
acc = accuracy_score(test_food_labels, predicted_food_labels)
print(cm)
print(acc*100)

[[26  2  2]
 [ 0 24  6]
 [ 1  3 26]]
84.44444444444444


In [37]:
adb_classifier = AdaBoostClassifier(n_estimators = 200, random_state = 0)
adb_classifier.fit(training_word_histograms, training_food_labels)
word_histograms = dictionary.create_word_histograms(test_file_names)
predicted_food_labels = adb_classifier.predict(word_histograms)
cm = confusion_matrix(test_food_labels, predicted_food_labels)
acc = accuracy_score(test_food_labels, predicted_food_labels)
print(cm)
print(acc*100)

[[18  2 10]
 [ 0 21  9]
 [ 3  5 22]]
67.77777777777779


----

In [38]:
num_nearest_neighbours = 30 #number of neighbours
knn = KNeighborsClassifier(n_neighbors = num_nearest_neighbours)
knn.fit(training_word_histograms, training_food_labels)
word_histograms = dictionary.create_word_histograms(test_file_names)
predicted_food_labels = knn.predict(word_histograms)
cm = confusion_matrix(test_food_labels, predicted_food_labels)
acc = accuracy_score(test_food_labels, predicted_food_labels)
print(cm)
print(acc*100)

[[10 10 10]
 [ 0 26  4]
 [ 0  8 22]]
64.44444444444444


In [39]:
svm_classifier = svm.SVC(C = 50,kernel = 'linear') 
svm_classifier.fit(training_word_histograms, training_food_labels)
word_histograms = dictionary.create_word_histograms(test_file_names)
predicted_food_labels = svm_classifier.predict(word_histograms)
cm = confusion_matrix(test_food_labels, predicted_food_labels)
acc = accuracy_score(test_food_labels, predicted_food_labels)
print(cm)
print(acc*100)

[[26  2  2]
 [ 0 24  6]
 [ 1  3 26]]
84.44444444444444


In [40]:
adb_classifier = AdaBoostClassifier(n_estimators = 250, random_state = 0)
adb_classifier.fit(training_word_histograms, training_food_labels)
word_histograms = dictionary.create_word_histograms(test_file_names)
predicted_food_labels = adb_classifier.predict(word_histograms)
cm = confusion_matrix(test_food_labels, predicted_food_labels)
acc = accuracy_score(test_food_labels, predicted_food_labels)
print(cm)
print(acc*100)

[[21  1  8]
 [ 0 20 10]
 [ 3  3 24]]
72.22222222222221
