In [34]:
import numpy as np 
import cv2 as cv 
from sklearn.cluster import KMeans 
import pickle 
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.metrics import classification_report, confusion_matrix 
from sklearn import svm 
from sklearn.ensemble import AdaBoostClassifier 

class Dictionary(object): 
    def __init__(self, name, img_filenames, num_words): 
        self.name = name #name of your dictionary 
        self.img_filenames = img_filenames #list of image filenames 
        self.num_words = num_words #the number of words 
         
        self.training_data = [] #training data used to learn clusters 
        self.words = [] #list of words, which are the centroids of clusters 
     
    def learn(self): 
        sift = cv.SIFT_create() 
         
        num_keypoints = [] #used to store the number of keypoints in each image 
         
        #load training images and compute SIFT descriptors 
        for filename in self.img_filenames: 
            img = cv.imread(filename) 
            img_gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY) 
            list_des = sift.detectAndCompute(img_gray, None)[1] 
            if list_des is None: 
                num_keypoints.append(0) 
            else: 
                num_keypoints.append(len(list_des)) 
                for des in list_des: 
                    self.training_data.append(des) 
             
        #cluster SIFT descriptors using K-means algorithm 
        kmeans = KMeans(self.num_words) 
        kmeans.fit(self.training_data) 
        self.words = kmeans.cluster_centers_ 
         
        #create word histograms for training images 
        training_word_histograms = [] #list of word histograms of all training images 
        index = 0 
        for i in range(0, len(self.img_filenames)): #for each file, create a histogram 
            histogram = np.zeros(self.num_words, np.float32) 
            #if some keypoints exist 
            if num_keypoints[i] > 0: 
                for j in range(0, num_keypoints[i]): 
                    histogram[kmeans.labels_[j + index]] += 1 
                index += num_keypoints[i] 
                histogram /= num_keypoints[i] 
                training_word_histograms.append(histogram) 
         
        return training_word_histograms
    
    def create_word_histograms(self, img_filenames): 
        sift = cv.SIFT_create() 
        histograms = [] 
         
        for filename in img_filenames: 
            img = cv.imread(filename) 
            img_gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY) 
            descriptors = sift.detectAndCompute(img_gray, None)[1] 
         
            histogram = np.zeros(self.num_words, np.float32) #word histogram  
         
            if descriptors is not None: 
                for des in descriptors: 
                    #find the best matching word 
                    min_distance = 1111111 #this can be any large number 
                    matching_word_ID = -1 #initialise ID with an impractical value 
                     
                    for i in range(0, self.num_words): #find the best matching word 
                        distance = np.linalg.norm(des - self.words[i]) 
                        if distance < min_distance: 
                            min_distance = distance 
                            matching_word_ID = i 
                     
                    histogram[matching_word_ID] += 1 
                 
                histogram /= len(descriptors) #make histogram a prob distribution 
         
            histograms.append(histogram) 
         
        return histograms 

In [35]:
import os 
 
foods = ['Cakes', 'Pasta', 'Pizza'] 
path = 'FoodImages/' 
training_file_names = [] 
training_food_labels = [] 
for i in range(0, len(foods)): 
    sub_path = path + 'Train/' + foods[i] + '/' 
    sub_file_names = [os.path.join(sub_path, f) for f in os.listdir(sub_path)] 
    sub_food_labels = [i] * len(sub_file_names) #create a list of N elements, all are i 
    training_file_names += sub_file_names 
    training_food_labels += sub_food_labels 
     
print(training_file_names) 
print(training_food_labels)

['FoodImages/Train/Cakes/cake1.png', 'FoodImages/Train/Cakes/cake10.jpg', 'FoodImages/Train/Cakes/cake11.jpg', 'FoodImages/Train/Cakes/cake12.jpg', 'FoodImages/Train/Cakes/cake13.jpg', 'FoodImages/Train/Cakes/cake14.jpg', 'FoodImages/Train/Cakes/cake15.jpg', 'FoodImages/Train/Cakes/cake16.jpg', 'FoodImages/Train/Cakes/cake17.jpg', 'FoodImages/Train/Cakes/cake18.jpg', 'FoodImages/Train/Cakes/cake19.jpg', 'FoodImages/Train/Cakes/cake2.png', 'FoodImages/Train/Cakes/cake20.jpg', 'FoodImages/Train/Cakes/cake21.jpg', 'FoodImages/Train/Cakes/cake22.jpg', 'FoodImages/Train/Cakes/cake23.jpg', 'FoodImages/Train/Cakes/cake24.jpg', 'FoodImages/Train/Cakes/cake25.jpg', 'FoodImages/Train/Cakes/cake26.jpg', 'FoodImages/Train/Cakes/cake27.jpg', 'FoodImages/Train/Cakes/cake28.jpg', 'FoodImages/Train/Cakes/cake29.jpg', 'FoodImages/Train/Cakes/cake3.png', 'FoodImages/Train/Cakes/cake30.jpg', 'FoodImages/Train/Cakes/cake4.jpg', 'FoodImages/Train/Cakes/cake5.jpg', 'FoodImages/Train/Cakes/cake6.jpg', 'FoodI

In [36]:
num_words = 50 
dictionary_name = 'food' 
dictionary = Dictionary(dictionary_name, training_file_names, num_words) 

In [37]:
training_word_histograms = dictionary.learn()

In [38]:
#save dictionary 
with open('food_dictionary.dic', 'wb') as f: #'wb' is for binary write 
    pickle.dump(dictionary, f)

In [39]:
with open('food_dictionary.dic', 'rb') as f: #'rb' is for binary read 
    dictionary = pickle.load(f) 

In [40]:
test_file_names = [] 
test_food_labels = []

#load test images and create word histograms
for i in range(0, len(foods)): 
    sub_path = path + 'Test/' + foods[i] + '/' 
    sub_file_names = [os.path.join(sub_path, f) for f in os.listdir(sub_path)] 
    sub_food_labels = [i] * len(sub_file_names) #create a list of N elements, all are i 
    test_file_names += sub_file_names 
    test_food_labels += sub_food_labels 
    
word_histograms = dictionary.create_word_histograms(test_file_names) 

# KNN

In [41]:
num_nearest_neighbours = 5 #number of neighbours 
knn = KNeighborsClassifier(n_neighbors = num_nearest_neighbours) 
knn.fit(training_word_histograms, training_food_labels) 
predicted_food_labels = knn.predict(word_histograms) 
cm = confusion_matrix(test_food_labels, predicted_food_labels) 
print("num_nearest_neighbour:", num_nearest_neighbours)
print(cm) 
print(classification_report(test_food_labels, predicted_food_labels))

num_nearest_neighbour: 5
[[18  5  7]
 [ 0 26  4]
 [ 1  8 21]]
              precision    recall  f1-score   support

           0       0.95      0.60      0.73        30
           1       0.67      0.87      0.75        30
           2       0.66      0.70      0.68        30

    accuracy                           0.72        90
   macro avg       0.76      0.72      0.72        90
weighted avg       0.76      0.72      0.72        90



In [42]:
num_nearest_neighbours = 10 #number of neighbours 
knn = KNeighborsClassifier(n_neighbors = num_nearest_neighbours) 
knn.fit(training_word_histograms, training_food_labels) 
predicted_food_labels = knn.predict(word_histograms) 
cm = confusion_matrix(test_food_labels, predicted_food_labels) 
print("num_nearest_neighbour:", num_nearest_neighbours)
print(cm) 
print(classification_report(test_food_labels, predicted_food_labels))

num_nearest_neighbour: 10
[[18  5  7]
 [ 0 26  4]
 [ 0  8 22]]
              precision    recall  f1-score   support

           0       1.00      0.60      0.75        30
           1       0.67      0.87      0.75        30
           2       0.67      0.73      0.70        30

    accuracy                           0.73        90
   macro avg       0.78      0.73      0.73        90
weighted avg       0.78      0.73      0.73        90



In [43]:
num_nearest_neighbours = 15 #number of neighbours 
knn = KNeighborsClassifier(n_neighbors = num_nearest_neighbours) 
knn.fit(training_word_histograms, training_food_labels) 
predicted_food_labels = knn.predict(word_histograms) 
cm = confusion_matrix(test_food_labels, predicted_food_labels) 
print("num_nearest_neighbour:", num_nearest_neighbours)
print(cm) 
print(classification_report(test_food_labels, predicted_food_labels))

num_nearest_neighbour: 15
[[16  6  8]
 [ 0 25  5]
 [ 0  9 21]]
              precision    recall  f1-score   support

           0       1.00      0.53      0.70        30
           1       0.62      0.83      0.71        30
           2       0.62      0.70      0.66        30

    accuracy                           0.69        90
   macro avg       0.75      0.69      0.69        90
weighted avg       0.75      0.69      0.69        90



In [44]:
num_nearest_neighbours = 20 #number of neighbours 
knn = KNeighborsClassifier(n_neighbors = num_nearest_neighbours) 
knn.fit(training_word_histograms, training_food_labels) 
predicted_food_labels = knn.predict(word_histograms) 
cm = confusion_matrix(test_food_labels, predicted_food_labels) 
print("num_nearest_neighbour:", num_nearest_neighbours)
print(cm) 
print(classification_report(test_food_labels, predicted_food_labels))

num_nearest_neighbour: 20
[[13 10  7]
 [ 0 26  4]
 [ 0 10 20]]
              precision    recall  f1-score   support

           0       1.00      0.43      0.60        30
           1       0.57      0.87      0.68        30
           2       0.65      0.67      0.66        30

    accuracy                           0.66        90
   macro avg       0.74      0.66      0.65        90
weighted avg       0.74      0.66      0.65        90



In [45]:
num_nearest_neighbours = 25 #number of neighbours 
knn = KNeighborsClassifier(n_neighbors = num_nearest_neighbours) 
knn.fit(training_word_histograms, training_food_labels) 
predicted_food_labels = knn.predict(word_histograms) 
cm = confusion_matrix(test_food_labels, predicted_food_labels) 
print("num_nearest_neighbour:", num_nearest_neighbours)
print(cm) 
print(classification_report(test_food_labels, predicted_food_labels))

num_nearest_neighbour: 25
[[11 13  6]
 [ 0 26  4]
 [ 0  9 21]]
              precision    recall  f1-score   support

           0       1.00      0.37      0.54        30
           1       0.54      0.87      0.67        30
           2       0.68      0.70      0.69        30

    accuracy                           0.64        90
   macro avg       0.74      0.64      0.63        90
weighted avg       0.74      0.64      0.63        90



In [46]:
num_nearest_neighbours = 30 #number of neighbours 
knn = KNeighborsClassifier(n_neighbors = num_nearest_neighbours) 
knn.fit(training_word_histograms, training_food_labels) 
predicted_food_labels = knn.predict(word_histograms) 
cm = confusion_matrix(test_food_labels, predicted_food_labels) 
print("num_nearest_neighbour:", num_nearest_neighbours)
print(cm) 
print(classification_report(test_food_labels, predicted_food_labels))

num_nearest_neighbour: 30
[[ 8 14  8]
 [ 0 25  5]
 [ 0  6 24]]
              precision    recall  f1-score   support

           0       1.00      0.27      0.42        30
           1       0.56      0.83      0.67        30
           2       0.65      0.80      0.72        30

    accuracy                           0.63        90
   macro avg       0.73      0.63      0.60        90
weighted avg       0.73      0.63      0.60        90



# SVM

In [47]:
C = 10
svm_classifier = svm.SVC(C = C, #see slide 32 in week 4 handouts 
kernel = 'linear') #see slide 35 in week 4 handouts 
svm_classifier.fit(training_word_histograms, training_food_labels)
predicted_food_labels = svm_classifier.predict(word_histograms) 
cm = confusion_matrix(test_food_labels, predicted_food_labels) 
print("C:", C)
print(cm) 
print(classification_report(test_food_labels, predicted_food_labels))

C: 10
[[25  2  3]
 [ 0 23  7]
 [ 1  1 28]]
              precision    recall  f1-score   support

           0       0.96      0.83      0.89        30
           1       0.88      0.77      0.82        30
           2       0.74      0.93      0.82        30

    accuracy                           0.84        90
   macro avg       0.86      0.84      0.85        90
weighted avg       0.86      0.84      0.85        90



In [48]:
C = 20
svm_classifier = svm.SVC(C = C, #see slide 32 in week 4 handouts 
kernel = 'linear') #see slide 35 in week 4 handouts 
svm_classifier.fit(training_word_histograms, training_food_labels)
predicted_food_labels = svm_classifier.predict(word_histograms) 
cm = confusion_matrix(test_food_labels, predicted_food_labels) 
print("C:", C)
print(cm) 
print(classification_report(test_food_labels, predicted_food_labels))

C: 20
[[26  2  2]
 [ 0 23  7]
 [ 1  1 28]]
              precision    recall  f1-score   support

           0       0.96      0.87      0.91        30
           1       0.88      0.77      0.82        30
           2       0.76      0.93      0.84        30

    accuracy                           0.86        90
   macro avg       0.87      0.86      0.86        90
weighted avg       0.87      0.86      0.86        90



In [49]:
C = 30
svm_classifier = svm.SVC(C = C, #see slide 32 in week 4 handouts 
kernel = 'linear') #see slide 35 in week 4 handouts 
svm_classifier.fit(training_word_histograms, training_food_labels)
predicted_food_labels = svm_classifier.predict(word_histograms) 
cm = confusion_matrix(test_food_labels, predicted_food_labels) 
print("C:", C)
print(cm) 
print(classification_report(test_food_labels, predicted_food_labels))

C: 30
[[26  2  2]
 [ 0 22  8]
 [ 1  1 28]]
              precision    recall  f1-score   support

           0       0.96      0.87      0.91        30
           1       0.88      0.73      0.80        30
           2       0.74      0.93      0.82        30

    accuracy                           0.84        90
   macro avg       0.86      0.84      0.85        90
weighted avg       0.86      0.84      0.85        90



In [50]:
C = 40
svm_classifier = svm.SVC(C = C, #see slide 32 in week 4 handouts 
kernel = 'linear') #see slide 35 in week 4 handouts 
svm_classifier.fit(training_word_histograms, training_food_labels)
predicted_food_labels = svm_classifier.predict(word_histograms) 
cm = confusion_matrix(test_food_labels, predicted_food_labels) 
print("C:", C)
print(cm) 
print(classification_report(test_food_labels, predicted_food_labels))

C: 40
[[27  1  2]
 [ 0 24  6]
 [ 1  3 26]]
              precision    recall  f1-score   support

           0       0.96      0.90      0.93        30
           1       0.86      0.80      0.83        30
           2       0.76      0.87      0.81        30

    accuracy                           0.86        90
   macro avg       0.86      0.86      0.86        90
weighted avg       0.86      0.86      0.86        90



In [51]:
C = 50
svm_classifier = svm.SVC(C = C, #see slide 32 in week 4 handouts 
kernel = 'linear') #see slide 35 in week 4 handouts 
svm_classifier.fit(training_word_histograms, training_food_labels)
predicted_food_labels = svm_classifier.predict(word_histograms) 
cm = confusion_matrix(test_food_labels, predicted_food_labels) 
print("C:", C)
print(cm) 
print(classification_report(test_food_labels, predicted_food_labels))

C: 50
[[27  1  2]
 [ 0 24  6]
 [ 1  3 26]]
              precision    recall  f1-score   support

           0       0.96      0.90      0.93        30
           1       0.86      0.80      0.83        30
           2       0.76      0.87      0.81        30

    accuracy                           0.86        90
   macro avg       0.86      0.86      0.86        90
weighted avg       0.86      0.86      0.86        90



# AdaBoost

In [52]:
n_estimators = 50
adb_classifier = AdaBoostClassifier(n_estimators = n_estimators, #number of weak classifiers 
random_state = 0) 
adb_classifier.fit(training_word_histograms, training_food_labels)
predicted_food_labels = adb_classifier.predict(word_histograms)
cm = confusion_matrix(test_food_labels, predicted_food_labels) 
print("n_estimators:", n_estimators)
print(cm) 
print(classification_report(test_food_labels, predicted_food_labels))

n_estimators: 50
[[22  0  8]
 [ 1 24  5]
 [ 3  1 26]]
              precision    recall  f1-score   support

           0       0.85      0.73      0.79        30
           1       0.96      0.80      0.87        30
           2       0.67      0.87      0.75        30

    accuracy                           0.80        90
   macro avg       0.82      0.80      0.80        90
weighted avg       0.82      0.80      0.80        90



In [53]:
n_estimators = 100
adb_classifier = AdaBoostClassifier(n_estimators = n_estimators, #number of weak classifiers 
random_state = 0) 
adb_classifier.fit(training_word_histograms, training_food_labels)
predicted_food_labels = adb_classifier.predict(word_histograms)
cm = confusion_matrix(test_food_labels, predicted_food_labels) 
print("n_estimators:", n_estimators)
print(cm) 
print(classification_report(test_food_labels, predicted_food_labels))

n_estimators: 100
[[21  1  8]
 [ 0 25  5]
 [ 4  2 24]]
              precision    recall  f1-score   support

           0       0.84      0.70      0.76        30
           1       0.89      0.83      0.86        30
           2       0.65      0.80      0.72        30

    accuracy                           0.78        90
   macro avg       0.79      0.78      0.78        90
weighted avg       0.79      0.78      0.78        90



In [54]:
n_estimators = 150
adb_classifier = AdaBoostClassifier(n_estimators = n_estimators, #number of weak classifiers 
random_state = 0) 
adb_classifier.fit(training_word_histograms, training_food_labels)
predicted_food_labels = adb_classifier.predict(word_histograms)
cm = confusion_matrix(test_food_labels, predicted_food_labels) 
print("n_estimators:", n_estimators)
print(cm) 
print(classification_report(test_food_labels, predicted_food_labels))

n_estimators: 150
[[23  0  7]
 [ 0 25  5]
 [ 4  4 22]]
              precision    recall  f1-score   support

           0       0.85      0.77      0.81        30
           1       0.86      0.83      0.85        30
           2       0.65      0.73      0.69        30

    accuracy                           0.78        90
   macro avg       0.79      0.78      0.78        90
weighted avg       0.79      0.78      0.78        90



In [55]:
n_estimators = 200
adb_classifier = AdaBoostClassifier(n_estimators = n_estimators, #number of weak classifiers 
random_state = 0) 
adb_classifier.fit(training_word_histograms, training_food_labels)
predicted_food_labels = adb_classifier.predict(word_histograms)
cm = confusion_matrix(test_food_labels, predicted_food_labels) 
print("n_estimators:", n_estimators)
print(cm) 
print(classification_report(test_food_labels, predicted_food_labels))

n_estimators: 200
[[22  0  8]
 [ 0 25  5]
 [ 4  3 23]]
              precision    recall  f1-score   support

           0       0.85      0.73      0.79        30
           1       0.89      0.83      0.86        30
           2       0.64      0.77      0.70        30

    accuracy                           0.78        90
   macro avg       0.79      0.78      0.78        90
weighted avg       0.79      0.78      0.78        90



In [56]:
n_estimators = 250
adb_classifier = AdaBoostClassifier(n_estimators = n_estimators, #number of weak classifiers 
random_state = 0) 
adb_classifier.fit(training_word_histograms, training_food_labels)
predicted_food_labels = adb_classifier.predict(word_histograms)
cm = confusion_matrix(test_food_labels, predicted_food_labels) 
print("n_estimators:", n_estimators)
print(cm) 
print(classification_report(test_food_labels, predicted_food_labels))

n_estimators: 250
[[21  1  8]
 [ 0 24  6]
 [ 4  4 22]]
              precision    recall  f1-score   support

           0       0.84      0.70      0.76        30
           1       0.83      0.80      0.81        30
           2       0.61      0.73      0.67        30

    accuracy                           0.74        90
   macro avg       0.76      0.74      0.75        90
weighted avg       0.76      0.74      0.75        90

