In this code, the K-means with a BoW model, KNN and SVM are used for classifying food images. The used dataset is FoodImages which is located in the folder data. Adjust the path, to load the data.
In FoodImages, there are two folders: Train and Test containing training and test images respectively. Each Train/Test folder contains three sub-folders corresponding to three different food types including Cakes, Pasta, and Pizza. For each food category, there are equal numbers of images (30 images) used for training and testing. The Bag-of-Words (BoW) model is built for food image recognition based on the training images of the supplied food database. 

The Dictionary class below is developed to build BoW models using K-means algorithm. 

# 1. Bag-of-Words (BoW) model

In [1]:
import numpy as np
import cv2 as cv
from sklearn.cluster import KMeans
import warnings
warnings.filterwarnings("ignore")

class Dictionary(object):
    def __init__(self, name, img_filenames, num_words):
        self.name = name
        self.img_filenames = img_filenames
        self.num_words = num_words

        self.training_data = []
        self.words = []

    def learn(self):
        sift = cv.SIFT_create()

        num_keypoints = []

        # load training images and compute SIFT descriptors
        for filename in self.img_filenames:
            img = cv.imread(filename)
            img_gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
            list_des = sift.detectAndCompute(img_gray, None)[1]
            if list_des is None:
                num_keypoints.append(0)
            else:
                num_keypoints.append(len(list_des))
                for des in list_des:
                    self.training_data.append(des)

        # cluster SIFT descriptors using K-means algorithm
        kmeans = KMeans(self.num_words)
        kmeans.fit(self.training_data)
        self.words = kmeans.cluster_centers_

        # create word histograms for training images
        training_word_histograms = [] # list of word histograms of all training images
        index = 0 
        for i in range(0, len(self.img_filenames)):
            #for each file, create a histogram
            histogram = np.zeros(self.num_words, np.float32)
            # if some keypoints exist
            if num_keypoints[i]>0:
                for j in range(0, num_keypoints[i]):
                    histogram[kmeans.labels_[j+index]] += 1
                index += num_keypoints[i]
                histogram /= num_keypoints[i]
                training_word_histograms.append(histogram)
        return training_word_histograms

    def create_word_histograms(self, img_filenames):
        sift = cv.SIFT_create()
        histograms = []
        
        for filename in img_filenames:
            img = cv.imread(filename)
            img_gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
            descriptors = sift.detectAndCompute(img_gray, None)[1]

            histogram = np.zeros(self.num_words, np.float32)

            if descriptors is not None:
                for des in descriptors:
                    # find the best matching word
                    min_distance = 1111111
                    matching_word_ID = -1 

                    for i in range(0, self.num_words):
                        distance = np.linalg.norm(des-self.words[i])
                        if distance < min_distance:
                            min_distance = distance
                            matching_word_ID = i

                    histogram[matching_word_ID] += 1

                histogram /= len(descriptors) #normalise histogram to frequencies
            histograms.append(histogram)
        return histograms



#### Load training data

In [2]:

import os

foods = ["Cakes", "Pasta", "Pizza"]
path = "FoodImages/"
training_file_names = []
training_food_labels = []
for i in range(0, len(foods)):
    sub_path = path+"Train/"+foods[i]+"/"
    sub_file_names = [os.path.join(sub_path,f)  for f in os.listdir(sub_path)]
    sub_food_labels = [i] * len(sub_file_names)
    training_file_names += sub_file_names
    training_food_labels += sub_food_labels

print(training_file_names)
print(training_food_labels)


['FoodImages/Train/Cakes/cake1.png', 'FoodImages/Train/Cakes/cake10.jpg', 'FoodImages/Train/Cakes/cake11.jpg', 'FoodImages/Train/Cakes/cake12.jpg', 'FoodImages/Train/Cakes/cake13.jpg', 'FoodImages/Train/Cakes/cake14.jpg', 'FoodImages/Train/Cakes/cake15.jpg', 'FoodImages/Train/Cakes/cake16.jpg', 'FoodImages/Train/Cakes/cake17.jpg', 'FoodImages/Train/Cakes/cake18.jpg', 'FoodImages/Train/Cakes/cake19.jpg', 'FoodImages/Train/Cakes/cake2.png', 'FoodImages/Train/Cakes/cake20.jpg', 'FoodImages/Train/Cakes/cake21.jpg', 'FoodImages/Train/Cakes/cake22.jpg', 'FoodImages/Train/Cakes/cake23.jpg', 'FoodImages/Train/Cakes/cake24.jpg', 'FoodImages/Train/Cakes/cake25.jpg', 'FoodImages/Train/Cakes/cake26.jpg', 'FoodImages/Train/Cakes/cake27.jpg', 'FoodImages/Train/Cakes/cake28.jpg', 'FoodImages/Train/Cakes/cake29.jpg', 'FoodImages/Train/Cakes/cake3.png', 'FoodImages/Train/Cakes/cake30.jpg', 'FoodImages/Train/Cakes/cake4.jpg', 'FoodImages/Train/Cakes/cake5.jpg', 'FoodImages/Train/Cakes/cake6.jpg', 'FoodI

#### Create and learn dictionary

In [3]:
num_words = 50
dictionary_name = "food"
dictionary = Dictionary(dictionary_name, training_file_names, num_words)
training_word_histograms = dictionary.learn()

#### Save dictionary

In [4]:
import pickle
with open("food_dictionary.dic", "wb") as f: # "wb" is for binary write
    pickle.dump(dictionary, f)

#### Load dictionary

In [5]:
import pickle 
with open("food_dictionary.dic", "rb") as f: # "rb" is for binary read
    dictionary = pickle.load(f)

# 2. KNN

#### Apply KNN on whole dataset and test it on a sample image

In [6]:
from sklearn.neighbors import KNeighborsClassifier

num_nearest_neighbours = 5 

knn=KNeighborsClassifier(n_neighbors = num_nearest_neighbours)
knn.fit(training_word_histograms, training_food_labels)

test_file_names = ["FoodImages/Test/Pasta/pasta35.jpg"]
word_histograms = dictionary.create_word_histograms(test_file_names)

predicted_food_labels = knn.predict(word_histograms)
print("Food label: ", predicted_food_labels)

Food label:  [1]


#### Apply and test KNN on the whole dataset

In [7]:
test_file_names, test_food_labels = [], []
for i in range(0, len(foods)):
    sub_path = path + "Test/" + foods[i] + "/"
    sub_file_names = [os.path.join(sub_path, f) for f in os.listdir(sub_path)]
    sub_food_labels = [i]*len(sub_file_names)
    test_file_names += sub_file_names
    test_food_labels += sub_food_labels

print(test_file_names)
print(test_food_labels)

['FoodImages/Test/Cakes/cake31.jpg', 'FoodImages/Test/Cakes/cake32.jpg', 'FoodImages/Test/Cakes/cake33.jpg', 'FoodImages/Test/Cakes/cake34.jpg', 'FoodImages/Test/Cakes/cake35.jpg', 'FoodImages/Test/Cakes/cake36.jpg', 'FoodImages/Test/Cakes/cake37.jpg', 'FoodImages/Test/Cakes/cake38.jpg', 'FoodImages/Test/Cakes/cake39.jpg', 'FoodImages/Test/Cakes/cake40.jpg', 'FoodImages/Test/Cakes/cake41.jpg', 'FoodImages/Test/Cakes/cake42.jpg', 'FoodImages/Test/Cakes/cake43.jpg', 'FoodImages/Test/Cakes/cake44.jpg', 'FoodImages/Test/Cakes/cake45.jpg', 'FoodImages/Test/Cakes/cake46.jpg', 'FoodImages/Test/Cakes/cake47.jpg', 'FoodImages/Test/Cakes/cake48.jpg', 'FoodImages/Test/Cakes/cake49.jpg', 'FoodImages/Test/Cakes/cake50.jpg', 'FoodImages/Test/Cakes/cake51.jpg', 'FoodImages/Test/Cakes/cake52.jpg', 'FoodImages/Test/Cakes/cake53.jpg', 'FoodImages/Test/Cakes/cake54.jpg', 'FoodImages/Test/Cakes/cake55.jpg', 'FoodImages/Test/Cakes/cake56.jpg', 'FoodImages/Test/Cakes/cake57.jpg', 'FoodImages/Test/Cakes/cake

#### Test the knn classifiere on all test food images

In [8]:
word_histograms = dictionary.create_word_histograms(test_file_names)
predicted_food_labels = knn.predict(word_histograms)

#### Evaluation: calculating recognition accuracy

In [9]:
num_correct_predictions = np.sum(predicted_food_labels == test_food_labels)
recognition_acc = num_correct_predictions/len(test_food_labels)
print(recognition_acc)

0.7222222222222222


#### Evaluation: compute confusion matrix

In [10]:
from sklearn.metrics import classification_report, confusion_matrix
cm = confusion_matrix(test_food_labels, predicted_food_labels)
print(cm)

[[18  5  7]
 [ 0 25  5]
 [ 1  7 22]]


#### Vary number of neighbours

In [11]:
num_nearest_neighbours = [10, 15, 20, 25, 30]

for k in num_nearest_neighbours:
    knn=KNeighborsClassifier(n_neighbors = k)
    knn.fit(training_word_histograms, training_food_labels)
    predicted_food_labels = knn.predict(word_histograms)
    num_correct_predictions = np.sum(predicted_food_labels == test_food_labels)
    recognition_acc = num_correct_predictions/len(test_food_labels)
    print("Number of neighbours: ", k, "    ; accuracy: ", recognition_acc)

Number of neighbours:  10     ; accuracy:  0.7111111111111111
Number of neighbours:  15     ; accuracy:  0.6888888888888889
Number of neighbours:  20     ; accuracy:  0.6888888888888889
Number of neighbours:  25     ; accuracy:  0.6222222222222222
Number of neighbours:  30     ; accuracy:  0.6333333333333333


The best value for num_nearest_neighbours was 5 with an accuracy of 71.1%.

# 3. SVM

In [12]:
from sklearn import svm
svm_classifier = svm.SVC(C=50, kernel = "linear")
svm_classifier.fit(training_word_histograms, training_food_labels)

#### Test with one test image

In [13]:
test_file_name = ["FoodImages/Test/Pasta/pasta35.jpg"]
word_histograms = dictionary.create_word_histograms(test_file_name)

predicted_food_labels = svm_classifier.predict(word_histograms)
print("Food label: ", predicted_food_labels)

Food label:  [1]


#### Test with all test images

In [14]:
word_histograms = dictionary.create_word_histograms(test_file_names)
predicted_food_labels = svm_classifier.predict(word_histograms)

#### Evaluation: calculating recognition accuracy

In [15]:
num_correct_predictions = np.sum(predicted_food_labels == test_food_labels)
recognition_acc = num_correct_predictions/len(test_food_labels)
print(recognition_acc)

0.8444444444444444


#### Evaluation: compute confusion matrix

In [16]:
from sklearn.metrics import classification_report, confusion_matrix
cm = confusion_matrix(test_food_labels, predicted_food_labels)
print(cm)

[[26  2  2]
 [ 0 25  5]
 [ 1  4 25]]


#### Vary number of the parameter C

In [17]:
C_list = [10,20,30,40,50]

for c in C_list:
    svm_classifier = svm.SVC(C=c, kernel = "linear")
    svm_classifier.fit(training_word_histograms, training_food_labels)
    word_histograms = dictionary.create_word_histograms(test_file_names)
    predicted_food_labels = svm_classifier.predict(word_histograms)
    num_correct_predictions = np.sum(predicted_food_labels == test_food_labels)
    recognition_acc = num_correct_predictions/len(test_food_labels)
    print("Parameter C: ", c, "    ; accuracy: ", recognition_acc)

Parameter C:  10     ; accuracy:  0.8
Parameter C:  20     ; accuracy:  0.8111111111111111
Parameter C:  30     ; accuracy:  0.8222222222222222
Parameter C:  40     ; accuracy:  0.8333333333333334
Parameter C:  50     ; accuracy:  0.8444444444444444


The highest accuracy of 84.4% can be achieved with C=50.

In [18]:
from sklearn.ensemble import AdaBoostClassifier
adb_classifier = AdaBoostClassifier(n_estimators=150, random_state=0)
adb_classifier.fit(training_word_histograms, training_food_labels)

#### Test algorithm with one test image

In [19]:
test_file_name = ["FoodImages/Test/Pasta/pasta35.jpg"]
word_histogram = dictionary.create_word_histograms(test_file_name)

predicted_food_labels = adb_classifier.predict(word_histograms)
print("Food label: ", predicted_food_labels)

Food label:  [0 0 0 2 0 0 0 0 1 0 0 0 0 0 0 2 0 0 0 0 0 2 0 2 0 0 0 0 0 0 1 2 2 1 1 1 1
 1 1 2 1 1 2 1 1 1 1 1 1 2 1 1 1 2 1 1 2 1 1 2 2 2 0 2 1 2 2 2 2 2 2 2 2 2
 1 2 2 2 1 2 2 2 1 2 2 2 2 2 2 2]


#### Test with all test images

In [20]:
word_histograms = dictionary.create_word_histograms(test_file_names)
predicted_food_labels = adb_classifier.predict(word_histograms)

#### Evaluation: calculating recognition accuracy

In [21]:
num_correct_predictions = np.sum(predicted_food_labels == test_food_labels)
recognition_acc = num_correct_predictions/len(test_food_labels)
print(recognition_acc)

0.8


#### Evaluation: compute confusion matrix

In [22]:
from sklearn.metrics import classification_report, confusion_matrix
cm = confusion_matrix(test_food_labels, predicted_food_labels)
print(cm)

[[25  1  4]
 [ 0 22  8]
 [ 1  4 25]]


#### Vary n_estimators

In [23]:
N_estimators = [50,100,150,200,250]

for n in N_estimators:
    adb_classifier = AdaBoostClassifier(n_estimators=n, random_state=0)
    adb_classifier.fit(training_word_histograms, training_food_labels)
    word_histograms = dictionary.create_word_histograms(test_file_names)
    predicted_food_labels = svm_classifier.predict(word_histograms)
    num_correct_predictions = np.sum(predicted_food_labels == test_food_labels)
    recognition_acc = num_correct_predictions/len(test_food_labels)
    print("n_estimators: ", n, "    ; accuracy: ", recognition_acc)

n_estimators:  50     ; accuracy:  0.8444444444444444
n_estimators:  100     ; accuracy:  0.8444444444444444
n_estimators:  150     ; accuracy:  0.8444444444444444
n_estimators:  200     ; accuracy:  0.8444444444444444
n_estimators:  250     ; accuracy:  0.8444444444444444


The same accuracy of 84.4% is achieved for all tested values of n_estimators.

In this example SVM performs as good as KNN. However, SVM is less sensible to the choice of its hyperparameter.