In [1]:
import os
import cv2
import numpy as np


def preprocess_image(image, step='full', resize=64):
    if step not in ['full', 'half']:
        raise ValueError()

    if step == 'half':
        return cv2.resize(image, (resize, resize))
    else:
        return cv2.resize(image, (224, 224)).flatten().astype('float32')


def get_images(t: str = "Train", resize: int = 64):
    img = []
    label = []
    t = t.capitalize()

    if t not in ["Train", "Test", "Validate"]:
        raise ValueError()

    folders = os.listdir(f'/kaggle/input/indian-food-16/{t}')

    for folder in folders:
        for image in os.listdir(f'/kaggle/input/indian-food-16/{t}/{folder}'):
            img_og = cv2.imread(f"/kaggle/input/indian-food-16/{t}/{folder}/{image}")
            img.append(preprocess_image(img_og, resize=resize))
            label.append(folders.index(folder))

    img = np.array(img)
    label = np.array(label)
    return img, label

In [2]:
print("Training Data Loading...")
i_tr, l_tr = get_images("Train")
print("Testing Data Loading...")
i_te, l_te = get_images("Test")
print("Validating Data Loading...")
i_va, l_va = get_images("Validate")

Training Data Loading...
Testing Data Loading...
Validating Data Loading...


In [3]:
i_tr = np.concatenate((i_tr, i_va), 0)
l_tr = np.concatenate((l_tr, l_va), 0)

In [4]:
print("Training Data Information")
print(i_tr.shape)
print(l_tr.shape)
print("Testing Data Information")
print(i_te.shape)
print(l_te.shape)

Training Data Information
(5207, 150528)
(5207,)
Testing Data Information
(1294, 150528)
(1294,)


In [5]:
from sklearn.preprocessing import StandardScaler

scaler = scaler = StandardScaler()
i_tr = scaler.fit_transform(i_tr)
print("Done with Training, Doing Testing")
i_te = scaler.fit_transform(i_te)

Done with Training, Doing Testing


In [6]:
accuracy_li = [[0.815], [0.475]]

In [7]:
from sklearn.ensemble import RandomForestClassifier

random_forest = RandomForestClassifier(n_estimators=101)
random_forest.fit(i_tr, l_tr)

In [8]:
from sklearn.metrics import accuracy_score, classification_report

predictions = random_forest.predict(i_te)
accuracy = accuracy_score(l_te, predictions)
report = classification_report(l_te, predictions)
accuracy_li.append([accuracy])
print(f"Classification Report:\n{report}")


Classification Report:
              precision    recall  f1-score   support

           0       0.36      0.31      0.33        88
           1       0.21      0.21      0.21       108
           2       0.31      0.55      0.40       119
           3       0.47      0.44      0.45        94
           4       0.20      0.15      0.17       102
           5       0.44      0.37      0.40        68
           6       0.00      0.00      0.00        12
           7       0.29      0.22      0.25        94
           8       0.48      0.48      0.48        91
           9       0.52      0.15      0.23        82
          10       0.25      0.42      0.31       126
          11       0.36      0.27      0.31        84
          12       0.21      0.21      0.21       126
          13       0.28      0.25      0.26       100

    accuracy                           0.31      1294
   macro avg       0.31      0.29      0.29      1294
weighted avg       0.32      0.31      0.30      1294



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
from sklearn.naive_bayes import GaussianNB

gnb = GaussianNB()
gnb.fit(i_tr, l_tr)

In [10]:
predictions = gnb.predict(i_te)
accuracy = accuracy_score(l_te, predictions)
report = classification_report(l_te, predictions)
accuracy_li.append([accuracy])
print(f"Classification Report:\n{report}")


Classification Report:
              precision    recall  f1-score   support

           0       0.21      0.18      0.19        88
           1       0.30      0.06      0.09       108
           2       0.22      0.48      0.30       119
           3       0.16      0.51      0.24        94
           4       0.16      0.04      0.06       102
           5       0.11      0.21      0.14        68
           6       0.06      0.17      0.09        12
           7       0.36      0.17      0.23        94
           8       0.34      0.41      0.37        91
           9       0.25      0.06      0.10        82
          10       0.00      0.00      0.00       126
          11       0.21      0.24      0.22        84
          12       0.17      0.02      0.04       126
          13       0.17      0.25      0.20       100

    accuracy                           0.20      1294
   macro avg       0.19      0.20      0.16      1294
weighted avg       0.20      0.20      0.16      1294



In [11]:
from sklearn import tree

dec_tree = tree.DecisionTreeClassifier()
dec_tree.fit(i_tr, l_tr)

In [12]:
predictions = dec_tree.predict(i_te)
accuracy = accuracy_score(l_te, predictions)
report = classification_report(l_te, predictions)
accuracy_li.append([accuracy])
print(f"Classification Report:\n{report}")


Classification Report:
              precision    recall  f1-score   support

           0       0.25      0.31      0.28        88
           1       0.11      0.11      0.11       108
           2       0.18      0.18      0.18       119
           3       0.20      0.21      0.21        94
           4       0.13      0.13      0.13       102
           5       0.18      0.18      0.18        68
           6       0.10      0.08      0.09        12
           7       0.15      0.14      0.14        94
           8       0.29      0.32      0.30        91
           9       0.15      0.13      0.14        82
          10       0.17      0.17      0.17       126
          11       0.19      0.19      0.19        84
          12       0.14      0.11      0.12       126
          13       0.15      0.16      0.15       100

    accuracy                           0.18      1294
   macro avg       0.17      0.17      0.17      1294
weighted avg       0.17      0.18      0.17      1294



In [13]:
from sklearn.neighbors import KNeighborsClassifier

knn_3 = KNeighborsClassifier(n_neighbors=3)
knn_11 = KNeighborsClassifier(n_neighbors=11)
knn_21 = KNeighborsClassifier(n_neighbors=21)

knn_3.fit(i_tr, l_tr)
knn_11.fit(i_tr, l_tr)
knn_21.fit(i_tr, l_tr)

In [14]:
predictions = knn_3.predict(i_te)
accuracy = accuracy_score(l_te, predictions)
report = classification_report(l_te, predictions)
accuracy_li.append([accuracy])
print(f"Classification Report (3):\n{report}")

predictions = knn_11.predict(i_te)
accuracy = accuracy_score(l_te, predictions)
report = classification_report(l_te, predictions)
accuracy_li.append([accuracy])
print(f"Classification Report (11):\n{report}")

predictions = knn_21.predict(i_te)
accuracy = accuracy_score(l_te, predictions)
report = classification_report(l_te, predictions)
accuracy_li.append([accuracy])
print(f"Classification Report(21):\n{report}")

Classification Report (3):
              precision    recall  f1-score   support

           0       0.10      0.66      0.18        88
           1       0.11      0.13      0.12       108
           2       0.18      0.16      0.17       119
           3       0.26      0.21      0.23        94
           4       0.17      0.10      0.12       102
           5       0.18      0.49      0.26        68
           6       0.00      0.00      0.00        12
           7       0.32      0.18      0.23        94
           8       0.26      0.15      0.19        91
           9       0.00      0.00      0.00        82
          10       0.17      0.04      0.06       126
          11       0.40      0.02      0.04        84
          12       0.13      0.02      0.03       126
          13       0.10      0.01      0.02       100

    accuracy                           0.15      1294
   macro avg       0.17      0.15      0.12      1294
weighted avg       0.18      0.15      0.12      1294

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [15]:
from sklearn.ensemble import VotingClassifier

models = [('Random Forest', random_forest),
          ('Gaussian Naive Bayes', gnb),
          ('Decision Tree', dec_tree),
          ('KNN (k=3)', knn_3),
          ('KNN (k=11)', knn_11),
          ('KNN (k=21)', knn_21)]

voting_classifier = VotingClassifier(estimators=models)

voting_classifier.fit(i_tr, l_tr)

predictions = voting_classifier.predict(i_te)

accuracy = accuracy_score(l_te, predictions)
report = classification_report(l_te, predictions)

print(f"Ensemble Learning Classification Report:\n{report}")
accuracy_li.append([accuracy])

Ensemble Learning Classification Report:
              precision    recall  f1-score   support

           0       0.10      0.62      0.17        88
           1       0.28      0.16      0.20       108
           2       0.31      0.34      0.32       119
           3       0.26      0.34      0.29        94
           4       0.21      0.07      0.10       102
           5       0.20      0.53      0.29        68
           6       0.50      0.08      0.14        12
           7       0.41      0.28      0.33        94
           8       0.55      0.25      0.35        91
           9       0.50      0.01      0.02        82
          10       0.22      0.07      0.11       126
          11       0.50      0.04      0.07        84
          12       0.25      0.02      0.04       126
          13       0.22      0.04      0.07       100

    accuracy                           0.20      1294
   macro avg       0.32      0.20      0.18      1294
weighted avg       0.30      0.20      

In [16]:
labels = ["Transfer Learning ResNet", "Our Own CNN Architecture", "RandomForestClassifier", "GaussianNB",
          'DecisionTreeClassifier', 'KNeighborsClassifier_3', "KNeighborsClassifier_11", "KNeighborsClassifier_21",
          "VotingClassifier"]

In [17]:
import pandas as pd

pd.DataFrame(np.array(accuracy_li).T, columns=labels, index=['Accuracy'])

Unnamed: 0,Transfer Learning ResNet,Our Own CNN Architecture,RandomForestClassifier,GaussianNB,DecisionTreeClassifier,KNeighborsClassifier_3,KNeighborsClassifier_11,KNeighborsClassifier_21,VotingClassifier
Accuracy,0.815,0.475,0.309892,0.195518,0.176198,0.150696,0.162287,0.156105,0.199382
