In [1]:
import os
import cv2
import numpy as np

def pre_preprocess_image(processed_image):
    return (processed_image - np.array([0.485, 0.456, 0.406])) / np.array([0.229, 0.224, 0.225])


def preprocess_image(image, step='full', resize=64):
  if step not in ['full', 'half']:
    raise ValueError()
    
  processed_image = cv2.flip(image, 1)
  rotation_angle = np.random.uniform(0, 15)
  rows, cols, _ = processed_image.shape
  rotation_matrix = cv2.getRotationMatrix2D((cols / 2, rows / 2), rotation_angle, 1)
  processed_image = cv2.warpAffine(processed_image, rotation_matrix, (cols, rows))

  processed_image = pre_preprocess_image(processed_image)
  og_image = pre_preprocess_image(image)
  if step == 'half':
    return cv2.resize(processed_image, (resize,resize)), cv2.resize(og_image, (resize,resize))
  else:
    return cv2.resize(processed_image, (224,224)).flatten().astype('float32'), cv2.resize(og_image, (224,224)).flatten().astype('float32')




def get_images(t: str = "Train", resize: int = 64):
    img = []
    label = []
    t = t.capitalize()

    if t not in ["Train", "Test", "Validate"]:
        raise ValueError()

    folders = os.listdir(f'/kaggle/input/indian-food-16/{t}')

    for folder in folders:
        for image in os.listdir(f'/kaggle/input/indian-food-16/{t}/{folder}'):
            img_og = cv2.imread(f"/kaggle/input/indian-food-16/{t}/{folder}/{image}")
            img_set = preprocess_image(img_og, resize = resize)
            img.append(img_set[0])
            img.append(img_set[1])
            label.append(folders.index(folder))
            label.append(folders.index(folder))
            
    img = np.array(img)
    label = np.array(label)
    return img, label

In [2]:
print("Training Data Loading...")
i_tr, l_tr = get_images("Train")
print("Testing Data Loading...")
i_te, l_te = get_images("Test")
print("Validating Data Loading...")
i_va, l_va = get_images("Validate")

Training Data Loading...
Testing Data Loading...
Validating Data Loading...


In [3]:
i_tr = np. concatenate((i_tr, i_va), 0)
l_tr = np. concatenate((l_tr, l_va), 0)

In [4]:
print("Training Data Information")
print(i_tr.shape)
print(l_tr.shape)
print("Testing Data Information")
print(i_te.shape)
print(l_te.shape)

Training Data Information
(10414, 150528)
(10414,)
Testing Data Information
(2588, 150528)
(2588,)


In [5]:
# from sklearn.preprocessing import StandardScaler
# scaler = scaler = StandardScaler()
# i_tr = scaler.fit_transform(i_tr)
# print("Done with Training, Doing Testing")
# i_te = scaler.fit_transform(i_te)

In [6]:
accuracy_li = [[0.815], [0.475]]

In [7]:
from sklearn.ensemble import RandomForestClassifier

random_forest = RandomForestClassifier(n_estimators=101)
random_forest.fit(i_tr, l_tr)

In [8]:
from sklearn.metrics import accuracy_score, classification_report
predictions = random_forest.predict(i_te)
accuracy = accuracy_score(l_te, predictions)
report = classification_report(l_te, predictions)
accuracy_li.append([accuracy])
print(f"Classification Report:\n{report}")


Classification Report:
              precision    recall  f1-score   support

           0       0.37      0.36      0.36       176
           1       0.18      0.16      0.17       216
           2       0.30      0.53      0.38       238
           3       0.39      0.36      0.38       188
           4       0.23      0.13      0.16       204
           5       0.48      0.32      0.39       136
           6       1.00      0.08      0.15        24
           7       0.38      0.26      0.30       188
           8       0.49      0.47      0.48       182
           9       0.40      0.13      0.19       164
          10       0.22      0.36      0.27       252
          11       0.35      0.33      0.34       168
          12       0.19      0.23      0.21       252
          13       0.26      0.24      0.25       200

    accuracy                           0.30      2588
   macro avg       0.37      0.28      0.29      2588
weighted avg       0.32      0.30      0.29      2588



In [9]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(i_tr, l_tr)

In [10]:
from sklearn.metrics import accuracy_score, classification_report
predictions = gnb.predict(i_te)
accuracy = accuracy_score(l_te, predictions)
report = classification_report(l_te, predictions)
accuracy_li.append([accuracy])
print(f"Classification Report:\n{report}")


Classification Report:
              precision    recall  f1-score   support

           0       0.17      0.16      0.17       176
           1       0.26      0.05      0.08       216
           2       0.21      0.49      0.30       238
           3       0.16      0.52      0.25       188
           4       0.11      0.02      0.04       204
           5       0.11      0.21      0.14       136
           6       0.05      0.17      0.08        24
           7       0.38      0.18      0.24       188
           8       0.32      0.39      0.35       182
           9       0.33      0.05      0.09       164
          10       0.05      0.00      0.01       252
          11       0.19      0.20      0.19       168
          12       0.15      0.02      0.04       252
          13       0.16      0.23      0.19       200

    accuracy                           0.19      2588
   macro avg       0.19      0.19      0.15      2588
weighted avg       0.19      0.19      0.15      2588



In [11]:
from sklearn import tree
dec_tree = tree.DecisionTreeClassifier()
dec_tree.fit(i_tr, l_tr)

In [12]:
predictions = dec_tree.predict(i_te)
accuracy = accuracy_score(l_te, predictions)
report = classification_report(l_te, predictions)
accuracy_li.append([accuracy])
print(f"Classification Report:\n{report}")


Classification Report:
              precision    recall  f1-score   support

           0       0.21      0.22      0.21       176
           1       0.10      0.10      0.10       216
           2       0.26      0.26      0.26       238
           3       0.24      0.21      0.23       188
           4       0.12      0.12      0.12       204
           5       0.20      0.20      0.20       136
           6       0.05      0.04      0.04        24
           7       0.16      0.16      0.16       188
           8       0.26      0.24      0.25       182
           9       0.14      0.15      0.14       164
          10       0.14      0.14      0.14       252
          11       0.18      0.18      0.18       168
          12       0.17      0.18      0.17       252
          13       0.13      0.14      0.13       200

    accuracy                           0.17      2588
   macro avg       0.17      0.17      0.17      2588
weighted avg       0.18      0.17      0.17      2588



In [13]:
from sklearn.neighbors import KNeighborsClassifier
knn_3 = KNeighborsClassifier(n_neighbors=3)
knn_11 = KNeighborsClassifier(n_neighbors=11)
knn_21 = KNeighborsClassifier(n_neighbors=21)

knn_3.fit(i_tr, l_tr)
knn_11.fit(i_tr, l_tr)
knn_21.fit(i_tr, l_tr)

In [14]:
predictions = knn_3.predict(i_te)
accuracy = accuracy_score(l_te, predictions)
report = classification_report(l_te, predictions)
accuracy_li.append([accuracy])
print(f"Classification Report (3):\n{report}")

predictions = knn_11.predict(i_te)
accuracy = accuracy_score(l_te, predictions)
report = classification_report(l_te, predictions)
accuracy_li.append([accuracy])
print(f"Classification Report (11):\n{report}")


predictions = knn_21.predict(i_te)
accuracy = accuracy_score(l_te, predictions)
report = classification_report(l_te, predictions)
accuracy_li.append([accuracy])
print(f"Classification Report(21):\n{report}")

Classification Report (3):
              precision    recall  f1-score   support

           0       0.10      0.54      0.17       176
           1       0.14      0.21      0.17       216
           2       0.17      0.20      0.18       238
           3       0.24      0.21      0.22       188
           4       0.10      0.05      0.07       204
           5       0.17      0.47      0.25       136
           6       0.00      0.00      0.00        24
           7       0.21      0.16      0.18       188
           8       0.22      0.12      0.15       182
           9       0.17      0.02      0.04       164
          10       0.17      0.05      0.08       252
          11       0.50      0.04      0.08       168
          12       0.11      0.02      0.03       252
          13       0.20      0.04      0.07       200

    accuracy                           0.15      2588
   macro avg       0.18      0.15      0.12      2588
weighted avg       0.19      0.15      0.13      2588

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [15]:
from sklearn.ensemble import VotingClassifier

models = [('Random Forest', random_forest),
          ('Gaussian Naive Bayes', gnb),
          ('Decision Tree', dec_tree),
          ('KNN (k=3)', knn_3),
          ('KNN (k=11)', knn_11),
          ('KNN (k=21)', knn_21)]

voting_classifier = VotingClassifier(estimators=models)

voting_classifier.fit(i_tr, l_tr)

predictions = voting_classifier.predict(i_te)

accuracy = accuracy_score(l_te, predictions)
report = classification_report(l_te, predictions)

print(f"Ensemble Learning Classification Report:\n{report}")
accuracy_li.append([accuracy])

Ensemble Learning Classification Report:
              precision    recall  f1-score   support

           0       0.10      0.59      0.17       176
           1       0.17      0.14      0.15       216
           2       0.25      0.37      0.30       238
           3       0.26      0.31      0.28       188
           4       0.20      0.04      0.07       204
           5       0.21      0.54      0.30       136
           6       1.00      0.04      0.08        24
           7       0.36      0.24      0.29       188
           8       0.47      0.21      0.29       182
           9       0.17      0.01      0.01       164
          10       0.19      0.07      0.10       252
          11       0.53      0.10      0.17       168
          12       0.18      0.02      0.04       252
          13       0.23      0.05      0.08       200

    accuracy                           0.19      2588
   macro avg       0.31      0.20      0.17      2588
weighted avg       0.26      0.19      

In [16]:
labels = ["Transfer Learning ResNet", "Our Own CNN Architecture", "RandomForestClassifier", "GaussianNB", 'DecisionTreeClassifier', 'KNeighborsClassifier_3', "KNeighborsClassifier_11", "KNeighborsClassifier_21", "VotingClassifier"]

In [17]:
import pandas as pd
pd.DataFrame(np.array(accuracy_li).T, columns=labels, index=['Accuracy'])

Unnamed: 0,Transfer Learning ResNet,Our Own CNN Architecture,RandomForestClassifier,GaussianNB,DecisionTreeClassifier,KNeighborsClassifier_3,KNeighborsClassifier_11,KNeighborsClassifier_21,VotingClassifier
Accuracy,0.815,0.475,0.298686,0.188563,0.174652,0.150696,0.153787,0.161128,0.193586
