In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
import os.path

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

import tensorflow as tf

from sklearn.metrics import confusion_matrix, classification_report

In [2]:
# from google.colab import drive
# drive.mount('/content/drive')

In [3]:
image_dir = Path('/Users/davemag9/Desktop/Diploma/archive1/Indian Food Images/Indian Food Images')

In [4]:
filepaths = list(image_dir.glob(r"**/*.jpg"))
os.path.split(os.path.split(filepaths[0])[0])

('/Users/davemag9/Desktop/Diploma/archive1/Indian Food Images/Indian Food Images',
 'shrikhand')

In [5]:
labels = list(map(lambda x : os.path.split(os.path.split(x)[0])[1], filepaths))

In [6]:
filepaths = pd.Series(filepaths, name="FilePath")
labels = pd.Series(labels, name="Label")

images = pd.concat([filepaths, labels], axis=1)
# images = images.iloc[:len(images) // 2].reset_index(drop=True)


In [7]:
images

Unnamed: 0,FilePath,Label
0,/Users/davemag9/Desktop/Diploma/archive1/India...,shrikhand
1,/Users/davemag9/Desktop/Diploma/archive1/India...,shrikhand
2,/Users/davemag9/Desktop/Diploma/archive1/India...,shrikhand
3,/Users/davemag9/Desktop/Diploma/archive1/India...,shrikhand
4,/Users/davemag9/Desktop/Diploma/archive1/India...,shrikhand
...,...,...
3995,/Users/davemag9/Desktop/Diploma/archive1/India...,sheer_korma
3996,/Users/davemag9/Desktop/Diploma/archive1/India...,sheer_korma
3997,/Users/davemag9/Desktop/Diploma/archive1/India...,sheer_korma
3998,/Users/davemag9/Desktop/Diploma/archive1/India...,sheer_korma


In [8]:
category_sample = []
for category in images['Label'].unique():
    category_slice = images.query("Label == @category")
    category_sample.append(category_slice.sample(len(category_slice), random_state = 1))
image_df = pd.concat(category_sample, axis=0).sample(frac=1.0, random_state=1).reset_index(drop=True)

In [9]:
image_df

Unnamed: 0,FilePath,Label
0,/Users/davemag9/Desktop/Diploma/archive1/India...,imarti
1,/Users/davemag9/Desktop/Diploma/archive1/India...,chikki
2,/Users/davemag9/Desktop/Diploma/archive1/India...,dal_makhani
3,/Users/davemag9/Desktop/Diploma/archive1/India...,sutar_feni
4,/Users/davemag9/Desktop/Diploma/archive1/India...,bhatura
...,...,...
3995,/Users/davemag9/Desktop/Diploma/archive1/India...,anarsa
3996,/Users/davemag9/Desktop/Diploma/archive1/India...,chikki
3997,/Users/davemag9/Desktop/Diploma/archive1/India...,sheer_korma
3998,/Users/davemag9/Desktop/Diploma/archive1/India...,imarti


In [10]:
image_df['Label'].value_counts()

Label
imarti               50
chikki               50
daal_baati_churma    50
bhindi_masala        50
ariselu              50
                     ..
bandar_laddu         50
dharwad_pedha        50
kofta                50
doodhpak             50
qubani_ka_meetha     50
Name: count, Length: 80, dtype: int64

In [11]:
train_df, test_df = train_test_split(image_df, train_size = 0.7, shuffle=True, random_state=1)

In [12]:
train_gen = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input,
    validation_split=0.2
)
test_gen = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input
)

In [13]:
train_df['FilePath'] = train_df['FilePath'].astype(str)
test_df['FilePath'] = test_df['FilePath'].astype(str)

In [14]:
train_images = train_gen.flow_from_dataframe(
    dataframe=train_df,
    x_col="FilePath",
    y_col="Label",
    target_size=(224, 224),  # Resize images to 224x224 (MobileNetV2 input size)
    color_mode="rgb",  # Images are in RGB
    class_mode="categorical",  # Multi-class classification (categorical labels)
    batch_size=32,  # Number of images per batch
    shuffle=True,  # Shuffle the data
    seed=42,  # Set a seed for reproducibility
    subset="training"  # Use the training subset of the data (80%)
)

val_images = train_gen.flow_from_dataframe(
    dataframe=test_df,
    x_col="FilePath",
    y_col="Label",
    target_size=(224, 224),
    color_mode="rgb",
    class_mode="categorical",
    batch_size=32,
    subset="validation" # Don't shuffle the test data
)

# Create the test generator
test_images = test_gen.flow_from_dataframe(
    dataframe=test_df,
    x_col="FilePath",
    y_col="Label",
    target_size=(224, 224),
    color_mode="rgb",
    class_mode="categorical",
    batch_size=32,
    shuffle=False,# Don't shuffle the test data
)

Found 2240 validated image filenames belonging to 80 classes.
Found 240 validated image filenames belonging to 80 classes.
Found 1200 validated image filenames belonging to 80 classes.


In [15]:
def build_efficientnet_model(input_shape, num_classes):
    base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=input_shape)
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.4)(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.4)(x)
    output = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# ResNet Model
def build_resnet_model(input_shape, num_classes):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.4)(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.4)(x)
    output = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [16]:
from tensorflow.keras.applications import ResNet50, EfficientNetB0
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout

models = {
    'EfficientNet': build_efficientnet_model,
}


In [17]:
from keras.callbacks import ReduceLROnPlateau, EarlyStopping

learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss', patience=3, factor=0.5, min_lr=0.00001)
early_stopping_monitor = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)


In [21]:
history_dict = {}
trained_models = {}

for model_name, model_func in models.items():
    # Initialize the model by calling the model function
    model = model_func(input_shape=(224, 224, 3), num_classes=80)

    # Fit the model using the image generators
    history = model.fit(
        train_images,  # Using train_images generator
        epochs=20,
        validation_data=val_images,  # Using val_images generator
        verbose=1,
        callbacks=[learning_rate_reduction, early_stopping_monitor]  # Callbacks
    )

    # Store the training history and the trained model
    history_dict[model_name] = history.history
    trained_models[model_name] = model

    # Save the trained model
    model.save(f"{model_name.lower()}_model.h5")


Epoch 1/20


KeyboardInterrupt: 

In [None]:
for model_name, model in trained_models.items():
    # Evaluate the model on validation data using the validation generator
    loss_v, accuracy_v = model.evaluate(val_images, verbose=1)

    # Evaluate the model on test data using the test generator
    loss, accuracy = model.evaluate(test_images, verbose=1)

    print(f"Validation for {model_name}: accuracy = {accuracy_v:.6f}  ;  loss_v = {loss_v:.6f}")
    print(f"Test for {model_name}: accuracy = {accuracy:.6f}  ;  loss = {loss:.6f}")


In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix

# Assuming `axes` is predefined for the number of models
for i, (model_name, model) in enumerate(trained_models.items()):
    # Predicting for the test set using the test generator
    Y_pred = model.predict(test_images, verbose=1)
    Y_pred_classes = np.argmax(Y_pred, axis=1)

    # Resetting test_images generator to get true labels (y_true) for the confusion matrix
    test_images.reset()
    y_true = np.concatenate([y for _, y in test_images], axis=0)
    y_true_classes = np.argmax(y_true, axis=1)

    # Computing the confusion matrix
    confusion_mtx = confusion_matrix(y_true_classes, Y_pred_classes)

    # Plotting confusion matrix for each model
    ax = axes[i]
    plot_confusion_matrix(confusion_mtx, classes=range(80), title=f'Confusion Matrix for {model_name}')
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')

plt.tight_layout()
plt.show()


In [None]:
# from sklearn.metrics import classification_report
#
# for model_name, model in trained_models.items():
#     y_pred = model.predict(x_test)
#     y_pred =y_pred>0.5
#     cm_plot_labels = ['akiec', 'bcc', 'bkl', 'df', 'mel','nv', 'vasc']
#
#     report = classification_report(y_test, y_pred, target_names=cm_plot_labels)
#
#     # print(report)
#
