In [1]:
!pip install scikit-learn




In [2]:
import numpy as np
import pandas as pd
from pathlib import Path
import os.path
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.metrics import classification_report


In [3]:
image_dir = Path(r'/Users/dipak/Downloads/archive (10)/images')  #Points to your food images folder
filepaths = list(image_dir.glob(r'**/*.jpg'))  # finds all .jpg files recursively in all subfolders Result: A list of all image paths
labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], filepaths))
'''For each image path, it extracts the parent folder name as the label (food type)
Example: If image is at food/images/pizza/image1.jpg, the label is "pizza"
This assumes each food type is in its own subfolder'''
filepaths = pd.Series(filepaths, name='Filepath').astype(str) #Combines filepaths and labels into a pandas DataFrame with two columns: Filepath and Label
labels = pd.Series(labels, name='Label')
images = pd.concat([filepaths, labels], axis=1)
category_samples = [] #For each unique food category, it randomly selects 100 images
for category in images['Label'].unique(): 
    category_slice = images.query("Label == @category")
    category_samples.append(category_slice.sample(100, random_state=1))
image_df = pd.concat(category_samples, axis=0).sample(
    frac=1.0, random_state=1).reset_index(drop=True)

In [4]:
image_df['Label'].value_counts()

Label
pork_chop        100
bread_pudding    100
club_sandwich    100
french_fries     100
beef_tartare     100
                ... 
creme_brulee     100
hummus           100
clam_chowder     100
caprese_salad    100
foie_gras        100
Name: count, Length: 101, dtype: int64

In [5]:
train_df, test_df = train_test_split(
image_df, train_size=0.7, shuffle=True, random_state=42)
#Splits your balanced dataset into training (70%) and testing (30%) sets
#shuffle=True randomly mixes before splitting (prevents sequential bias)

In [6]:
train_generator = tf.keras.preprocessing.image.ImageDataGenerator( #Loads and processes images in batches during training
preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input, #Applies MobileNetV2's specific preprocessing (normalizes pixel values to match what the model expects)
validation_split=0.2
)#From the training data, 20% is reserved for validation
test_generator = tf.keras.preprocessing.image.ImageDataGenerator(
preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input
)

In [7]:
'''This code is the bridge between your data and the model. It loads actual image files from disk, preprocesses them, and organizes them into batches ready for training. Let me explain each part:

What flow_from_dataframe() Does
It reads image files from your DataFrame and converts them into batches that TensorFlow can use. Think of it as a smart data loader.'''

train_images = train_generator.flow_from_dataframe(
dataframe=train_df,
x_col='Filepath',
y_col='Label',
target_size=(224, 224),
color_mode='rgb',
class_mode='categorical',
batch_size=32,
shuffle=True,
seed=42,
subset='training'
)
'''Loads images from train_df paths

Resizes each to 224×224 (MobileNetV2's expected input size)

Shuffles order to prevent overfitting to image sequence

Creates batches of 32 images at a time

Subset='training' means: Use only the 80% portion (excluding validation)
'''
val_images = train_generator.flow_from_dataframe(
dataframe=train_df,
x_col='Filepath',
y_col='Label',
target_size=(224, 224),
color_mode='rgb',
class_mode='categorical',
batch_size=32,
shuffle=True,
seed=42,
subset='validation'
)
test_images = test_generator.flow_from_dataframe(
dataframe=test_df,
x_col='Filepath',
y_col='Label',
target_size=(224, 224),
color_mode='rgb',
class_mode='categorical',
batch_size=32,
shuffle=False
)

Found 5656 validated image filenames belonging to 101 classes.
Found 1414 validated image filenames belonging to 101 classes.
Found 3030 validated image filenames belonging to 101 classes.


In [None]:
pretrained_model = tf.keras.applications.MobileNetV2(  #loading a pretrained model that already knows how to recognize patterns in images. Let me break it down:
input_shape=(224, 224, 3),
include_top=False,
weights='imagenet',
pooling='avg'
)
pretrained_model.trainable = False

In [10]:
inputs = pretrained_model.input
x = tf.keras.layers.Dense(128, activation='relu')(pretrained_model.output)
x = tf.keras.layers.Dense(128, activation='relu')(x)
outputs = tf.keras.layers.Dense(101, activation='softmax')(x)
model = tf.keras.Model(inputs, outputs)
print(model.summary())

None


In [11]:
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
history = model.fit(
    train_images,
    validation_data=val_images,
    epochs=50,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=3,
            restore_best_weights=True)])

Epoch 1/50
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 256ms/step - accuracy: 0.1363 - loss: 3.8841 - val_accuracy: 0.2786 - val_loss: 2.9732
Epoch 2/50
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 257ms/step - accuracy: 0.3780 - loss: 2.4391 - val_accuracy: 0.3656 - val_loss: 2.4784
Epoch 3/50
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 251ms/step - accuracy: 0.5095 - loss: 1.8704 - val_accuracy: 0.4123 - val_loss: 2.3644
Epoch 4/50
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 260ms/step - accuracy: 0.5964 - loss: 1.4992 - val_accuracy: 0.4399 - val_loss: 2.3084
Epoch 5/50
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 258ms/step - accuracy: 0.6773 - loss: 1.1930 - val_accuracy: 0.4392 - val_loss: 2.3038
Epoch 6/50
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 262ms/step - accuracy: 0.7550 - loss: 0.9328 - val_accuracy: 0.4364 - val_loss: 2.3854
Epoch 7/50

In [12]:
results = model.evaluate(test_images, verbose=0)
print("Test Accuracy: {:.2f}%".format(results[1] * 100))

Test Accuracy: 42.41%
