## Load & Preprocess Data

In [None]:
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
train_dir = r"D:\\Education\\IIT\\3rd yr\\Edge AI - CM 3606\\EdgeAI - CW - git\\garbage-classifier\\Dataset\\train"
validation_dir = r"D:\\Education\\IIT\\3rd yr\\Edge AI - CM 3606\\EdgeAI - CW - git\\garbage-classifier\\Dataset\\validation"

In [None]:
# Resizing images
IMG_SHAPE = (224, 224)

data_gen_train = ImageDataGenerator(
  rescale=1/255., # Normalize the pixel values to the range between 0-1
  rotation_range=40,  # Randomly rotate images up to 40 degrees
  width_shift_range=0.2,  # Randomly shifts images horizontally by 20% of the width
  height_shift_range=0.2, # Randomly shifts images vertically by 20% of the height
  shear_range=0.2,  # Random shearing transformation
  zoom_range=0.2, # Randomly zoom in on images
  horizontal_flip=True, # Randomly flip  images horizontally
  fill_mode='nearest' # Fill in new pixels created by transformation using the nearest pixel values
)
data_gen_valid = ImageDataGenerator(rescale=1/255.) # Validation data

# Seperate generators for training and validation data
train_generator = data_gen_train. flow_from_directory(
    train_dir, 
    target_size=IMG_SHAPE, 
    batch_size=32, 
    class_mode="categorical") # Generate batched of augmented image data directly from training & validation directories.. Class mode is categorical since we have 3 classes (Plastic, Metal, Cardboard)
valid_generator = data_gen_valid. flow_from_directory(
    validation_dir, 
    target_size=IMG_SHAPE, 
    batch_size=32, 
    class_mode="categorical")

## Data Visualization

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Function to count images per class
def count_images_per_class(generator):
    class_counts = {class_name: 0 for class_name in generator.class_indices.keys()}
    for file_path, class_idx in zip(generator.filepaths, generator.classes):
        class_name = list(generator.class_indices.keys())[list(generator.class_indices.values()).index(class_idx)]
        class_counts[class_name] += 1
    return class_counts

# Count for training and validation sets
train_counts = count_images_per_class(train_generator)
val_counts = count_images_per_class(valid_generator)

# Combine counts into one dict for full dataset
total_counts = {cls: train_counts.get(cls, 0) + val_counts.get(cls, 0) for cls in train_counts.keys()}

# Plotting
classes = list(total_counts.keys())
counts = list(total_counts.values())

plt.figure(figsize=(8, 5))
bars = plt.bar(classes, counts, color=['skyblue', 'salmon', 'limegreen'])
plt.xlabel("Class")
plt.ylabel("Number of Images")
plt.title("Image Distribution per Class (Train + Validation)")

# Add counts on top of bars
for bar in bars:
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2.0, yval + 10, int(yval), ha='center', va='bottom')

plt.tight_layout()
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

## Load the pre-trained MobileNetV2 model

In [None]:
# Load the pre-trained MobileNetV2 model
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE + (3,), include_top=False, weights="imagenet")
base_model. summary ()

In [None]:
# Freeze the base model
base_model.trainable = False # Freeze all the layers in the base model. This prevents their weights from being updated during training which helps retain the pretrained features learned from the imagenet dataset. This allows to focus on training the custom head.

In [None]:
# Add custom head specific to binary classification task of distinguishing between cat and dogs
model = tf.keras.models. Sequential( [  # Use sequential method to create a new model by stacking layers sequentially.This make it easy to add layers on top of the pre trained base model.
    base_model, # First layer of the new model is the base model which is the Frozen MobilenetV2 Model
    tf.keras. layers.GlobalAveragePooling2D(),  # This layer reduces each feature map to a single value by taking the average effectively flattening the output of the convolutional base model. It helps to reduce number of parameters & prevents overfitting.
    tf.keras.layers.Dropout(0.2), # Dropout is aregularization technique that randomly sets 20% of the input units to zero during training. This helps to prevent overfitting by making the model more robust.
    tf.keras.layers.Dense(3, activation='softmax')  # Output layer with 3 units (for 3 classes)
])

## Compile the Model

In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy', tf.keras.metrics.TopKCategoricalAccuracy(k=3)]
)

In [None]:
history = model. fit(train_generator, epochs=25, validation_data=valid_generator)  # This invloves Feeding it to training data & validating its performance on the validation data

## Track & Display Best Epoch Based on Validation Accuracy

In [None]:
import numpy as np

val_acc = history.history['val_accuracy']
best_epoch = np.argmax(val_acc) + 1
best_val_acc = val_acc[best_epoch - 1]
print(f"Best Epoch: {best_epoch}, Best Validation Accuracy: {best_val_acc:.4f}")

In [None]:
val_loss, val_acc, val_top3_acc = model.evaluate(valid_generator)
print(f'Validation Accuracy: {val_acc:.4f}')
print(f'Validation Top-3 Accuracy: {val_top3_acc:.4f}')

## Plot Training & Validation Accuracy and Loss

In [None]:
import matplotlib.pyplot as plt

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(1, len(acc) + 1)

plt.figure(figsize=(14, 5))

# Accuracy
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Train Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.scatter(best_epoch, best_val_acc, color='red', label='Best Epoch')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Loss
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Train Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()


## Class-wise Accuracy (Confusion Matrix & Classification Report)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

# Get class labels
class_names = list(train_generator.class_indices.keys())

# Predict
Y_pred = model.predict(valid_generator)
y_pred = np.argmax(Y_pred, axis=1)
y_true = valid_generator.classes

# Classification Report
print(classification_report(y_true, y_pred, target_names=class_names))

# Confusion Matrix
import seaborn as sns
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

## Transfer learning model saving

In [None]:
model.save(("Model Experiments/fine-tuning-garbage-classifier-25"))

## Fine-tune the Model

In [None]:
base_model. trainable = True  # Make entire base model trainable
fine_tune_at = 100  # Finetune the model from layer 100 onwords keeping the first 100 layers frozen to preserve the learned features

for layer in base_model. layers[: fine_tune_at] :
  layer. trainable = False  # This ensure only the layers after the 100th layer are updated during fine tuning

#After unfreezing some layers we recompile the model

# Recompile the model for fine-tuning with a lower learning rate - Lower lr is crucial for finetuning as it allows the model to make more precise updates to the weights without drastically changing them
model. compile(
  optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5), 
  loss='categorical_crossentropy', 
  # metrics=['accuracy']
  metrics=['accuracy', tf.keras.metrics.TopKCategoricalAccuracy(k=3)]
  )

# Finally call the fit method again to train the model - but this time with the some layers of the base model unfrozen. We use the same training and validation generators

# Fine-tune the model
history_fine = model. fit(train_generator, epochs=25, validation_data=valid_generator)

## Track & Display Best Epoch Based on Validation Accuracy

In [None]:
import numpy as np

val_acc = history_fine.history['val_accuracy']
best_epoch = np.argmax(val_acc) + 1
best_val_acc = val_acc[best_epoch - 1]
print(f"Best Epoch: {best_epoch}, Best Validation Accuracy: {best_val_acc:.4f}")

In [None]:
val_loss, val_acc, val_top3_acc = model.evaluate(valid_generator)
print(f'Validation Accuracy: {val_acc:.4f}')
print(f'Validation Top-3 Accuracy: {val_top3_acc:.4f}')

## Plot Training & Validation Accuracy and Loss

In [None]:
import matplotlib.pyplot as plt

acc = history_fine.history['accuracy']
val_acc = history_fine.history['val_accuracy']
loss = history_fine.history['loss']
val_loss = history_fine.history['val_loss']
epochs_range = range(1, len(acc) + 1)

plt.figure(figsize=(14, 5))

# Accuracy
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Train Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.scatter(best_epoch, best_val_acc, color='red', label='Best Epoch')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Loss
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Train Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()


## Class-wise Accuracy (Confusion Matrix & Classification Report)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

# Get class labels
class_names = list(train_generator.class_indices.keys())

# Predict
Y_pred = model.predict(valid_generator)
y_pred = np.argmax(Y_pred, axis=1)
y_true = valid_generator.classes

# Classification Report
print(classification_report(y_true, y_pred, target_names=class_names))

# Confusion Matrix
import seaborn as sns
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

In [None]:
model.save("Model Experiments/fine-tuning-garbage-classifier-25")