## 1. Import Required Libraries
Import all necessary libraries for deep learning, data processing, and model training

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['ABSL_LOG_LEVEL'] = 'FATAL'
import random
import numpy as np
import tensorflow as tf
from sklearn.metrics import classification_report
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dropout, Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau


## 2. Set Random Seeds
Ensure reproducibility of experiments by setting consistent random seeds for all operations

In [2]:
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

## 3. Data Preprocessing and Augmentation
Define image data generators with normalization, rotation, flipping, and other data augmentation techniques

In [3]:
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])

def pytorch_normalize(img):
    img = img / 255.0
    return (img - mean) / std

train_datagen = ImageDataGenerator(
    preprocessing_function=pytorch_normalize,
    rotation_range=15,
    width_shift_range=0.05,
    height_shift_range=0.05,
    shear_range=0.1,
    zoom_range=[0.95, 1.05],
    brightness_range=[0.85, 1.15],
    horizontal_flip=True,
    channel_shift_range=0.02,
    fill_mode='reflect'
)

val_test_datagen = ImageDataGenerator(
    preprocessing_function=pytorch_normalize
)

## 4. Create Data Generators
Create data generators for training, validation, and test sets, and display dataset information

In [4]:
base_dir = '../data/garbage-dataset'
classes = os.listdir(base_dir)
train_dir = '../data/garbage-split/train'
test_dir = '../data/garbage-split/test'
val_dir = '../data/garbage-split/val'

train_generator = train_datagen.flow_from_directory(
    train_dir, target_size=(224, 224), batch_size=32, class_mode='categorical', shuffle=True, seed=SEED
)

val_generator = val_test_datagen.flow_from_directory(
    val_dir, target_size=(224, 224), batch_size=32, class_mode='categorical', shuffle=False
)

test_generator = val_test_datagen.flow_from_directory(
    test_dir, target_size=(224, 224), batch_size=32, class_mode='categorical',shuffle=False
)

train_steps = int(np.ceil(train_generator.samples / 32))
val_steps = int(np.ceil(val_generator.samples / 32))

print(f"Number of training samples: {train_generator.samples}, Steps per epoch: {train_steps}")
print(f"Number of validation samples: {val_generator.samples}, Validation steps: {val_steps}")

Found 15806 images belonging to 10 classes.
Found 2963 images belonging to 10 classes.
Found 993 images belonging to 10 classes.
Number of training samples: 15806, Steps per epoch: 494
Number of validation samples: 2963, Validation steps: 93


## 5. Build ResNet50 Model
Use pre-trained ResNet50 as base model and add custom classification layers

In [5]:
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
predictions = Dense(train_generator.num_classes, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)
base_model.trainable = True

model.compile(optimizer=Adam(learning_rate=1e-5), loss='categorical_crossentropy', metrics=['accuracy'])

## 6. Model Training
Train the model using callbacks including early stopping, learning rate reduction, and model checkpointing

In [6]:
checkpoint = ModelCheckpoint('saved_models/best_resnet50.keras',monitor='val_accuracy', mode='max', save_best_only=True, verbose=1)
early_stop = EarlyStopping(monitor='val_accuracy', patience=9, restore_best_weights=True, min_delta=0.001, verbose=1)
lr_reduce = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=3, min_lr=1e-6, verbose=1)

history = model.fit(
    train_generator,
    steps_per_epoch=train_steps,
    validation_data=val_generator,
    validation_steps=val_steps,
    epochs=40,
    callbacks=[checkpoint, early_stop, lr_reduce],
    verbose=1
)

model.save('saved_models/resnet50_final.keras')

  self._warn_if_super_not_called()


Epoch 1/40


I0000 00:00:1753894099.637029   46361 service.cc:145] XLA service 0x7c8bf809f4b0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1753894099.637103   46361 service.cc:153]   StreamExecutor device (0): NVIDIA GeForce GTX 1660 Ti with Max-Q Design, Compute Capability 7.5
I0000 00:00:1753894131.088801   46361 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m494/494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 424ms/step - accuracy: 0.4278 - loss: 1.8480
Epoch 1: val_accuracy improved from -inf to 0.78299, saving model to saved_models/best_resnet50.keras
[1m494/494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m289s[0m 466ms/step - accuracy: 0.4282 - loss: 1.8467 - val_accuracy: 0.7830 - val_loss: 0.6744 - learning_rate: 1.0000e-05
Epoch 2/40
[1m494/494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 363ms/step - accuracy: 0.8342 - loss: 0.5152
Epoch 2: val_accuracy improved from 0.78299 to 0.92103, saving model to saved_models/best_resnet50.keras
[1m494/494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m190s[0m 384ms/step - accuracy: 0.8343 - loss: 0.5150 - val_accuracy: 0.9210 - val_loss: 0.2480 - learning_rate: 1.0000e-05
Epoch 3/40
[1m494/494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 445ms/step - accuracy: 0.9003 - loss: 0.3117
Epoch 3: val_accuracy improved from 0.92103 to 0.94060, saving model to 

## 7. Model Evaluation
Evaluate model performance on test set and generate detailed classification report

In [7]:
test_generator.reset()
Y_pred = model.predict(test_generator)
y_pred = np.argmax(Y_pred, axis=1)
y_true = test_generator.classes

print(classification_report(y_true, y_pred, target_names=list(test_generator.class_indices.keys())))

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 217ms/step
              precision    recall  f1-score   support

     battery       0.96      1.00      0.98        48
  biological       1.00      0.94      0.97        50
   cardboard       0.98      0.95      0.96        92
     clothes       0.99      0.98      0.99       267
       glass       0.98      0.97      0.98       154
       metal       0.98      0.94      0.96        51
       paper       0.90      0.98      0.94        84
     plastic       0.98      0.97      0.97       100
       shoes       0.95      0.99      0.97        99
       trash       0.96      0.96      0.96        48

    accuracy                           0.97       993
   macro avg       0.97      0.97      0.97       993
weighted avg       0.97      0.97      0.97       993

