In [None]:
# Import necessary libraries
import os
import json
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
# Set random seed for reproducibility
SEED = 42
np.random.seed(SEED)
random.seed(SEED)

In [None]:
# Define dataset path
dataset_path = "/content/drive/MyDrive/Colab Notebooks/Machine Learning /project/resized_dataset"

# Get all garbage type folders
garbage_types = [d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))]

# Load dataset: Collect image file paths and labels
data = [
    (os.path.join(dataset_path, garbage_type, file), garbage_type)
    for garbage_type in garbage_types
    for file in os.listdir(os.path.join(dataset_path, garbage_type))
    if file.endswith(("jpg", "jpeg"))
]

In [None]:
# Convert data to a DataFrame and shuffle
df = pd.DataFrame(data, columns=["filepath", "label"]).sample(frac=1, random_state=SEED).reset_index(drop=True)
df.head()

Unnamed: 0,filepath,label
0,/content/drive/MyDrive/Colab Notebooks/Machine...,organic
1,/content/drive/MyDrive/Colab Notebooks/Machine...,paper
2,/content/drive/MyDrive/Colab Notebooks/Machine...,organic
3,/content/drive/MyDrive/Colab Notebooks/Machine...,glass
4,/content/drive/MyDrive/Colab Notebooks/Machine...,textile


In [None]:
# Display sample images from each class
def show_sample_images(df, num_images=7):
    unique_labels = df["label"].unique()
    for label in unique_labels:
        sample_images = df[df["label"] == label].sample(n=min(num_images, len(df[df["label"] == label])), random_state=SEED)["filepath"].tolist()

        fig, axes = plt.subplots(1, len(sample_images), figsize=(15, 3))
        fig.suptitle(label, fontsize=16, y=1.05)

        for ax, img_path in zip(axes, sample_images):
            img = Image.open(img_path)
            ax.imshow(img)
            ax.axis("off")

        plt.tight_layout()
        plt.show()

# Show sample images from dataset
show_sample_images(df)

Output hidden; open in https://colab.research.google.com to view.

In [None]:
# Stratified split: Train (70%), Validation (20%), Test (10%)
train_df, temp_df = train_test_split(df, test_size=0.3, stratify=df["label"], random_state=SEED)
val_df, test_df = train_test_split(temp_df, test_size=0.33, stratify=temp_df["label"], random_state=SEED)

# Display dataset distribution
def display_class_distribution(df, title):
    distribution = df["label"].value_counts(normalize=True) * 100
    print(f"\n{title}:\n{distribution.round(2)}")

display_class_distribution(df, "Overall Class Distribution")
display_class_distribution(train_df, "Training Set Distribution")
display_class_distribution(val_df, "Validation Set Distribution")
display_class_distribution(test_df, "Test Set Distribution")


Overall Class Distribution:
label
plastic    20.52
paper      20.42
textile    20.37
glass      20.17
organic    18.51
Name: proportion, dtype: float64

Training Set Distribution:
label
plastic    20.49
paper      20.42
textile    20.35
glass      20.20
organic    18.55
Name: proportion, dtype: float64

Validation Set Distribution:
label
plastic    20.55
textile    20.55
paper      20.55
glass      20.05
organic    18.30
Name: proportion, dtype: float64

Test Set Distribution:
label
plastic    20.71
glass      20.20
paper      20.20
textile    20.20
organic    18.69
Name: proportion, dtype: float64


In [None]:
# Image augmentation for training set
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=45,
    width_shift_range=0.15,
    height_shift_range=0.15,
    zoom_range=0.15,
    horizontal_flip=True,
    vertical_flip=True,
    shear_range=0.05,
    brightness_range=[0.9, 1.1],
    channel_shift_range=10,
    fill_mode="nearest"
)

# Only rescale for validation and test sets
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
# Define batch size and image target size
BATCH_SIZE = 32
IMAGE_SIZE = (224, 224)

# Create data generators using flow_from_dataframe
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col="filepath",
    y_col="label",
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    seed=SEED,
    shuffle=True
)

val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_df,
    x_col="filepath",
    y_col="label",
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    seed=SEED,
    shuffle=False
)

test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col="filepath",
    y_col="label",
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    seed=SEED,
    shuffle=False
)

Found 1391 validated image filenames belonging to 5 classes.
Found 399 validated image filenames belonging to 5 classes.
Found 198 validated image filenames belonging to 5 classes.


In [None]:
# Check batch distribution
def check_batches(generator, df, set_name):
    expected_batches = len(df) // BATCH_SIZE + (len(df) % BATCH_SIZE != 0)
    last_batch_size = len(df) % BATCH_SIZE or BATCH_SIZE
    print(f"{set_name}: {len(generator)} batches (Expected: {expected_batches}) | Last batch size: {last_batch_size}")

check_batches(train_generator, train_df, "Train Set")
check_batches(val_generator, val_df, "Validation Set")
check_batches(test_generator, test_df, "Test Set")

Train Set: 44 batches (Expected: 44) | Last batch size: 15
Validation Set: 13 batches (Expected: 13) | Last batch size: 15
Test Set: 7 batches (Expected: 7) | Last batch size: 6


In [None]:
# Extract and save class indices for future use
class_indices = train_generator.class_indices
with open("class_indices.json", "w") as f:
    json.dump(class_indices, f)

print("Class Indices Mapping:", class_indices)

Class Indices Mapping: {'glass': 0, 'organic': 1, 'paper': 2, 'plastic': 3, 'textile': 4}


In [None]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, BatchNormalization, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Load the pre-trained ResNet50 model (without the classification head)
base_model = ResNet50(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

# Freeze all layers except the last few (we will fine-tune later)
for layer in base_model.layers[:-10]:  # Unfreezing last 10 layers
    layer.trainable = False

# Add custom layers for waste classification
x = base_model.output
x = GlobalAveragePooling2D()(x)  # Converts feature maps to 1D vector
x = BatchNormalization()(x)  # Normalize activations to improve stability
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)  # Dropout to prevent overfitting
x = Dense(256, activation='relu')(x)
x = Dropout(0.3)(x)

# Output layer (softmax for multi-class classification)
output = Dense(len(train_generator.class_indices), activation='softmax')(x)

# Define the final model
model = Model(inputs=base_model.input, outputs=output)

# Compile the model with an adaptive learning rate
model.compile(optimizer=Adam(learning_rate=0.0001), loss="categorical_crossentropy", metrics=["accuracy"])

# Show model summary
model.summary()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
EPOCHS = 100
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=EPOCHS,
    batch_size=32,
    verbose=1
)

  self._warn_if_super_not_called()


Epoch 1/100
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m485s[0m 11s/step - accuracy: 0.2803 - loss: 1.8355 - val_accuracy: 0.1830 - val_loss: 1.6609
Epoch 2/100
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 565ms/step - accuracy: 0.3976 - loss: 1.5262 - val_accuracy: 0.1830 - val_loss: 1.6554
Epoch 3/100
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 557ms/step - accuracy: 0.3980 - loss: 1.5177 - val_accuracy: 0.1830 - val_loss: 1.6510
Epoch 4/100
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 575ms/step - accuracy: 0.4195 - loss: 1.4180 - val_accuracy: 0.1805 - val_loss: 1.6274
Epoch 5/100
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 582ms/step - accuracy: 0.4265 - loss: 1.4023 - val_accuracy: 0.1805 - val_loss: 1.6101
Epoch 6/100
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 575ms/step - accuracy: 0.4266 - loss: 1.3764 - val_accuracy: 0.1830 - val_loss: 1.6314
Epoch 7/100
[1m4

In [None]:
# Unfreeze deeper layers
for layer in base_model.layers[-30:]:
    layer.trainable = True

# Recompile with lower learning rate
model.compile(optimizer=Adam(learning_rate=1e-5), loss="categorical_crossentropy", metrics=["accuracy"])

# Continue training for fine-tuning
history_fine_tune = model.fit(train_generator, validation_data=val_generator, epochs=100)

Epoch 1/100
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 857ms/step - accuracy: 0.3832 - loss: 1.7856 - val_accuracy: 0.2957 - val_loss: 3.1699
Epoch 2/100
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 587ms/step - accuracy: 0.4962 - loss: 1.3539 - val_accuracy: 0.2456 - val_loss: 3.6329
Epoch 3/100
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 574ms/step - accuracy: 0.4940 - loss: 1.3438 - val_accuracy: 0.2381 - val_loss: 3.1182
Epoch 4/100
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 596ms/step - accuracy: 0.4699 - loss: 1.3455 - val_accuracy: 0.2381 - val_loss: 2.7486
Epoch 5/100
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 593ms/step - accuracy: 0.4813 - loss: 1.2927 - val_accuracy: 0.2882 - val_loss: 1.9205
Epoch 6/100
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 598ms/step - accuracy: 0.5376 - loss: 1.1893 - val_accuracy: 0.2982 - val_loss: 1.8426
Epoch 7/100
[1m

In [None]:
# Evaluate on validation set
val_loss, val_acc = model.evaluate(val_generator)

# Evaluate on test set
test_loss, test_acc = model.evaluate(test_generator)

print(f"Validation Accuracy: {val_acc:.2%}")
print(f"Test Accuracy: {test_acc:.2%}")

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 115ms/step - accuracy: 0.5978 - loss: 1.0923
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 6s/step - accuracy: 0.5528 - loss: 1.1018
Validation Accuracy: 57.39%
Test Accuracy: 54.04%
