In [None]:
!pip install -q kagglehub tensorflow matplotlib scikit-learn


In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications.resnet50 import preprocess_input
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
import kagglehub


In [None]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    print("✅ GPU detected")
else:
    print("⚠️ GPU not found")


✅ GPU detected


In [None]:
dataset_path = kagglehub.dataset_download("sumn2u/garbage-classification-v2")
data_dir = dataset_path
print("Dataset path:", data_dir)


Using Colab cache for faster access to the 'garbage-classification-v2' dataset.
Dataset path: /kaggle/input/garbage-classification-v2


In [None]:
from PIL import Image

def clean_dataset(root_dir):
    valid_ext = (".jpg", ".jpeg", ".png", ".bmp")
    removed = 0

    for root, _, files in os.walk(root_dir):
        for file in files:
            path = os.path.join(root, file)

            if not file.lower().endswith(valid_ext):
                os.remove(path)
                removed += 1
                continue

            try:
                img = Image.open(path)
                img.verify()
            except:
                os.remove(path)
                removed += 1

    print(f"Removed {removed} corrupted / invalid files")

clean_dataset(data_dir)


Removed 0 corrupted / invalid files


In [None]:
SEED = 42
IMG_SIZE = (256, 256)
BATCH_SIZE = 32

train_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="training",
    seed=SEED,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE
)

temp_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="validation",
    seed=SEED,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE
)

class_names = train_ds.class_names
NUM_CLASSES = len(class_names)

temp_batches = tf.data.experimental.cardinality(temp_ds).numpy()
val_ds = temp_ds.take(temp_batches // 2)
test_ds = temp_ds.skip(temp_batches // 2)

print("Classes:", class_names)
print("Train batches:", tf.data.experimental.cardinality(train_ds).numpy())
print("Val batches:", tf.data.experimental.cardinality(val_ds).numpy())
print("Test batches:", tf.data.experimental.cardinality(test_ds).numpy())


Found 20212 files belonging to 10 classes.
Using 16170 files for training.
Found 20212 files belonging to 10 classes.
Using 4042 files for validation.
Classes: ['battery', 'biological', 'cardboard', 'clothes', 'glass', 'metal', 'paper', 'plastic', 'shoes', 'trash']
Train batches: 506
Val batches: 63
Test batches: 64


In [None]:
def preprocess(image, label):
    image = preprocess_input(image)
    return image, label

AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
val_ds   = val_ds.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
test_ds  = test_ds.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)

train_ds = train_ds.ignore_errors().prefetch(tf.data.AUTOTUNE)
val_ds   = val_ds.ignore_errors().prefetch(tf.data.AUTOTUNE)
test_ds  = test_ds.ignore_errors().prefetch(tf.data.AUTOTUNE)



In [None]:
data_augmentation = keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
])


In [None]:
base_model = keras.applications.ResNet50(
    weights="imagenet",
    include_top=False,
    input_shape=(256, 256, 3)
)

base_model.trainable = False

inputs = keras.Input(shape=(256, 256, 3))
x = data_augmentation(inputs)
x = base_model(x)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(NUM_CLASSES, activation="softmax")(x)

model = keras.Model(inputs, outputs)


In [None]:
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=5e-5),
    loss=keras.losses.SparseCategoricalCrossentropy(),
    metrics=["accuracy"]
)

model.summary()


In [None]:
EPOCHS = 8

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS
)


Epoch 1/8
    504/Unknown [1m80s[0m 138ms/step - accuracy: 0.2449 - loss: 2.5351



[1m504/504[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 162ms/step - accuracy: 0.2452 - loss: 2.5339 - val_accuracy: 0.7197 - val_loss: 0.9208
Epoch 2/8
[1m504/504[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 149ms/step - accuracy: 0.6494 - loss: 1.0821 - val_accuracy: 0.8199 - val_loss: 0.5880
Epoch 3/8
[1m504/504[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 145ms/step - accuracy: 0.7597 - loss: 0.7288 - val_accuracy: 0.8596 - val_loss: 0.4489
Epoch 4/8
[1m504/504[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 142ms/step - accuracy: 0.8126 - loss: 0.5820 - val_accuracy: 0.8765 - val_loss: 0.3937
Epoch 5/8
[1m504/504[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 143ms/step - accuracy: 0.8390 - loss: 0.5027 - val_accuracy: 0.8869 - val_loss: 0.3522
Epoch 6/8
[1m504/504[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 143ms/step - accuracy: 0.8572 - loss: 0.4433 - val_accuracy: 0.8983 - val_loss: 0.3212
Epoch 7/8
[1m504/504[0m 

In [None]:
test_loss, test_acc = model.evaluate(test_ds)
print(f"\n✅ Test Accuracy: {test_acc*100:.2f}%")


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 160ms/step - accuracy: 0.9069 - loss: 0.2990

✅ Test Accuracy: 91.07%
