<a href="https://colab.research.google.com/github/NethmiAmasha/Waste-Image-Classification-CNN/blob/main/CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os

# File and folder names
zip_file = "realwaste.zip"
dataset_folder = "realwaste_dataset"
url = "https://archive.ics.uci.edu/static/public/908/realwaste.zip"

# Download the zip file only if it doesn't exist
if not os.path.exists(zip_file):
    !wget "{url}" -O {zip_file}

# Unzip only if the dataset folder doesn't exist
if not os.path.exists(dataset_folder):
    !unzip -q {zip_file} -d {dataset_folder}  # -q = quiet mode, no unnecessary output

print("Dataset is ready in:", dataset_folder)



Dataset is ready in: realwaste_dataset


In [None]:
import tensorflow as tf
import os

# Path to your dataset
base_path = "/content/realwaste_dataset/realwaste-main/RealWaste"

# Image size and batch size
img_size = (128, 128)
batch_size = 32

# Training set (70%)
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    base_path,
    validation_split=0.30,   # 30% reserved for val+test
    subset="training",
    seed=42,
    image_size=img_size,
    batch_size=batch_size
)

# Validation + test (30%)
val_test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    base_path,
    validation_split=0.30,
    subset="validation",
    seed=42,
    image_size=img_size,
    batch_size=batch_size
)

# Split validation+test 30% → 15% + 15%
val_batches = tf.data.experimental.cardinality(val_test_ds)
val_ds = val_test_ds.take(val_batches // 2)
test_ds = val_test_ds.skip(val_batches // 2)


print("Classes:", train_ds.class_names)
print("Train batches:", tf.data.experimental.cardinality(train_ds))
print("Val batches:", tf.data.experimental.cardinality(val_ds))
print("Test batches:", tf.data.experimental.cardinality(test_ds))

Found 4752 files belonging to 9 classes.
Using 3327 files for training.
Found 4752 files belonging to 9 classes.
Using 1425 files for validation.
Classes: ['Cardboard', 'Food Organics', 'Glass', 'Metal', 'Miscellaneous Trash', 'Paper', 'Plastic', 'Textile Trash', 'Vegetation']
Train batches: tf.Tensor(104, shape=(), dtype=int64)
Val batches: tf.Tensor(22, shape=(), dtype=int64)
Test batches: tf.Tensor(23, shape=(), dtype=int64)


In [None]:
#To determine the number of output neurons

num_classes = len(train_ds.class_names)
print("Number of classes:", num_classes)


Number of classes: 9


In [None]:
AUTOTUNE = tf.data.AUTOTUNE

# Prefetching and caching for performance
train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)

Cache -keep data ready in memory.

Prefetch - prepare next batch while training.

AUTOTUNE - let TensorFlow choose the best prefetching speed.

In [None]:
# Normalize pixel values
from tensorflow.keras import layers

normalization_layer = layers.Rescaling(1./255)

train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y))
test_ds = test_ds.map(lambda x, y: (normalization_layer(x), y))

# Take one batch from the training dataset
for images, labels in train_ds.take(1):
    print("Shape of images batch:", images.shape)
    print("Min pixel value:", tf.reduce_min(images).numpy())
    print("Max pixel value:", tf.reduce_max(images).numpy())



Shape of images batch: (32, 128, 128, 3)
Min pixel value: 0.0
Max pixel value: 1.0


Since we are just doing min max normalization doesnt matter if we divide the dataset into traning set, validation ... before or after it.

When using standard normalization techniques also we have to first divide into sets since we only have to calculate the mean, standard deviation statistics on the test set and not all. Otherwise the model is going to fit the data in an unnecessary manner.

In [None]:
from tensorflow.keras import models, layers


Here using sequential we make the layers all in the sequential order.

In [None]:
model = models.Sequential([
    layers.Conv2D(32, (3,3), activation='relu', input_shape=(128,128,3)),
    layers.MaxPooling2D((2,2)),

    layers.Conv2D(64, (3,3), activation='relu'),
    layers.MaxPooling2D((2,2)),

    layers.Conv2D(128, (3,3), activation='relu'),
    layers.MaxPooling2D((2,2)),

    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(num_classes, activation='softmax')
])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
model.summary()
