<a href="https://colab.research.google.com/github/nipuni1313/CNN-for-image-classification/blob/main/Simple_convolutional_neural_network_to_perform_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os

In [None]:
dataset_path = '/content/drive/MyDrive/RealWaste/'
output_path = '/content/drive/MyDrive/RealWaste_output/'
files = os.listdir(dataset_path)
print(files)

['Vegetation', 'Textile Trash', 'Plastic', 'Paper', 'Miscellaneous Trash', 'Metal', 'Glass', 'Food Organics', 'Cardboard', 'RealWaste output']


In [None]:
# Install necessary libraries
!pip install Augmentor tensorflow opencv-python

import os
import shutil
import random
import cv2
import numpy as np
import Augmentor
from tensorflow.keras.preprocessing import image
from tensorflow.keras.utils import img_to_array, array_to_img
from tensorflow.image import resize
from PIL import Image



In [None]:
# Function to preprocess image (resize and normalize)
def preprocess_image(img_path, target_size=(128, 128)):
    img = Image.open(img_path)
    img = img.resize(target_size)
    img = img_to_array(img)
    img = img / 255.0
    return img

# Function for data augmentation
def augment_data(input_directory, augmented_directory, num_augmented_images=5):
    """
    Perform data augmentation for each class in the dataset, and save to a separate augmented directory.
    The original data is copied to the augmented directory first, then augmentation is applied.
    """
    for class_folder in os.listdir(input_directory):
        class_path = os.path.join(input_directory, class_folder)

        if os.path.isdir(class_path):
            print(f"Processing class: {class_folder}...")

            # Create the same class folder in the augmented dataset if it doesn't exist
            augmented_class_folder = os.path.join(augmented_directory, class_folder)
            os.makedirs(augmented_class_folder, exist_ok=True)

            # Check if originals are already copied, if not, copy and preprocess them
            original_images_in_augmented = os.listdir(augmented_class_folder)
            if not original_images_in_augmented:  # Only copy if folder is empty
                for image_file in os.listdir(class_path):
                    if image_file.endswith(('jpg', 'jpeg', 'png')):
                        original_image_path = os.path.join(class_path, image_file)

                        # Preprocess and normalize original images
                        original_img = preprocess_image(original_image_path)

                        # Save the preprocessed original image in the augmented folder
                        img_name = image_file.split('.')[0] + "_original.jpg"
                        save_path = os.path.join(augmented_class_folder, img_name)
                        original_img = array_to_img(original_img)  # Convert back to image for saving
                        original_img.save(save_path)
            # Create Augmentor pipeline for this class folder
            pipeline = Augmentor.Pipeline(class_path)
            pipeline.rotate(probability=0.7, max_left_rotation=15, max_right_rotation=15)
            pipeline.flip_left_right(probability=0.5)
            pipeline.flip_top_bottom(probability=0.5)
            pipeline.zoom_random(probability=0.5, percentage_area=0.8)
            pipeline.random_contrast(probability=0.5, min_factor=0.7, max_factor=1.3)
            pipeline.random_brightness(probability=0.5, min_factor=0.7, max_factor=1.3)

            # Augment and save images to the augmented dataset folder
            pipeline.sample(num_augmented_images)

            # Move augmented images into the augmented folder
            augmented_folder = os.path.join(class_path, "output")  # Augmentor stores images here
            for augmented_image in os.listdir(augmented_folder):
                img_path = os.path.join(augmented_folder, augmented_image)
                if augmented_image.endswith(('jpg', 'jpeg', 'png')):
                    augmented_img = preprocess_image(img_path)
                    img_name = augmented_image.split('.')[0] + "_augmented.jpg"
                    save_path = os.path.join(augmented_class_folder, img_name)  # Save in augmented folder
                    augmented_img = array_to_img(augmented_img)
                    augmented_img.save(save_path)


# Function to split dataset into training, validation, and testing sets (by class)
def split_dataset(input_directory, output_directory, train_ratio=0.6, val_ratio=0.2, test_ratio=0.2):
    """
    Split the augmented dataset into training, validation, and testing sets for each class folder.
    """
    # Loop through each class folder in the augmented dataset
    for class_folder in os.listdir(input_directory):
        class_path = os.path.join(input_directory, class_folder)

        if os.path.isdir(class_path):
            print(f"Splitting {class_folder} images into train, val, and test sets...")

            # List all the images in the current class folder
            image_files = [f for f in os.listdir(class_path) if f.endswith(('.jpg', '.jpeg', '.png'))]
            random.shuffle(image_files)

            total_images = len(image_files)
            train_size = int(total_images * train_ratio)
            val_size = int(total_images * val_ratio)
            test_size = total_images - train_size - val_size

            train_files = image_files[:train_size]
            val_files = image_files[train_size:train_size + val_size]
            test_files = image_files[train_size + val_size:]

            # Create directories for each subset (train, val, test) within the class folder
            class_train_dir = os.path.join(output_directory, 'train', class_folder)
            class_val_dir = os.path.join(output_directory, 'val', class_folder)
            class_test_dir = os.path.join(output_directory, 'test', class_folder)

            os.makedirs(class_train_dir, exist_ok=True)
            os.makedirs(class_val_dir, exist_ok=True)
            os.makedirs(class_test_dir, exist_ok=True)

            # Move images to the respective directories
            for image in train_files:
                shutil.copy(os.path.join(class_path, image), os.path.join(class_train_dir, image))

            for image in val_files:
                shutil.copy(os.path.join(class_path, image), os.path.join(class_val_dir, image))

            for image in test_files:
                shutil.copy(os.path.join(class_path, image), os.path.join(class_test_dir, image))

            print(f"Dataset split complete for {class_folder}: {train_size} training, {val_size} validation, {test_size} test images.")


In [None]:


# Path to the RealWASTE dataset
input_directory = dataset_path  # Path to your original RealWASTE dataset images

# Path to save augmented and split images
output_directory =  output_path # Path where the processed data will be saved



In [None]:
# Step 1: Perform data augmentation
augment_data(input_directory, output_directory, num_augmented_images=5)



Processing class: Vegetation...
Initialised with 436 image(s) found.
Output directory set to /content/drive/MyDrive/RealWaste/Vegetation/output.

Processing <PIL.Image.Image image mode=RGB size=524x524 at 0x7C8DC1B67F70>: 100%|██████████| 5/5 [00:00<00:00, 24.73 Samples/s]


Processing class: Textile Trash...
Initialised with 318 image(s) found.
Output directory set to /content/drive/MyDrive/RealWaste/Textile Trash/output.

Processing <PIL.Image.Image image mode=RGB size=524x524 at 0x7C8DC1B7C580>: 100%|██████████| 5/5 [00:00<00:00, 18.22 Samples/s]


Processing class: Plastic...
Initialised with 921 image(s) found.
Output directory set to /content/drive/MyDrive/RealWaste/Plastic/output.

Processing <PIL.Image.Image image mode=RGB size=524x524 at 0x7C8DC1B65F00>: 100%|██████████| 5/5 [00:00<00:00, 16.90 Samples/s]


Processing class: Paper...
Initialised with 500 image(s) found.
Output directory set to /content/drive/MyDrive/RealWaste/Paper/output.

Processing <PIL.Image.Image image mode=RGB size=524x524 at 0x7C8DC1B7CFD0>: 100%|██████████| 5/5 [00:00<00:00, 22.24 Samples/s]


Processing class: Miscellaneous Trash...
Initialised with 495 image(s) found.
Output directory set to /content/drive/MyDrive/RealWaste/Miscellaneous Trash/output.

Processing <PIL.Image.Image image mode=RGB size=524x524 at 0x7C8DC1B657E0>: 100%|██████████| 5/5 [00:00<00:00, 21.60 Samples/s]


Processing class: Metal...
Initialised with 790 image(s) found.
Output directory set to /content/drive/MyDrive/RealWaste/Metal/output.

Processing <PIL.Image.Image image mode=RGB size=524x524 at 0x7C8DC1B64CA0>: 100%|██████████| 5/5 [00:00<00:00, 30.29 Samples/s]


Processing class: Glass...
Initialised with 420 image(s) found.
Output directory set to /content/drive/MyDrive/RealWaste/Glass/output.

Processing <PIL.Image.Image image mode=RGB size=524x524 at 0x7C8DC1B67A00>: 100%|██████████| 5/5 [00:00<00:00, 24.85 Samples/s]


Processing class: Food Organics...
Initialised with 411 image(s) found.
Output directory set to /content/drive/MyDrive/RealWaste/Food Organics/output.

Processing <PIL.Image.Image image mode=RGB size=524x524 at 0x7C8DC1B64D90>: 100%|██████████| 5/5 [00:00<00:00, 19.70 Samples/s]


Processing class: Cardboard...
Initialised with 461 image(s) found.
Output directory set to /content/drive/MyDrive/RealWaste/Cardboard/output.

Processing <PIL.Image.Image image mode=RGB size=524x524 at 0x7C8DC1B677F0>: 100%|██████████| 5/5 [00:00<00:00, 16.20 Samples/s]


In [None]:
# Step 2: Split the dataset into training, validation, and testing sets
split_dataset(output_directory, output_directory, train_ratio=0.6, val_ratio=0.2, test_ratio=0.2)

Splitting Vegetation images into train, val, and test sets...
Dataset split complete for Vegetation: 267 training, 89 validation, 90 test images.
Splitting Textile Trash images into train, val, and test sets...
Dataset split complete for Textile Trash: 196 training, 65 validation, 67 test images.
Splitting Plastic images into train, val, and test sets...
Dataset split complete for Plastic: 558 training, 186 validation, 187 test images.
Splitting Paper images into train, val, and test sets...
Dataset split complete for Paper: 306 training, 102 validation, 102 test images.
Splitting Miscellaneous Trash images into train, val, and test sets...
Dataset split complete for Miscellaneous Trash: 303 training, 101 validation, 101 test images.
Splitting Metal images into train, val, and test sets...
Dataset split complete for Metal: 480 training, 160 validation, 160 test images.
Splitting Glass images into train, val, and test sets...
Dataset split complete for Glass: 258 training, 86 validation