The purpose of this script is to make all the necessary transformations to the images so that we can consume them during the model building.

In [None]:
#Connect to GoogleDrive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#Import all necessary libraries
import os
import cv2
import shutil
import random
import numpy as np
from PIL import Image

In [None]:
#Set-up directory
os.chdir('/content/drive/My Drive/Colab Notebooks')

In [None]:
#Set-up input folder
extracted_folder_path = '/content/drive/My Drive/Colab Notebooks/Renamed_Images'

In [None]:
#Function to check if image is valid
def is_valid_image(image_path):
    try:
        img = Image.open(image_path)
        img.verify()  # Verify the image integrity
        return True
    except Exception as e:
        return False

In [None]:
#Function to check data integrity
def perform_data_integrity_check(image_folder):
    for class_folder in os.listdir(image_folder):
        class_path = os.path.join(image_folder, class_folder)
        for image_name in os.listdir(class_path):
            image_path = os.path.join(class_path, image_name)
            if not is_valid_image(image_path):
                print(f"Invalid image: {image_path}")

In [None]:
perform_data_integrity_check(extracted_folder_path)

In [None]:
#Function for preprocessing
def preprocess_images(image_folder, target_size=(224, 224), output_folder='Preprocessed_Images'):
    #Create output folder if it does not already exist
    os.makedirs(output_folder, exist_ok=True)

    #Get a list of all the subfolders (classes) from the input directory
    class_folders = os.listdir(image_folder)

    #Iterate through each class folder
    for class_folder in class_folders:
        class_path = os.path.join(image_folder, class_folder)
        class_images = os.listdir(class_path)

        #Create all the class folders in the output directory
        output_class_folder = os.path.join(output_folder, class_folder)
        os.makedirs(output_class_folder, exist_ok=True)

        #Preprocess each image in the class folder
        for image_name in class_images:
            image_path = os.path.join(class_path, image_name)

            try:
                #Read the image using PIL (RGB mode)
                image = Image.open(image_path).convert('RGB')

                #Resize the image to the target size (224,224)
                image = image.resize(target_size)

                #Convert the image to a NumPy array
                image = np.array(image)

                #Normalize the pixel values to range [0, 1]
                image = image.astype('float32') / 255.0

                #Save the preprocessed image in the output folder, preserving the original folder structure
                output_image_path = os.path.join(output_class_folder, image_name)
                image = Image.fromarray((image * 255).astype(np.uint8))  #Convert back to PIL image
                image.save(output_image_path)

            except Exception as e:
                print(f"Error occurred while processing {image_path}: {str(e)}")  #Print error message and path of the image when the preprocessing is not successful
                continue


In [None]:
#Preprocess the images
preprocess_images(extracted_folder_path)



We get the following warning: /usr/local/lib/python3.10/dist-packages/PIL/Image.py:996: UserWarning: Palette images with Transparency expressed in bytes should be converted to RGBA images
  warnings.warn(

This is not a critical error, but rather a user-level advisory message. It indicates that there might be room for improvement in how transparency is handled in certain images, particularly those with a palette-based color mode and transparency information.

In [None]:
preprocessed_folder_path = '/content/drive/My Drive/Colab Notebooks/Preprocessed_Images'