## Milestone 1 - Steps for the Image Preprocessing

## Importing the Dataset from Either Google Drive or Kaggle

In [1]:
# Mounting Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Extracting the Dataset from either Google Drive or Kaggle

In [2]:
# importing libraries
import zipfile
import os

# Path to your zip file
zip_path = '/content/drive/MyDrive/Plants.zip'
extract_path = '/content/Extract'

# Unzipping the file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

# Check if the extraction worked
print(os.listdir(extract_path))

['Plants']


## Showing the Number of Images and Classes in the Dataset

In [3]:
# importing libraries
import os

def count_images_in_directory(directory):
    # Initialize a counter for the images
    total_images = 0

    # Walk through the directory
    for root, dirs, files in os.walk(directory):
        for filename in files:
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):  # Handle different image formats
                total_images += 1

    return total_images

# Define the path to the cleaned_data directory
Raw_data_data_dir = '/content/Extract/Plants'

# Count the total number of images
total_images = count_images_in_directory(Raw_data_data_dir)

# Print the total number of images
print(f"Total number of images in '{Raw_data_data_dir}': {total_images}")

Total number of images in '/content/Extract/Plants': 87900


The image data in the dataset was already splited in train, validation and test folder so we dont need to perform data splitting operation

## Specifying the Path for Training, Validation and Testing


In [None]:
# Specifying the Path for Training, Validation and Testing
train_dir = os.path.join(extract_path, 'Plants', 'train')
valid_dir = os.path.join(extract_path, 'Plants', 'valid')
test_dir = os.path.join(extract_path, 'Plants', 'test')

print(f"Folders in train_dir '{os.listdir(train_dir)}'")
print(f"Folders in valid_dir '{os.listdir(valid_dir)}'")
print(f"Folders in test_dir '{os.listdir(test_dir)}'")

## Importing the Dependencies and Libraries

In [5]:
# importing libraries
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

## Image Preprocessing Code for Quality Enhancement

In [None]:
# importing libraries
import os
import cv2
import numpy as np

def preprocess_images(input_dir, output_dir, image_size=(224, 224)):
    # Create the output directory
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Walk through the input directory to find images
    for root, dirs, files in os.walk(input_dir):
        print(f"Processing folder: {root}")  # Debugging
        for filename in files:
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):  # Handle different image formats
                img_path = os.path.join(root, filename)
                image = cv2.imread(img_path)

                # Checking if the image was loaded correctly
                if image is None:
                    print(f"Warning: Unable to load image at {img_path}. Skipping.")
                    continue

                # Step 1: Noise Reduction
                blurred_image = cv2.GaussianBlur(image, (5, 5), 0)

                # Step 2: Color Normalization
                normalized_image = cv2.normalize(blurred_image, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX)

                # Step 3: Image Resizing
                resized_image = cv2.resize(normalized_image, image_size)

                # Step 4: Normalization (scale pixel values to [0, 1])
                normalized_image = resized_image / 255.0

                # Create the relative output path
                relative_path = os.path.relpath(root, input_dir)
                output_subdir = os.path.join(output_dir, relative_path)

                if not os.path.exists(output_subdir):
                    os.makedirs(output_subdir)

                output_path = os.path.join(output_subdir, filename)
                cv2.imwrite(output_path, (normalized_image * 255).astype(np.uint8))  # Save image as uint8
                print(f"Processed and saved: {output_path}")

# Define your directories
train_dir = '/content/Extract/Plants/train'
valid_dir = '/content/Extract/Plants/valid'
test_dir  = '/content/Extract/Plants/test'

# Define output directories for Cleaned Dataset
train_output_dir = '/content/Plant_disease_detection/train'
valid_output_dir = '/content/Plant_disease_detection/valid'
test_output_dir  = '/content/Plant_disease_detection/test'

# Preprocess images
preprocess_images(train_dir, train_output_dir)
preprocess_images(valid_dir, valid_output_dir)
preprocess_images(test_dir, test_output_dir)

# Finding total number of images after Image Preprocessing

In [7]:
import os

def count_images_in_directory(directory):
    # Initialize a counter for the images
    total_images = 0

    # Walk through the directory
    for root, dirs, files in os.walk(directory):
        for filename in files:
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):  # Handle different image formats
                total_images += 1

    return total_images

# Define the path to the cleaned_data directory
cleaned_data_dir = '/content/Plant_disease_detection'

# Count the total number of images
total_images = count_images_in_directory(cleaned_data_dir)

# Print the total number of images
print(f"Total number of images in '{cleaned_data_dir}': {total_images}")

Total number of images in '/content/Plant_disease_detection': 87900


# Downloading the Zip File in Google Colab

In [8]:
import shutil
import os

def zip_directory(folder_path, zip_file_path):
    """Zip the folder itself, including its contents."""
    folder_parent = os.path.dirname(folder_path)
    folder_name = os.path.basename(folder_path)

    # Zip the folder while maintaining the directory structure
    shutil.make_archive(zip_file_path, 'zip', folder_parent, folder_name)

# Define the path to the cleaned_data directory
cleaned_data_dir = '/content/Plant_disease_detection'

# Define the path for the zip file
zip_file_path = '/content/Plant_disease_detection'

# Create the zip file
zip_directory(cleaned_data_dir, zip_file_path)

print(f"successfully Zipped '{cleaned_data_dir}' into '{zip_file_path}.zip'")

successfully Zipped '/content/Plant_disease_detection' into '/content/Plant_disease_detection.zip'
