<a href="https://colab.research.google.com/github/Beenaa99/Strawberry_Leaf_Disease_Detection/blob/main/Data_splitting_strawberry_leaves.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import shutil
import random
from google.colab import drive
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split


In [2]:
# Mount Google Drive
drive.mount('/content/drive')

# Define the path to your dataset
data_path = '/content/drive/MyDrive/omdena/Image Collection/Strawberry Leaf - Beenaa'

# Create directories for train, validation, and test sets
base_dir = '/content/drive/MyDrive/omdena/Image Collection/Strawberry Leaf - Beenaa/Split_data'
os.makedirs(base_dir, exist_ok=True)
train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'val')
test_dir = os.path.join(base_dir, 'test')

Mounted at /content/drive


In [3]:
# Create subdirectories for each class in train, val, and test directories
classes = ['angular_leaf_spots_bacteria', 'healthy_leaf', 'leaf_scorch_fungus', 'leaf_spot_fungus']
for category in classes:
    os.makedirs(os.path.join(train_dir, category), exist_ok=True)
    os.makedirs(os.path.join(val_dir, category), exist_ok=True)
    os.makedirs(os.path.join(test_dir, category), exist_ok=True)

In [4]:
# Function to resize and normalize images
def process_and_save_images(src_dir, dest_dir, image_size=(256, 256)):
    for category in classes:
        category_path = os.path.join(src_dir, category)
        images = os.listdir(category_path)
        for image_name in images:
            img_path = os.path.join(category_path, image_name)
            try:
                img = Image.open(img_path)
                img = img.resize(image_size)
                img = np.array(img) / 255.0  # Normalize
                img = Image.fromarray((img * 255).astype('uint8'))
                img.save(os.path.join(dest_dir, category, image_name))
            except Exception as e:
                print(f"Error processing image {img_path}: {e}")

In [5]:
# Read and split the data
for category in classes:
    category_path = os.path.join(data_path, category)
    images = os.listdir(category_path)
    train_imgs, test_imgs = train_test_split(images, test_size=0.2, random_state=42)
    train_imgs, val_imgs = train_test_split(train_imgs, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2

    for image_name in train_imgs:
        shutil.copy(os.path.join(category_path, image_name), os.path.join(train_dir, category, image_name))

    for image_name in val_imgs:
        shutil.copy(os.path.join(category_path, image_name), os.path.join(val_dir, category, image_name))

    for image_name in test_imgs:
        shutil.copy(os.path.join(category_path, image_name), os.path.join(test_dir, category, image_name))


In [6]:
# Process and save images
process_and_save_images(train_dir, train_dir)
process_and_save_images(val_dir, val_dir)
process_and_save_images(test_dir, test_dir)

# Verify the result
def count_images(directory):
    count = 0
    for category in classes:
        category_path = os.path.join(directory, category)
        count += len(os.listdir(category_path))
    return count

print(f"Number of images in train set: {count_images(train_dir)}")
print(f"Number of images in val set: {count_images(val_dir)}")
print(f"Number of images in test set: {count_images(test_dir)}")

Number of images in train set: 2788
Number of images in val set: 930
Number of images in test set: 930
