## **Data Spliting**

In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Define paths
data_dir = r"D:\School\ITC\Y3\Semet 2\Mini Project\Project Folder\Corn-Disease-Detection\datasets\data"  # Path to your dataset folder
output_dir = r"D:\School\ITC\Y3\Semet 2\Mini Project\Project Folder\Corn-Disease-Detection\datasets\processed_data_v2"  # Output folder for train, val, test splits

# Define split ratios
train_ratio = 0.8
test_ratio = 0.1
val_ratio = 0.1

# Create output directories
for split in ['train', 'val', 'test']:
    for category in os.listdir(data_dir):
        os.makedirs(os.path.join(output_dir, split, category), exist_ok=True)

# Split data
for category in os.listdir(data_dir):
    category_path = os.path.join(data_dir, category)
    images = os.listdir(category_path)
    
    train_images, temp_images = train_test_split(images, test_size=(1 - train_ratio), random_state=42)
    val_images, test_images = train_test_split(temp_images, test_size=(test_ratio / (val_ratio + test_ratio)), random_state=42)
    
    # Move files to respective directories
    for image in train_images:
        shutil.copy(os.path.join(category_path, image), os.path.join(output_dir, 'train', category, image))
    for image in val_images:
        shutil.copy(os.path.join(category_path, image), os.path.join(output_dir, 'val', category, image))
    for image in test_images:
        shutil.copy(os.path.join(category_path, image), os.path.join(output_dir, 'test', category, image))

print("Data successfully split into train, val, and test sets!")

Data successfully split into train, val, and test sets!


In [17]:
[ train_dir, val_dir, test_dir ] = [ 
                                    os.path.join(output_dir, 'train'), 
                                    os.path.join(output_dir, 'val'), 
                                    os.path.join(output_dir, 'test') 
                                    ]

# Count images in the directory and its subdirectories
for split_dir in [train_dir, val_dir, test_dir]:
    image_count = sum([len(files) for _, _, files in os.walk(split_dir)])
    print(f"Number of images in {split_dir}: {image_count}")

Number of images in D:\School\ITC\Y3\Semet 2\Mini Project\Project Folder\Corn-Disease-Detection\datasets\processed_data_v2\train: 3348
Number of images in D:\School\ITC\Y3\Semet 2\Mini Project\Project Folder\Corn-Disease-Detection\datasets\processed_data_v2\val: 419
Number of images in D:\School\ITC\Y3\Semet 2\Mini Project\Project Folder\Corn-Disease-Detection\datasets\processed_data_v2\test: 421


# **Oversampling the Minor Class**

- Takes each image in your minority class folder
- Applies random transformations (like rotating, flipping, zooming)
- Generates 10 new versions per image
- Saves those new images to a specified directory

In [4]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
import os

minority_class_dir = r"D:\School\ITC\Y3\Semet 2\Mini Project\Project Folder\Corn-Disease-Classification\datasets\data\Gray_Leaf_Spot"
output_dir = r"D:\School\ITC\Y3\Semet 2\Mini Project\Project Folder\Corn-Disease-Classification\datasets\data_oversampled\Gray_Leaf_Spot"

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Example: Generate 1000 augmented images
for img_file in os.listdir(minority_class_dir):
    img = tf.keras.utils.load_img(os.path.join(minority_class_dir, img_file), target_size=(224, 224))
    x = tf.keras.utils.img_to_array(img)
    x = x.reshape((1,) + x.shape)

    i = 0
    for batch in datagen.flow(x, batch_size=1, save_to_dir=output_dir, save_prefix='aug', save_format='jpeg'):
        i += 1
        if i > 2:
            break