In [None]:
import os
import random
import shutil

# Set the desired count for data augmentation
desired_augmentation_count = 3000

# Set the desired train-test-validation ratios
train_ratio = 0.7
test_ratio = 0.2
val_ratio = 0.1

# Specify the paths
dataset_path = r'D:\SETU Assignments\Dissertation\Dataset\Data'
output_folder = r'D:\SETU Assignments\Dissertation\Dataset\ProcessedDS'

# Define the classes and their corresponding counts
classes = [
    'Bacterial Blight', 'Bacterial Streak', 'Brown Spot', 'Healthy', 'Hispa',
    'Leaf Blast', 'Leaf Scald', 'Leaf Smut', 'Neck Blast', 'Tungro'
]
class_counts = {
    'Bacterial Blight': 1833,
    'Bacterial Streak': 628,
    'Brown Spot': 5041,
    'Healthy': 8294,
    'Hispa': 3572,
    'Leaf Blast': 6666,
    'Leaf Scald': 438,
    'Leaf Smut': 80,
    'Neck Blast': 2000,
    'Tungro': 360
}

# Create train, test, and validation directories
train_path = os.path.join(output_folder, 'train')
test_path = os.path.join(output_folder, 'test')
val_path = os.path.join(output_folder, 'val')
os.makedirs(train_path, exist_ok=True)
os.makedirs(test_path, exist_ok=True)
os.makedirs(val_path, exist_ok=True)

# Perform data augmentation and split dataset for each class
for class_name in classes:
    class_folder = os.path.join(dataset_path, class_name)
    output_class_folder = os.path.join(output_folder, class_name)
    os.makedirs(output_class_folder, exist_ok=True)
    
    # Check the number of images in the class folder
    image_count = len(os.listdir(class_folder))
    
    # Perform data augmentation if the count is less than the desired count
    if image_count < desired_augmentation_count:
        # Calculate the number of images to be augmented
        augmentation_count = desired_augmentation_count - image_count
        
        # Augment the images by randomly selecting and copying existing images
        for i in range(augmentation_count):
            source_image = random.choice(os.listdir(class_folder))
            source_image_path = os.path.join(class_folder, source_image)
            target_image_path = os.path.join(output_class_folder, f'augmented_{i+1}.jpg')
            shutil.copy(source_image_path, target_image_path)
    
    # Move the original and augmented images to train, test, and validation sets
    all_files = os.listdir(output_class_folder)
    random.shuffle(all_files)
    train_count = int(train_ratio * len(all_files))
    test_count = int(test_ratio * len(all_files))
    val_count = int(val_ratio * len(all_files))
    
    train_files = all_files[:train_count]
    test_files = all_files[train_count:train_count+test_count]
    val_files = all_files[train_count+test_count:]
    
    for file in train_files:
        src = os.path.join(output_class_folder, file)
        dst = os.path.join(train_path, file)
        shutil.copy(src, dst)
    
    for file in test_files:
        src = os.path.join(output_class_folder, file)
        dst = os.path.join(test_path, file)
        shutil.copy(src, dst)
    
    for file in val_files:
        src = os.path.join(output_class_folder, file)
        dst = os.path.join(val_path, file)
        shutil.copy(src, dst)

print("Data augmentation and dataset splitting completed successfully.")


In [1]:
import os
import random
import shutil

# Specify the paths
dataset_path = r'D:\SETU Assignments\Dissertation\Dataset\Data'
output_folder = r'D:\SETU Assignments\Dissertation\Dataset\ProcessedDS'

# Create the train, test, and val folders
train_path = os.path.join(output_folder, 'train')
test_path = os.path.join(output_folder, 'test')
val_path = os.path.join(output_folder, 'val')
os.makedirs(train_path, exist_ok=True)
os.makedirs(test_path, exist_ok=True)
os.makedirs(val_path, exist_ok=True)

# Define the train, test, and val ratios
train_ratio = 0.7
test_ratio = 0.2
val_ratio = 0.1

# Specify the classes to be used for splitting
classes = [
    'Bacterial Blight', 'Bacterial Streak', 'Brown Spot', 'Healthy', 'Hispa',
    'Leaf Blast', 'Leaf Scald', 'Leaf Smut', 'Neck Blast', 'Tungro'
]

for class_name in classes:
    class_folder = os.path.join(dataset_path, class_name)

    # Get the list of images for the class
    images = os.listdir(class_folder)

    # Shuffle the images
    random.shuffle(images)

    # Calculate the number of images for each split
    total_images = len(images)
    train_count = int(total_images * train_ratio)
    test_count = int(total_images * test_ratio)
    val_count = total_images - train_count - test_count

    # Split the images into train, test, and val sets
    train_images = images[:train_count]
    test_images = images[train_count:train_count + test_count]
    val_images = images[train_count + test_count:]

    # Move the images to the respective folders
    for image in train_images:
        src = os.path.join(class_folder, image)
        dst = os.path.join(train_path, class_name, image)
        os.makedirs(os.path.dirname(dst), exist_ok=True)
        shutil.copy(src, dst)

    for image in test_images:
        src = os.path.join(class_folder, image)
        dst = os.path.join(test_path, class_name, image)
        os.makedirs(os.path.dirname(dst), exist_ok=True)
        shutil.copy(src, dst)

    for image in val_images:
        src = os.path.join(class_folder, image)
        dst = os.path.join(val_path, class_name, image)
        os.makedirs(os.path.dirname(dst), exist_ok=True)
        shutil.copy(src, dst)

print("Dataset splitting completed successfully.")


Dataset splitting completed successfully.
