## Splitting the datset

In [6]:
import os
import shutil
from sklearn.model_selection import train_test_split
import tensorflow as tf

original_dataset_dir = './raw/color'
new_base_dir = './PlantVillage-Dataset-processed'

# Splitting ratio
train_ratio = 0.8
test_ratio = 0.2

# Ensure the new directory exists
os.makedirs(new_base_dir, exist_ok=True)

In [7]:
# Function to resize and save an image
def resize_and_save_image(src_path, dest_path, new_size=(256, 256)):
    image = tf.keras.preprocessing.image.load_img(src_path, target_size=new_size)
    image.save(dest_path)

# Loop through each class directory
for class_name in os.listdir(original_dataset_dir):
    print(f'Processing {class_name} ...')
    # Create new directories for train and test sets for the current class
    os.makedirs(os.path.join(new_base_dir, 'train', class_name), exist_ok=True)
    os.makedirs(os.path.join(new_base_dir, 'test', class_name), exist_ok=True)

    # List all images in the current class directory
    images = os.listdir(os.path.join(original_dataset_dir, class_name))
    images = [img for img in images if img.lower().endswith(('png', 'jpg', 'jpeg'))]  # Filter out non-image files

    # Split images into train and test sets
    train_images, test_images = train_test_split(images, test_size=test_ratio, random_state=42)

    # Resize and save train images
    for img in train_images:
        src_path = os.path.join(original_dataset_dir, class_name, img)
        dest_path = os.path.join(new_base_dir, 'train', class_name, img)
        resize_and_save_image(src_path, dest_path)

    # Resize and save test images
    for img in test_images:
        src_path = os.path.join(original_dataset_dir, class_name, img)
        dest_path = os.path.join(new_base_dir, 'test', class_name, img)
        resize_and_save_image(src_path, dest_path)


Processing Apple___Apple_scab ...
Processing Apple___Black_rot ...
Processing Apple___Cedar_apple_rust ...
Processing Apple___healthy ...
Processing Blueberry___healthy ...
Processing Cherry_(including_sour)___healthy ...
Processing Cherry_(including_sour)___Powdery_mildew ...
Processing Corn_(maize)___Cercospora_leaf_spot Gray_leaf_spot ...
Processing Corn_(maize)___Common_rust_ ...
Processing Corn_(maize)___healthy ...
Processing Corn_(maize)___Northern_Leaf_Blight ...
Processing Grape___Black_rot ...
Processing Grape___Esca_(Black_Measles) ...
Processing Grape___healthy ...
Processing Grape___Leaf_blight_(Isariopsis_Leaf_Spot) ...
Processing Orange___Haunglongbing_(Citrus_greening) ...
Processing Peach___Bacterial_spot ...
Processing Peach___healthy ...
Processing Pepper,_bell___Bacterial_spot ...
Processing Pepper,_bell___healthy ...
Processing Potato___Early_blight ...
Processing Potato___healthy ...
Processing Potato___Late_blight ...
Processing Raspberry___healthy ...
Processing

### Merging the files

In [9]:
import os
import shutil

def copy_images(source_folder, target_folder):
    # Loop through each subdirectory in the source folder
    for subdir in os.listdir(source_folder):
        # Create the corresponding subdirectory in the target folder
        target_subdir = os.path.join(target_folder, subdir)
        os.makedirs(target_subdir, exist_ok=True)

        # Loop through each image file in the subdirectory
        for filename in os.listdir(os.path.join(source_folder, subdir)):
            # Copy the image file to the target subdirectory
            source_file = os.path.join(source_folder, subdir, filename)
            target_file = os.path.join(target_subdir, filename)
            shutil.copy(source_file, target_file)
            
            
copy_images('./PlantVillage-Dataset-processed/train', './PlantVillage-Dataset-processed/train2')
copy_images('./PlantVillage-Dataset-processed/generated_dataset_cloudy', './PlantVillage-Dataset-processed/train2')
copy_images('./PlantVillage-Dataset-processed/generated_dataset_sunny', './PlantVillage-Dataset-processed/train2')
