## Splitting the datset

In [6]:
import os
import shutil
from sklearn.model_selection import train_test_split
import tensorflow as tf

original_dataset_dir = './raw/color'
new_base_dir = './PlantVillage-Dataset-processed'

# Splitting ratio
train_ratio = 0.8
test_ratio = 0.2

# Ensure the new directory exists
os.makedirs(new_base_dir, exist_ok=True)

In [7]:
# Function to resize and save an image
def resize_and_save_image(src_path, dest_path, new_size=(256, 256)):
    image = tf.keras.preprocessing.image.load_img(src_path, target_size=new_size)
    image.save(dest_path)

# Loop through each class directory
for class_name in os.listdir(original_dataset_dir):
    print(f'Processing {class_name} ...')
    # Create new directories for train and test sets for the current class
    os.makedirs(os.path.join(new_base_dir, 'train', class_name), exist_ok=True)
    os.makedirs(os.path.join(new_base_dir, 'test', class_name), exist_ok=True)

    # List all images in the current class directory
    images = os.listdir(os.path.join(original_dataset_dir, class_name))
    images = [img for img in images if img.lower().endswith(('png', 'jpg', 'jpeg'))]  # Filter out non-image files

    # Split images into train and test sets
    train_images, test_images = train_test_split(images, test_size=test_ratio, random_state=42)

    # Resize and save train images
    for img in train_images:
        src_path = os.path.join(original_dataset_dir, class_name, img)
        dest_path = os.path.join(new_base_dir, 'train', class_name, img)
        resize_and_save_image(src_path, dest_path)

    # Resize and save test images
    for img in test_images:
        src_path = os.path.join(original_dataset_dir, class_name, img)
        dest_path = os.path.join(new_base_dir, 'test', class_name, img)
        resize_and_save_image(src_path, dest_path)


Processing Apple___Apple_scab ...
Processing Apple___Black_rot ...
Processing Apple___Cedar_apple_rust ...
Processing Apple___healthy ...
Processing Blueberry___healthy ...
Processing Cherry_(including_sour)___healthy ...
Processing Cherry_(including_sour)___Powdery_mildew ...
Processing Corn_(maize)___Cercospora_leaf_spot Gray_leaf_spot ...
Processing Corn_(maize)___Common_rust_ ...
Processing Corn_(maize)___healthy ...
Processing Corn_(maize)___Northern_Leaf_Blight ...
Processing Grape___Black_rot ...
Processing Grape___Esca_(Black_Measles) ...
Processing Grape___healthy ...
Processing Grape___Leaf_blight_(Isariopsis_Leaf_Spot) ...
Processing Orange___Haunglongbing_(Citrus_greening) ...
Processing Peach___Bacterial_spot ...
Processing Peach___healthy ...
Processing Pepper,_bell___Bacterial_spot ...
Processing Pepper,_bell___healthy ...
Processing Potato___Early_blight ...
Processing Potato___healthy ...
Processing Potato___Late_blight ...
Processing Raspberry___healthy ...
Processing

### Merging the files

In [3]:
import os
import shutil

def copy_images(source_folder, target_folder,type):
    # Loop through each subdirectory in the source folder
    for subdir in os.listdir(source_folder):
        # Create the corresponding subdirectory in the target folder
        target_subdir = os.path.join(target_folder, subdir)
        os.makedirs(target_subdir, exist_ok=True)

        # Loop through each image file in the subdirectory
        for filename in os.listdir(os.path.join(source_folder, subdir)):
            new_file = f"{type}_{filename}"
            # Copy the image file to the target subdirectory
            source_file = os.path.join(source_folder, subdir, filename)
            target_file = os.path.join(target_subdir, new_file)
            shutil.copy(source_file, target_file)
            
            
copy_images('./PlantVillage-Dataset-processed/train', './PlantVillage-Dataset-processed/train2','train')
copy_images('./PlantVillage-Dataset-processed/generated_dataset_cloudy', './PlantVillage-Dataset-processed/train2','cloudy')
copy_images('./PlantVillage-Dataset-processed/generated_dataset_sunny', './PlantVillage-Dataset-processed/train2','sunny')


# Generating count table

In [None]:
import os
import pandas as pd

# Paths to the training and testing directories
train_dir = './PlantVillage-Dataset-processed/train2'
test_dir = './PlantVillage-Dataset-processed/test'

# Initialize a dictionary to keep the counts
class_counts = {}

# Iterate over the training directory
for class_name in os.listdir(train_dir):
    train_class_dir = os.path.join(train_dir, class_name)
    if os.path.isdir(train_class_dir):
        # Count the number of images in the training class directory
        train_count = len([name for name in os.listdir(train_class_dir) if os.path.isfile(os.path.join(train_class_dir, name))])
        # Initialize the dictionary entry
        class_counts[class_name] = {'Ntr': train_count, 'Nte': 0}

# Repeat the process for the testing directory
for class_name in os.listdir(test_dir):
    test_class_dir = os.path.join(test_dir, class_name)
    if os.path.isdir(test_class_dir):
        # Count the number of images in the testing class directory
        test_count = len([name for name in os.listdir(test_class_dir) if os.path.isfile(os.path.join(test_class_dir, name))])
        # Update the dictionary entry
        class_counts[class_name]['Nte'] = test_count

# Convert the dictionary to a pandas DataFrame
df_counts = pd.DataFrame.from_dict(class_counts, orient='index').reset_index()
df_counts.rename(columns={'index': 'Class', 'Ntr': 'Train Count', 'Nte': 'Test Count'}, inplace=True)

df_counts['Species'] = df_counts['Class'].apply(lambda x: x.split('___')[0])
df_counts['Disease'] = df_counts['Class'].apply(lambda x: x.split('___')[1])
# Re-order the DataFrame columns 
df_counts = df_counts[['Species', 'Disease', 'Train Count', 'Test Count']]


print(df_counts)
df_counts.to_csv('PlantVillage-Dataset-processed/train_test_counts.csv', index=False)


                    Species                               Disease  \
0                     Apple                            Apple_scab   
1                     Apple                             Black_rot   
2                     Apple                      Cedar_apple_rust   
3                     Apple                               healthy   
4                 Blueberry                               healthy   
5   Cherry_(including_sour)                               healthy   
6   Cherry_(including_sour)                        Powdery_mildew   
7              Corn_(maize)   Cercospora_leaf_spot Gray_leaf_spot   
8              Corn_(maize)                          Common_rust_   
9              Corn_(maize)                               healthy   
10             Corn_(maize)                  Northern_Leaf_Blight   
11                    Grape                             Black_rot   
12                    Grape                  Esca_(Black_Measles)   
13                    Grape       

In [None]:
print(df_counts.to_latex(
    index=False,  # To not include the DataFrame index as a column in the table
    caption="Training and Testing Image Counts",  # The caption to appear above the table in the LaTeX document
    label="tab:trainTestCounts",  # A label used for referencing the table within the LaTeX document
    position="htbp",  # The preferred positions where the table should be placed in the document ('here', 'top', 'bottom', 'page')
    column_format="|l|l|l|l|",  # The format of the columns: left-aligned with vertical lines between them
    escape=False,  # Disable escaping LaTeX special characters in the DataFrame
    # float_format="{:0.2f}".format  # Formats floats to two decimal places
))

\begin{table}[htbp]
\caption{Training and Testing Image Counts}
\label{tab:trainTestCounts}
\begin{tabular}{|l|l|l|l|}
\toprule
Species & Disease & Train Count & Test Count \\
\midrule
Apple & Apple_scab & 534 & 126 \\
Apple & Black_rot & 526 & 125 \\
Apple & Cedar_apple_rust & 250 & 55 \\
Apple & healthy & 1346 & 329 \\
Blueberry & healthy & 1231 & 301 \\
Cherry_(including_sour) & healthy & 713 & 171 \\
Cherry_(including_sour) & Powdery_mildew & 871 & 211 \\
Corn_(maize) & Cercospora_leaf_spot Gray_leaf_spot & 440 & 103 \\
Corn_(maize) & Common_rust_ & 983 & 239 \\
Corn_(maize) & healthy & 959 & 233 \\
Corn_(maize) & Northern_Leaf_Blight & 818 & 197 \\
Grape & Black_rot & 974 & 236 \\
Grape & Esca_(Black_Measles) & 1136 & 277 \\
Grape & healthy & 368 & 85 \\
Grape & Leaf_blight_(Isariopsis_Leaf_Spot) & 890 & 216 \\
Orange & Haunglongbing_(Citrus_greening) & 4435 & 1102 \\
Peach & Bacterial_spot & 1867 & 460 \\
Peach & healthy & 318 & 72 \\
Pepper,_bell & Bacterial_spot & 827 & 200 \\
