In [None]:
import numpy as np  # For numerical computations and array operations
import pickle  # For saving and loading serialized Python objects (like models or encoders)
import cv2  # OpenCV library for image processing
from os import listdir  # For listing files in directories

# Preprocessing and encoding
from sklearn.preprocessing import LabelBinarizer, MultiLabelBinarizer  # For one-hot encoding of labels

# Keras deep learning imports
from keras.models import Sequential  # For creating a linear stack of layers
from keras.layers.normalization import BatchNormalization  # For normalizing layer activations
from keras.layers.convolutional import Conv2D, MaxPooling2D  # For convolution and pooling layers
from keras.layers.core import Activation, Flatten, Dropout, Dense  # Core layers: activations, flattening, dropout, dense
from keras import backend as K  # For backend configuration and utilities

# Keras utilities for preprocessing images
from keras.preprocessing.image import ImageDataGenerator  # For real-time image data augmentation
from keras.optimizers import Adam  # Optimizer for training the model
from keras.preprocessing import image  # For image loading and preprocessing
from keras.preprocessing.image import img_to_array  # Converts a PIL image to a NumPy array

# Scikit-learn utilities
from sklearn.model_selection import train_test_split  # For splitting data into training and test sets

# Matplotlib for plotting
import matplotlib.pyplot as plt  # For visualization of training progress, images, etc.

In [None]:
# Number of times the entire training dataset will be passed through the model
EPOCHS = 25  

# Initial learning rate for the optimizer (controls how fast the model learns)
INIT_LR = 1e-3  

# Batch size — number of training samples to work through before the model updates weights
BS = 32  

# Target size for all input images (width, height)
default_image_size = tuple((256, 256))  

# Placeholder for image size (can be set later based on dataset)
image_size = 0  

# Root directory containing subfolders of image data
directory_root = '..'  

# Width, height, and depth (number of color channels — 3 for RGB)
width = 256  
height = 256  
depth = 3  
  



In [None]:
# Initialize lists to hold image data and their corresponding labels
image_list, label_list = [], []

try:
    print("[INFO] Loading images ...")

    # List all folders in the root directory (e.g., plant types)
    root_dir = listdir(directory_root)

    # Remove macOS system file if present
    for directory in root_dir:
        if directory == ".DS_Store":
            root_dir.remove(directory)

    # Loop through each plant category folder
    for plant_folder in root_dir:
        # List all disease-specific folders inside each plant category
        plant_disease_folder_list = listdir(f"{directory_root}/{plant_folder}")

        # Remove .DS_Store from disease folders
        for disease_folder in plant_disease_folder_list:
            if disease_folder == ".DS_Store":
                plant_disease_folder_list.remove(disease_folder)

        # Loop through each plant disease folder
        for plant_disease_folder in plant_disease_folder_list:
            print(f"[INFO] Processing {plant_disease_folder} ...")

            # Get list of image files in the disease folder
            plant_disease_image_list = listdir(f"{directory_root}/{plant_folder}/{plant_disease_folder}/")

            # Remove .DS_Store if present in the image list
            for single_plant_disease_image in plant_disease_image_list:
                if single_plant_disease_image == ".DS_Store":
                    plant_disease_image_list.remove(single_plant_disease_image)

            # Limit to first 200 images (can help manage dataset size during testing/training)
            for image in plant_disease_image_list[:200]:
                # Build the complete path to the image file
                image_directory = f"{directory_root}/{plant_folder}/{plant_disease_folder}/{image}"

                # Only process JPG files
                if image_directory.endswith(".jpg") or image_directory.endswith(".JPG"):
                    # Convert image to array and add to list
                    image_array = convert_image_to_array(image_directory)
                    image_list.append(image_array)

                    # Append corresponding label (disease folder name)
                    label_list.append(plant_disease_folder)

    print("[INFO] Image loading completed")  

except Exception as e:
    # Catch and print any error that occurs during loading
    print(f"Error : {e}")


In [None]:
image_size = len(image_list)

In [None]:
# Initialize the LabelBinarizer to convert string labels into one-hot encoded vectors
label_binarizer = LabelBinarizer()

# Fit the label binarizer on the collected labels and transform them into one-hot encoded format
image_labels = label_binarizer.fit_transform(label_list)

# Save the fitted label binarizer to a file so it can be reused later (e.g., during inference)
pickle.dump(label_binarizer, open('label_transform.pkl', 'wb'))

# Get the total number of unique classes (i.e., plant diseases)
n_classes = len(label_binarizer.classes_)


In [None]:
print(label_binarizer.classes_)

In [None]:
np_image_list = np.array(image_list, dtype=np.float16) / 225.0

In [None]:
print("[INFO] Spliting data to train, test")
x_train, x_test, y_train, y_test = train_test_split(np_image_list, image_labels, test_size=0.2, random_state = 42) 