In [1]:
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
import tensorflow as tf

# Function to read images from double folders and label them based on folder names
def read_images_and_label(folder_path):
    image_data = []
    labels = []

    # Create a label encoder to convert string labels to numerical values
    label_encoder = LabelEncoder()

    # Iterate through the first-level folders in the specified path
    for first_level_label in os.listdir(folder_path):
        first_level_path = os.path.join(folder_path, first_level_label)

        # Check if the item in the directory is a folder
        if os.path.isdir(first_level_path):
            # Iterate through second-level folders (subfolders) in the first-level folder
            for second_level_label in os.listdir(first_level_path):
                second_level_path = os.path.join(first_level_path, second_level_label)

                # Check if the item in the directory is a folder
                if os.path.isdir(second_level_path):
                    # Iterate through images in the second-level folder
                    for image_file in os.listdir(second_level_path):
                        image_path = os.path.join(second_level_path, image_file)

                        # Read the image using OpenCV
                        img = cv2.imread(image_path)

                        # Resize the image to a consistent size (e.g., 128x128)
                        img = cv2.resize(img, (64, 64))

                        # Normalize pixel values to the range [0, 1]
                        img = img.astype('float32') / 64

                        # Append the image data and corresponding label
                        image_data.append(img)
                        labels.append(first_level_label)  # Use the second-level folder name as the label

    # Convert labels to numerical values using label encoder
    encoded_labels = label_encoder.fit_transform(labels)

    return np.array(image_data), np.array(encoded_labels)

# Specify the path to the main folder containing double folders of images
main_folder_path = "C:\\Users\\glowi\Desktop\\SYNC_internship\\Plant disease\\archive\\plantvillage dataset"



In [2]:
# Call the function to read images and labels
images, labels = read_images_and_label(main_folder_path)


In [3]:
images


array([[[[3.09375 , 2.578125, 2.640625],
         [3.0625  , 2.546875, 2.609375],
         [3.09375 , 2.578125, 2.640625],
         ...,
         [3.34375 , 2.890625, 2.9375  ],
         [3.34375 , 2.890625, 2.9375  ],
         [3.3125  , 2.859375, 2.90625 ]],

        [[3.140625, 2.625   , 2.6875  ],
         [3.15625 , 2.640625, 2.703125],
         [3.125   , 2.609375, 2.671875],
         ...,
         [3.328125, 2.875   , 2.921875],
         [3.3125  , 2.859375, 2.90625 ],
         [3.296875, 2.84375 , 2.890625]],

        [[3.0625  , 2.546875, 2.609375],
         [3.078125, 2.5625  , 2.625   ],
         [3.125   , 2.609375, 2.671875],
         ...,
         [3.296875, 2.84375 , 2.890625],
         [3.265625, 2.8125  , 2.859375],
         [3.21875 , 2.765625, 2.8125  ]],

        ...,

        [[2.015625, 1.484375, 1.640625],
         [2.      , 1.46875 , 1.625   ],
         [1.96875 , 1.4375  , 1.59375 ],
         ...,
         [2.09375 , 1.671875, 1.921875],
         [2.15625 , 1.

In [4]:
labels

array([0, 0, 0, ..., 2, 2, 2], dtype=int64)

0 is colored
1 is grayscaled
2 is segmented


In [5]:
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

In [6]:

X_train, X_test, y_train, y_test = train_test_split(images, encoded_labels, test_size=0.3, random_state=42, shuffle = True)


In [7]:
# Define a learning rate scheduler
def lr_schedule(epoch):
    initial_lr = 0.0001
    drop = 0.5
    epochs_drop = 5
    lr = initial_lr * (drop ** (epoch // epochs_drop))
    return lr

# Create a learning rate scheduler callback
lr_scheduler = LearningRateScheduler(lr_schedule)

In [8]:
# Create a CNN model
model = keras.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(256, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    
    layers.BatchNormalization(),  
    
    layers.Dropout(0.2),
    
    layers.Flatten(),
    
    layers.Dropout(0.2),
    
    layers.Dense(128, activation='relu'),
    
    layers.Dense(64, activation='relu'),
    
    layers.Dense(32, activation='relu'),
    
    layers.Dense(len(label_encoder.classes_), activation='softmax')  # Output layer with softmax for classification
])

# Specify the optimizer with gradient clipping
optimizer = Adam(clipvalue=0.5)

# Compile the model
model.compile(optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
Train_History=model.fit(X_train, y_train, epochs=30, validation_data=(X_test, y_test), callbacks=[lr_scheduler])

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print('\nTest accuracy:', test_acc)

#Saving the model
model.save('CNN_Choosing_PlantDisease_64.h5')

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
1528/1528 - 20s - loss: 0.0347 - accuracy: 0.9839 - 20s/epoch - 13ms/step

Test accuracy: 0.9838567972183228
