In [77]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import pickle
import cv2
import os
from os import listdir

from sklearn.preprocessing import LabelBinarizer,MultiLabelBinarizer
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.svm import SVC

from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input,Conv2D,Activation,LeakyReLU,BatchNormalization,MaxPooling2D,Flatten,Dense,Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator,img_to_array

In [46]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [47]:
root_dir = '/content/drive/My Drive/PlantVillage'

os.chdir(root_dir)
listdir()

['Pepper__bell___Bacterial_spot',
 'Pepper__bell___healthy',
 'Potato___Early_blight',
 'Potato___healthy',
 'Potato___Late_blight',
 'Tomato_Bacterial_spot',
 'Tomato_Early_blight',
 'Tomato_healthy',
 'Tomato_Late_blight',
 'Tomato_Leaf_Mold',
 'Tomato_Septoria_leaf_spot',
 'Tomato_Spider_mites_Two_spotted_spider_mite',
 'Tomato__Target_Spot',
 'Tomato__Tomato_mosaic_virus',
 'Tomato__Tomato_YellowLeaf__Curl_Virus']

In [48]:
# Dimension of resized image
DEFAULT_IMAGE_SIZE = tuple((256, 256))

# Number of images used to train the model
N_IMAGES = 50  # Most of the folder has the maximum of 500 images (Totally it has 7000 images)

data_dir = os.path.join(root_dir)

# Resize an image to the size `DEFAULT_IMAGE_SIZE`
def convert_image_to_array(image_dir):
    try:
        image = cv2.imread(image_dir)
        if image is not None:
            image = cv2.resize(image, DEFAULT_IMAGE_SIZE)
            return img_to_array(image)
        else:
            return np.array([])
    except Exception as e:
        print(f"Error : {e}")
        return None

In [49]:
print("Load images from all classes ...")
plant_disease_folder_list = listdir(data_dir)
print(len(plant_disease_folder_list))

Load images from all classes ...
15


# Loading Training & Validation Data

In [50]:
image_list, label_list = [], []

def convert_image_to_array(image_path):
    try:
        image = cv2.imread(image_path)
        if image is not None:
            image = cv2.resize(image, DEFAULT_IMAGE_SIZE)
            return img_to_array(image)
        else:
            print(f"⚠️ cv2 failed to read: {image_path}")
            return np.array([])
    except Exception as e:
        print(f"Error converting image: {e}")
        return np.array([])

print("Loading images...")
for folder in listdir(data_dir):
    folder_path = os.path.join(data_dir, folder)
    if not os.path.isdir(folder_path):
        continue

    print(f"📁 Processing folder: {folder}")
    for image_filename in listdir(folder_path)[:N_IMAGES]:
        if image_filename.lower().endswith((".jpg", ".jpeg", ".png")):
            image_path = os.path.join(folder_path, image_filename)
            image_array = convert_image_to_array(image_path)
            if image_array.size != 0:
                image_list.append(image_array)
                label_list.append(folder)

print("✅ Finished loading!")
print("Total images loaded:", len(image_list))

Loading images...
📁 Processing folder: Pepper__bell___Bacterial_spot
📁 Processing folder: Pepper__bell___healthy
📁 Processing folder: Potato___Early_blight
📁 Processing folder: Potato___healthy
📁 Processing folder: Potato___Late_blight
📁 Processing folder: Tomato_Bacterial_spot
📁 Processing folder: Tomato_Early_blight
📁 Processing folder: Tomato_healthy
📁 Processing folder: Tomato_Late_blight
📁 Processing folder: Tomato_Leaf_Mold
📁 Processing folder: Tomato_Septoria_leaf_spot
📁 Processing folder: Tomato_Spider_mites_Two_spotted_spider_mite
📁 Processing folder: Tomato__Target_Spot
📁 Processing folder: Tomato__Tomato_mosaic_virus
📁 Processing folder: Tomato__Tomato_YellowLeaf__Curl_Virus
✅ Finished loading!
Total images loaded: 750


In [52]:
# Transform the loaded training image data into numpy array
np_image_list = np.array(image_list, dtype=np.float16) / 255.0

# Check the number of images loaded for training
image_len = len(image_list)
print(f"Total number of images: {image_len}")

Total number of images: 750


# One-Hot Encoding the labels

In [53]:
# Examine the labels/classes in the training dataset.
label_binarizer = LabelBinarizer()
image_labels = label_binarizer.fit_transform(label_list)

# pickle.dump(label_binarizer,open('plant_disease_label_transform.pkl', 'wb'))
n_classes = len(label_binarizer.classes_)

print("Total number of classes: ", n_classes)

Total number of classes:  15


In [54]:
# Augment and Split Dataset
augment = ImageDataGenerator(rotation_range=25, width_shift_range=0.1,
                             height_shift_range=0.1, shear_range=0.2,
                             zoom_range=0.2, horizontal_flip=True,
                             fill_mode="nearest")

In [55]:
# Splitting the data into training and test sets for validation purpose.
x_train, x_test, y_train, y_test = train_test_split(np_image_list, image_labels, test_size=0.2, random_state = 42)
print('Successfully split data into TRAIN & TEST')

Successfully split data into TRAIN & TEST


In [56]:
# Build Model"
EPOCHS = 10
STEPS = 100
LR = 1e-3
BATCH_SIZE = 32
WIDTH = 256
HEIGHT = 256
DEPTH = 3

In [96]:
# Creating a Sequential Model to build CNN for multi-class classification
input_shape = (HEIGHT, WIDTH, DEPTH)
chanDim = -1

if K.image_data_format() == "channels_first":
    inputShape = (DEPTH, HEIGHT, WIDTH)
    chanDim = 1

model = Sequential()
model.add(Conv2D(32, (3, 3), padding="same",input_shape=input_shape))
model.add(LeakyReLU(alpha=0.1))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(3, 3)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding="same"))
model.add(LeakyReLU(alpha=0.1))
model.add(BatchNormalization(axis=chanDim))

model.add(Conv2D(64, (3, 3), padding="same"))
model.add(LeakyReLU(alpha=0.1))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, (3, 3), padding="same"))
model.add(LeakyReLU(alpha=0.1))
model.add(BatchNormalization(axis=chanDim))

model.add(Conv2D(128, (3, 3), padding="same"))
model.add(LeakyReLU(alpha=0.1))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())

model.add(Dense(1024, name = 'my_dense'))
model.add(LeakyReLU(alpha=0.1))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Dense(n_classes))
model.add(Activation("softmax"))

model.summary()

## Feature Extraction from the Dense Layer

In [97]:
# 🔧 FIX: Call model once with dummy input to initialize it
dummy_input = np.zeros((1, HEIGHT, WIDTH, DEPTH))
model.predict(dummy_input)

intermediate_layer_model = Model(inputs=model.input, outputs=model.get_layer('my_dense').output)
intermediate_layer_model.summary()

In [None]:
# Train Model
# Initialize optimizer
opt = Adam(learning_rate=LR, decay=LR / EPOCHS)

# Compile model
model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"])

# Train model
print("Training CNN...")
history = model.fit(augment.flow(x_train, y_train, batch_size=BATCH_SIZE),
                              validation_data=(x_test, y_test),
                              epochs=20,
                              verbose=1)

scores = model.evaluate(x_test, y_test)
print(f"Test Accuray: {scores[1]*100}")

In [None]:
# Evaluate Model
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)

# Train and validation accuracy
plt.plot(epochs, acc, 'b', label='Training accurarcy')
plt.plot(epochs, val_acc, 'r', label='Validation accurarcy')
plt.title('Training and Validation accurarcy')
plt.legend()
plt.figure()

# Train and validation loss
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and Validation loss')
plt# # d()
plt.show()

# Evaluating model accuracy by u `evaluate` method
print("[INFO] Calculating model accuracy")
scores = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {scores[1]*100}")

In [None]:
x_train_predict = intermediate_layer_model.predict(x_train)
print(f"shape of x_train is :{x_train_predict.shape}")

x_test_predict = intermediate_layer_model.predict(x_test)
print(f"shape of x_test is :{x_test_predict.shape}")

# Support Vector Machinee.

In [None]:
svm = SVC(kernel='rbf')
svm.fit(x_train_predict,np.argmax(y_train,axis=1))
print('SVM Fit Complete')

In [None]:
svm.score(x_train_predict,np.argmax(y_train,axis=1))

In [None]:
svm.score(x_test_predict,np.argmax(y_test,axis=1))

In [None]:
Pred_labels = svm.predict(x_test_predict)
Pred_labels = pd.DataFrame(Pred_labels,index =None)
Pred_labels.head()

# XGBoost Integrationm.

In [None]:
xb = xgb.XGBClassifier(use_label_encoder=False)
xb.fit(x_train_predict,np.argmax(y_train,axis=1))
print('XGBoost Fit Compl','wb')

In [None]:
xb.score(x_train_predict,np.argmax(y_train,axis=1))

In [None]:
xb.score(x_test_predict,np.argmax(y_test,axis=1))

In [None]:
Pred_labels = xb.predict(x_test_predict)
Pred_labels = pd.DataFrame(Pred_labels,index =None)
Pred_labels.head()