#### **Plant Disease Prediction Using Deep Learning Convolutional Neural Networks (CNN) and Keras**

#### **Purpose:** This project looks to develop a deep learning model built with TensorFlow and Keras for classifying plant diseases from images using Convolutional Neural Networks (CNN).

##### **Source:** New Plant Diseases dataset from Kaggle.

##### **Author:** Bello Oluwatobi

##### **Date:** December 25, 2025

### #1 Importing Libraries

In [None]:
# importing the required libraries
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import os

In [None]:
# automatically detecting the presence of GPU on your physical/virtual machine
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    print(f"GPU detected: {len(physical_devices)} device(s) available.")
else:
    print("No GPU found. Running on CPU.")

### #2 Downloading and unzipping the dataset

In [None]:
# new plant diseases dataset url
_URL = 'https://drive.usercontent.google.com/download?id=1d8q3147YuLAms4hPzANMRw9sCUSMwBiw&export=download&authuser=0&confirm=t'


# saving the file directly to the specified absolute path
path_to_zip = tf.keras.utils.get_file(
    fname='new-plant-diseases-dataset.zip', 
    origin=_URL,
    extract=True,
    cache_dir='../',
    cache_subdir='unzipped_dataset'
)

### #3 Data Preprocessing

##### Training Image Preprocessing

In [None]:
# loading training data with categorical labels and 128x128 image resizing
training_set = tf.keras.utils.image_dataset_from_directory(
    "../unzipped_dataset/new-plant-diseases-dataset_extracted/new plant diseases dataset(augmented)/New Plant Diseases Dataset(Augmented)/train",
    labels="inferred",
    label_mode="categorical",
    class_names=None,
    color_mode="rgb",
    batch_size=32,
    image_size=(128, 128),
    shuffle=True,
    seed=None,
    validation_split=None,
    subset=None,
    interpolation="bilinear",
    follow_links=False,
    crop_to_aspect_ratio=False,
    pad_to_aspect_ratio=False,
    # format="tf",
    verbose=True,
)

##### Validation Image Preprocessing

In [None]:
# loading validation data with categorical labels and 128x128 image resizing
validation_set = tf.keras.utils.image_dataset_from_directory(
    "../unzipped_dataset/new-plant-diseases-dataset_extracted/new plant diseases dataset(augmented)/New Plant Diseases Dataset(Augmented)/valid",
    labels="inferred",
    label_mode="categorical",
    class_names=None,
    color_mode="rgb",
    batch_size=32,
    image_size=(128, 128),
    shuffle=True,
    seed=None,
    validation_split=None,
    subset=None,
    interpolation="bilinear",
    follow_links=False,
    crop_to_aspect_ratio=False,
    pad_to_aspect_ratio=False,
    # data_format=None,
    # format="tf",
    verbose=True,
)

In [None]:
# confirming properties of the training set
training_set

### #4 Building Model

In [None]:
# importing components for building a Sequential CNN architecture
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Dropout
from tensorflow.keras.models import Sequential

In [None]:
# initializing the model instance
model = Sequential()

##### Building Convolution Layer

In [None]:
# First Convolutional block: Extracting low-level features (edges/textures) and downsampling the spatial resolution
model.add(Conv2D(filters=32,kernel_size=3, padding='same',activation='relu',input_shape=(128,128,3)))
model.add(Conv2D(filters=32,kernel_size=3, activation='relu'))
model.add(MaxPool2D(pool_size=2,strides=2))

In [None]:
# Second Convolution block: 
# extracting complex patterns by doubling filter depth and further downsampling
model.add(Conv2D(filters=64,kernel_size=3, padding='same',activation='relu'))
model.add(Conv2D(filters=64,kernel_size=3,activation='relu'))
model.add(MaxPool2D(pool_size=2,strides=2))

In [None]:
# Third Convolution block: 
# increasing the filter depth to 128 to capture high-level, abstract leaf disease features
model.add(Conv2D(filters=128,kernel_size=3, padding='same',activation='relu'))
model.add(Conv2D(filters=128,kernel_size=3,activation='relu'))
model.add(MaxPool2D(pool_size=2,strides=2))

In [None]:
# Fourth Convolution block: 
# capturing ultra-fine details with 256 filters
model.add(Conv2D(filters=256,kernel_size=3, padding='same',activation='relu'))
model.add(Conv2D(filters=256,kernel_size=3,activation='relu'))
model.add(MaxPool2D(pool_size=2,strides=2))

In [None]:
# Fifth Convolution block: 
# maximum feature extraction with 512 filters for identifying complex disease patterns
model.add(Conv2D(filters=512,kernel_size=3, padding='same', activation='relu'))
model.add(Conv2D(filters=512,kernel_size=3, activation='relu'))
model.add(MaxPool2D(pool_size=2,strides=2))

In [None]:
# deactivating 25% of neurons to prevent model overfitting
model.add(Dropout(0.25))

In [None]:
# flattening the 3D feature maps into a 1D vector for the final classification layers
model.add(Flatten())

In [None]:
# aggregates all extracted features for the final prediction
model.add(Dense(units=1500, activation='relu'))

In [None]:
# deactivating 40% of neurons to prevent model overfitting
model.add(Dropout(0.4))

In [None]:
# Output layer: 
# specifying 38 units (one per plant disease class) with softmax for probability distribution
model.add(Dense(units=38, activation='softmax'))

### #5 Compiling Model

In [None]:
# preparing model for training with categorical labels and performance tracking
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# generating the model summary
model.summary()

### #6 Model Training

In [None]:
# starting the training for 10 epochs and monitoring the validation accuracy
training_history = model.fit(training_set, epochs=10, validation_data=validation_set)

### #7 Model Evaluation

In [None]:
# model evaluation on training set
train_loss, train_acc = model.evaluate(training_set)

In [None]:
# displaying training data evaluation results
print(f"Training Accuracy: {train_acc}")
print(f"Training Loss: {train_loss}")

In [None]:
# model evaluation on validation set
val_loss, val_acc = model.evaluate(validation_set)

In [None]:
# displaying validation data evaluation results
print(f"Validation Accuracy: {val_acc}")
print(f"Validation Loss: {val_loss}")

### #8 Saving Model

In [None]:
# exporting the trained model to storage in the in the native Keras format
model.save("../trained_model/plant_disease_trained_model.keras")

In [None]:
# retrieving the recorded loss and accuracy values from the training session
training_history.history

In [None]:
# storing the training history in json
import json

with open("../training_history/plant_disease_training_history.json", "w") as f:
    json.dump(training_history.history, f)

### #9 Accuracy and Loss Visualization

In [None]:
# visualizing the model's accuracy across training and validation sets
epochs = [i for i in range(1, 11)]
plt.plot(epochs, training_history.history['accuracy'], color='red', label='Training Accuracy')
plt.plot(epochs, training_history.history['val_accuracy'], color='blue', label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
import json

# Open and load the file
with open('../training_history/plant_disease_training_history.json', 'r') as file:
    training_history = json.load(file)


# visualizing the model's loss across training and validation sets
epochs = [i for i in range(1, 11)]
plt.plot(epochs, training_history['loss'], color='green', label='Training Loss')
plt.plot(epochs, training_history['val_loss'], color='purple', label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

### #10 Other Model Evaluation Metrics

In [None]:
# extracting the 38 classes of plant disease
class_names = validation_set.class_names
class_names

In [None]:
# utilizing the validation dataset as test dataset for further model evaluation
test_valid_set = tf.keras.utils.image_dataset_from_directory(
    "../unzipped_dataset/new-plant-diseases-dataset_extracted/new plant diseases dataset(augmented)/New Plant Diseases Dataset(Augmented)/valid",
    labels="inferred",
    label_mode="categorical",
    class_names=None,
    color_mode="rgb",
    batch_size=32,
    image_size=(128, 128),
    shuffle=False,
    seed=None,
    validation_split=None,
    subset=None,
    interpolation="bilinear",
    follow_links=False,
    crop_to_aspect_ratio=False,
    pad_to_aspect_ratio=False,
    # data_format=None,
    # format="tf",
    verbose=True,
)

In [None]:
# performing prediction on the trained model using the test dataset
y_pred = model.predict(test_valid_set)
y_pred, y_pred.shape

In [None]:
# selecting the disease category with the highest confidence score for each image
predicted_categories = tf.argmax(y_pred, axis=1) # get the max of each column across each row

predicted_categories

In [None]:
# aggregating the ground-truth labels from the test dataset for comparison
true_categories = tf.concat([y for x, y in test_valid_set], axis=0)
true_categories

In [None]:
# selecting the disease category with the highest confidence score for each image
Y_true = tf.argmax(true_categories, axis=1)
Y_true

### #11 Precision, Recall and F1 scores on Test values

In [None]:
# loading metrics to calculate F1-score and category-specific accuracy
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
# displaying a table of precision, recall, and F1-score for each plant disease class
print(classification_report(Y_true, predicted_categories, target_names=class_names))

### #12 Confusion Matrix

In [None]:
# creating a 38x38 cross-tabulation of observed vs. predicted plant diseases
cm = confusion_matrix(Y_true, predicted_categories)
cm

##### Confusion Matrix Visualization

In [None]:
# visualizing the final confusion matrix to compare predicted labels against observed
plt.figure(figsize=(40, 40))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', annot_kws={"size": 10})
plt.xlabel('Predicted Class', fontsize=20)
plt.ylabel('Actual Class', fontsize=20)
plt.title('Plant Disease Prediction Confusion Matrix', fontsize=25)
plt.show()