In [None]:
# Restart the runtime
# !kill -9 -1

In [1]:
from google.colab import drive  # 40s

drive.mount("/content/drive")

Mounted at /content/drive


In [2]:
# 1. Import Libraries 5s
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
import matplotlib.pyplot as plt

In [3]:
# 2. Dataset Loading

dataset_path = "/content/drive/MyDrive/Projects/Plant Disease Prediction/a_novel_groundnut_leaf_dataset"
# train_dir = os.path.join(dataset_path, "train")
# test_dir = os.path.join(dataset_path, "test")

# Get the class names (disease names)
class_names = os.listdir(dataset_path)
num_classes = len(class_names)
print(f"Number of classes: {num_classes}")
print(f"Class names: {class_names}")

Number of classes: 5
Class names: ['RUST', 'LEAF SPOT (EARLY AND LATE)', 'HEALTHY', 'ALTERNARIA LEAF SPOT', 'ROSETTE']


In [4]:
import os

def count_images_in_subfolders(folder_path):
    image_extensions = {".jpg", ".jpeg", ".png", ".bmp", ".gif"}
    folder_image_counts = {}

    # Loop through each subfolder inside the given folder
    for sub_folder in os.listdir(folder_path):
        sub_folder_path = os.path.join(folder_path, sub_folder)

        if os.path.isdir(sub_folder_path):  # Ensure it's a folder
            image_count = sum(1 for file in os.listdir(sub_folder_path)
                              if os.path.splitext(file)[1].lower() in image_extensions)
            folder_image_counts[sub_folder] = image_count

    return folder_image_counts

folder_path = "/content/drive/MyDrive/Projects/Plant Disease Prediction/a_novel_groundnut_leaf_dataset"
folder_counts = count_images_in_subfolders(folder_path)
for folder_name, count in folder_counts.items():
    print(f"Folder '{folder_name}' has {count} images.")

Folder 'RUST' has 120 images.
Folder 'LEAF SPOT (EARLY AND LATE)' has 450 images.
Folder 'HEALTHY' has 600 images.
Folder 'ALTERNARIA LEAF SPOT' has 450 images.
Folder 'ROSETTE' has 100 images.


In [5]:
# 3. Data Preprocessing (6m for 2160 imgs)

images = []
labels = []

for class_name in class_names:
    class_dir = os.path.join(dataset_path, class_name)
    for filename in os.listdir(class_dir):
        if filename.endswith((".jpg", ".png", ".jpeg")):
            img_path = os.path.join(class_dir, filename)
            img = cv2.imread(img_path)
            if img is not None:
                img = cv2.resize(img, (224, 224))

                height, width = img.shape[:2]
                size = min(height, width)
                x = (width - size) // 2
                y = (height - size) // 2
                img = img[y : y + size, x : x + size]

                original_shape = img.shape
                image_2d = img.reshape(-1, 3)
                scaler = MinMaxScaler(feature_range=(0, 1))
                normalized_2d = scaler.fit_transform(image_2d)
                normalized_image = normalized_2d.reshape(original_shape)

                images.append(normalized_image)
                labels.append(class_name)
            else:
                print(f"Error reading image: {img_path}")

images = np.array(images)
labels = np.array(labels)
print(images.shape)
print(labels.shape)

(1720, 224, 224, 3)
(1720,)


In [6]:
# 4. Label Encoding
le = LabelEncoder()
labels_encoded = le.fit_transform(labels)
print(labels_encoded)
print(set(labels_encoded))

[4 4 4 ... 3 3 3]
{0, 1, 2, 3, 4}


In [7]:
# 5. Split the dataset
X_train, X_test, y_train, y_test = train_test_split(
    images, labels_encoded, test_size=0.2, random_state=42, stratify=labels_encoded
)
X_val, X_test, y_val, y_test = train_test_split(
    X_test, y_test, test_size=0.5, random_state=42, stratify=y_test
)

print("Train data shape:", X_train.shape)
print("Validation data shape:", X_val.shape)
print("Test data shape:", X_test.shape)

Train data shape: (1376, 224, 224, 3)
Validation data shape: (172, 224, 224, 3)
Test data shape: (172, 224, 224, 3)


In [8]:
# 3. Data Augmentation (Important for small datasets):
from tensorflow.keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode="nearest",
)

datagen.fit(X_train)  # Fit the datagen on the training data

In [9]:
model = tf.keras.models.Sequential(   # 3s
    [
        tf.keras.layers.Conv2D(
            32, (3, 3), activation="relu", input_shape=(224, 224, 3)
        ),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation="relu"),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(128, (3, 3), activation="relu"),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(num_classes, activation="softmax"),
    ]
)

model.compile(
    optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [10]:
# 7. Model Training # 1m
epochs = 30
batch_size = 32

history = model.fit(
    X_train,
    y_train,
    epochs=epochs,
    batch_size=batch_size,
    validation_data=(X_val, y_val),
)

Epoch 1/30
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 114ms/step - accuracy: 0.3306 - loss: 1.9382 - val_accuracy: 0.3547 - val_loss: 1.3685
Epoch 2/30
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 55ms/step - accuracy: 0.3745 - loss: 1.3526 - val_accuracy: 0.3663 - val_loss: 1.1999
Epoch 3/30
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 54ms/step - accuracy: 0.4767 - loss: 1.1539 - val_accuracy: 0.6337 - val_loss: 0.8818
Epoch 4/30
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 54ms/step - accuracy: 0.5926 - loss: 0.9478 - val_accuracy: 0.6570 - val_loss: 0.9284
Epoch 5/30
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 55ms/step - accuracy: 0.6741 - loss: 0.8143 - val_accuracy: 0.7558 - val_loss: 0.6536
Epoch 6/30
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 59ms/step - accuracy: 0.7501 - loss: 0.6325 - val_accuracy: 0.7151 - val_loss: 0.6424
Epoch 7/30
[1m43/43[0m [32m━

In [11]:
# 8. Model Evaluation:

model.compile(
    optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
)

loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {loss * 100}")
print(f"Test Accuracy: {accuracy * 100}")

Test Loss: 82.86530375480652
Test Accuracy: 81.39534592628479


In [12]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

y_pred = model.predict(X_test)

y_pred_classes = y_pred.argmax(axis=-1)
y_true = y_test

precision = precision_score(y_true, y_pred_classes, average='weighted')  # For multi-class classification
recall = recall_score(y_true, y_pred_classes, average='weighted')
f1 = f1_score(y_true, y_pred_classes, average='weighted')
accuracy = accuracy_score(y_true, y_pred_classes)

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")
print(f"Accuracy: {accuracy * 100:.2f}%")


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 83ms/step
Precision: 0.8165
Recall: 0.8140
F1-Score: 0.8132
Accuracy: 81.40%


In [14]:
# 7. Saving the Model and Label Encoder:

import pickle
models_dir = "/content/drive/MyDrive//Projects/Plant Disease Prediction/saved_models"
os.makedirs(models_dir, exist_ok=True)

model_save_path = os.path.join(models_dir, "81acc_groundnut_disease_model.keras")
model.save(model_save_path)

le_save_path = os.path.join(models_dir, "label_encoder.pkl")

with open(le_save_path, "wb") as f:
    pickle.dump(le, f)

# print(f"Model saved to: {model_save_path}")
print(f"Label encoder saved to: {le_save_path}")

Label encoder saved to: /content/drive/MyDrive//Projects/Plant Disease Prediction/saved_models/label_encoder.pkl


In [None]:
# 9. Plotting Training History

plt.plot(history.history["accuracy"], label="Train Accuracy")
plt.plot(history.history["val_accuracy"], label="Validation Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.show()

plt.plot(history.history["loss"], label="Train Loss")
plt.plot(history.history["val_loss"], label="Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.show()

In [20]:
import cv2
import numpy as np
from tensorflow.keras.preprocessing import image
from sklearn.preprocessing import MinMaxScaler

def preprocess_image(img_path):
    # Load image
    img = cv2.imread(img_path)
    img = cv2.resize(img, (224, 224))  # Resize image to (224, 224)
    height, width = img.shape[:2]
    size = min(height, width)
    x = (width - size) // 2
    y = (height - size) // 2
    img = img[y : y + size, x : x + size]  # Crop image to square

    # Normalize the image
    original_shape = img.shape
    image_2d = img.reshape(-1, 3)
    scaler = MinMaxScaler(feature_range=(0, 1))
    normalized_2d = scaler.fit_transform(image_2d)
    normalized_image = normalized_2d.reshape(original_shape)

    # Add an extra dimension for batch size (1 image in this case)
    normalized_image = np.expand_dims(normalized_image, axis=0)

    return normalized_image

# Path to the input image
# img_path = "/content/drive/MyDrive/Projects/Plant Disease Prediction/Groundnut_Leaf_dataset/test/nutriti=on_deficiency_1/dr_4_9961.jpg"
# img_path = "/content/drive/MyDrive/Projects/Plant Disease Prediction/Groundnut_Leaf_dataset/test/rust_1/IMG_9015.jpg"
# img_path = "/content/drive/MyDrive/Projects/Plant Disease Prediction/Groundnut_Leaf_dataset/test/healthy_leaf_1/dr_4_1013.jpg"
img_path = "/content/drive/MyDrive/Projects/Plant Disease Prediction/Groundnut_Leaf_dataset/test/late_leaf_spot_1/dr_4_9915.jpg"

# Preprocess the image
input_img = preprocess_image(img_path)

# Make prediction
predictions = model.predict(input_img)
predicted_class = np.argmax(predictions)  # Get class with highest probability
confidence = np.max(predictions)  # Get confidence score

print(f"Predicted Class: {predicted_class}, Confidence: {confidence * 100:.2f}%")

import pickle

# Load label encoder
le_path = "/content/drive/MyDrive/Projects/Plant Disease Prediction/saved_models/label_encoder.pkl"
with open(le_path, "rb") as f:
    le = pickle.load(f)

# Get the disease name
predicted_label = le.inverse_transform([predicted_class])[0]
print(f"Predicted Disease: {predicted_label}, Confidence: {confidence * 100:.2f}%")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
Predicted Class: 2, Confidence: 67.70%
Predicted Disease: LEAF SPOT (EARLY AND LATE), Confidence: 67.70%
