In [2]:
%cd /content/drive/MyDrive/train

/content/drive/MyDrive/train


In [3]:
!ls

kaggle.json  train.ipynb


In [None]:
# ==============================
# Plant Disease Detection System
# ==============================

# -------------------------
# Imports
# -------------------------
import os
import json
import random
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, callbacks
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.models import Model
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
from PIL import Image

# -------------------------
# Reproducibility
# -------------------------
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

# -------------------------
# Global Params
# -------------------------
IMG_SIZE = 224
BATCH_SIZE = 32
EPOCHS = 10
BASE_DIR = "plantvillage dataset/color"
MODEL_PATH = "plant_disease_prediction_model.h5"

# -------------------------
# Kaggle Dataset Download
# -------------------------
def download_and_extract_dataset(dataset_name="abdallahalidev/plantvillage-dataset", zip_name="plantvillage-dataset.zip"):
    """
    Downloads and extracts a dataset from Kaggle.
    Requires kaggle.json file with credentials in the working directory.
    """
    import subprocess
    from zipfile import ZipFile

    # Load Kaggle credentials
    kaggle_credentials = json.load(open("kaggle.json"))
    os.environ["KAGGLE_USERNAME"] = kaggle_credentials["username"]
    os.environ["KAGGLE_KEY"] = kaggle_credentials["key"]

    # Download dataset
    if not os.path.exists(zip_name):
        print("Downloading dataset from Kaggle...")
        subprocess.run(["kaggle", "datasets", "download", "-d", dataset_name, "-p", "."])

    # Extract dataset
    if not os.path.exists("plantvillage dataset"):
        print("Extracting dataset...")
        with ZipFile(zip_name, "r") as zip_ref:
            zip_ref.extractall()
        print("Extraction complete.")

# -------------------------
# Data Preprocessing
# -------------------------
def create_data_generators(base_dir=BASE_DIR, img_size=IMG_SIZE, batch_size=BATCH_SIZE):
    """
    Creates train and validation data generators with augmentation & normalization.
    """
    datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=30,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        validation_split=0.2
    )

    train_gen = datagen.flow_from_directory(
        base_dir,
        target_size=(img_size, img_size),
        batch_size=batch_size,
        class_mode='categorical',
        subset='training'
    )

    val_gen = datagen.flow_from_directory(
        base_dir,
        target_size=(img_size, img_size),
        batch_size=batch_size,
        class_mode='categorical',
        subset='validation'
    )

    return train_gen, val_gen

# -------------------------
# Model Definition
# -------------------------
def build_model(num_classes, img_size=IMG_SIZE):
    """
    Builds a pretrained EfficientNetB0 model with dropout layers.
    """
    base_model = EfficientNetB0(weights="imagenet", include_top=False, input_shape=(img_size, img_size, 3))
    base_model.trainable = False  # Freeze base

    x = layers.GlobalAveragePooling2D()(base_model.output)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(256, activation="relu")(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(num_classes, activation="softmax")(x)

    model = Model(inputs=base_model.input, outputs=outputs)
    model.compile(optimizer=tf.keras.optimizers.Adam(),
                  loss="categorical_crossentropy",
                  metrics=["accuracy"])
    return model

# -------------------------
# Class Weights for Imbalance
# -------------------------
def compute_class_weights(generator):
    """
    Compute class weights to handle imbalance.
    """
    from sklearn.utils.class_weight import compute_class_weight
    classes = generator.classes
    class_weights = compute_class_weight("balanced", classes=np.unique(classes), y=classes)
    return dict(enumerate(class_weights))

# -------------------------
# Training Pipeline
# -------------------------
def train_model(model, train_gen, val_gen, epochs=EPOCHS):
    """
    Trains the model with callbacks for early stopping, LR scheduler, and checkpoints.
    """
    cb = [
        callbacks.EarlyStopping(monitor="val_loss", patience=3, restore_best_weights=True),
        callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.2, patience=2, min_lr=1e-6),
        callbacks.ModelCheckpoint("best_model.h5", save_best_only=True),
        callbacks.TensorBoard(log_dir="./logs")
    ]

    history = model.fit(
        train_gen,
        validation_data=val_gen,
        epochs=epochs,
        class_weight=compute_class_weights(train_gen),
        callbacks=cb
    )
    return history

# -------------------------
# Evaluation
# -------------------------
def evaluate_model(model, val_gen):
    """
    Evaluate model with classification report & confusion matrix.
    """
    val_gen.reset()
    preds = model.predict(val_gen, verbose=1)
    y_pred = np.argmax(preds, axis=1)
    y_true = val_gen.classes
    class_labels = list(val_gen.class_indices.keys())

    print(classification_report(y_true, y_pred, target_names=class_labels))

    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(12, 8))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=class_labels, yticklabels=class_labels)
    plt.title("Confusion Matrix")
    plt.ylabel("True Label")
    plt.xlabel("Predicted Label")
    plt.show()

# -------------------------
# Grad-CAM Visualization
# -------------------------
def grad_cam(model, img_path, layer_name=None):
    """
    Visualize important regions in the image using Grad-CAM.
    """
    img = Image.open(img_path).resize((IMG_SIZE, IMG_SIZE))
    img_array = np.expand_dims(np.array(img) / 255.0, axis=0)

    if layer_name is None:
        layer_name = [layer.name for layer in model.layers if "conv" in layer.name][-1]

    grad_model = Model(inputs=model.inputs, outputs=[model.get_layer(layer_name).output, model.output])

    with tf.GradientTape() as tape:
        conv_output, predictions = grad_model(img_array)
        class_idx = tf.argmax(predictions[0])
        loss = predictions[:, class_idx]

    grads = tape.gradient(loss, conv_output)[0]
    weights = tf.reduce_mean(grads, axis=(0, 1))
    cam = tf.reduce_sum(tf.multiply(weights, conv_output[0]), axis=-1)

    cam = np.maximum(cam, 0) / np.max(cam)
    plt.imshow(img)
    plt.imshow(cam, cmap="jet", alpha=0.5)
    plt.title("Grad-CAM")
    plt.show()

# -------------------------
# Main Execution
# -------------------------
if __name__ == "__main__":
    # Step 1: Download dataset
    download_and_extract_dataset()

    # Step 2: Create data generators
    train_gen, val_gen = create_data_generators()

    # Step 3: Build and train model
    model = build_model(num_classes=train_gen.num_classes)
    history = train_model(model, train_gen, val_gen)

    # Step 4: Save final model and class indices
    model.save(MODEL_PATH)
    json.dump(train_gen.class_indices, open("class_indices.json", "w"))

    # Step 5: Evaluate
    evaluate_model(model, val_gen)

    # Optional: Grad-CAM visualization
    # grad_cam(model, "sample_leaf.jpg")
