# AI-Based Plant Disease Detection System

## Project Details
- **Roll No:** 28, 29, 30, 32
- **Problem Statement:** Classify plant leaf diseases from images using Deep Learning.
- **Dataset:** [Plant Disease Dataset](https://www.kaggle.com/datasets/emmarex/plantdisease)

## 1. Setup and Dependencies

In [1]:
!pip install tensorflow pandas numpy matplotlib seaborn kagglehub opencv-python



In [3]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import numpy as np
import os
import kagglehub

print(f"TensorFlow Version: {tf.__version__}")

TensorFlow Version: 2.20.0


  from .autonotebook import tqdm as notebook_tqdm


## 2. Dataset Download
We use `kagglehub` to download the dataset directly.

In [13]:
# Download latest version
path = kagglehub.dataset_download("emmarex/plantdisease")

print("Path to dataset files:", path)

# Check directory structure
for root, dirs, files in os.walk(path):
    level = root.replace(path, '').count(os.sep)
    indent = ' ' * 4 * (level)
    print('{}{}/'.format(indent, os.path.basename(root)))
    subindent = ' ' * 4 * (level + 1)
    if level < 2: # Only print top levels to avoid spam
        for f in files[:2]:
            print('{}{}'.format(subindent, f))
    if level > 2:
        break

Resuming download from 593494016 bytes (96018674 bytes left)...
Resuming download from https://www.kaggle.com/api/v1/datasets/download/emmarex/plantdisease?dataset_version_number=1 (593494016/689512690) bytes left.


100%|███████████████████████████████████████████████████████████████████████████████| 658M/658M [00:15<00:00, 6.35MB/s]

Extracting files...





Path to dataset files: C:\Users\dnitr\.cache\kagglehub\datasets\emmarex\plantdisease\versions\1
1/
    PlantVillage/
        Pepper__bell___Bacterial_spot/
        Pepper__bell___healthy/
        PlantVillage/
            Pepper__bell___Bacterial_spot/


## 3. Data Preprocessing
We will use `ImageDataGenerator` for data augmentation and normalization.

In [16]:
import json  # Added this line
import os
# Define parameters
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

# IMPORTANT: Adjust 'dataset_path' based on the output of the previous cell.
# The kaggle dataset usually extracts to a specific folder. 
# Let's assume the download path contains the 'PlantVillage' or similar structure.
# We will try to find the directory containing the class folders automatically.

dataset_root = path
# Simple heuristic to find the root containing classes (if nested)
for root, dirs, files in os.walk(path):
    if len(dirs) > 2: # If contains multiple class folders
        dataset_root = root
        break

print(f"Using dataset root: {dataset_root}")

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2  # 80/20 split
)

train_generator = train_datagen.flow_from_directory(
    dataset_root,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training'
)

validation_generator = train_datagen.flow_from_directory(
    dataset_root,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation'
)

class_names = list(train_generator.class_indices.keys())
print("Classes:", class_names)

# Save class names to file for the app
with open('class_names.json', 'w') as f:
    json.dump(class_names, f)

Using dataset root: C:\Users\dnitr\.cache\kagglehub\datasets\emmarex\plantdisease\versions\1\PlantVillage
Found 33027 images belonging to 16 classes.
Found 8249 images belonging to 16 classes.
Classes: ['Pepper__bell___Bacterial_spot', 'Pepper__bell___healthy', 'PlantVillage', 'Potato___Early_blight', 'Potato___Late_blight', 'Potato___healthy', 'Tomato_Bacterial_spot', 'Tomato_Early_blight', 'Tomato_Late_blight', 'Tomato_Leaf_Mold', 'Tomato_Septoria_leaf_spot', 'Tomato_Spider_mites_Two_spotted_spider_mite', 'Tomato__Target_Spot', 'Tomato__Tomato_YellowLeaf__Curl_Virus', 'Tomato__Tomato_mosaic_virus', 'Tomato_healthy']


## 4. Model Building (Transfer Learning)
Using MobileNetV2 for efficiency.

In [None]:
import os

checkpoint_path = 'plant_disease_checkpoint.keras'

if os.path.exists(checkpoint_path):
    # LOAD PREVIOUS PROGRESS
    print("Checkpoint found! Loading existing model...")
    model = tf.keras.models.load_model(checkpoint_path)
    print("Resumed model from checkpoint!")
else:
    # BUILD FROM SCRATCH (Only if no checkpoint exists)
    print("No checkpoint found. Building model from scratch...")
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=IMG_SIZE + (3,))
    base_model.trainable = False
    
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.2)(x)
    predictions = Dense(len(class_names), activation='softmax')(x)
    
    model = Model(inputs=base_model.input, outputs=predictions)
    model.compile(optimizer=Adam(learning_rate=0.0001), 
                  loss='categorical_crossentropy', 
                  metrics=['accuracy'])

## 5. Training

In [None]:
# Create a callback to save the model after every epoch
checkpoint_path = "plant_disease_checkpoint.h5"
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path,
    save_weights_only=False, # Saves the whole model (architecture + weights)
    monitor='val_accuracy',
    mode='max',
    save_best_only=True,    # Only overwrites if the model improved
    verbose=1
)

In [None]:
EPOCHS = 10 # Increase this for better results

history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // BATCH_SIZE,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // BATCH_SIZE,
    epochs=10, 
    callbacks=[checkpoint_callback] # Add this line
) 

## 6. Evaluation

In [None]:
# Plot accuracy and loss
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(len(acc))

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
## 7. Save Model
model.save('plant_disease_model.h5')
print("Model saved as plant_disease_model.h5")