# Plant Disease Detection — Colab Training Notebook (TF 2.19 compatible)
**Instructions**
1. Mount Google Drive (cell below) and upload `archive.zip` to your Drive root or adjust paths.
2. Run cells top-to-bottom. Use GPU runtime (Runtime → Change runtime type → GPU).
3. This notebook trains a 16-class classifier, saves `plant_disease_model.keras` (Keras native format),
   and writes `class_indices.json` for frontend mapping.


In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
# Adjust zip_path if your archive is stored in a different Drive folder
import zipfile, os, shutil
zip_path = "/content/drive/MyDrive/archive.zip"  # change if needed
extract_path = "/content/plant_dataset"

if not os.path.exists(extract_path):
    os.makedirs(extract_path, exist_ok=True)
    with zipfile.ZipFile(zip_path, 'r') as z:
        z.extractall(extract_path)
    print("✅ Extracted to:", extract_path)
else:
    print("Dataset folder already exists:", extract_path)

# Quick check - print top folders (class folders)
print('Top-level folders in dataset:')
print(sorted(os.listdir(extract_path))[:20])


In [None]:
import tensorflow as tf
print('TensorFlow version:', tf.__version__)


In [None]:
import os
import json
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, CSVLogger


In [None]:
# ---------- CONFIG ----------
DATASET_PATH = "/content/plant_dataset"   # where archive.zip was extracted
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 12           # start with 12 epochs; increase if you have time
LEARNING_RATE = 1e-4
OUTPUT_DIR = "/content/drive/MyDrive/plant_training_outputs"
os.makedirs(OUTPUT_DIR, exist_ok=True)

print("Dataset path:", DATASET_PATH)
print("Output dir:", OUTPUT_DIR)


In [None]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.12,
    height_shift_range=0.12,
    shear_range=0.12,
    zoom_range=0.12,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.20
)

train_generator = train_datagen.flow_from_directory(
    DATASET_PATH,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training',
    shuffle=True,
    seed=42
)

val_generator = train_datagen.flow_from_directory(
    DATASET_PATH,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation',
    shuffle=False,
    seed=42
)

num_classes = len(train_generator.class_indices)
print('Found classes:', num_classes)
print(train_generator.class_indices)

# Save mapping for frontend
with open(os.path.join(OUTPUT_DIR, 'class_indices.json'), 'w') as f:
    json.dump(train_generator.class_indices, f, indent=2)
print('Saved class_indices.json to', OUTPUT_DIR)


In [None]:
base_model = ResNet50V2(
    include_top=False,
    weights='imagenet',
    input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3)
)

# Freeze most layers for fast training
for layer in base_model.layers:
    layer.trainable = False

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = BatchNormalization()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.4)(x)
x = BatchNormalization()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.3)(x)
preds = Dense(num_classes, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=preds)
print(model.summary())


In [None]:
model.compile(
    optimizer=Adam(learning_rate=LEARNING_RATE),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)


In [None]:
import time
timestamp = int(time.time())
checkpoint_path = os.path.join(OUTPUT_DIR, f'best_model_{timestamp}.keras')
csv_log_path = os.path.join(OUTPUT_DIR, f'training_log_{timestamp}.csv')

callbacks = [
    ModelCheckpoint(checkpoint_path, monitor='val_accuracy', save_best_only=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-7, verbose=1),
    EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True),
    CSVLogger(csv_log_path)
]

print('Checkpoint will be saved to:', checkpoint_path)


In [None]:
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=EPOCHS,
    callbacks=callbacks
)


In [None]:
# Optional fine-tune: unfreeze last N layers of base_model
N_UNFREEZE = 30
for layer in base_model.layers[-N_UNFREEZE:]:
    layer.trainable = True

# Recompile with lower LR
model.compile(optimizer=Adam(learning_rate=LEARNING_RATE/10), loss='categorical_crossentropy', metrics=['accuracy'])

ft_checkpoint = os.path.join(OUTPUT_DIR, f'best_model_finetune_{timestamp}.keras')
ft_csv = os.path.join(OUTPUT_DIR, f'finetune_log_{timestamp}.csv')

from tensorflow.keras.callbacks import ModelCheckpoint as MCP
ft_callbacks = [
    MCP(ft_checkpoint, monitor='val_accuracy', save_best_only=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-8, verbose=1),
    EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True),
    CSVLogger(ft_csv)
]

print('Starting fine-tuning, unfreezing last', N_UNFREEZE, 'layers.')
history_ft = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=5,
    callbacks=ft_callbacks
)


In [None]:
# Load best model (finetune if exists else head)
best_model_path = ft_checkpoint if os.path.exists(ft_checkpoint) else checkpoint_path
print('Loading best model from:', best_model_path)
model = tf.keras.models.load_model(best_model_path)

loss, acc = model.evaluate(val_generator, verbose=1)
print(f'Final validation accuracy: {acc:.4f}, loss: {loss:.4f}')


In [None]:
final_model_path = os.path.join(OUTPUT_DIR, 'plant_disease_model.keras')
print('Saving final model to:', final_model_path)
model.save(final_model_path)
print('Saved model. You can download it from Drive at:', final_model_path)


In [None]:
# Optionally download to local machine (may be large)
from google.colab import files
files.download(final_model_path)


In [None]:
import matplotlib.pyplot as plt

def plot_hist(h):
    plt.figure(figsize=(12,5))
    plt.subplot(1,2,1)
    plt.plot(h.history.get('accuracy', []), label='train_acc')
    plt.plot(h.history.get('val_accuracy', []), label='val_acc')
    plt.legend(); plt.title('Accuracy')

    plt.subplot(1,2,2)
    plt.plot(h.history.get('loss', []), label='train_loss')
    plt.plot(h.history.get('val_loss', []), label='val_loss')
    plt.legend(); plt.title('Loss')
    plt.show()

# Plot head and finetune if available
if 'history' in globals():
    plot_hist(history)
if 'history_ft' in globals():
    plot_hist(history_ft)


In [None]:
# Quick test prediction helper
from PIL import Image
import numpy as np

with open(os.path.join(OUTPUT_DIR, 'class_indices.json')) as f:
    class_idx = json.load(f)
idx_to_class = {int(v):k for k,v in enumerate(class_idx.keys())}  # placeholder

def predict_image(img_path, model_path=final_model_path):
    img = Image.open(img_path).convert('RGB').resize(IMG_SIZE)
    arr = np.array(img)/255.0
    arr = np.expand_dims(arr, 0)
    m = tf.keras.models.load_model(model_path)
    preds = m.predict(arr)
    idx = int(np.argmax(preds))
    # build proper reverse mapping from saved JSON
    rev_map = {v:k for k,v in class_idx.items()}
    return rev_map[str(idx)], float(np.max(preds))

# Example usage:
# print(predict_image('/content/plant_dataset/Apple___Black_rot/0a...jpg'))
