# 🌾 Rice Disease Detection - Model Training (Google Colab)

This notebook will train the CNN model on your rice disease dataset.

**Your dataset:** Already linked from Google Drive! ✅

**Steps:**
1. Click `Runtime → Change runtime type → GPU`
2. Run all cells (Ctrl+F9 or Runtime → Run all)
3. Wait 10-15 minutes for training
4. Download the trained model files
5. Place in your local `models/` folder

In [None]:
# Install dependencies
!pip install -q tensorflow numpy pillow matplotlib seaborn scikit-learn gdown

In [None]:
# Download and unzip dataset from Google Drive
import zipfile
import os
import gdown

# Your dataset file ID from Google Drive
file_id = '1YcJ_spCXpE9IfvhuLz31h2kzx5oF9Dky'
output_zip = '/content/rice_dataset.zip'

# Download from Google Drive
print("📥 Downloading dataset from Google Drive...")
url = f'https://drive.google.com/uc?id={file_id}'
gdown.download(url, output_zip, quiet=False)

# Extract dataset
print("\n📦 Extracting dataset...")
with zipfile.ZipFile(output_zip, 'r') as zip_ref:
    zip_ref.extractall('/content/')

print("\n✅ Dataset ready!")
!ls -la /content/

## ⚠️ Important Note
After running the download cell above, check the output to see the exact folder name. If the folder is named differently (like "rice_dataset" instead of "Rice Leaf Disease Images"), the next cell will auto-detect it!

In [None]:
# Training code
import os
import json
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix

# Configuration
IMG_SIZE = 224
BATCH_SIZE = 32
EPOCHS = 50
LEARNING_RATE = 0.001

# Auto-detect dataset directory
import glob
possible_dirs = glob.glob('/content/*Rice*') + glob.glob('/content/*rice*')
if possible_dirs:
    DATA_DIR = possible_dirs[0]
    print(f"✅ Found dataset at: {DATA_DIR}")
else:
    DATA_DIR = "/content/Rice Leaf Disease Images"
    print(f"⚠️ Using default path: {DATA_DIR}")

# Check GPU
gpus = tf.config.list_physical_devices('GPU')
print(f"🎮 GPU Available: {len(gpus) > 0}")
if gpus:
    print(f"   GPU: {gpus[0].name}")

In [None]:
# Data Loading
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    validation_split=0.2
)

train_generator = train_datagen.flow_from_directory(
    DATA_DIR,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training'
)

val_generator = train_datagen.flow_from_directory(
    DATA_DIR,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation'
)

class_names = list(train_generator.class_indices.keys())
num_classes = len(class_names)
print(f"Classes: {class_names}")
print(f"Training samples: {train_generator.samples}")
print(f"Validation samples: {val_generator.samples}")

In [None]:
# Build Model
model = keras.Sequential([
    layers.Input(shape=(224, 224, 3)),
    
    # Block 1
    layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),
    
    # Block 2
    layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.25),
    
    # Block 3
    layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.25),
    
    # Block 4
    layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.3),
    
    # Dense layers
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(num_classes, activation='softmax')
])

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=LEARNING_RATE),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

In [None]:
# Callbacks
checkpoint = ModelCheckpoint(
    'best_model.h5',
    monitor='val_accuracy',
    save_best_only=True,
    verbose=1
)

early_stop = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True,
    verbose=1
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=5,
    verbose=1
)

In [None]:
# Train Model
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=EPOCHS,
    callbacks=[checkpoint, early_stop, reduce_lr]
)

In [None]:
# Save Model and Class Indices
model.save('rice_disease_model.h5')

class_indices = {v: k for k, v in train_generator.class_indices.items()}
with open('class_indices.json', 'w') as f:
    json.dump(class_indices, f, indent=4)

print("✓ Model saved")
print("✓ Class indices saved")

In [None]:
# Evaluate
val_loss, val_accuracy = model.evaluate(val_generator)
print(f"\nValidation Accuracy: {val_accuracy*100:.2f}%")
print(f"Validation Loss: {val_loss:.4f}")

In [None]:
# Confusion Matrix
val_generator.reset()
y_pred_proba = model.predict(val_generator)
y_pred = np.argmax(y_pred_proba, axis=1)
y_true = val_generator.classes

cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=class_names, yticklabels=class_names)
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.savefig('confusion_matrix.png', dpi=300, bbox_inches='tight')
plt.show()

print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=class_names))

In [None]:
# Training Curves
plt.figure(figsize=(14, 5))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training')
plt.plot(history.history['val_accuracy'], label='Validation')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training')
plt.plot(history.history['val_loss'], label='Validation')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig('training_curves.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# Download files
from google.colab import files

print("Downloading trained model...")
files.download('rice_disease_model.h5')
files.download('class_indices.json')
files.download('confusion_matrix.png')
files.download('training_curves.png')

print("\n✓ All files downloaded!")
print("\nNext steps:")
print("1. Place 'rice_disease_model.h5' in your local 'models/' folder")
print("2. Place 'class_indices.json' in your local 'models/' folder")
print("3. Run: python app_simple.py")
print("4. Open: http://localhost:5000")