# Rice Leaf Disease Detection — Complete Colab Notebook

This single notebook runs the entire project end-to-end in Google Colab. It:

1. Installs small missing packages (not TensorFlow)
2. Lets you upload the 3 ZIP files (one by one if needed)
3. Extracts and auto-fixes nested folders
4. Verifies dataset counts
5. Builds generators with augmentation
6. Trains a Transfer Learning model (MobileNetV2)
7. Evaluates and saves outputs and model

**How to use:** Open this notebook in Colab, set Runtime → GPU, then click `Runtime → Run all`.


In [1]:
# 0. Install helper packages (do NOT install TensorFlow)
import sys
IN_COLAB = 'google.colab' in sys.modules
if IN_COLAB:
    print('Installing helper packages...')
    !pip install -q seaborn opencv-python
else:
    print('Not running in Colab. Make sure required packages are installed.')


Installing helper packages...


In [2]:
# 1. Imports and configuration
from pathlib import Path
import os, random, json, zipfile, shutil
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models, optimizers, callbacks
from sklearn.metrics import classification_report, confusion_matrix

random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

WORK_DIR = Path('/content')
DATA_ROOT = WORK_DIR / 'dataset' / 'train'   # we will extract zips here
OUTPUT_DIR = WORK_DIR / 'outputs'
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

IMG_SIZE = (224,224)
BATCH_SIZE = 16
EPOCHS = 15

print('Working dir:', WORK_DIR)
print('Data root:', DATA_ROOT)
print('Output dir:', OUTPUT_DIR)


Working dir: /content
Data root: /content/dataset/train
Output dir: /content/outputs


## 2. Upload ZIP files

Run the cell and use the file picker to upload your three zip files. If you cannot select multiple files at once, run the cell multiple times (once per zip).

In [None]:
from google.colab import files
uploaded = files.upload()  # pick 1 or more ZIP files
print('Uploaded files:', list(uploaded.keys()))


In [None]:
from google.colab import files
uploaded = files.upload()  # pick 1 or more ZIP files
print('Uploaded files:', list(uploaded.keys()))


Saving Leaf smut_zip.zip to Leaf smut_zip.zip
Uploaded files: ['Leaf smut_zip.zip']


In [None]:
from google.colab import files
uploaded = files.upload()  # pick 1 or more ZIP files
print('Uploaded files:', list(uploaded.keys()))


Saving Bacterial leaf blight.zip to Bacterial leaf blight.zip
Uploaded files: ['Bacterial leaf blight.zip']


## 3. Extract uploaded zips into `/content/dataset/train/` and auto-fix nested folders

This cell extracts the uploaded ZIP files into class-named folders and moves images up if the zip contained an extra folder layer.

In [None]:
# ensure destination
os.makedirs(str(DATA_ROOT), exist_ok=True)

# Extract any uploaded zips (the uploaded dict keys are available)
for filename in list(uploaded.keys()):
    try:
        with zipfile.ZipFile(filename, 'r') as z:
            temp_dir = WORK_DIR / 'temp_extract'
            if temp_dir.exists():
                shutil.rmtree(temp_dir)
            temp_dir.mkdir(parents=True, exist_ok=True)
            z.extractall(temp_dir)
            extracted_items = list(temp_dir.iterdir())
            if len(extracted_items) == 1 and extracted_items[0].is_dir():
                src_folder = extracted_items[0]
                # Handle the case where the single extracted folder is named 'Data'
                if src_folder.name.lower() == 'data':
                    # Assuming class folders are directly within 'Data' or at the top level
                    for item in src_folder.iterdir():
                        if item.is_dir():
                            dest_folder = DATA_ROOT / item.name
                            dest_folder.mkdir(parents=True, exist_ok=True)
                            moved = 0
                            for f in item.iterdir():
                                if f.is_file():
                                    shutil.move(str(f), str(dest_folder / f.name))
                                    moved += 1
                            print(f"Extracted {moved} images from '{item.name}' within 'Data' into {dest_folder}")
                        elif item.is_file(): # Handle files directly in 'Data' if any
                             # Determine class name from filename or assume a default
                             base_class_name = filename.replace('.zip','').replace('.rar','').replace('.RAR', '') # Handle .rar as well
                             dest_folder = DATA_ROOT / base_class_name
                             dest_folder.mkdir(parents=True, exist_ok=True)
                             shutil.move(str(item), str(dest_folder / item.name))
                             print(f"Moved file '{item.name}' from 'Data' into {dest_folder}")

                else: # Original logic for a single nested folder not named 'Data'
                    dest_folder = DATA_ROOT / src_folder.name
                    dest_folder.mkdir(parents=True, exist_ok=True)
                    for f in src_folder.iterdir():
                        if f.is_file():
                            shutil.move(str(f), str(dest_folder / f.name))
                    print(f"Extracted into existing folder: {dest_folder}")
                shutil.rmtree(temp_dir)
            else:
                base_class_name = filename.replace('.zip','').replace('.rar','').replace('.RAR', '') # Handle .rar as well
                dest_folder = DATA_ROOT / base_class_name
                dest_folder.mkdir(parents=True, exist_ok=True)
                moved = 0
                for root, dirs, files in os.walk(temp_dir):
                    for f in files:
                        if f.lower().endswith(('.jpg','.jpeg','.png')):
                            src = Path(root) / f
                            shutil.move(str(src), str(dest_folder / f))
                            moved += 1
                shutil.rmtree(temp_dir)
                print(f"Extracted {moved} images into {dest_folder}")
    except Exception as e:
        print('Error extracting', filename, e)

# Auto-fix nested folders (keep original logic as it might still be useful)
for cls in os.listdir(DATA_ROOT):
    cls_path = DATA_ROOT / cls
    if not cls_path.is_dir():
        continue
    subitems = list(cls_path.iterdir())
    # Add a check to ensure the nested folder is not empty before trying to move
    if len(subitems) == 1 and subitems[0].is_dir() and len(list(subitems[0].iterdir())) > 0:
        nested = subitems[0]
        moved = 0
        for f in nested.iterdir():
            if f.is_file():
                shutil.move(str(f), str(cls_path / f.name))
                moved += 1
        shutil.rmtree(nested)
        print(f'Fixed nested folder for {cls}, moved {moved} files up')
    # Also handle the case where images might be directly in the class folder but nested folders exist
    # This part might need further refinement based on expected structures
    elif len(subitems) > 1 and any(item.is_dir() for item in subitems):
        print(f"Warning: Folder '{cls}' contains both images and subfolders. Manual inspection might be needed.")


print('\nFinal classes in dataset:')
final_classes = [p.name for p in DATA_ROOT.iterdir() if p.is_dir()]
print(sorted(final_classes))

if not final_classes:
    print("No class folders found. Please check the extraction process and the contents of your zip files.")

Extracted into existing folder: /content/dataset/train/Brown spot

Final classes in dataset:
['Brown spot', 'Data']


## 4. Verify dataset counts

This will print number of images found in each class folder. If counts are zero, re-check uploads.

In [None]:
total = 0
for cls in sorted(DATA_ROOT.iterdir()):
    if cls.is_dir():
        imgs = [p for p in cls.glob('*.*') if p.suffix.lower() in ['.jpg','.jpeg','.png']]
        print(cls.name, len(imgs))
        total += len(imgs)
print('Total images found:', total)
if total == 0:
    raise RuntimeError('No images found. Please upload and extract your zip files correctly.')


Brown spot 40
Data 0
Total images found: 40


## 5. Build generators with augmentation (uses validation_split)

We will use ImageDataGenerator with `validation_split=0.2` so we don't need separate test folders.

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255, rotation_range=20, width_shift_range=0.1, height_shift_range=0.1, zoom_range=0.15, horizontal_flip=True, vertical_flip=True, validation_split=0.2)

train_gen = train_datagen.flow_from_directory(str(DATA_ROOT), target_size=IMG_SIZE, batch_size=BATCH_SIZE, class_mode='categorical', subset='training')
val_gen = train_datagen.flow_from_directory(str(DATA_ROOT), target_size=IMG_SIZE, batch_size=BATCH_SIZE, class_mode='categorical', subset='validation', shuffle=False)

print('Train samples:', train_gen.n)
print('Validation samples:', val_gen.n)


Found 32 images belonging to 2 classes.
Found 8 images belonging to 2 classes.
Train samples: 32
Validation samples: 8


## 6. Build the Transfer Learning model (MobileNetV2)

We freeze the base model and train top layers first.

In [None]:
base = tf.keras.applications.MobileNetV2(include_top=False, weights='imagenet', input_shape=(*IMG_SIZE,3))
base.trainable = False
inputs = layers.Input(shape=(*IMG_SIZE,3))
x = tf.keras.applications.mobilenet_v2.preprocess_input(inputs)
x = base(x, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.4)(x)
outputs = layers.Dense(train_gen.num_classes, activation='softmax')(x)
model = models.Model(inputs, outputs)
model.compile(optimizer=optimizers.Adam(1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


## 7. Callbacks and training

The notebook uses EarlyStopping and ModelCheckpoint. Training will save best model to `/content/outputs/best_model.h5`.

In [None]:
es = callbacks.EarlyStopping(monitor='val_loss', patience=6, restore_best_weights=True)
mc = callbacks.ModelCheckpoint(str(OUTPUT_DIR / 'best_model.keras'), monitor='val_accuracy', save_best_only=True) # Save in .keras format

history = model.fit(train_gen, epochs=EPOCHS, validation_data=val_gen, callbacks=[es, mc])
with open(OUTPUT_DIR / 'mobilenetv2_top_history.json','w') as f:
    json.dump(history.history, f)
model.save(OUTPUT_DIR / 'mobilenetv2_top.keras') # Save in .keras format
print('Training complete. Models and history saved to', OUTPUT_DIR)

  self._warn_if_super_not_called()


Epoch 1/15
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3s/step - accuracy: 1.0000 - loss: 0.1239 - val_accuracy: 1.0000 - val_loss: 0.0865
Epoch 2/15
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1s/step - accuracy: 1.0000 - loss: 0.1065 - val_accuracy: 1.0000 - val_loss: 0.0758
Epoch 3/15
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 932ms/step - accuracy: 1.0000 - loss: 0.1320 - val_accuracy: 1.0000 - val_loss: 0.0666
Epoch 4/15
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 817ms/step - accuracy: 1.0000 - loss: 0.1297 - val_accuracy: 1.0000 - val_loss: 0.0593
Epoch 5/15
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 869ms/step - accuracy: 1.0000 - loss: 0.0984 - val_accuracy: 1.0000 - val_loss: 0.0524
Epoch 6/15
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 851ms/step - accuracy: 1.0000 - loss: 0.0901 - val_accuracy: 1.0000 - val_loss: 0.0450
Epoch 7/15
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━

## 8. Evaluate on validation set and save outputs

This will produce classification report and confusion matrix, and save them to the outputs folder.

In [None]:
from tensorflow.keras.models import load_model
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
import tensorflow as tf

best = OUTPUT_DIR / 'best_model.keras' # Changed to .keras
if best.exists():
    final_model = load_model(best) # .keras format doesn't need custom_objects for these layers
else:
    final_model = model

val_gen.reset()
preds = final_model.predict(val_gen, steps=len(val_gen), verbose=1)
y_pred = np.argmax(preds, axis=1)
y_true = val_gen.classes
labels = list(train_gen.class_indices.keys())

cr = classification_report(y_true, y_pred, target_names=labels)
cm = confusion_matrix(y_true, y_pred)
print('\nClassification Report:\n')
print(cr)

with open(OUTPUT_DIR / 'classification_report.txt','w') as f:
    f.write(cr)

plt.figure(figsize=(6,6))
sns.heatmap(cm, annot=True, fmt='d', xticklabels=labels, yticklabels=labels)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'confusion_matrix.png')
plt.show()

print('Saved outputs to', OUTPUT_DIR)



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step


ValueError: Number of classes, 1, does not match size of target_names, 2. Try specifying the labels parameter

## 9. Download or copy outputs

Use the left Files pane in Colab to download files from `/content/outputs/` or move them to your Google Drive.