In [1]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
import kagglehub

# ==========================================
# 1. Data Collection & Path Configuration
# ==========================================
print("Downloading dataset via kagglehub...")
dataset_path = kagglehub.dataset_download("vipoooool/new-plant-diseases-dataset")
print("Path to dataset files:", dataset_path)

train_dir = None
valid_dir = None

# Dynamically locate the train and valid directories to avoid hardcoded path errors
for root, dirs, files in os.walk(dataset_path):
    if 'train' in dirs and 'valid' in dirs:
        train_dir = os.path.join(root, 'train')
        valid_dir = os.path.join(root, 'valid')
        break

if not train_dir or not valid_dir:
    print("Error: Could not find 'train' and 'valid' directories within the downloaded dataset.")
    sys.exit(1)

print(f"Train directory found: {train_dir}")
print(f"Valid directory found: {valid_dir}")

# Configuration variables
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 5
N_LAST_LAYERS = 10
NUM_CLASSES = 38
SEED = 1337

# ==========================================
# 2. Data Preprocessing & Augmentation
# ==========================================
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    horizontal_flip=True,
    rotation_range=20,
    zoom_range=0.15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    fill_mode='reflect'
)

valid_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

train_gen = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True,
    seed=SEED
)

valid_gen = valid_datagen.flow_from_directory(
    valid_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

# ==========================================
# 3. Model Building (Transfer Learning)
# ==========================================
base_model = MobileNetV2(
    input_shape=IMG_SIZE + (3,),
    include_top=False,
    weights='imagenet'
)

# Freeze layers and unfreeze the last N layers for fine-tuning
for layer in base_model.layers:
    layer.trainable = False

if N_LAST_LAYERS > 0:
    for layer in base_model.layers[-N_LAST_LAYERS:]:
        layer.trainable = True

# Build the complete architecture
inputs = keras.Input(shape=IMG_SIZE + (3,))
x = base_model(inputs, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.35)(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dropout(0.25)(x)
outputs = layers.Dense(NUM_CLASSES, activation='softmax')(x)

model = keras.Model(inputs, outputs, name="mobilenetv2_plant_disease_classifier")

# Compile the model
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-4),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# ==========================================
# 4. Model Training & Callbacks
# ==========================================
callbacks = [
    keras.callbacks.ModelCheckpoint(
        'mobilenetv2_best.keras', # Saving locally in the working directory
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=3,
        verbose=1
    ),
    keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=6,
        restore_best_weights=True,
        verbose=1
    )
]

history = model.fit(
    train_gen,
    epochs=EPOCHS,
    validation_data=valid_gen,
    callbacks=callbacks
)

# Save the final model explicitly
final_path = "mobilenetv2_final.keras"
model.save(final_path)
print(f"Saved final model to: {final_path}")

Downloading dataset via kagglehub...
Using Colab cache for faster access to the 'new-plant-diseases-dataset' dataset.
Path to dataset files: /kaggle/input/new-plant-diseases-dataset
Train directory found: /kaggle/input/new-plant-diseases-dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/train
Valid directory found: /kaggle/input/new-plant-diseases-dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/valid
Found 70295 images belonging to 38 classes.
Found 17572 images belonging to 38 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


  self._warn_if_super_not_called()


Epoch 1/5
[1m2197/2197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 497ms/step - accuracy: 0.6686 - loss: 1.2307
Epoch 1: val_accuracy improved from -inf to 0.90621, saving model to mobilenetv2_best.keras
[1m2197/2197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1212s[0m 541ms/step - accuracy: 0.6686 - loss: 1.2305 - val_accuracy: 0.9062 - val_loss: 0.3215 - learning_rate: 1.0000e-04
Epoch 2/5
[1m2197/2197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 399ms/step - accuracy: 0.9178 - loss: 0.2521
Epoch 2: val_accuracy improved from 0.90621 to 0.93421, saving model to mobilenetv2_best.keras
[1m2197/2197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m910s[0m 414ms/step - accuracy: 0.9178 - loss: 0.2521 - val_accuracy: 0.9342 - val_loss: 0.2072 - learning_rate: 1.0000e-04
Epoch 3/5
[1m2197/2197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 391ms/step - accuracy: 0.9389 - loss: 0.1873
Epoch 3: val_accuracy improved from 0.93421 to 0.94935, saving model to m

In [2]:
from tensorflow.keras.models import load_model

# 1. Load the model from your last best checkpoint
print("Loading saved model...")
model = load_model('mobilenetv2_best.keras')

# 2. Resume training
# We set epochs=5 and initial_epoch=4 so Keras knows to just run the 5th epoch.
print("Resuming training for the final epoch...")
history_continued = model.fit(
    train_gen,
    epochs=5,
    initial_epoch=4,
    validation_data=valid_gen,
    callbacks=callbacks
)

# 3. Save the final model when done
model.save("mobilenetv2_final.keras")
print("Finished and saved!")

Loading saved model...
Resuming training for the final epoch...
Epoch 5/5
[1m2197/2197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 437ms/step - accuracy: 0.9558 - loss: 0.1304
Epoch 5: val_accuracy did not improve from 0.96529
[1m2197/2197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1019s[0m 458ms/step - accuracy: 0.9558 - loss: 0.1304 - val_accuracy: 0.9601 - val_loss: 0.1231 - learning_rate: 1.0000e-04
Restoring model weights from the end of the best epoch: 5.
Finished and saved!


In [4]:
print("Evaluating model performance on the validation set...")
val_loss, val_acc = model.evaluate(valid_gen)
print(f"Validation loss: {val_loss:.4f}, accuracy: {val_acc:.4f}")

Evaluating model performance on the validation set...
[1m550/550[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 71ms/step - accuracy: 0.9731 - loss: 0.0796
Validation loss: 0.1231, accuracy: 0.9601


In [6]:
model_name = "mobilenetv2_best.keras"
model.save(model_name)
print(f"Saved model as: {model_name}")

Saved model as: mobilenetv2_best.keras


In [7]:
from google.colab import files
files.download('mobilenetv2_best.keras')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>