In [None]:
# Install tensorflowjs for model conversion
!pip install tensorflowjs -q

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
import numpy as np
import os
import json

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {len(tf.config.list_physical_devices('GPU')) > 0}")

In [None]:
# Configuration
IMG_SIZE = 224
BATCH_SIZE = 32
EPOCHS = 15  # Enough for good accuracy, fast training
NUM_CLASSES = 38

# Dataset path - CHANGE THIS to match your Kaggle dataset path
# Common paths:
# /kaggle/input/plantvillage-dataset/color
# /kaggle/input/new-plant-diseases-dataset/New Plant Diseases Dataset(Augmented)/train
DATASET_PATH = '/kaggle/input/plantvillage-dataset/color'

In [None]:
# Check dataset structure
import os

if os.path.exists(DATASET_PATH):
    classes = sorted(os.listdir(DATASET_PATH))
    print(f"Found {len(classes)} classes:")
    for i, c in enumerate(classes):
        count = len(os.listdir(os.path.join(DATASET_PATH, c)))
        print(f"{i}: {c} ({count} images)")
else:
    print(f"Dataset not found at {DATASET_PATH}")
    print("\nLooking for dataset...")
    !find /kaggle/input -type d -name "*plant*" 2>/dev/null | head -20

In [None]:
# Data Generators with PROPER preprocessing for TensorFlow.js
# IMPORTANT: Use rescale=1./255 - this is what we'll use in the browser too

train_datagen = ImageDataGenerator(
    rescale=1./255,  # CRITICAL: Normalize to [0,1]
    validation_split=0.2,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
)

train_generator = train_datagen.flow_from_directory(
    DATASET_PATH,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training',
    shuffle=True
)

val_generator = val_datagen.flow_from_directory(
    DATASET_PATH,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation',
    shuffle=False
)

# Save class names for later
class_names = list(train_generator.class_indices.keys())
print(f"\nClass indices: {train_generator.class_indices}")
print(f"\nTotal training samples: {train_generator.samples}")
print(f"Total validation samples: {val_generator.samples}")

In [None]:
# Build Model - MobileNetV2 with custom head
# Using weights='imagenet' but NOT using imagenet preprocessing
# The model will adapt during training

base_model = MobileNetV2(
    weights='imagenet',
    include_top=False,
    input_shape=(IMG_SIZE, IMG_SIZE, 3)
)

# Freeze base model initially
base_model.trainable = False

# Add classification head
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.3)(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.3)(x)
predictions = Dense(NUM_CLASSES, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

In [None]:
# Callbacks
callbacks = [
    ModelCheckpoint(
        'best_model.h5',
        monitor='val_accuracy',
        save_best_only=True,
        mode='max',
        verbose=1
    ),
    EarlyStopping(
        monitor='val_accuracy',
        patience=5,
        restore_best_weights=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=2,
        min_lr=1e-7,
        verbose=1
    )
]

In [None]:
# Phase 1: Train only the classification head
print("Phase 1: Training classification head only...")

history1 = model.fit(
    train_generator,
    epochs=5,
    validation_data=val_generator,
    callbacks=callbacks,
    verbose=1
)

In [None]:
# Phase 2: Fine-tune the entire model
print("\nPhase 2: Fine-tuning entire model...")

# Unfreeze base model
base_model.trainable = True

# Recompile with lower learning rate
model.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

history2 = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=val_generator,
    callbacks=callbacks,
    verbose=1
)

In [None]:
# Evaluate final model
print("\nFinal Evaluation:")
loss, accuracy = model.evaluate(val_generator)
print(f"Validation Loss: {loss:.4f}")
print(f"Validation Accuracy: {accuracy*100:.2f}%")

In [None]:
# Load best model
if os.path.exists('best_model.h5'):
    model = tf.keras.models.load_model('best_model.h5')
    print("Loaded best model from checkpoint")

In [None]:
# Convert to TensorFlow.js format
import tensorflowjs as tfjs
import shutil

# Create output directory
output_dir = 'tfjs_model'
if os.path.exists(output_dir):
    shutil.rmtree(output_dir)
os.makedirs(output_dir)

# Convert to TFJS format
tfjs.converters.save_keras_model(model, output_dir)

print(f"\nModel converted to TensorFlow.js format in: {output_dir}")
print("\nGenerated files:")
for f in os.listdir(output_dir):
    size = os.path.getsize(os.path.join(output_dir, f)) / 1024 / 1024
    print(f"  {f} ({size:.2f} MB)")

In [None]:
# Save class names JSON
class_names_path = os.path.join(output_dir, 'class_names.json')
with open(class_names_path, 'w') as f:
    json.dump(class_names, f, indent=2)

print(f"\nSaved class_names.json with {len(class_names)} classes:")
for i, name in enumerate(class_names):
    print(f"{i}: {name}")

In [None]:
# Create a zip file for easy download
import zipfile

zip_path = 'plantvillage_model_tfjs.zip'
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
    for root, dirs, files in os.walk(output_dir):
        for file in files:
            file_path = os.path.join(root, file)
            arcname = os.path.join('model', file)
            zipf.write(file_path, arcname)

zip_size = os.path.getsize(zip_path) / 1024 / 1024
print(f"\nâœ… Created {zip_path} ({zip_size:.2f} MB)")
print("\nðŸ“¥ Download this file and extract to your public/model/ folder")

In [None]:
# Test the model with a sample prediction
from tensorflow.keras.preprocessing import image
import matplotlib.pyplot as plt

# Get a sample batch
sample_batch = next(iter(val_generator))
sample_images = sample_batch[0]
sample_labels = sample_batch[1]

# Make predictions
predictions = model.predict(sample_images[:5])

# Display results
fig, axes = plt.subplots(1, 5, figsize=(20, 4))
for i, ax in enumerate(axes):
    ax.imshow(sample_images[i])
    pred_idx = np.argmax(predictions[i])
    true_idx = np.argmax(sample_labels[i])
    pred_conf = predictions[i][pred_idx] * 100
    
    color = 'green' if pred_idx == true_idx else 'red'
    ax.set_title(f'Pred: {class_names[pred_idx][:20]}\n{pred_conf:.1f}%', color=color, fontsize=8)
    ax.axis('off')

plt.tight_layout()
plt.savefig('sample_predictions.png')
plt.show()

print("\nâœ… Model training complete!")
print("\nðŸ“‹ Next steps:")
print("1. Download 'plantvillage_model_tfjs.zip'")
print("2. Extract files to your project's public/model/ folder")
print("3. Make sure preprocessing uses rescale=1./255 (div by 255)")