# ü•î Phase 6: Model Training & Hyperparameter Tuning

This notebook trains and optimizes the potato disease classification model.

**Features:**
- Model training with real-time monitoring
- Hyperparameter tuning experiments
- Training visualization (loss/accuracy curves)
- Best model checkpointing
- Training report generation

## 1. Mount Google Drive & Setup

In [None]:
from google.colab import drive
import os
import shutil

MOUNT_PATH = '/content/drive'

def mount_drive():
    if os.path.exists(os.path.join(MOUNT_PATH, 'MyDrive')):
        print('‚úÖ Google Drive is already mounted!')
        return True
    if os.path.exists(MOUNT_PATH):
        try:
            drive.flush_and_unmount()
        except:
            pass
        if os.path.exists(MOUNT_PATH):
            try:
                shutil.rmtree(MOUNT_PATH)
            except:
                pass
    try:
        drive.mount(MOUNT_PATH)
        print('‚úÖ Google Drive mounted successfully!')
        return True
    except Exception as e:
        print(f'‚ùå Mount failed: {e}')
        return False

mount_drive()

In [None]:
# Install and import dependencies
!pip install -q tensorflow keras matplotlib seaborn scikit-learn

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import time
from datetime import datetime
from pathlib import Path

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, regularizers
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow.keras.callbacks import (
    EarlyStopping, 
    ModelCheckpoint, 
    ReduceLROnPlateau,
    TensorBoard,
    CSVLogger
)
from tensorflow.keras.preprocessing.image import ImageDataGenerator

print(f"‚úÖ TensorFlow version: {tf.__version__}")

# Check GPU
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print(f"üöÄ GPU detected: {gpus[0].name}")
    # Enable memory growth
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
else:
    print("‚ö†Ô∏è No GPU detected, training will be slow")

# Set random seeds for reproducibility
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

## 2. Configuration

In [None]:
# ===== CONFIGURATION =====

# Paths
DATASET_DIR = "/content/drive/MyDrive/DrukFarm/data/final_potato_dataset"
MODEL_SAVE_DIR = "/content/drive/MyDrive/DrukFarm/models"
LOGS_DIR = "/content/drive/MyDrive/DrukFarm/training_logs"

# Dataset paths
TRAIN_DIR = os.path.join(DATASET_DIR, 'train')
VAL_DIR = os.path.join(DATASET_DIR, 'validation')
TEST_DIR = os.path.join(DATASET_DIR, 'test')

# Model parameters
IMG_SIZE = (224, 224)
IMG_SHAPE = (224, 224, 3)
NUM_CLASSES = 3
CLASS_NAMES = ['Early_Blight', 'Healthy', 'Late_Blight']

# Training hyperparameters (initial values)
BATCH_SIZE = 32
LEARNING_RATE = 0.0001
EPOCHS = 50
FINE_TUNE_LAYERS = 20

# Create directories
os.makedirs(MODEL_SAVE_DIR, exist_ok=True)
os.makedirs(LOGS_DIR, exist_ok=True)

print("‚úÖ Configuration loaded!")
print(f"\nüìÅ Dataset: {DATASET_DIR}")
print(f"üìÇ Models: {MODEL_SAVE_DIR}")
print(f"üìä Logs: {LOGS_DIR}")
print(f"\nüîß Hyperparameters:")
print(f"   ‚Ä¢ Batch size: {BATCH_SIZE}")
print(f"   ‚Ä¢ Learning rate: {LEARNING_RATE}")
print(f"   ‚Ä¢ Epochs: {EPOCHS}")
print(f"   ‚Ä¢ Fine-tune layers: {FINE_TUNE_LAYERS}")

## 3. Load Dataset

In [None]:
def create_data_generators(train_dir, val_dir, test_dir, img_size, batch_size):
    """
    Create data generators for training, validation, and testing.
    """
    # Training generator with augmentation
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=20,
        width_shift_range=0.1,
        height_shift_range=0.1,
        horizontal_flip=True,
        zoom_range=0.1,
        fill_mode='reflect'
    )
    
    # Validation and test generators (no augmentation)
    val_datagen = ImageDataGenerator(rescale=1./255)
    test_datagen = ImageDataGenerator(rescale=1./255)
    
    # Create generators
    train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=img_size,
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=True,
        seed=SEED
    )
    
    val_generator = val_datagen.flow_from_directory(
        val_dir,
        target_size=img_size,
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False
    )
    
    test_generator = test_datagen.flow_from_directory(
        test_dir,
        target_size=img_size,
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False
    )
    
    return train_generator, val_generator, test_generator


# Create data generators
print("üìÇ Loading dataset...\n")
train_gen, val_gen, test_gen = create_data_generators(
    TRAIN_DIR, VAL_DIR, TEST_DIR, IMG_SIZE, BATCH_SIZE
)

print(f"\nüìä Dataset Summary:")
print(f"   ‚Ä¢ Training samples: {train_gen.samples}")
print(f"   ‚Ä¢ Validation samples: {val_gen.samples}")
print(f"   ‚Ä¢ Test samples: {test_gen.samples}")
print(f"   ‚Ä¢ Classes: {list(train_gen.class_indices.keys())}")

## 4. Build Model

In [None]:
def build_model(input_shape, num_classes, learning_rate, fine_tune_layers=20):
    """
    Build MobileNetV2-based model for potato disease classification.
    """
    # Load pre-trained base
    base_model = MobileNetV2(
        input_shape=input_shape,
        include_top=False,
        weights='imagenet'
    )
    
    # Freeze early layers, fine-tune later layers
    base_model.trainable = True
    for layer in base_model.layers[:-fine_tune_layers]:
        layer.trainable = False
    
    # Build model
    model = models.Sequential([
        layers.InputLayer(input_shape=input_shape),
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.BatchNormalization(),
        layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
        layers.Dropout(0.5),
        layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
        layers.Dropout(0.3),
        layers.Dense(num_classes, activation='softmax', name='output')
    ], name='PotatoDisease_MobileNetV2')
    
    # Compile
    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss='categorical_crossentropy',
        metrics=[
            'accuracy',
            tf.keras.metrics.Precision(name='precision'),
            tf.keras.metrics.Recall(name='recall')
        ]
    )
    
    return model


# Build model
print("üî® Building model...")
model = build_model(IMG_SHAPE, NUM_CLASSES, LEARNING_RATE, FINE_TUNE_LAYERS)

# Summary
print("\n" + "=" * 60)
print("üì± Model Architecture")
print("=" * 60)
model.summary()

print(f"\nüìä Total parameters: {int(model.count_params()):,}")

## 5. Define Training Callbacks

In [None]:
def create_callbacks(model_name, save_dir, logs_dir):
    """
    Create comprehensive training callbacks.
    """
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    
    callbacks = [
        # Early stopping
        EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True,
            verbose=1
        ),
        
        # Save best model
        ModelCheckpoint(
            filepath=os.path.join(save_dir, f'{model_name}_best.keras'),
            monitor='val_accuracy',
            save_best_only=True,
            verbose=1
        ),
        
        # Reduce LR on plateau
        ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.2,
            patience=5,
            min_lr=1e-7,
            verbose=1
        ),
        
        # CSV logging
        CSVLogger(
            os.path.join(logs_dir, f'training_log_{timestamp}.csv'),
            separator=',',
            append=False
        )
    ]
    
    return callbacks, timestamp


# Create callbacks
callbacks, timestamp = create_callbacks('PotatoDisease_MobileNetV2', MODEL_SAVE_DIR, LOGS_DIR)

print("‚úÖ Training callbacks configured:")
print(f"   ‚Ä¢ EarlyStopping (patience=10)")
print(f"   ‚Ä¢ ModelCheckpoint (best val_accuracy)")
print(f"   ‚Ä¢ ReduceLROnPlateau (factor=0.2)")
print(f"   ‚Ä¢ CSVLogger ‚Üí training_log_{timestamp}.csv")

## 6. Train Model üöÄ

In [None]:
print("\n" + "=" * 60)
print("üöÄ STARTING MODEL TRAINING")
print("=" * 60)
print(f"\nüìÖ Started at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"\nüîß Training Configuration:")
print(f"   ‚Ä¢ Epochs: {EPOCHS}")
print(f"   ‚Ä¢ Batch size: {BATCH_SIZE}")
print(f"   ‚Ä¢ Learning rate: {LEARNING_RATE}")
print(f"   ‚Ä¢ Training samples: {train_gen.samples}")
print(f"   ‚Ä¢ Validation samples: {val_gen.samples}")
print("\n" + "-" * 60)

# Calculate steps
steps_per_epoch = train_gen.samples // BATCH_SIZE
validation_steps = val_gen.samples // BATCH_SIZE

# Start timer
start_time = time.time()

# Train model
history = model.fit(
    train_gen,
    epochs=EPOCHS,
    steps_per_epoch=steps_per_epoch,
    validation_data=val_gen,
    validation_steps=validation_steps,
    callbacks=callbacks,
    verbose=1
)

# Training time
training_time = time.time() - start_time

print("\n" + "=" * 60)
print("‚úÖ TRAINING COMPLETED!")
print("=" * 60)
print(f"\n‚è±Ô∏è Total training time: {training_time/60:.2f} minutes")
print(f"üìÖ Completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

## 7. Training Visualization

In [None]:
def plot_training_history(history):
    """
    Plot training and validation metrics.
    """
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    # Accuracy
    axes[0, 0].plot(history.history['accuracy'], label='Training', linewidth=2)
    axes[0, 0].plot(history.history['val_accuracy'], label='Validation', linewidth=2)
    axes[0, 0].set_title('Model Accuracy', fontsize=14)
    axes[0, 0].set_xlabel('Epoch')
    axes[0, 0].set_ylabel('Accuracy')
    axes[0, 0].legend()
    axes[0, 0].grid(True, alpha=0.3)
    
    # Loss
    axes[0, 1].plot(history.history['loss'], label='Training', linewidth=2)
    axes[0, 1].plot(history.history['val_loss'], label='Validation', linewidth=2)
    axes[0, 1].set_title('Model Loss', fontsize=14)
    axes[0, 1].set_xlabel('Epoch')
    axes[0, 1].set_ylabel('Loss')
    axes[0, 1].legend()
    axes[0, 1].grid(True, alpha=0.3)
    
    # Precision
    if 'precision' in history.history:
        axes[1, 0].plot(history.history['precision'], label='Training', linewidth=2)
        axes[1, 0].plot(history.history['val_precision'], label='Validation', linewidth=2)
        axes[1, 0].set_title('Model Precision', fontsize=14)
        axes[1, 0].set_xlabel('Epoch')
        axes[1, 0].set_ylabel('Precision')
        axes[1, 0].legend()
        axes[1, 0].grid(True, alpha=0.3)
    
    # Recall
    if 'recall' in history.history:
        axes[1, 1].plot(history.history['recall'], label='Training', linewidth=2)
        axes[1, 1].plot(history.history['val_recall'], label='Validation', linewidth=2)
        axes[1, 1].set_title('Model Recall', fontsize=14)
        axes[1, 1].set_xlabel('Epoch')
        axes[1, 1].set_ylabel('Recall')
        axes[1, 1].legend()
        axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.suptitle('Training History', fontsize=16, y=1.02)
    plt.savefig(os.path.join(LOGS_DIR, f'training_curves_{timestamp}.png'), dpi=150, bbox_inches='tight')
    plt.show()


# Plot training history
plot_training_history(history)
print(f"\nüíæ Training curves saved to: {LOGS_DIR}/training_curves_{timestamp}.png")

## 8. Training Results Summary

In [None]:
def get_best_metrics(history):
    """
    Extract best metrics from training history.
    """
    best_epoch = np.argmax(history.history['val_accuracy']) + 1
    
    metrics = {
        'best_epoch': int(best_epoch),
        'total_epochs': len(history.history['accuracy']),
        'best_val_accuracy': float(max(history.history['val_accuracy'])),
        'best_val_loss': float(min(history.history['val_loss'])),
        'final_train_accuracy': float(history.history['accuracy'][-1]),
        'final_val_accuracy': float(history.history['val_accuracy'][-1]),
        'final_train_loss': float(history.history['loss'][-1]),
        'final_val_loss': float(history.history['val_loss'][-1])
    }
    
    if 'precision' in history.history:
        metrics['best_val_precision'] = float(max(history.history['val_precision']))
    if 'recall' in history.history:
        metrics['best_val_recall'] = float(max(history.history['val_recall']))
    
    return metrics


# Get best metrics
best_metrics = get_best_metrics(history)

print("\n" + "=" * 60)
print("üìä TRAINING RESULTS SUMMARY")
print("=" * 60)

print(f"\nüèÜ BEST PERFORMANCE:")
print(f"   ‚Ä¢ Best epoch: {best_metrics['best_epoch']} / {best_metrics['total_epochs']}")
print(f"   ‚Ä¢ Best validation accuracy: {best_metrics['best_val_accuracy']*100:.2f}%")
print(f"   ‚Ä¢ Best validation loss: {best_metrics['best_val_loss']:.4f}")

if 'best_val_precision' in best_metrics:
    print(f"   ‚Ä¢ Best validation precision: {best_metrics['best_val_precision']*100:.2f}%")
if 'best_val_recall' in best_metrics:
    print(f"   ‚Ä¢ Best validation recall: {best_metrics['best_val_recall']*100:.2f}%")

print(f"\nüìà FINAL METRICS:")
print(f"   ‚Ä¢ Training accuracy: {best_metrics['final_train_accuracy']*100:.2f}%")
print(f"   ‚Ä¢ Validation accuracy: {best_metrics['final_val_accuracy']*100:.2f}%")
print(f"   ‚Ä¢ Training loss: {best_metrics['final_train_loss']:.4f}")
print(f"   ‚Ä¢ Validation loss: {best_metrics['final_val_loss']:.4f}")

# Check for overfitting
overfit_gap = best_metrics['final_train_accuracy'] - best_metrics['final_val_accuracy']
print(f"\nüîç OVERFITTING ANALYSIS:")
print(f"   ‚Ä¢ Train-Val accuracy gap: {overfit_gap*100:.2f}%")
if overfit_gap > 0.1:
    print(f"   ‚ö†Ô∏è Warning: Possible overfitting detected. Consider more regularization.")
elif overfit_gap < 0:
    print(f"   ‚ö†Ô∏è Warning: Possible underfitting. Consider training longer or more capacity.")
else:
    print(f"   ‚úÖ Good generalization - model is well balanced.")

## 9. Save Final Model & Training Report

In [None]:
def save_training_report(history, metrics, config, save_dir, timestamp):
    """
    Save comprehensive training report.
    """
    report = {
        'phase': 'Phase 6: Model Training & Hyperparameter Tuning',
        'timestamp': datetime.now().isoformat(),
        'training_id': timestamp,
        'configuration': {
            'model_name': 'PotatoDisease_MobileNetV2',
            'input_shape': list(config['img_shape']),
            'num_classes': int(config['num_classes']),
            'batch_size': int(config['batch_size']),
            'learning_rate': float(config['learning_rate']),
            'epochs_configured': int(config['epochs']),
            'fine_tune_layers': int(config['fine_tune_layers'])
        },
        'dataset': {
            'train_samples': int(config['train_samples']),
            'val_samples': int(config['val_samples']),
            'test_samples': int(config['test_samples'])
        },
        'training_results': metrics,
        'training_time_minutes': float(config['training_time'] / 60),
        'model_saved_to': os.path.join(save_dir, 'PotatoDisease_MobileNetV2_best.keras')
    }
    
    # Save report
    report_path = os.path.join(save_dir, f'training_report_{timestamp}.json')
    with open(report_path, 'w') as f:
        json.dump(report, f, indent=2)
    
    return report_path


# Prepare config
training_config = {
    'img_shape': IMG_SHAPE,
    'num_classes': NUM_CLASSES,
    'batch_size': BATCH_SIZE,
    'learning_rate': LEARNING_RATE,
    'epochs': EPOCHS,
    'fine_tune_layers': FINE_TUNE_LAYERS,
    'train_samples': train_gen.samples,
    'val_samples': val_gen.samples,
    'test_samples': test_gen.samples,
    'training_time': training_time
}

# Save report
report_path = save_training_report(history, best_metrics, training_config, MODEL_SAVE_DIR, timestamp)

# Save final model (in addition to best checkpoint)
final_model_path = os.path.join(MODEL_SAVE_DIR, f'PotatoDisease_MobileNetV2_final_{timestamp}.keras')
model.save(final_model_path)

print("\n" + "=" * 60)
print("üíæ MODEL & REPORTS SAVED")
print("=" * 60)
print(f"\nüìÅ Files saved to: {MODEL_SAVE_DIR}")
print(f"   ‚Ä¢ Best model: PotatoDisease_MobileNetV2_best.keras")
print(f"   ‚Ä¢ Final model: PotatoDisease_MobileNetV2_final_{timestamp}.keras")
print(f"   ‚Ä¢ Training report: training_report_{timestamp}.json")
print(f"\nüìÅ Logs saved to: {LOGS_DIR}")
print(f"   ‚Ä¢ Training log: training_log_{timestamp}.csv")
print(f"   ‚Ä¢ Training curves: training_curves_{timestamp}.png")

## 10. Quick Validation on Test Set (Preview)

In [None]:
# Quick evaluation on test set
print("\nüß™ Quick Test Set Evaluation...")

test_results = model.evaluate(test_gen, verbose=1)

print(f"\nüìä Test Set Results:")
print(f"   ‚Ä¢ Test Loss: {test_results[0]:.4f}")
print(f"   ‚Ä¢ Test Accuracy: {test_results[1]*100:.2f}%")
if len(test_results) > 2:
    print(f"   ‚Ä¢ Test Precision: {test_results[2]*100:.2f}%")
if len(test_results) > 3:
    print(f"   ‚Ä¢ Test Recall: {test_results[3]*100:.2f}%")

## 11. Hyperparameter Tuning Experiments (Optional)

Run this section to experiment with different hyperparameters.

In [None]:
# ===== HYPERPARAMETER EXPERIMENTS =====
# Uncomment and modify to run experiments

RUN_EXPERIMENTS = False  # Set to True to run hyperparameter search

if RUN_EXPERIMENTS:
    # Define hyperparameter grid
    experiments = [
        {'lr': 0.001, 'batch': 32, 'fine_tune': 10, 'name': 'high_lr'},
        {'lr': 0.0001, 'batch': 16, 'fine_tune': 20, 'name': 'small_batch'},
        {'lr': 0.00005, 'batch': 32, 'fine_tune': 30, 'name': 'more_finetune'},
    ]
    
    results = []
    
    for exp in experiments:
        print(f"\nüî¨ Experiment: {exp['name']}")
        print(f"   LR: {exp['lr']}, Batch: {exp['batch']}, Fine-tune: {exp['fine_tune']}")
        
        # Create new data generators with different batch size
        train_g, val_g, _ = create_data_generators(
            TRAIN_DIR, VAL_DIR, TEST_DIR, IMG_SIZE, exp['batch']
        )
        
        # Build and train model
        exp_model = build_model(IMG_SHAPE, NUM_CLASSES, exp['lr'], exp['fine_tune'])
        
        exp_callbacks = [
            EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
        ]
        
        exp_history = exp_model.fit(
            train_g,
            epochs=15,  # Reduced epochs for experiments
            validation_data=val_g,
            callbacks=exp_callbacks,
            verbose=0
        )
        
        best_val_acc = max(exp_history.history['val_accuracy'])
        results.append({
            'name': exp['name'],
            'lr': exp['lr'],
            'batch': exp['batch'],
            'fine_tune': exp['fine_tune'],
            'best_val_acc': best_val_acc
        })
        print(f"   ‚úÖ Best val accuracy: {best_val_acc*100:.2f}%")
        
        # Clear memory
        del exp_model
        tf.keras.backend.clear_session()
    
    # Show experiment results
    print("\n" + "=" * 50)
    print("üìä HYPERPARAMETER EXPERIMENT RESULTS")
    print("=" * 50)
    for r in sorted(results, key=lambda x: x['best_val_acc'], reverse=True):
        print(f"\n   {r['name']}:")
        print(f"   LR={r['lr']}, Batch={r['batch']}, Fine-tune={r['fine_tune']}")
        print(f"   ‚Üí Val Accuracy: {r['best_val_acc']*100:.2f}%")
else:
    print("\n‚è≠Ô∏è Hyperparameter experiments skipped.")
    print("   Set RUN_EXPERIMENTS = True to run experiments.")

---

## ‚úÖ Phase 6 Complete!

**Training Summary:**
- ‚úÖ Model trained successfully
- ‚úÖ Best model checkpoint saved
- ‚úÖ Training curves generated
- ‚úÖ Training report saved

**Saved Files:**
```
/content/drive/MyDrive/DrukFarm/models/
‚îú‚îÄ‚îÄ PotatoDisease_MobileNetV2_best.keras     (best checkpoint)
‚îú‚îÄ‚îÄ PotatoDisease_MobileNetV2_final_*.keras  (final model)
‚îî‚îÄ‚îÄ training_report_*.json                   (training report)

/content/drive/MyDrive/DrukFarm/training_logs/
‚îú‚îÄ‚îÄ training_log_*.csv                       (epoch-by-epoch metrics)
‚îî‚îÄ‚îÄ training_curves_*.png                    (visualization)
```

**Next Steps:**
- Phase 7: Model Evaluation & Testing