# Coconut Mite Detection Model - Proper Training

**Model:** MobileNetV2 (Transfer Learning)  
**Dataset:** Coconut Mite vs Healthy Leaves  

## Dataset Structure (Pre-organized by User):
- **Train:** 8,975 images (augmented)
- **Validation:** 188 images (originals only)
- **Test:** 189 images (originals only)

## Madam's Requirements:
1. P/R/F1 close for each class
2. Similar F1 across classes
3. Accuracy close to F1
4. Step-by-step notebook with outputs
5. No hardcoded values
6. Charts included

## 1. Setup and Imports

In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import json
import shutil
import random
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from pathlib import Path

import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
from sklearn.metrics import classification_report, confusion_matrix, precision_recall_fscore_support

# Set seeds for reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

print(f"TensorFlow Version: {tf.__version__}")
print(f"GPU Available: {len(tf.config.list_physical_devices('GPU')) > 0}")
if len(tf.config.list_physical_devices('GPU')) > 0:
    print(f"GPU Device: {tf.config.list_physical_devices('GPU')}")
print(f"Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

## 2. Configuration

All hyperparameters defined here - NO hardcoded values in training code.

In [None]:
# Paths - Using EXISTING folder structure
BASE_DIR = Path(r"D:\SLIIT\Reaserch Project\CoconutHealthMonitor\Research\ml")
DATA_DIR = BASE_DIR / 'data' / 'raw' / 'pest'
COMBINED_DIR = BASE_DIR / 'data' / 'raw' / 'pest_combined'  # Will create TF-compatible structure
MODEL_DIR = BASE_DIR / 'models' / 'coconut_mite_v6'

# Create model directory
MODEL_DIR.mkdir(parents=True, exist_ok=True)

# Hyperparameters
IMG_SIZE = (224, 224)
BATCH_SIZE = 16
EPOCHS = 50
LEARNING_RATE = 1e-4
DROPOUT_RATE = 0.5
L2_REG = 0.01

# Class names
CLASS_NAMES = ['coconut_mite', 'healthy']

print("Configuration:")
print(f"  Data Directory: {DATA_DIR}")
print(f"  Model Directory: {MODEL_DIR}")
print(f"  Image Size: {IMG_SIZE}")
print(f"  Batch Size: {BATCH_SIZE}")
print(f"  Epochs: {EPOCHS}")
print(f"  Learning Rate: {LEARNING_RATE}")
print(f"  Dropout Rate: {DROPOUT_RATE}")
print(f"  L2 Regularization: {L2_REG}")

## 3. Analyze Existing Dataset Structure

Count images in each pre-organized folder.

In [None]:
def count_images(directory):
    """Count image files in a directory."""
    if not directory.exists():
        return 0
    count = 0
    for ext in ['*.jpg', '*.jpeg', '*.png', '*.JPG', '*.JPEG', '*.PNG']:
        count += len(list(directory.glob(ext)))
    return count

print("Analyzing Existing Dataset Structure...")
print("="*60)

# Define folder mappings (handling lowercase 'test' in healthy)
folder_mapping = {
    'coconut_mite': {'train': 'Train', 'validation': 'Validation', 'test': 'Test'},
    'healthy': {'train': 'Train', 'validation': 'Validation', 'test': 'test'}  # lowercase!
}

dataset_counts = {'train': {}, 'validation': {}, 'test': {}}

for cls in CLASS_NAMES:
    print(f"\nClass: {cls}")
    for split in ['train', 'validation', 'test']:
        folder_name = folder_mapping[cls][split]
        folder_path = DATA_DIR / cls / folder_name
        count = count_images(folder_path)
        dataset_counts[split][cls] = count
        print(f"  {split.capitalize():12}: {count:,} images ({folder_path})")

print("\n" + "="*60)
print("DATASET SUMMARY:")
print("="*60)
for split in ['train', 'validation', 'test']:
    total = sum(dataset_counts[split].values())
    print(f"{split.capitalize():12}: {dataset_counts[split]} = {total:,} images")

total_all = sum(sum(dataset_counts[s].values()) for s in dataset_counts)
print(f"\nTOTAL: {total_all:,} images")

## 4. Create TensorFlow-Compatible Directory Structure

TensorFlow's `image_dataset_from_directory` expects:
```
split/
  class1/
  class2/
```

We'll create symbolic links to avoid duplicating data.

In [None]:
import os

def create_combined_structure(data_dir, combined_dir, class_names, folder_mapping):
    """
    Create TensorFlow-compatible directory structure using file copying.
    Structure: combined_dir/split/class/images
    """
    # Clean existing
    if combined_dir.exists():
        shutil.rmtree(combined_dir)
    
    print("Creating TensorFlow-compatible directory structure...")
    print("="*60)
    
    for split in ['train', 'validation', 'test']:
        for cls in class_names:
            # Source folder
            src_folder = folder_mapping[cls][split]
            src_path = data_dir / cls / src_folder
            
            # Destination folder
            dst_path = combined_dir / split / cls
            dst_path.mkdir(parents=True, exist_ok=True)
            
            # Copy files (using hard links to save space on Windows)
            if src_path.exists():
                file_count = 0
                for img_file in src_path.iterdir():
                    if img_file.suffix.lower() in ['.jpg', '.jpeg', '.png']:
                        dst_file = dst_path / img_file.name
                        try:
                            # Try hard link first (saves space)
                            os.link(str(img_file), str(dst_file))
                        except OSError:
                            # Fall back to copy if hard link fails
                            shutil.copy2(img_file, dst_file)
                        file_count += 1
                print(f"  {split}/{cls}: {file_count} files linked")
    
    print("\n" + "="*60)
    print(f"Combined directory created: {combined_dir}")
    return combined_dir

# Create the structure
combined_dir = create_combined_structure(DATA_DIR, COMBINED_DIR, CLASS_NAMES, folder_mapping)

## 5. Visualize Dataset Distribution

In [None]:
# Visualize distribution
fig, axes = plt.subplots(1, 3, figsize=(14, 4))
colors = ['#e74c3c', '#2ecc71']  # Red for mite, green for healthy

for ax, split in zip(axes, ['train', 'validation', 'test']):
    vals = [dataset_counts[split][c] for c in CLASS_NAMES]
    total = sum(vals)
    bars = ax.bar(CLASS_NAMES, vals, color=colors, edgecolor='black', linewidth=1.5)
    
    title_suffix = "(augmented)" if split == 'train' else "(originals)"
    ax.set_title(f'{split.capitalize()} Set\n{total:,} images {title_suffix}', 
                 fontsize=11, fontweight='bold')
    ax.set_ylabel('Number of Images')
    ax.set_ylim([0, max(vals) * 1.2 if max(vals) > 0 else 10])
    
    for bar, v in zip(bars, vals):
        ax.text(bar.get_x() + bar.get_width()/2, v + max(vals)*0.02, 
                f'{v:,}', ha='center', fontweight='bold', fontsize=11)

plt.suptitle('Dataset Distribution (Pre-organized - No Data Leakage)', fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig(MODEL_DIR / 'dataset_distribution.png', dpi=150, bbox_inches='tight')
plt.show()

print(f"Chart saved: {MODEL_DIR / 'dataset_distribution.png'}")

## 6. Load Datasets

In [None]:
# Load datasets from combined structure
print("Loading datasets...")

train_ds = tf.keras.utils.image_dataset_from_directory(
    str(COMBINED_DIR / 'train'),
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed=SEED
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    str(COMBINED_DIR / 'validation'),
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    shuffle=False
)

test_ds = tf.keras.utils.image_dataset_from_directory(
    str(COMBINED_DIR / 'test'),
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    shuffle=False
)

detected_classes = train_ds.class_names
print(f"\nDetected Classes: {detected_classes}")
print(f"Training batches: {len(train_ds)}")
print(f"Validation batches: {len(val_ds)}")
print(f"Test batches: {len(test_ds)}")

In [None]:
# Show sample images
fig, axes = plt.subplots(2, 4, figsize=(12, 6))
fig.suptitle('Sample Images from Training Set', fontsize=14, fontweight='bold')

for images, labels in train_ds.take(1):
    for i, ax in enumerate(axes.flat):
        if i < len(images):
            ax.imshow(images[i].numpy().astype('uint8'))
            cls_name = detected_classes[labels[i]]
            color = 'red' if 'mite' in cls_name else 'green'
            ax.set_title(cls_name, fontsize=10, color=color)
            ax.axis('off')

plt.tight_layout()
plt.savefig(MODEL_DIR / 'sample_images.png', dpi=150, bbox_inches='tight')
plt.show()

## 7. Data Preprocessing

- **Training:** On-the-fly augmentation + Normalization
- **Validation/Test:** Normalization ONLY (no augmentation)

In [None]:
# On-the-fly data augmentation (ONLY for training)
# Note: Training data already has augmented images, but we add light augmentation
# for additional variation during training
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip('horizontal'),
    tf.keras.layers.RandomRotation(0.1),
    tf.keras.layers.RandomZoom(0.1),
], name='data_augmentation')

# Normalization (MobileNetV2 expects [-1, 1])
normalization = tf.keras.layers.Rescaling(1./127.5, offset=-1, name='normalization')

# Preprocessing functions
def preprocess_train(image, label):
    """Training: light augmentation + normalization"""
    image = data_augmentation(image, training=True)
    image = normalization(image)
    return image, label

def preprocess_val(image, label):
    """Validation/Test: normalization only"""
    image = normalization(image)
    return image, label

# Apply preprocessing
AUTOTUNE = tf.data.AUTOTUNE

train_ds_prep = train_ds.map(preprocess_train, num_parallel_calls=AUTOTUNE).prefetch(AUTOTUNE)
val_ds_prep = val_ds.map(preprocess_val, num_parallel_calls=AUTOTUNE).prefetch(AUTOTUNE)
test_ds_prep = test_ds.map(preprocess_val, num_parallel_calls=AUTOTUNE).prefetch(AUTOTUNE)

print("Preprocessing Applied:")
print("  Training:   Light Augmentation + Normalization [-1, 1]")
print("  Validation: Normalization [-1, 1] only")
print("  Test:       Normalization [-1, 1] only")

## 8. Build Model

MobileNetV2 with strong regularization to prevent overfitting.

In [None]:
def build_model(img_size, dropout_rate, l2_reg):
    """
    Build MobileNetV2 model with strong regularization.
    """
    # Load pre-trained MobileNetV2
    base_model = MobileNetV2(
        weights='imagenet',
        include_top=False,
        input_shape=(img_size[0], img_size[1], 3)
    )
    
    # Freeze base model initially
    base_model.trainable = False
    
    # Build model with regularization
    inputs = tf.keras.Input(shape=(img_size[0], img_size[1], 3))
    x = base_model(inputs, training=False)
    x = GlobalAveragePooling2D()(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout_rate)(x)
    x = Dense(128, activation='relu', kernel_regularizer=l2(l2_reg))(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout_rate)(x)
    x = Dense(64, activation='relu', kernel_regularizer=l2(l2_reg))(x)
    x = Dropout(dropout_rate / 2)(x)
    outputs = Dense(1, activation='sigmoid')(x)
    
    model = Model(inputs, outputs)
    
    return model, base_model

# Build model
model, base_model = build_model(IMG_SIZE, DROPOUT_RATE, L2_REG)

print("Model Architecture:")
print(f"  Base Model: MobileNetV2 (ImageNet weights)")
print(f"  Base Model Layers: {len(base_model.layers)}")
print(f"  Total Parameters: {model.count_params():,}")
trainable = sum([tf.keras.backend.count_params(w) for w in model.trainable_weights])
print(f"  Trainable Parameters: {trainable:,}")

In [None]:
model.summary()

## 9. Training

In [None]:
# Compile model
model.compile(
    optimizer=Adam(learning_rate=LEARNING_RATE),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Callbacks
callbacks = [
    EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True,
        verbose=1
    ),
    ModelCheckpoint(
        str(MODEL_DIR / 'best_model.keras'),
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        min_lr=1e-7,
        verbose=1
    )
]

print("Training Configuration:")
print(f"  Optimizer: Adam")
print(f"  Learning Rate: {LEARNING_RATE}")
print(f"  Loss: Binary Crossentropy")
print(f"  Max Epochs: {EPOCHS}")
print(f"  Early Stopping Patience: 10")
print(f"  LR Reduction Patience: 5")

In [None]:
# Train model
print("\nStarting Training...")
print("="*60)

history = model.fit(
    train_ds_prep,
    validation_data=val_ds_prep,
    epochs=EPOCHS,
    callbacks=callbacks,
    verbose=1
)

print("\n" + "="*60)
print("Training Complete!")
print(f"Epochs trained: {len(history.history['accuracy'])}")
print(f"Best Validation Accuracy: {max(history.history['val_accuracy'])*100:.2f}%")

## 10. Training History Visualization

In [None]:
# Save training history
history_dict = {
    'accuracy': [float(x) for x in history.history['accuracy']],
    'val_accuracy': [float(x) for x in history.history['val_accuracy']],
    'loss': [float(x) for x in history.history['loss']],
    'val_loss': [float(x) for x in history.history['val_loss']],
}

with open(MODEL_DIR / 'training_history.json', 'w') as f:
    json.dump(history_dict, f, indent=2)

print(f"Training history saved: {MODEL_DIR / 'training_history.json'}")

In [None]:
# Plot training history
epochs_range = range(1, len(history_dict['accuracy']) + 1)

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Accuracy
axes[0].plot(epochs_range, history_dict['accuracy'], 'b-o', label='Training', markersize=4, linewidth=2)
axes[0].plot(epochs_range, history_dict['val_accuracy'], 'r-s', label='Validation', markersize=4, linewidth=2)

best_epoch = history_dict['val_accuracy'].index(max(history_dict['val_accuracy'])) + 1
best_val_acc = max(history_dict['val_accuracy'])
axes[0].scatter([best_epoch], [best_val_acc], color='green', s=200, zorder=5, marker='*',
                label=f'Best: {best_val_acc*100:.2f}%')

axes[0].set_xlabel('Epoch', fontsize=12)
axes[0].set_ylabel('Accuracy', fontsize=12)
axes[0].set_title('Model Accuracy', fontsize=14, fontweight='bold')
axes[0].legend(loc='lower right')
axes[0].grid(True, alpha=0.3)

# Loss
axes[1].plot(epochs_range, history_dict['loss'], 'b-o', label='Training', markersize=4, linewidth=2)
axes[1].plot(epochs_range, history_dict['val_loss'], 'r-s', label='Validation', markersize=4, linewidth=2)
axes[1].set_xlabel('Epoch', fontsize=12)
axes[1].set_ylabel('Loss', fontsize=12)
axes[1].set_title('Model Loss', fontsize=14, fontweight='bold')
axes[1].legend(loc='upper right')
axes[1].grid(True, alpha=0.3)

# Calculate gap
final_train_acc = history_dict['accuracy'][-1]
final_val_acc = history_dict['val_accuracy'][-1]
gap = abs(final_train_acc - final_val_acc)

plt.suptitle(f'Training History (Train-Val Gap: {gap*100:.2f}%)', fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig(MODEL_DIR / 'training_history.png', dpi=150, bbox_inches='tight')
plt.show()

print(f"\nFinal Training Accuracy: {final_train_acc*100:.2f}%")
print(f"Final Validation Accuracy: {final_val_acc*100:.2f}%")
print(f"Train-Val Gap: {gap*100:.2f}%")

## 11. Test Set Evaluation

In [None]:
# Load best model
best_model = tf.keras.models.load_model(str(MODEL_DIR / 'best_model.keras'))
print(f"Loaded best model: {MODEL_DIR / 'best_model.keras'}")

In [None]:
# Get predictions on test set
y_true = []
y_pred_probs = []

print("Running predictions on test set...")
for images, labels in test_ds_prep:
    preds = best_model.predict(images, verbose=0)
    y_true.extend(labels.numpy())
    y_pred_probs.extend(preds.flatten())

y_true = np.array(y_true)
y_pred_probs = np.array(y_pred_probs)

print(f"Test samples: {len(y_true)}")
print(f"Class distribution: {dict(zip(*np.unique(y_true, return_counts=True)))}")

In [None]:
# Find optimal threshold for balanced Precision/Recall
print("Finding optimal threshold for balanced Precision/Recall...")
print("-"*60)

thresholds = np.arange(0.1, 0.9, 0.05)
results = []

for thresh in thresholds:
    y_pred_temp = (y_pred_probs > thresh).astype(int)
    p, r, f1, _ = precision_recall_fscore_support(y_true, y_pred_temp, average=None, zero_division=0)
    
    # Calculate gaps
    max_pr_gap = max(abs(p[0] - r[0]), abs(p[1] - r[1])) if len(p) > 1 else abs(p[0] - r[0])
    f1_diff = abs(f1[0] - f1[1]) if len(f1) > 1 else 0
    combined = max_pr_gap + f1_diff
    acc = np.mean(y_true == y_pred_temp)
    
    results.append({
        'threshold': thresh,
        'max_pr_gap': max_pr_gap,
        'f1_diff': f1_diff,
        'combined': combined,
        'accuracy': acc,
        'macro_f1': np.mean(f1)
    })
    
    print(f"Thresh {thresh:.2f}: P-R Gap={max_pr_gap:.3f}, F1 Diff={f1_diff:.3f}, Acc={acc*100:.1f}%, F1={np.mean(f1)*100:.1f}%")

# Find best threshold (minimize combined gap)
best_result = min(results, key=lambda x: x['combined'])
OPTIMAL_THRESHOLD = best_result['threshold']

print("-"*60)
print(f"Optimal Threshold: {OPTIMAL_THRESHOLD:.2f}")

In [None]:
# Apply optimal threshold
y_pred = (y_pred_probs > OPTIMAL_THRESHOLD).astype(int)

# Calculate final metrics
accuracy = np.mean(y_true == y_pred)
p, r, f1, support = precision_recall_fscore_support(y_true, y_pred, average=None, zero_division=0)
macro_f1 = np.mean(f1)

print("="*60)
print("CLASSIFICATION REPORT")
print("="*60)
print(classification_report(y_true, y_pred, target_names=detected_classes, digits=4))

In [None]:
# Confusion Matrix
cm = confusion_matrix(y_true, y_pred)

fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# Counts
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[0],
            xticklabels=detected_classes, yticklabels=detected_classes, annot_kws={'size': 20})
axes[0].set_title('Confusion Matrix (Counts)', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Predicted')
axes[0].set_ylabel('Actual')

# Normalized
cm_norm = cm.astype('float') / (cm.sum(axis=1)[:, np.newaxis] + 1e-10)
sns.heatmap(cm_norm, annot=True, fmt='.2%', cmap='Blues', ax=axes[1],
            xticklabels=detected_classes, yticklabels=detected_classes, annot_kws={'size': 16})
axes[1].set_title('Confusion Matrix (Normalized)', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Predicted')
axes[1].set_ylabel('Actual')

plt.tight_layout()
plt.savefig(MODEL_DIR / 'confusion_matrix.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Performance Metrics Visualization
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Overall metrics
metrics_names = ['Accuracy', 'Precision', 'Recall', 'F1-Score']
metrics_values = [accuracy, np.mean(p), np.mean(r), macro_f1]
colors_metrics = ['#3498db', '#2ecc71', '#f39c12', '#e74c3c']

bars = axes[0].bar(metrics_names, metrics_values, color=colors_metrics, edgecolor='black')
axes[0].set_ylim([0, 1.1])
axes[0].set_title('Overall Model Performance', fontsize=14, fontweight='bold')
axes[0].set_ylabel('Score')
for bar, val in zip(bars, metrics_values):
    axes[0].text(bar.get_x() + bar.get_width()/2, val + 0.02, f'{val:.2%}', ha='center', fontweight='bold')

# Per-class metrics
x = np.arange(len(detected_classes))
width = 0.25
axes[1].bar(x - width, p, width, label='Precision', color='#3498db', edgecolor='black')
axes[1].bar(x, r, width, label='Recall', color='#2ecc71', edgecolor='black')
axes[1].bar(x + width, f1, width, label='F1-Score', color='#e74c3c', edgecolor='black')
axes[1].set_ylim([0, 1.1])
axes[1].set_title('Per-Class Performance', fontsize=14, fontweight='bold')
axes[1].set_xticks(x)
axes[1].set_xticklabels(detected_classes)
axes[1].legend()
axes[1].set_ylabel('Score')

plt.tight_layout()
plt.savefig(MODEL_DIR / 'performance_metrics.png', dpi=150, bbox_inches='tight')
plt.show()

## 12. Madam's Requirements Validation

In [None]:
print("="*60)
print("MADAM'S REQUIREMENTS VALIDATION")
print("="*60)

all_pass = True
TOLERANCE = 0.10  # 10% tolerance

# Requirement 1: P/R/F1 balanced per class
print("\n[1] P/R/F1 should be close for EACH class (gap < 10%)")
print("-"*60)
for i, cls in enumerate(detected_classes):
    pr_gap = abs(p[i] - r[i])
    status = "PASS" if pr_gap < TOLERANCE else "FAIL"
    if pr_gap >= TOLERANCE:
        all_pass = False
    print(f"  {cls}:")
    print(f"    Precision: {p[i]:.4f}")
    print(f"    Recall:    {r[i]:.4f}")
    print(f"    F1-Score:  {f1[i]:.4f}")
    print(f"    P-R Gap:   {pr_gap:.4f} [{status}]")

# Requirement 2: F1 similar across classes
print("\n[2] F1-Scores should be similar across classes (diff < 10%)")
print("-"*60)
f1_diff = abs(f1[0] - f1[1]) if len(f1) > 1 else 0
status = "PASS" if f1_diff < TOLERANCE else "FAIL"
if f1_diff >= TOLERANCE:
    all_pass = False
for i, cls in enumerate(detected_classes):
    print(f"  {cls} F1: {f1[i]:.4f}")
print(f"  F1 Difference: {f1_diff:.4f} [{status}]")

# Requirement 3: Accuracy close to F1
print("\n[3] Accuracy should be close to F1-Score (diff < 10%)")
print("-"*60)
acc_f1_diff = abs(accuracy - macro_f1)
status = "PASS" if acc_f1_diff < TOLERANCE else "FAIL"
if acc_f1_diff >= TOLERANCE:
    all_pass = False
print(f"  Accuracy:  {accuracy:.4f}")
print(f"  Macro F1:  {macro_f1:.4f}")
print(f"  Difference: {acc_f1_diff:.4f} [{status}]")

# Requirement 4: No severe overfitting
print("\n[4] Train-Val gap should be small (gap < 15%)")
print("-"*60)
train_val_gap = abs(final_train_acc - final_val_acc)
status = "PASS" if train_val_gap < 0.15 else "FAIL"
if train_val_gap >= 0.15:
    all_pass = False
print(f"  Training Accuracy:   {final_train_acc:.4f}")
print(f"  Validation Accuracy: {final_val_acc:.4f}")
print(f"  Gap: {train_val_gap:.4f} [{status}]")

print("\n" + "="*60)
if all_pass:
    print("ALL REQUIREMENTS PASSED!")
else:
    print("SOME REQUIREMENTS NEED ATTENTION")
print("="*60)

## 13. Save Model Info

In [None]:
# Save model info
model_info = {
    'model_name': 'Coconut Mite Detection Model v6',
    'base_model': 'MobileNetV2',
    'input_size': list(IMG_SIZE) + [3],
    'classes': detected_classes,
    'threshold': float(OPTIMAL_THRESHOLD),
    'hyperparameters': {
        'batch_size': BATCH_SIZE,
        'learning_rate': LEARNING_RATE,
        'dropout_rate': DROPOUT_RATE,
        'l2_regularization': L2_REG,
        'epochs_trained': len(history_dict['accuracy'])
    },
    'dataset': {
        'train': sum(dataset_counts['train'].values()),
        'validation': sum(dataset_counts['validation'].values()),
        'test': sum(dataset_counts['test'].values()),
        'per_class': dataset_counts
    },
    'training_results': {
        'final_train_accuracy': float(final_train_acc),
        'final_val_accuracy': float(final_val_acc),
        'best_val_accuracy': float(best_val_acc),
        'train_val_gap': float(train_val_gap)
    },
    'test_results': {
        'accuracy': float(accuracy),
        'per_class': {},
        'macro_f1': float(macro_f1)
    },
    'requirements_validation': {
        'all_passed': all_pass
    },
    'timestamp': datetime.now().isoformat()
}

# Add per-class metrics
for i, cls in enumerate(detected_classes):
    model_info['test_results']['per_class'][cls] = {
        'precision': float(p[i]),
        'recall': float(r[i]),
        'f1': float(f1[i]),
        'support': int(support[i])
    }

with open(MODEL_DIR / 'model_info.json', 'w') as f:
    json.dump(model_info, f, indent=2)

print(f"Model info saved: {MODEL_DIR / 'model_info.json'}")

## 14. Final Summary

In [None]:
print("="*60)
print("TRAINING SUMMARY")
print("="*60)

print(f"\n--- Model ---")
print(f"  Architecture: MobileNetV2 (Transfer Learning)")
print(f"  Input Size: {IMG_SIZE[0]}x{IMG_SIZE[1]}x3")
print(f"  Dropout: {DROPOUT_RATE}")
print(f"  L2 Regularization: {L2_REG}")

print(f"\n--- Dataset ---")
print(f"  Training: {sum(dataset_counts['train'].values()):,} images (augmented)")
print(f"  Validation: {sum(dataset_counts['validation'].values()):,} images (originals)")
print(f"  Test: {sum(dataset_counts['test'].values()):,} images (originals)")

print(f"\n--- Training ---")
print(f"  Epochs: {len(history_dict['accuracy'])}")
print(f"  Final Train Accuracy: {final_train_acc*100:.2f}%")
print(f"  Final Val Accuracy: {final_val_acc*100:.2f}%")
print(f"  Best Val Accuracy: {best_val_acc*100:.2f}%")
print(f"  Train-Val Gap: {train_val_gap*100:.2f}%")

print(f"\n--- Test Results ---")
print(f"  Threshold: {OPTIMAL_THRESHOLD:.2f}")
print(f"  Test Accuracy: {accuracy*100:.2f}%")
print(f"  Macro F1-Score: {macro_f1*100:.2f}%")

print(f"\n--- Per-Class Results ---")
for i, cls in enumerate(detected_classes):
    print(f"  {cls}:")
    print(f"    Precision: {p[i]*100:.2f}%")
    print(f"    Recall: {r[i]*100:.2f}%")
    print(f"    F1-Score: {f1[i]*100:.2f}%")

print(f"\n--- Files Saved ---")
print(f"  {MODEL_DIR / 'best_model.keras'}")
print(f"  {MODEL_DIR / 'model_info.json'}")
print(f"  {MODEL_DIR / 'training_history.json'}")
print(f"  {MODEL_DIR / 'training_history.png'}")
print(f"  {MODEL_DIR / 'confusion_matrix.png'}")
print(f"  {MODEL_DIR / 'performance_metrics.png'}")
print(f"  {MODEL_DIR / 'dataset_distribution.png'}")

print("\n" + "="*60)
if all_pass:
    print("ALL MADAM'S REQUIREMENTS: PASSED")
else:
    print("SOME REQUIREMENTS NEED ATTENTION")
print("="*60)