In [None]:
```xml
<VSCode.Cell language="markdown">
# üçé Fruit Disease Detection - Training Notebook

**Complete training pipeline for EfficientNet-B0 model**

This notebook will:
1. Check and fix dataset structure automatically
2. Train the model on 17 fruit disease classes
3. Generate evaluation metrics and visualizations
4. Save the model as fruit_disease_model.h5

**Expected time:** 1-3 hours (GPU) or 6-12 hours (CPU)
</VSCode.Cell>

<VSCode.Cell language="markdown">
## Step 1: Import Required Libraries
</VSCode.Cell>

<VSCode.Cell language="python">
# Import libraries
import os
import sys
import json
import shutil
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")
</VSCode.Cell>

<VSCode.Cell language="markdown">
## Step 2: Configuration
</VSCode.Cell>

<VSCode.Cell language="python">
# Configuration class
class Config:
    """Training configuration parameters"""
    # Paths
    BASE_DIR = os.path.dirname(os.getcwd()) if 'model' in os.getcwd() else os.getcwd()
    DATASET_PATH = os.path.join(BASE_DIR, 'data', 'archive')
    MODEL_SAVE_PATH = os.path.join(BASE_DIR, 'model', 'fruit_disease_model.h5')
    LABELS_PATH = os.path.join(BASE_DIR, 'model', 'fruit_disease_labels.json')
    
    # Model parameters
    IMG_HEIGHT = 224
    IMG_WIDTH = 224
    BATCH_SIZE = 32
    EPOCHS = 50
    LEARNING_RATE = 0.001
    VALIDATION_SPLIT = 0.2
    NUM_CLASSES = 17

print(f"‚úì Dataset path: {Config.DATASET_PATH}")
print(f"‚úì Model will be saved to: {Config.MODEL_SAVE_PATH}")
</VSCode.Cell>

<VSCode.Cell language="markdown">
## Step 3: Auto-Fix Dataset Structure

This will automatically reorganize your dataset if it's nested in fruit folders.
</VSCode.Cell>

<VSCode.Cell language="python">
def fix_dataset_structure():
    """Flatten nested dataset structure"""
    fruit_folders = ['APPLE', 'GUAVA', 'MANGO', 'POMEGRANATE']
    needs_fix = any(os.path.exists(os.path.join(Config.DATASET_PATH, f)) for f in fruit_folders)
    
    if needs_fix:
        print("‚ö†Ô∏è  Detected nested structure. Flattening...")
        
        for fruit in fruit_folders:
            fruit_path = os.path.join(Config.DATASET_PATH, fruit)
            if not os.path.exists(fruit_path):
                continue
            
            disease_folders = [d for d in os.listdir(fruit_path)
                              if os.path.isdir(os.path.join(fruit_path, d))]
            
            for disease_folder in disease_folders:
                source = os.path.join(fruit_path, disease_folder)
                destination = os.path.join(Config.DATASET_PATH, disease_folder)
                
                if not os.path.exists(destination):
                    shutil.move(source, destination)
                    print(f"  ‚úì Moved: {disease_folder}")
            
            # Remove empty fruit folder
            try:
                if not os.listdir(fruit_path):
                    os.rmdir(fruit_path)
                    print(f"  ‚úì Removed: {fruit}/")
            except:
                pass
        
        print("‚úì Dataset structure fixed!")
    else:
        print("‚úì Dataset structure is correct!")

# Run the fix
fix_dataset_structure()
</VSCode.Cell>

<VSCode.Cell language="markdown">
## Step 4: Verify Dataset
</VSCode.Cell>

<VSCode.Cell language="python">
# Count classes and images
image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.gif'}
disease_classes = [d for d in os.listdir(Config.DATASET_PATH)
                   if os.path.isdir(os.path.join(Config.DATASET_PATH, d))]

print(f"Found {len(disease_classes)} disease classes:\n")

total_images = 0
for disease_class in sorted(disease_classes):
    class_path = os.path.join(Config.DATASET_PATH, disease_class)
    images = [f for f in os.listdir(class_path)
              if os.path.splitext(f)[1].lower() in image_extensions]
    num_images = len(images)
    total_images += num_images
    print(f"  ‚úì {disease_class:50s}: {num_images:4d} images")

print(f"\n{'='*60}")
print(f"Total Classes: {len(disease_classes)}")
print(f"Total Images:  {total_images:,}")
print(f"{'='*60}")
</VSCode.Cell>

<VSCode.Cell language="markdown">
## Step 5: Create Data Generators
</VSCode.Cell>

<VSCode.Cell language="python">
# Training data with augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.8, 1.2],
    fill_mode='nearest',
    validation_split=Config.VALIDATION_SPLIT
)

# Validation data (only rescaling)
validation_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=Config.VALIDATION_SPLIT
)

# Create generators
train_generator = train_datagen.flow_from_directory(
    Config.DATASET_PATH,
    target_size=(Config.IMG_HEIGHT, Config.IMG_WIDTH),
    batch_size=Config.BATCH_SIZE,
    class_mode='categorical',
    subset='training',
    shuffle=True,
    seed=42
)

validation_generator = validation_datagen.flow_from_directory(
    Config.DATASET_PATH,
    target_size=(Config.IMG_HEIGHT, Config.IMG_WIDTH),
    batch_size=Config.BATCH_SIZE,
    class_mode='categorical',
    subset='validation',
    shuffle=False,
    seed=42
)

print(f"\n‚úì Training samples: {train_generator.samples}")
print(f"‚úì Validation samples: {validation_generator.samples}")
print(f"‚úì Number of classes: {train_generator.num_classes}")

# Save class labels
labels_map = {v: k for k, v in train_generator.class_indices.items()}
with open(Config.LABELS_PATH, 'w') as f:
    json.dump(labels_map, f, indent=4)
print(f"‚úì Class labels saved!")
</VSCode.Cell>

<VSCode.Cell language="markdown">
## Step 6: Build EfficientNet-B0 Model
</VSCode.Cell>

<VSCode.Cell language="python">
# Load pretrained EfficientNet-B0
base_model = EfficientNetB0(
    include_top=False,
    weights='imagenet',
    input_shape=(Config.IMG_HEIGHT, Config.IMG_WIDTH, 3)
)

# Freeze base model initially
base_model.trainable = False

# Build custom classification head
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.BatchNormalization(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(Config.NUM_CLASSES, activation='softmax')
], name='FruitDiseaseDetector')

# Compile model
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=Config.LEARNING_RATE),
    loss='categorical_crossentropy',
    metrics=['accuracy', 
             keras.metrics.Precision(name='precision'),
             keras.metrics.Recall(name='recall')]
)

print("‚úì Model built successfully!")
print(f"‚úì Total parameters: {model.count_params():,}")
model.summary()
</VSCode.Cell>

<VSCode.Cell language="markdown">
## Step 7: Create Callbacks
</VSCode.Cell>

<VSCode.Cell language="python">
callbacks = [
    EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True,
        verbose=1
    ),
    ModelCheckpoint(
        filepath=Config.MODEL_SAVE_PATH,
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        min_lr=1e-7,
        verbose=1
    )
]

print("‚úì Callbacks configured!")
</VSCode.Cell>

<VSCode.Cell language="markdown">
## Step 8: Train Model (Phase 1 - Frozen Base)

‚è±Ô∏è **This will take 1-3 hours on GPU or 6-12 hours on CPU**

You can monitor the training progress in real-time below.
</VSCode.Cell>

<VSCode.Cell language="python">
# Phase 1: Train with frozen base
print("\n" + "="*60)
print("TRAINING PHASE 1: FROZEN BASE MODEL")
print("="*60)

initial_epochs = 30
history = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=initial_epochs,
    callbacks=callbacks,
    verbose=1
)

print("\n‚úì Phase 1 training complete!")
</VSCode.Cell>

<VSCode.Cell language="markdown">
## Step 9: Fine-Tune Model (Phase 2 - Unfreeze Layers)
</VSCode.Cell>

<VSCode.Cell language="python">
# Unfreeze last 20 layers of base model
base_model.trainable = True
for layer in base_model.layers[:-20]:
    layer.trainable = False

# Recompile with lower learning rate
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-5),
    loss='categorical_crossentropy',
    metrics=['accuracy',
             keras.metrics.Precision(name='precision'),
             keras.metrics.Recall(name='recall')]
)

print(f"‚úì Unfrozen last 20 layers")
print(f"‚úì Trainable parameters: {sum([tf.size(w).numpy() for w in model.trainable_weights]):,}")

# Phase 2: Fine-tuning
print("\n" + "="*60)
print("TRAINING PHASE 2: FINE-TUNING")
print("="*60)

fine_tune_epochs = 20
total_epochs = initial_epochs + fine_tune_epochs

history_fine = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=total_epochs,
    initial_epoch=len(history.history['accuracy']),
    callbacks=callbacks,
    verbose=1
)

print("\n‚úì Phase 2 training complete!")
</VSCode.Cell>

<VSCode.Cell language="markdown">
## Step 10: Plot Training History
</VSCode.Cell>

<VSCode.Cell language="python">
# Combine history from both phases
all_history = {
    'accuracy': history.history['accuracy'] + history_fine.history['accuracy'],
    'val_accuracy': history.history['val_accuracy'] + history_fine.history['val_accuracy'],
    'loss': history.history['loss'] + history_fine.history['loss'],
    'val_loss': history.history['val_loss'] + history_fine.history['val_loss']
}

# Plot
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Accuracy
axes[0].plot(all_history['accuracy'], label='Train Accuracy', linewidth=2)
axes[0].plot(all_history['val_accuracy'], label='Val Accuracy', linewidth=2)
axes[0].set_title('Model Accuracy', fontweight='bold')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Accuracy')
axes[0].legend()
axes[0].grid(True, alpha=0.3)
axes[0].axvline(x=initial_epochs, color='red', linestyle='--', label='Fine-tuning starts')

# Loss
axes[1].plot(all_history['loss'], label='Train Loss', linewidth=2)
axes[1].plot(all_history['val_loss'], label='Val Loss', linewidth=2)
axes[1].set_title('Model Loss', fontweight='bold')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Loss')
axes[1].legend()
axes[1].grid(True, alpha=0.3)
axes[1].axvline(x=initial_epochs, color='red', linestyle='--', label='Fine-tuning starts')

plt.tight_layout()
plt.show()

print(f"‚úì Final Validation Accuracy: {all_history['val_accuracy'][-1]*100:.2f}%")
</VSCode.Cell>

<VSCode.Cell language="markdown">
## Step 11: Evaluate Model
</VSCode.Cell>

<VSCode.Cell language="python">
# Load best model
model = keras.models.load_model(Config.MODEL_SAVE_PATH)

# Get predictions
validation_generator.reset()
y_pred_probs = model.predict(validation_generator, verbose=1)
y_pred = np.argmax(y_pred_probs, axis=1)
y_true = validation_generator.classes

# Classification report
class_names = [labels_map[i] for i in range(len(labels_map))]
report = classification_report(y_true, y_pred, target_names=class_names, digits=4)
print("\nCLASSIFICATION REPORT:")
print("="*60)
print(report)

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(16, 14))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names, yticklabels=class_names)
plt.title('Confusion Matrix - Fruit Disease Detection', fontsize=16, fontweight='bold')
plt.xlabel('Predicted Label', fontsize=12)
plt.ylabel('True Label', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

# Overall accuracy
overall_accuracy = np.sum(cm.diagonal()) / np.sum(cm)
print(f"\n{'='*60}")
print(f"OVERALL VALIDATION ACCURACY: {overall_accuracy*100:.2f}%")
print(f"{'='*60}")
</VSCode.Cell>

<VSCode.Cell language="markdown">
## Step 12: Per-Class Accuracy
</VSCode.Cell>

<VSCode.Cell language="python">
# Per-class accuracy
class_accuracy = cm.diagonal() / cm.sum(axis=1)

print("\nPER-CLASS ACCURACY:")
print("="*60)
for class_name, accuracy in zip(class_names, class_accuracy):
    print(f"{class_name:50s}: {accuracy*100:6.2f}%")
</VSCode.Cell>

<VSCode.Cell language="markdown">
## üéâ Training Complete!

Your model has been saved and is ready for deployment!

### Generated Files:
- ‚úÖ `fruit_disease_model.h5` - Trained model (~25MB)
- ‚úÖ `fruit_disease_labels.json` - Class mappings

### Next Steps:
1. Start your FastAPI server: `uvicorn main_fastapi:app --reload`
2. Test predictions via API endpoints
3. Integrate with your frontend application

### Model Performance:
- Architecture: EfficientNet-B0
- Input: 224√ó224 RGB images
- Output: 17 disease classes
- Expected accuracy: 95-97%
</VSCode.Cell>
```