In [None]:
# ============================================================================
# CELL 1: Setup & Installation
# Purpose: Install required packages, import libraries, and verify GPU availability
# Instructions: Run this cell first. Wait for all packages to install.
# ============================================================================

# ============================================================================
# GOURD FLOWER MULTI-CLASS CLASSIFICATION MODEL TRAINING V3
# Multi-Class Classification: Variety + Gender + Non-Flower Rejection
# Classes: ampalaya_bilog_female, ampalaya_bilog_male, patola_female, 
#          patola_male, not_flower
# Compatible with Expo React Native Mobile App + Gemini AI Validation
# ============================================================================

# ‚≠ê‚≠ê‚≠ê CRITICAL: Install TensorFlow with Keras 2.x compatibility
# ============================================================================
print("=" * 70)
print("üîß INSTALLING TENSORFLOW WITH KERAS 2.x COMPATIBILITY")
print("=" * 70)
print("‚ö†Ô∏è  This prevents Keras 3.x format issues with TFLite conversion")
print("‚ö†Ô∏è  DO NOT SKIP THIS STEP!")
print()

# Set environment variable BEFORE importing TensorFlow
import os
os.environ['TF_USE_LEGACY_KERAS'] = '1'

# Install TensorFlow (latest version) and required packages
print("Installing TensorFlow and dependencies...")
!pip install -q tensorflow tensorflow-hub
!pip install -q pillow
!pip install -q scikit-learn  # For confusion matrix

print("\n‚úÖ TensorFlow installed successfully!")
print("‚úÖ Keras 2.x compatibility mode enabled!")
print("‚úÖ Ready for TFLite conversion")
print()

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
import os
import json
from google.colab import drive
from datetime import datetime
import zipfile
import shutil

# Check TensorFlow version and GPU availability
print("=" * 70)
print("SYSTEM CONFIGURATION")
print("=" * 70)
print(f"TensorFlow Version: {tf.__version__}")
# Removed Keras Version print as it causes issues with legacy Keras compatibility
print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")
print(f"Built with CUDA: {tf.test.is_built_with_cuda()}")

# Verify we're using Keras 2.x compatibility mode
tf_version = tf.__version__
# Removed Keras version assignment as it causes issues with legacy Keras compatibility
print(f"\n‚úÖ TensorFlow version: {tf_version}")
# print(f"‚úÖ Keras version: {keras_version}") # Original line, commented out
if os.environ.get('TF_USE_LEGACY_KERAS') == '1':
    print("‚úÖ Keras 2.x compatibility mode ENABLED")
    print("‚úÖ TFLite conversion will work correctly")
else:
    print("‚ö†Ô∏è  WARNING: Keras 2.x compatibility not detected!")
    print("‚ö†Ô∏è  TFLite conversion may fail!")

print("=" * 70)
print("\n‚úì All libraries imported successfully!")
print("‚úì Ready to proceed to next cell")


In [None]:
# ============================================================================
# CELL 2: Mount Drive & Verify Dataset
# Purpose: Connect to Google Drive and verify your dataset folders exist
# Instructions: 
#   1. Run this cell
#   2. Click the authorization link
#   3. Grant access to Google Drive
#   4. Wait for dataset verification
# ============================================================================

# Mount Google Drive to access datasets
# force_remount=True allows you to choose which Google account to use
drive.mount('/content/drive', force_remount=True)

# ============================================================================
# DATASET CONFIGURATION - UPDATE THESE PATHS IF NEEDED
# ============================================================================

DRIVE_BASE_PATH = '/content/drive/MyDrive/EGourd/Datasets'

# Define all class folders (UPDATE FOLDER NAMES TO MATCH YOUR STRUCTURE)
CLASS_FOLDERS = {
    'ampalaya_bilog_female': f'{DRIVE_BASE_PATH}/ampalaya_bilog_female',
    'ampalaya_bilog_male': f'{DRIVE_BASE_PATH}/ampalaya_bilog_male',
    'patola_female': f'{DRIVE_BASE_PATH}/patola_female',
    'patola_male': f'{DRIVE_BASE_PATH}/patola_male',
    'upo_smooth_female': f'{DRIVE_BASE_PATH}/upo_smooth_female',
    'upo_smooth_male': f'{DRIVE_BASE_PATH}/upo_smooth_male',
    'not_flower': f'{DRIVE_BASE_PATH}/not_flower',
}

# Local paths for organized data
LOCAL_DATA_PATH = '/content/gourd_data'
TRAIN_DIR = f'{LOCAL_DATA_PATH}/train'
VALIDATION_DIR = f'{LOCAL_DATA_PATH}/validation'
TEST_DIR = f'{LOCAL_DATA_PATH}/test'

# Verify dataset paths
print("=" * 70)
print("VERIFYING DATASET FOLDERS")
print("=" * 70)

total_images = 0
class_counts = {}

for class_name, folder_path in CLASS_FOLDERS.items():
    if os.path.exists(folder_path):
        count = len([f for f in os.listdir(folder_path) 
                    if f.lower().endswith(('.png', '.jpg', '.jpeg'))])
        class_counts[class_name] = count
        total_images += count
        print(f"‚úì {class_name}: {count} images")
    else:
        print(f"‚ö†Ô∏è {class_name} not found at {folder_path}")
        class_counts[class_name] = 0

print("=" * 70)
print(f"Total images: {total_images}")

# Check class balance
if total_images > 0:
    avg_count = total_images / len(CLASS_FOLDERS)
    print(f"Average per class: {avg_count:.0f} images")
    print("\nClass Balance Check:")
    for class_name, count in class_counts.items():
        balance = (count / avg_count * 100) if avg_count > 0 else 0
        status = "‚úì" if balance > 70 else "‚ö†Ô∏è"
        print(f"  {status} {class_name}: {balance:.1f}% of average")
    
    print("\nüí° Dataset Statistics:")
    print(f"   ‚Ä¢ Smallest class: {min(class_counts.values())} images")
    print(f"   ‚Ä¢ Largest class: {max(class_counts.values())} images")
    print(f"   ‚Ä¢ Imbalance ratio: {max(class_counts.values()) / min(class_counts.values()):.2f}x")
    
    if max(class_counts.values()) / min(class_counts.values()) > 2:
        print("\n‚ö†Ô∏è  WARNING: High class imbalance detected (>2x difference)")
        print("   ‚Üí Class weights will be automatically applied during training")
        print("   ‚Üí Consider data augmentation or collecting more samples for smaller classes")
    
    if any(count < 300 for count in class_counts.values()):
        print("\n‚ö†Ô∏è  Warning: Some classes have <300 images. Consider collecting more data.")
else:
    print("‚ö†Ô∏è No images found. Please check your folder paths.")

print("\n‚úì Drive mounted and dataset verified!")
print("‚úì Ready to organize dataset in next cell")


In [None]:
# ============================================================================
# CELL 3: Organize Dataset & Visualize Samples
# Purpose: Split images into train/validation/test sets and preview samples
# Instructions: 
#   1. Run this cell once (takes 2-5 minutes to copy files)
#   2. Check the summary statistics
#   3. Review the sample images displayed
#   4. WARNING: Do not rerun this cell unless you want to re-split the data
# ============================================================================

def extract_and_organize_dataset():
    """
    Organize multi-class dataset into train/validation/test splits
    Split: 70% train, 15% validation, 15% test
    """
    
    # ‚≠ê CHECK IF ALREADY ORGANIZED (prevents re-splitting)
    first_class = list(CLASS_FOLDERS.keys())[0]
    if os.path.exists(f'{TRAIN_DIR}/{first_class}') and \
       len(os.listdir(f'{TRAIN_DIR}/{first_class}')) > 0:
        print("‚ö†Ô∏è  Dataset already organized! Skipping to avoid re-splitting.")
        print(f"   Delete {LOCAL_DATA_PATH}/ if you want to re-organize.\n")
        
        # Show existing split counts
        print("Current dataset splits:")
        for split in ['train', 'validation', 'test']:
            split_dir = f'{LOCAL_DATA_PATH}/{split}'
            total = 0
            for class_name in CLASS_FOLDERS.keys():
                class_dir = f'{split_dir}/{class_name}'
                if os.path.exists(class_dir):
                    count = len(os.listdir(class_dir))
                    total += count
                    print(f"   {split}/{class_name}: {count} images")
            print(f"   {split.upper()} TOTAL: {total} images\n")
        return
    
    print("="*70)
    print("ORGANIZING MULTI-CLASS DATASET")
    print("="*70 + "\n")
    
    # Create directory structure
    for split in ['train', 'validation', 'test']:
        for class_name in CLASS_FOLDERS.keys():
            os.makedirs(f'{LOCAL_DATA_PATH}/{split}/{class_name}', exist_ok=True)
    
    # Process each class
    split_summary = {'train': 0, 'validation': 0, 'test': 0}
    
    for class_name, source_dir in CLASS_FOLDERS.items():
        if not os.path.exists(source_dir):
            print(f"‚ö†Ô∏è  Skipping {class_name} - folder not found")
            continue
        
        print(f"Processing {class_name}...")
        
        # Get all image files
        all_files = [f for f in os.listdir(source_dir) 
                    if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
        
        if len(all_files) == 0:
            print(f"   ‚ö†Ô∏è  No images found in {class_name}")
            continue
        
        # Shuffle for random split
        np.random.seed(42)  # For reproducibility
        np.random.shuffle(all_files)
        
        # Calculate split indices
        total = len(all_files)
        train_count = int(total * 0.70)
        val_count = int(total * 0.15)
        
        train_files = all_files[:train_count]
        val_files = all_files[train_count:train_count + val_count]
        test_files = all_files[train_count + val_count:]
        
        # Copy files to respective directories
        for file in train_files:
            shutil.copy(f'{source_dir}/{file}', f'{TRAIN_DIR}/{class_name}/{file}')
        
        for file in val_files:
            shutil.copy(f'{source_dir}/{file}', f'{VALIDATION_DIR}/{class_name}/{file}')
        
        for file in test_files:
            shutil.copy(f'{source_dir}/{file}', f'{TEST_DIR}/{class_name}/{file}')
        
        # Update summary
        split_summary['train'] += len(train_files)
        split_summary['validation'] += len(val_files)
        split_summary['test'] += len(test_files)
        
        print(f"   ‚úì {class_name}: {len(train_files)} train, {len(val_files)} val, {len(test_files)} test")
    
    # Print summary
    print("\n" + "="*70)
    print("DATASET ORGANIZATION COMPLETE")
    print("="*70)
    print(f"Training set:    {split_summary['train']} images")
    print(f"Validation set:  {split_summary['validation']} images")
    print(f"Test set:        {split_summary['test']} images")
    print(f"Total:           {sum(split_summary.values())} images")
    print("="*70 + "\n")

# Run dataset extraction and organization
extract_and_organize_dataset()

# ============================================================================
# VISUALIZE SAMPLE IMAGES
# ============================================================================

def visualize_samples():
    """Display sample images from training set to verify data quality"""
    print("Visualizing sample images from training set...\n")
    
    class_names = list(CLASS_FOLDERS.keys())
    num_classes = len(class_names)
    samples_per_class = 3
    
    fig, axes = plt.subplots(num_classes, samples_per_class, 
                            figsize=(12, num_classes * 3))
    fig.suptitle('Sample Images from Training Set', fontsize=16, y=0.995)
    
    for i, class_name in enumerate(class_names):
        class_dir = f'{TRAIN_DIR}/{class_name}'
        
        if not os.path.exists(class_dir):
            continue
        
        images = os.listdir(class_dir)[:samples_per_class]
        
        for j, img_name in enumerate(images):
            img_path = f'{class_dir}/{img_name}'
            img = plt.imread(img_path)
            
            ax = axes[i, j] if num_classes > 1 else axes[j]
            ax.imshow(img)
            ax.axis('off')
            
            if j == 0:
                # Format class name for display
                display_name = class_name.replace('_', ' ').title()
                ax.set_title(display_name, fontsize=10, fontweight='bold', loc='left')
    
    plt.tight_layout()
    plt.savefig('sample_images.png', dpi=150, bbox_inches='tight')
    plt.show()
    
    print("‚úì Sample visualization saved as 'sample_images.png'\n")

# Visualize samples
visualize_samples()

print("‚úì Dataset organization complete!")
print("‚úì Ready to configure model in next cell")


In [None]:
# ============================================================================
# CELL 4: Configure Hyperparameters & Prepare Data
# Purpose: Set training configuration and create data generators with augmentation
# Instructions:
#   1. Review hyperparameters (modify if needed)
#   2. Run this cell
#   3. Note the number of training/validation/test samples
# ============================================================================

# ============================================================================
# HYPERPARAMETER CONFIGURATION
# ============================================================================
IMG_HEIGHT = 224
IMG_WIDTH = 224
BATCH_SIZE = 16
EPOCHS = 100
LEARNING_RATE = 0.001

# Class names (must match folder names and order)
CLASS_NAMES = ['ampalaya_bilog_female', 'ampalaya_bilog_male', 
               'patola_female', 'patola_male', 
               'upo_smooth_female', 'upo_smooth_male',
               'not_flower']
NUM_CLASSES = len(CLASS_NAMES)

# Confidence threshold for "unknown" predictions
CONFIDENCE_THRESHOLD = 0.65  # 65% minimum confidence

print("\n" + "="*70)
print("MODEL CONFIGURATION")
print("="*70)
print(f"Image Size: {IMG_HEIGHT}x{IMG_WIDTH}")
print(f"Batch Size: {BATCH_SIZE}")
print(f"Max Epochs: {EPOCHS}")
print(f"Learning Rate: {LEARNING_RATE}")
print(f"Number of Classes: {NUM_CLASSES}")
print(f"Classes: {CLASS_NAMES}")
print(f"Confidence Threshold: {CONFIDENCE_THRESHOLD * 100}%")
print("="*70 + "\n")

# ============================================================================
# DATA AUGMENTATION AND PREPROCESSING
# ============================================================================

# Enhanced data augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.3,
    height_shift_range=0.3,
    horizontal_flip=True,
    vertical_flip=True,
    zoom_range=0.3,
    shear_range=0.2,
    brightness_range=[0.7, 1.3],
    fill_mode='nearest',
    channel_shift_range=20
)

# Only rescaling for validation and test sets
val_test_datagen = ImageDataGenerator(rescale=1./255)

# Create data generators - CATEGORICAL for multi-class
train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',  # Changed from 'binary' to 'categorical'
    shuffle=True,
    seed=42
)

validation_generator = val_test_datagen.flow_from_directory(
    VALIDATION_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',  # Changed from 'binary' to 'categorical'
    shuffle=False
)

test_generator = val_test_datagen.flow_from_directory(
    TEST_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',  # Changed from 'binary' to 'categorical'
    shuffle=False
)

# Verify class indices
print("\nClass Indices (verify correct mapping):")
for class_name, idx in train_generator.class_indices.items():
    print(f"  {idx}: {class_name}")

print("\n" + "="*70)
print("DATA GENERATORS READY")
print("="*70)
print(f"Training samples:   {train_generator.samples}")
print(f"Validation samples: {validation_generator.samples}")
print(f"Test samples:       {test_generator.samples}")
print(f"Steps per epoch:    {train_generator.samples // BATCH_SIZE}")
print("="*70 + "\n")

# Calculate class weights for imbalanced datasets
from sklearn.utils.class_weight import compute_class_weight

class_weights_array = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_generator.classes),
    y=train_generator.classes
)
class_weights = dict(enumerate(class_weights_array))
print("Class Weights (for imbalanced data):")
for idx, weight in class_weights.items():
    print(f"  {CLASS_NAMES[idx]}: {weight:.3f}")

print("\n‚úì Hyperparameters configured!")
print("‚úì Data generators ready!")
print("‚úì Ready to build model in next cell")


In [None]:
# ============================================================================
# CELL 5: Build & Compile Model
# Purpose: Create the MobileNetV2 model for multi-class classification
# ============================================================================

def create_multiclass_model():
    """
    Create MobileNetV2 model for multi-class gourd flower classification
    Optimized for mobile deployment with TensorFlow Lite
    
    Output: 7 classes with softmax activation
    - ampalaya_bilog_female
    - ampalaya_bilog_male
    - patola_female
    - patola_male
    - upo_smooth_female
    - upo_smooth_male
    - not_flower
    """
    
    # MobileNetV2: Lightweight, optimized for mobile devices
    base_model = tf.keras.applications.MobileNetV2(
        input_shape=(IMG_HEIGHT, IMG_WIDTH, 3),
        include_top=False,  # Remove classification layer
        weights='imagenet'  # Use pre-trained weights
    )
    
    print("Using MobileNetV2 (Mobile-Optimized)")
    
    # Freeze base model layers initially (transfer learning phase 1)
    base_model.trainable = False
    
    # Build complete model with multi-class output
    model = keras.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dropout(0.3),  # Dropout for regularization
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(NUM_CLASSES, activation='softmax')  # Multi-class with softmax
    ])
    
    return model, base_model

# Create model
model, base_model = create_multiclass_model()

# Display model architecture
model.summary()

# ============================================================================
# COMPILE MODEL FOR MULTI-CLASS
# ============================================================================

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=LEARNING_RATE),
    loss='categorical_crossentropy',  # Changed from 'binary_crossentropy'
    metrics=[
        'accuracy',
        keras.metrics.Precision(name='precision'),
        keras.metrics.Recall(name='recall'),
        keras.metrics.AUC(name='auc')
    ]
)

print("‚úì Model compiled for multi-class classification")

# ============================================================================
# CALLBACKS FOR TRAINING
# ============================================================================

checkpoint_dir = '/content/checkpoints'
os.makedirs(checkpoint_dir, exist_ok=True)

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=15,
    restore_best_weights=True,
    verbose=1
)

model_checkpoint = ModelCheckpoint(
    filepath=f'{checkpoint_dir}/best_model.h5',
    monitor='val_accuracy',
    save_best_only=True,
    verbose=1
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,
    patience=5,
    min_lr=1e-7,
    verbose=1
)

callbacks = [early_stopping, model_checkpoint, reduce_lr]

print("‚úì Training callbacks configured")
print("\n‚úì Model is ready for training!")


In [None]:
# ============================================================================
# CELL 6: Train Model - Phase 1 (Frozen Base)
# Purpose: Train with frozen base layers (transfer learning)
# Expected time: 15-30 minutes
# ============================================================================

print("PHASE 1: Training with frozen base model")
print("="*70 + "\n")

history_phase1 = model.fit(
    train_generator,
    epochs=20,
    validation_data=validation_generator,
    callbacks=callbacks,
    class_weight=class_weights,  # Use class weights for imbalanced data
    verbose=1
)

print("\n‚úì Phase 1 training complete!")
print("‚úì Ready for Phase 2 fine-tuning in next cell")


In [None]:
# ============================================================================
# CELL 7: Fine-tune Model - Phase 2 & Evaluate
# Purpose: Unfreeze some layers and fine-tune, then evaluate on test set
# Expected time: 20-60 minutes
# ============================================================================

print("PHASE 2: Fine-tuning with unfrozen layers")
print("="*70 + "\n")

# Unfreeze the last 30 layers of the base model
base_model.trainable = True
for layer in base_model.layers[:-30]:
    layer.trainable = False

# Recompile with lower learning rate
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=LEARNING_RATE/10),
    loss='categorical_crossentropy',  # Multi-class
    metrics=[
        'accuracy',
        keras.metrics.Precision(name='precision'),
        keras.metrics.Recall(name='recall'),
        keras.metrics.AUC(name='auc')
    ]
)

# Continue training
history_phase2 = model.fit(
    train_generator,
    epochs=EPOCHS,
    initial_epoch=history_phase1.epoch[-1],
    validation_data=validation_generator,
    callbacks=callbacks,
    class_weight=class_weights,
    verbose=1
)

print("\n‚úì Training complete!")

# ============================================================================
# EVALUATE MODEL ON TEST SET
# ============================================================================

print("\n" + "="*70)
print("EVALUATING MODEL ON TEST SET")
print("="*70 + "\n")

# Load best model from checkpoint
print("Loading best model from checkpoint...")
best_model = keras.models.load_model(f'{checkpoint_dir}/best_model.h5')
print("‚úì Best model loaded\n")

# Evaluate on test set
test_loss, test_accuracy, test_precision, test_recall, test_auc = best_model.evaluate(
    test_generator,
    verbose=1
)

print(f"\nTest Results:")
print(f"  Loss: {test_loss:.4f}")
print(f"  Accuracy: {test_accuracy:.4f}")
print(f"  Precision: {test_precision:.4f}")
print(f"  Recall: {test_recall:.4f}")
print(f"  AUC: {test_auc:.4f}")

# Calculate F1 Score
f1_score = 2 * (test_precision * test_recall) / (test_precision + test_recall)
print(f"  F1 Score: {f1_score:.4f}")

# Generate confusion matrix
print("\nüìä Generating confusion matrix...")
from sklearn.metrics import confusion_matrix, classification_report

# Get predictions
test_generator.reset()
y_true = test_generator.classes
y_pred_probs = best_model.predict(test_generator, verbose=0)
y_pred = np.argmax(y_pred_probs, axis=1)  # Get class with highest probability

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)

# Display classification report
print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=CLASS_NAMES, digits=4))

# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=CLASS_NAMES,
            yticklabels=CLASS_NAMES)
plt.title('Multi-Class Confusion Matrix', fontsize=14, fontweight='bold')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
plt.savefig('/content/confusion_matrix.png', dpi=150, bbox_inches='tight')
plt.show()
print("‚úì Confusion matrix saved as 'confusion_matrix.png'")

print("\n‚úì Evaluation complete!")
print("‚úì Ready to visualize results in next cell")


In [None]:
# ============================================================================
# CELL 8: Visualize & Test Predictions
# Purpose: Plot training history and test the model on sample images
# ============================================================================

# ============================================================================
# VISUALIZE TRAINING HISTORY
# ============================================================================

# Check if training history variables exist (session may have been disconnected)
if 'history_phase1' in globals() and 'history_phase2' in globals():
    def plot_training_history(history1, history2):
        """Plot training and validation metrics"""
        
        # Combine histories
        acc = history1.history['accuracy'] + history2.history['accuracy']
        val_acc = history1.history['val_accuracy'] + history2.history['val_accuracy']
        loss = history1.history['loss'] + history2.history['loss']
        val_loss = history1.history['val_loss'] + history2.history['val_loss']
        
        epochs_range = range(len(acc))
        
        plt.figure(figsize=(15, 5))
        
        # Accuracy plot
        plt.subplot(1, 3, 1)
        plt.plot(epochs_range, acc, label='Training Accuracy')
        plt.plot(epochs_range, val_acc, label='Validation Accuracy')
        plt.axvline(x=len(history1.history['accuracy']), color='r',
                    linestyle='--', label='Fine-tuning Start')
        plt.legend(loc='lower right')
        plt.title('Training and Validation Accuracy')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        
        # Loss plot
        plt.subplot(1, 3, 2)
        plt.plot(epochs_range, loss, label='Training Loss')
        plt.plot(epochs_range, val_loss, label='Validation Loss')
        plt.axvline(x=len(history1.history['loss']), color='r',
                    linestyle='--', label='Fine-tuning Start')
        plt.legend(loc='upper right')
        plt.title('Training and Validation Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        
        # Additional metrics
        if 'precision' in history2.history:
            precision = history2.history['precision']
            recall = history2.history['recall']
            
            plt.subplot(1, 3, 3)
            plt.plot(range(len(precision)), precision, label='Precision')
            plt.plot(range(len(recall)), recall, label='Recall')
            plt.legend(loc='lower right')
            plt.title('Precision and Recall')
            plt.xlabel('Epoch (Phase 2)')
            plt.ylabel('Score')
        
        plt.tight_layout()
        plt.savefig('/content/training_history.png', dpi=300, bbox_inches='tight')
        plt.show()
    
    plot_training_history(history_phase1, history_phase2)
    print("‚úì Training history plotted successfully!\n")
else:
    print("‚ö†Ô∏è  Note: Training history variables not found (session was likely disconnected)")
    print("   Skipping training history plot, but will continue with model testing...")
    print("   The best model is already saved in the checkpoint!\n")

# ============================================================================
# TEST MODEL INFERENCE
# ============================================================================

print("\n" + "="*70)
print("TESTING MODEL INFERENCE")
print("="*70 + "\n")

# Load best model from checkpoint
print("Loading best model from checkpoint...")
best_model = keras.models.load_model('/content/checkpoints/best_model.h5')
print("‚úì Best model loaded successfully!\n")

def predict_image(model, image_path):
    """Test prediction on a single image for multi-class"""
    # Load and preprocess image
    img = keras.preprocessing.image.load_img(
        image_path,
        target_size=(IMG_HEIGHT, IMG_WIDTH)
    )
    img_array = keras.preprocessing.image.img_to_array(img)
    img_array = img_array / 255.0  # Rescale
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    
    # Predict
    prediction = model.predict(img_array, verbose=0)
    predicted_class_idx = np.argmax(prediction[0])
    confidence = prediction[0][predicted_class_idx] * 100
    class_name = CLASS_NAMES[predicted_class_idx]
    
    return class_name, confidence

# Test on random test images from each class
print("Sample predictions:\n")
for class_name in CLASS_NAMES:
    class_dir = f'{TEST_DIR}/{class_name}'
    if not os.path.exists(class_dir):
        continue
    
    images = os.listdir(class_dir)[:2]  # Get 2 images per class
    
    for img_name in images:
        img_path = f'{class_dir}/{img_name}'
        pred_class, confidence = predict_image(best_model, img_path)
        emoji = "‚úì" if pred_class == class_name else "‚úó"
        
        # Format class names for display
        true_display = class_name.replace('_', ' ').title()
        pred_display = pred_class.replace('_', ' ').title()
        
        print(f"{emoji} True: {true_display:25} | Predicted: {pred_display:25} ({confidence:.1f}%)")

print("\n‚úì Visualization and testing complete!")
print("‚úì Ready to export models in next cell")


In [None]:
# ============================================================================
# CELL 9: Export & Save to Google Drive
# Purpose: Convert model to TFLite format and save all files to Google Drive
# ============================================================================

print("CONVERTING MODEL TO TENSORFLOW LITE FORMAT")
print("="*70 + "\n")

# Load best model for export
print("Loading best model for export...")
best_model = keras.models.load_model(f'{checkpoint_dir}/best_model.h5')
print("‚úì Best model loaded for conversion\n")

# Save full model first
best_model.save('/content/gourd_multiclass_classifier.h5')
print("‚úì Saved full model: gourd_multiclass_classifier.h5")

# Convert to TensorFlow Lite (optimized for mobile)
converter = tf.lite.TFLiteConverter.from_keras_model(best_model)

# Optimizations for mobile deployment
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]  # Use float16 for smaller size

# Convert
tflite_model = converter.convert()

# Save TFLite model
tflite_path = '/content/gourd_multiclass_classifier.tflite'
with open(tflite_path, 'wb') as f:
    f.write(tflite_model)

print(f"‚úì TensorFlow Lite model saved: {tflite_path}")
print(f"  Model size: {os.path.getsize(tflite_path) / (1024*1024):.2f} MB")

# ============================================================================
# CREATE MODEL METADATA FOR MOBILE APP
# ============================================================================

# Create metadata file with model information
metadata = {
    "model_name": "Gourd Flower Multi-Class Classifier",
    "version": "3.0.0",
    "model_type": "multi-class",
    "created_at": datetime.now().isoformat(),
    "architecture": "MobileNetV2",
    "input_shape": [IMG_HEIGHT, IMG_WIDTH, 3],
    "num_classes": NUM_CLASSES,
    "class_labels": CLASS_NAMES,
    "confidence_threshold": CONFIDENCE_THRESHOLD,
    "preprocessing": {
        "rescale": 1.0/255.0,
        "input_size": [IMG_HEIGHT, IMG_WIDTH]
    },
    "metrics": {
        "test_accuracy": float(test_accuracy),
        "test_precision": float(test_precision),
        "test_recall": float(test_recall),
        "test_f1_score": float(f1_score),
        "test_auc": float(test_auc)
    },
    "training_info": {
        "total_epochs": len(history_phase1.history['accuracy']) + len(history_phase2.history['accuracy']),
        "batch_size": BATCH_SIZE,
        "train_samples": train_generator.samples,
        "val_samples": validation_generator.samples,
        "test_samples": test_generator.samples
    },
    "usage_notes": {
        "description": "Multi-class classifier for gourd flowers with non-flower rejection",
        "varieties_supported": ["Ampalaya Bilog", "Patola"],
        "genders_supported": ["Female", "Male"],
        "rejection_class": "not_flower",
        "integration": "Works with Gemini AI for validation"
    }
}

# Save metadata
with open('/content/model_metadata.json', 'w') as f:
    json.dump(metadata, f, indent=2)

print("‚úì Model metadata saved: model_metadata.json")

# ============================================================================
# SAVE ALL FILES TO GOOGLE DRIVE
# ============================================================================

print("\n" + "="*70)
print("SAVING FILES TO GOOGLE DRIVE")
print("="*70 + "\n")

# Create output directory in Google Drive
MODEL_SAVE_PATH = '/content/drive/MyDrive/EGourd/Model_Versions'
base_version_name = f'V3_MultiClass_{datetime.now().strftime("%m-%d-%Y")}'

# Find next available number
counter = 1
version_name = f'{base_version_name}_{counter}'
output_dir = f'{MODEL_SAVE_PATH}/{version_name}'

while os.path.exists(output_dir):
    counter += 1
    version_name = f'{base_version_name}_{counter}'
    output_dir = f'{MODEL_SAVE_PATH}/{version_name}'

os.makedirs(output_dir, exist_ok=True)
print(f"Saving to: {output_dir}")
print(f"Version: {version_name}\n")

# Copy files to Google Drive
files_to_save = [
    ('/content/gourd_multiclass_classifier.h5', 'Full Keras Model', '‚≠ê‚≠ê Backup/Retraining'),
    ('/content/gourd_multiclass_classifier.tflite', 'TensorFlow Lite Model', '‚≠ê‚≠ê‚≠ê FOR EXPO APP'),
    ('/content/model_metadata.json', 'Model Metadata', '‚≠ê‚≠ê‚≠ê FOR EXPO APP'),
    ('/content/training_history.png', 'Training History Plot', '‚≠ê Documentation'),
    ('/content/confusion_matrix.png', 'Confusion Matrix', '‚≠ê‚≠ê Documentation'),
    ('/content/sample_images.png', 'Sample Images', '‚≠ê Documentation'),
    (f'{checkpoint_dir}/best_model.h5', 'Best Model Checkpoint', '‚≠ê Backup')
]

print("File Purposes:")
print("‚≠ê‚≠ê‚≠ê = Essential for Expo app")
print("‚≠ê‚≠ê   = Good to keep for future training")
print("‚≠ê     = Optional documentation\n")

for source_path, description, priority in files_to_save:
    if os.path.exists(source_path):
        filename = os.path.basename(source_path)
        dest_path = f'{output_dir}/{filename}'
        shutil.copy(source_path, dest_path)
        size_mb = os.path.getsize(source_path) / (1024*1024)
        print(f"‚úì Saved: {description:30} ‚Üí {filename:35} ({size_mb:.2f} MB) {priority}")
    else:
        print(f"‚ö† Skipped: {description} (not found)")

print(f"\n‚úì All files saved to Google Drive")

# ============================================================================
# SUMMARY AND NEXT STEPS
# ============================================================================

print("\n" + "="*70)
print("TRAINING COMPLETE - V3 MULTI-CLASS MODEL SUMMARY")
print("="*70 + "\n")

print("üìä Model Performance:")
print(f"   ‚Ä¢ Accuracy: {test_accuracy*100:.2f}%")
print(f"   ‚Ä¢ Precision: {test_precision*100:.2f}%")
print(f"   ‚Ä¢ Recall: {test_recall*100:.2f}%")
print(f"   ‚Ä¢ F1 Score: {f1_score*100:.2f}%")

print("\nüéØ Model Capabilities:")
print(f"   ‚Ä¢ Classes: {NUM_CLASSES}")
print(f"   ‚Ä¢ Varieties: Ampalaya Bilog, Patola")
print(f"   ‚Ä¢ Genders: Male, Female")
print(f"   ‚Ä¢ Non-flower rejection: Yes (not_flower class)")
print(f"   ‚Ä¢ Confidence threshold: {CONFIDENCE_THRESHOLD*100}%")

print("\nüì¶ Generated Files:")
print("   ‚Ä¢ gourd_multiclass_classifier.h5 (Full Keras model)")
print("   ‚Ä¢ gourd_multiclass_classifier.tflite ‚≠ê FOR EXPO")
print("   ‚Ä¢ model_metadata.json ‚≠ê FOR EXPO")
print("   ‚Ä¢ training_history.png, confusion_matrix.png, sample_images.png")

print(f"\nüìÅ Location in Google Drive:")
print(f"   MyDrive/EGourd/Model_Versions/{version_name}/")

print("\nüì± Next Steps for Mobile Integration:")
print("   1. Download from Google Drive:")
print("      ‚Ä¢ gourd_multiclass_classifier.tflite")
print("      ‚Ä¢ model_metadata.json")
print("   2. Place in: frontend/mobile-app/assets/models/")
print("   3. Update modelService.js to use multi-class model")
print("   4. Test with Gemini AI validation (already integrated!)")

print("\nüîÑ Integration with Gemini:")
print("   ‚úì TFLite model provides fast on-device inference")
print("   ‚úì Gemini validates predictions for accuracy")
print("   ‚úì Conflict resolution when models disagree")
print("   ‚úì Confidence thresholds: 65% minimum")

print("\n" + "="*70)
print("üéâ V3 MULTI-CLASS MODEL READY FOR DEPLOYMENT!")
print("="*70)
