# ===============================================================
# üìô NOTEBOOK 3: Multi-Modal Model Training
# Train the Bi-LSTM model with hand, pose, and lip streams
# ===============================================================

In [None]:
# --- 1: Setup ---
!pip install tensorflow pandas numpy matplotlib scikit-learn

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, Model
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import pickle
import os
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

print(f"‚úÖ TensorFlow version: {tf.__version__}")
print(f"‚úÖ GPU available: {tf.config.list_physical_devices('GPU')}")

# Mount Drive
from google.colab import drive
drive.mount('/content/drive')


In [None]:
# --- 2: Load Prepared Data ---
def load_prepared_data():
    """Load data from Notebook 2"""
    
    # Load features
    X_train = np.load('/content/prepared_data/X_train.npy')
    X_val = np.load('/content/prepared_data/X_val.npy')
    X_test = np.load('/content/prepared_data/X_test.npy')
    
    # Load labels
    y_train = np.load('/content/prepared_data/y_train.npy')
    y_val = np.load('/content/prepared_data/y_val.npy')
    y_test = np.load('/content/prepared_data/y_test.npy')
    
    # Load metadata
    with open('/content/prepared_data/dataset_info.json', 'r') as f:
        info = json.load(f)
    
    # Load label encoder
    with open('/content/label_encoder.pkl', 'rb') as f:
        label_encoder = pickle.load(f)
    
    print(f"üìä Data loaded:")
    print(f"   Train: {X_train.shape}")
    print(f"   Val:   {X_val.shape}")
    print(f"   Test:  {X_test.shape}")
    print(f"   Classes: {info['num_classes']}")
    
    return (X_train, y_train), (X_val, y_val), (X_test, y_test), info, label_encoder

(X_train, y_train), (X_val, y_val), (X_test, y_test), info, label_encoder = load_prepared_data()

# Convert to float32
X_train = X_train.astype(np.float32)
X_val = X_val.astype(np.float32)
X_test = X_test.astype(np.float32)

# One-hot encode labels
num_classes = info['num_classes']
y_train_cat = tf.keras.utils.to_categorical(y_train, num_classes)
y_val_cat = tf.keras.utils.to_categorical(y_val, num_classes)
y_test_cat = tf.keras.utils.to_categorical(y_test, num_classes)

print(f"\n‚úÖ Labels converted to one-hot: {y_train_cat.shape}")


In [None]:
# --- 3: Define Multi-Modal Model Architecture ---
def create_multi_modal_model(sequence_length=60, 
                             hand_features=84,    # 21 points √ó 4 values
                             pose_features=100,   # 25 points √ó 4 values
                             lip_features=200,    # 50 points √ó 4 values
                             num_classes=10):
    """
    Create multi-modal model with separate streams
    Matches your proposal architecture
    """
    
    # Input layer (all features concatenated)
    total_features = hand_features + pose_features + lip_features
    inputs = layers.Input(shape=(sequence_length, total_features))
    
    # Split into modalities
    hand_input = inputs[:, :, :hand_features]
    pose_input = inputs[:, :, hand_features:hand_features+pose_features]
    lip_input = inputs[:, :, hand_features+pose_features:]
    
    # === HAND STREAM (1D CNN) ===
    hand_stream = layers.Conv1D(64, 3, padding='same', activation='relu')(hand_input)
    hand_stream = layers.BatchNormalization()(hand_stream)
    hand_stream = layers.Conv1D(128, 3, padding='same', activation='relu')(hand_stream)
    hand_stream = layers.BatchNormalization()(hand_stream)
    hand_stream = layers.MaxPooling1D(2)(hand_stream)
    hand_stream = layers.GlobalAveragePooling1D()(hand_stream)
    hand_stream = layers.Dense(64, activation='relu')(hand_stream)
    hand_stream = layers.Dropout(0.3)(hand_stream)
    
    # === POSE STREAM (Dense layers) ===
    pose_stream = layers.Conv1D(32, 3, padding='same', activation='relu')(pose_input)
    pose_stream = layers.BatchNormalization()(pose_stream)
    pose_stream = layers.GlobalAveragePooling1D()(pose_stream)
    pose_stream = layers.Dense(32, activation='relu')(pose_stream)
    pose_stream = layers.Dropout(0.3)(pose_stream)
    
    # === LIP STREAM (LSTM) ===
    lip_stream = layers.Conv1D(64, 3, padding='same', activation='relu')(lip_input)
    lip_stream = layers.BatchNormalization()(lip_stream)
    lip_stream = layers.LSTM(64, return_sequences=True)(lip_stream)
    lip_stream = layers.LSTM(32)(lip_stream)
    lip_stream = layers.Dropout(0.3)(lip_stream)
    
    # === FEATURE FUSION ===
    fused = layers.Concatenate()([hand_stream, pose_stream, lip_stream])
    fused = layers.Dense(128, activation='relu')(fused)
    fused = layers.Dropout(0.3)(fused)
    
    # === TEMPORAL MODELING (Bi-LSTM) ===
    # Note: Since we already did temporal in streams, we can add another Bi-LSTM
    # Or we can use the fused features directly
    
    # === CLASSIFICATION HEAD ===
    outputs = layers.Dense(num_classes, activation='softmax')(fused)
    
    # Create model
    model = Model(inputs=inputs, outputs=outputs)
    
    return model

# Create model
model = create_multi_modal_model(
    sequence_length=X_train.shape[1],
    hand_features=84,
    pose_features=100,
    lip_features=200,
    num_classes=num_classes
)

# Display model architecture
model.summary()


In [None]:
# --- 4: Alternative Model (Simpler) ---
def create_simpler_model(sequence_length=60, feature_dim=384, num_classes=10):
    """Simpler model for quick training"""
    
    model = keras.Sequential([
        # Input layer
        layers.Input(shape=(sequence_length, feature_dim)),
        
        # 1D Convolutions
        layers.Conv1D(64, 3, padding='same', activation='relu'),
        layers.BatchNormalization(),
        layers.Conv1D(64, 3, padding='same', activation='relu'),
        layers.MaxPooling1D(2),
        
        layers.Conv1D(128, 3, padding='same', activation='relu'),
        layers.BatchNormalization(),
        layers.Conv1D(128, 3, padding='same', activation='relu'),
        layers.GlobalAveragePooling1D(),
        
        # Dense layers
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.3),
        
        # Output
        layers.Dense(num_classes, activation='softmax')
    ])
    
    return model

# Create simpler model (if multi-modal is too complex)
# model = create_simpler_model(
#     sequence_length=X_train.shape[1],
#     feature_dim=X_train.shape[2],
#     num_classes=num_classes
# )


In [None]:
# --- 5: Compile Model ---
def compile_model(model, learning_rate=0.001):
    """Compile model with optimizer and metrics"""
    
    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
    
    model.compile(
        optimizer=optimizer,
        loss='categorical_crossentropy',
        metrics=['accuracy', 
                 keras.metrics.TopKCategoricalAccuracy(k=3, name='top3_acc')]
    )
    
    print("‚úÖ Model compiled")
    return model

model = compile_model(model)


In [None]:
# --- 6: Callbacks ---
def create_callbacks():
    """Create training callbacks"""
    
    # Create folder
    os.makedirs('/content/models', exist_ok=True)
    
    callbacks = [
        # Model checkpoint
        keras.callbacks.ModelCheckpoint(
            '/content/models/best_model.h5',
            monitor='val_accuracy',
            save_best_only=True,
            mode='max',
            verbose=1
        ),
        
        # Early stopping
        keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=15,
            restore_best_weights=True,
            verbose=1
        ),
        
        # Reduce learning rate on plateau
        keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=5,
            min_lr=1e-7,
            verbose=1
        ),
        
        # TensorBoard
        keras.callbacks.TensorBoard(
            log_dir='/content/logs',
            histogram_freq=1
        ),
        
        # CSV Logger
        keras.callbacks.CSVLogger('/content/training_log.csv')
    ]
    
    print("‚úÖ Callbacks created")
    return callbacks

callbacks = create_callbacks()


In [None]:
# --- 7: Train Model ---
def train_model(model, X_train, y_train_cat, X_val, y_val_cat, epochs=100, batch_size=32):
    """Train the model"""
    
    print("\nüöÄ Starting training...")
    print(f"   Epochs: {epochs}")
    print(f"   Batch size: {batch_size}")
    print(f"   Train samples: {len(X_train)}")
    print(f"   Val samples: {len(X_val)}")
    
    history = model.fit(
        X_train, y_train_cat,
        validation_data=(X_val, y_val_cat),
        epochs=epochs,
        batch_size=batch_size,
        callbacks=callbacks,
        verbose=1
    )
    
    print("\n‚úÖ Training complete!")
    
    # Save final model
    model.save('/content/models/final_model.h5')
    print("üíæ Model saved to /content/models/")
    
    return history

history = train_model(model, X_train, y_train_cat, X_val, y_val_cat)


In [None]:
# --- 8: Plot Training History ---
def plot_training_history(history):
    """Plot training curves"""
    
    fig, axes = plt.subplots(1, 3, figsize=(18, 5))
    
    # Loss
    axes[0].plot(history.history['loss'], label='Train Loss')
    axes[0].plot(history.history['val_loss'], label='Val Loss')
    axes[0].set_title('Model Loss')
    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('Loss')
    axes[0].legend()
    axes[0].grid(True)
    
    # Accuracy
    axes[1].plot(history.history['accuracy'], label='Train Acc')
    axes[1].plot(history.history['val_accuracy'], label='Val Acc')
    axes[1].set_title('Model Accuracy')
    axes[1].set_xlabel('Epoch')
    axes[1].set_ylabel('Accuracy')
    axes[1].legend()
    axes[1].grid(True)
    
    # Top-3 Accuracy
    if 'top3_acc' in history.history:
        axes[2].plot(history.history['top3_acc'], label='Train Top-3')
        axes[2].plot(history.history['val_top3_acc'], label='Val Top-3')
        axes[2].set_title('Top-3 Accuracy')
        axes[2].set_xlabel('Epoch')
        axes[2].set_ylabel('Accuracy')
        axes[2].legend()
        axes[2].grid(True)
    
    plt.tight_layout()
    plt.savefig('/content/training_history.png', dpi=150)
    plt.show()

plot_training_history(history)


In [None]:
# --- 9: Evaluate on Test Set ---
def evaluate_model(model, X_test, y_test_cat, label_encoder):
    """Evaluate model on test set"""
    
    print("\nüìä TEST SET EVALUATION")
    print("="*50)
    
    # Loss and accuracy
    loss, accuracy, top3_acc = model.evaluate(X_test, y_test_cat, verbose=0)
    print(f"Test Loss: {loss:.4f}")
    print(f"Test Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
    print(f"Test Top-3 Accuracy: {top3_acc:.4f} ({top3_acc*100:.2f}%)")
    
    # Predictions
    y_pred_probs = model.predict(X_test)
    y_pred = np.argmax(y_pred_probs, axis=1)
    y_true = np.argmax(y_test_cat, axis=1)
    
    # Classification report
    print("\nüìã Classification Report:")
    target_names = label_encoder.classes_
    print(classification_report(y_true, y_pred, target_names=target_names))
    
    # Confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    
    plt.figure(figsize=(12, 10))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=target_names[:10],  # Show first 10
                yticklabels=target_names[:10])
    plt.title('Confusion Matrix (First 10 Classes)')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.tight_layout()
    plt.savefig('/content/confusion_matrix.png', dpi=150)
    plt.show()
    
    return y_pred, y_true

y_pred, y_true = evaluate_model(model, X_test, y_test_cat, label_encoder)


In [None]:
# --- 10: Per-Class Accuracy ---
def per_class_accuracy(y_true, y_pred, label_encoder):
    """Calculate accuracy per class"""
    
    from sklearn.metrics import accuracy_score
    import pandas as pd
    
    results = []
    classes = label_encoder.classes_
    
    for i, class_name in enumerate(classes):
        mask = (y_true == i)
        if np.sum(mask) > 0:
            acc = accuracy_score(y_true[mask], y_pred[mask])
            results.append({
                'class': class_name,
                'samples': np.sum(mask),
                'accuracy': acc
            })
    
    df = pd.DataFrame(results)
    df = df.sort_values('accuracy', ascending=False)
    
    print("\nüìä Per-Class Accuracy:")
    print(df.to_string(index=False))
    
    # Plot
    plt.figure(figsize=(12, 8))
    top_classes = df.head(15)
    colors = ['green' if x > 0.8 else 'orange' if x > 0.6 else 'red' 
              for x in top_classes['accuracy']]
    
    plt.barh(range(len(top_classes)), top_classes['accuracy'].values, color=colors)
    plt.yticks(range(len(top_classes)), [c[:30] + '...' for c in top_classes['class']])
    plt.xlabel('Accuracy')
    plt.title('Per-Class Accuracy (Top 15)')
    plt.xlim(0, 1)
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig('/content/per_class_accuracy.png', dpi=150)
    plt.show()
    
    return df

per_class_df = per_class_accuracy(y_true, y_pred, label_encoder)


In [None]:
# --- 11: Error Analysis ---
def error_analysis(y_true, y_pred, label_encoder, X_test):
    """Analyze where model makes mistakes"""
    
    # Find errors
    errors = y_true != y_pred
    error_indices = np.where(errors)[0]
    
    print(f"\nüîç ERROR ANALYSIS")
    print(f"   Total errors: {len(error_indices)}/{len(y_true)} ({len(error_indices)/len(y_true)*100:.2f}%)")
    
    if len(error_indices) > 0:
        # Show some error examples
        print("\nüìù Example errors:")
        for idx in error_indices[:10]:
            true_label = label_encoder.inverse_transform([y_true[idx]])[0]
            pred_label = label_encoder.inverse_transform([y_pred[idx]])[0]
            print(f"   True: {true_label[:30]:30} ‚Üí Pred: {pred_label[:30]}")
        
        # Confusion pairs
        from collections import Counter
        confusion_pairs = [(y_true[i], y_pred[i]) for i in error_indices]
        pair_counts = Counter(confusion_pairs)
        
        print("\nüîÑ Most confused pairs:")
        for (true, pred), count in pair_counts.most_common(5):
            true_name = label_encoder.inverse_transform([true])[0][:20]
            pred_name = label_encoder.inverse_transform([pred])[0][:20]
            print(f"   {true_name} ‚Üí {pred_name}: {count} times")

error_analysis(y_true, y_pred, label_encoder, X_test)


In [None]:
# --- 12: Convert to TFLite ---
def convert_to_tflite(model, quantize=True):
    """Convert Keras model to TFLite for mobile deployment"""
    
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    
    if quantize:
        # Apply quantization for mobile
        converter.optimizations = [tf.lite.Optimize.DEFAULT]
        converter.target_spec.supported_types = [tf.float16]
    
    # Convert
    tflite_model = converter.convert()
    
    # Save
    tflite_path = '/content/models/sentence_model.tflite'
    with open(tflite_path, 'wb') as f:
        f.write(tflite_model)
    
    print(f"\nüì± TFLite model saved: {tflite_path}")
    print(f"   Size: {len(tflite_model) / 1024:.2f} KB")
    
    # Test inference
    interpreter = tf.lite.Interpreter(model_content=tflite_model)
    interpreter.allocate_tensors()
    
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    
    print(f"\nüîç TFLite Model Info:")
    print(f"   Input shape: {input_details[0]['shape']}")
    print(f"   Output shape: {output_details[0]['shape']}")
    print(f"   Input type: {input_details[0]['dtype']}")
    
    return tflite_path

tflite_path = convert_to_tflite(model, quantize=True)


In [None]:
# --- 13: Test TFLite Inference ---
def test_tflite_inference(tflite_path, X_test, y_test, num_samples=5):
    """Test TFLite model inference"""
    
    interpreter = tf.lite.Interpreter(model_path=tflite_path)
    interpreter.allocate_tensors()
    
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    
    print("\nüß™ TESTING TFLITE INFERENCE")
    print("="*50)
    
    for i in range(min(num_samples, len(X_test))):
        # Prepare input
        input_data = X_test[i:i+1].astype(np.float32)
        
        # Set input
        interpreter.set_tensor(input_details[0]['index'], input_data)
        
        # Run inference
        import time
        start = time.time()
        interpreter.invoke()
        inference_time = (time.time() - start) * 1000
        
        # Get output
        output = interpreter.get_tensor(output_details[0]['index'])
        pred_class = np.argmax(output[0])
        
        print(f"\nSample {i+1}:")
        print(f"   Inference time: {inference_time:.2f} ms")
        print(f"   True: {label_encoder.inverse_transform([y_test[i]])[0][:30]}")
        print(f"   Pred: {label_encoder.inverse_transform([pred_class])[0][:30]}")
        print(f"   Confidence: {output[0][pred_class]:.4f}")

test_tflite_inference(tflite_path, X_test, y_test)


In [None]:
# --- 14: Save Model and Metadata for Flutter ---
def save_for_flutter():
    """Save all files needed for Flutter app"""
    
    os.makedirs('/content/flutter_assets', exist_ok=True)
    
    # Copy TFLite model
    !cp /content/models/sentence_model.tflite /content/flutter_assets/
    
    # Save label mapping
    with open('/content/flutter_assets/label_mapping.json', 'w', encoding='utf-8') as f:
        mapping = {str(i): label for i, label in enumerate(label_encoder.classes_)}
        json.dump(mapping, f, indent=2, ensure_ascii=False)
    
    # Save class list
    with open('/content/flutter_assets/class_names.txt', 'w', encoding='utf-8') as f:
        for label in label_encoder.classes_:
            f.write(f"{label}\n")
    
    # Save model info
    model_info = {
        'input_shape': X_train.shape[1:],
        'num_classes': num_classes,
        'sequence_length': X_train.shape[1],
        'features_per_frame': X_train.shape[2],
        'accuracy': float(history.history['val_accuracy'][-1]),
        'normalization_mean': '/content/normalization_mean.npy',
        'normalization_std': '/content/normalization_std.npy'
    }
    
    with open('/content/flutter_assets/model_info.json', 'w') as f:
        json.dump(model_info, f, indent=2)
    
    # Zip everything
    !zip -r /content/flutter_model_files.zip /content/flutter_assets/
    
    print("\n‚úÖ Flutter assets ready in /content/flutter_assets/")
    print("üì¶ Zipped to: /content/flutter_model_files.zip")

save_for_flutter()


In [None]:
# --- 15: Download All ---
from google.colab import files

# Download all results
!zip -r /content/all_model_files.zip \
    /content/models \
    /content/flutter_assets \
    /content/training_log.csv \
    /content/training_history.png \
    /content/confusion_matrix.png \
    /content/per_class_accuracy.png

files.download('/content/all_model_files.zip')

print("\nüéâ NOTEBOOK 3 COMPLETE! Model ready for Flutter app.")
