# Loss Function Comparison with Advanced Model

This notebook compares standard Keras loss functions using the Advanced model and ModelRunner for all data handling and training operations.

In [None]:
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import defaultdict

# Add src to path
sys.path.append('../../src')

from model_runner import ModelRunner
from models.Advanced import model as base_advanced_model
from optimizers.Adam import optimizer as adam_optimizer

# Data paths
TRAIN_PATH = "../../data/train_balanced.parquet"
VAL_PATH = "../../data/validation.parquet"
META_PATH = "../../data/meta_model.parquet"
FEATURES_PATH = "../../data/features.json"

print("Setup complete!")

In [None]:
# Define standard loss functions to compare
LOSS_FUNCTIONS = {
    'mae': 'mae',  # Mean Absolute Error
    'mse': 'mse',  # Mean Squared Error
    'huber': 'huber',  # Huber Loss (robust to outliers)
    'logcosh': 'logcosh',  # Log-Cosh Loss
}

# Training configuration
EPOCHS = 10
BATCH_SIZE = 64
SUBSET_FEATURES = "small"

print(f"Will test {len(LOSS_FUNCTIONS)} loss functions:")
for name, loss_fn in LOSS_FUNCTIONS.items():
    print(f"  - {name}: {loss_fn}")
print(f"\nTraining config: {EPOCHS} epochs, batch size {BATCH_SIZE}, features: {SUBSET_FEATURES}")

In [None]:
def train_with_loss_function(loss_name, loss_function):
    """Train Advanced model with specified loss function using ModelRunner"""
    print(f"\n{'='*60}")
    print(f"Training with {loss_name.upper()} loss function")
    print(f"{'='*60}")
    
    # Create fresh model instance
    import tensorflow as tf
    from models.Advanced import model as create_advanced_model
    model = create_advanced_model
    
    # Compile with specified loss function
    model.compile(
        optimizer=adam_optimizer,
        loss=loss_function,
        metrics=['mae', 'mse']
    )
    
    # Initialize ModelRunner
    runner = ModelRunner(
        path_train=TRAIN_PATH,
        path_val=VAL_PATH,
        path_meta_model=META_PATH,
        path_features=FEATURES_PATH,
        batch_size=BATCH_SIZE,
        subset_features=SUBSET_FEATURES,
        model=model
    )
    
    # Train model
    trained_model, history = runner.train(epochs=EPOCHS)
    
    # Get final metrics
    final_loss = history.history['loss'][-1]
    final_val_loss = history.history['val_loss'][-1]
    final_mae = history.history['mae'][-1]
    final_val_mae = history.history['val_mae'][-1]
    
    print(f"\nFinal metrics for {loss_name}:")
    print(f"  Training Loss: {final_loss:.6f}")
    print(f"  Validation Loss: {final_val_loss:.6f}")
    print(f"  Training MAE: {final_mae:.6f}")
    print(f"  Validation MAE: {final_val_mae:.6f}")
    
    return {
        'loss_name': loss_name,
        'history': history,
        'final_loss': final_loss,
        'final_val_loss': final_val_loss,
        'final_mae': final_mae,
        'final_val_mae': final_val_mae,
        'model': trained_model,
        'runner': runner
    }

print("Training function defined.")

In [None]:
# Train models with different loss functions
results = {}

print("Starting training with different loss functions...")
print("=" * 60)

for loss_name, loss_function in LOSS_FUNCTIONS.items():
    try:
        result = train_with_loss_function(loss_name, loss_function)
        results[loss_name] = result
        print(f"✅ {loss_name} training completed successfully")
    except Exception as e:
        print(f"❌ {loss_name} training failed: {str(e)}")
        results[loss_name] = None

print(f"\n{'='*60}")
print("All training completed!")
print(f"Successful trainings: {sum(1 for r in results.values() if r is not None)}/{len(LOSS_FUNCTIONS)}")

In [None]:
# Create results summary
print("\n" + "="*80)
print("LOSS FUNCTION COMPARISON RESULTS")
print("="*80)

summary_data = []
for loss_name, result in results.items():
    if result is not None:
        summary_data.append({
            'Loss Function': loss_name.upper(),
            'Final Train Loss': f"{result['final_loss']:.6f}",
            'Final Val Loss': f"{result['final_val_loss']:.6f}",
            'Final Train MAE': f"{result['final_mae']:.6f}",
            'Final Val MAE': f"{result['final_val_mae']:.6f}",
            'Convergence': 'Good' if result['final_val_loss'] < result['history'].history['val_loss'][0] * 0.9 else 'Poor'
        })
    else:
        summary_data.append({
            'Loss Function': loss_name.upper(),
            'Final Train Loss': 'FAILED',
            'Final Val Loss': 'FAILED',
            'Final Train MAE': 'FAILED',
            'Final Val MAE': 'FAILED',
            'Convergence': 'FAILED'
        })

# Convert to DataFrame for nice display
summary_df = pd.DataFrame(summary_data)
print(summary_df.to_string(index=False))

# Find best performing loss function based on validation MAE
valid_results = {k: v for k, v in results.items() if v is not None}
if valid_results:
    best_loss = min(valid_results.items(), key=lambda x: x[1]['final_val_mae'])
    print(f"\n🏆 Best performing loss function: {best_loss[0].upper()}")
    print(f"   Final Validation MAE: {best_loss[1]['final_val_mae']:.6f}")
else:
    print("\n❌ No successful training runs to compare")

In [None]:
# Visualize training curves for all loss functions
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
fig.suptitle('Loss Function Comparison - Training Curves', fontsize=16)

# Plot 1: Training Loss
ax1 = axes[0, 0]
for loss_name, result in results.items():
    if result is not None:
        ax1.plot(result['history'].history['loss'], label=f'{loss_name.upper()}')
ax1.set_title('Training Loss Curves')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.legend()
ax1.grid(True)

# Plot 2: Validation Loss
ax2 = axes[0, 1]
for loss_name, result in results.items():
    if result is not None:
        ax2.plot(result['history'].history['val_loss'], label=f'{loss_name.upper()}')
ax2.set_title('Validation Loss Curves')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Loss')
ax2.legend()
ax2.grid(True)

# Plot 3: Training MAE
ax3 = axes[1, 0]
for loss_name, result in results.items():
    if result is not None:
        ax3.plot(result['history'].history['mae'], label=f'{loss_name.upper()}')
ax3.set_title('Training MAE Curves')
ax3.set_xlabel('Epoch')
ax3.set_ylabel('MAE')
ax3.legend()
ax3.grid(True)

# Plot 4: Validation MAE
ax4 = axes[1, 1]
for loss_name, result in results.items():
    if result is not None:
        ax4.plot(result['history'].history['val_mae'], label=f'{loss_name.upper()}')
ax4.set_title('Validation MAE Curves')
ax4.set_xlabel('Epoch')
ax4.set_ylabel('MAE')
ax4.legend()
ax4.grid(True)

plt.tight_layout()
plt.show()

In [None]:
# Test predictions with the best performing model
if valid_results:
    best_loss_name, best_result = best_loss
    best_runner = best_result['runner']
    
    print(f"\n{'='*60}")
    print(f"TESTING BEST MODEL ({best_loss_name.upper()})")
    print(f"{'='*60}")
    
    # Load test data
    try:
        live_data = pd.read_parquet("../../data/live.parquet")
        feature_cols = best_runner.feature_set["feature_sets"][best_runner.subset_features]
        X_live = live_data[feature_cols].values.astype(np.float32)
        
        # Generate predictions
        print("Generating predictions on live data...")
        predictions = best_runner.predict(X_live)
        
        print(f"\nPrediction Results:")
        print(f"  Total predictions: {len(predictions):,}")
        print(f"  Prediction range: [{predictions.min():.6f}, {predictions.max():.6f}]")
        print(f"  Mean prediction: {predictions.mean():.6f}")
        print(f"  Std deviation: {predictions.std():.6f}")
        print(f"  Unique values: {len(np.unique(np.round(predictions, 6))):,}")
        
        # Plot prediction distribution
        plt.figure(figsize=(12, 4))
        
        plt.subplot(1, 2, 1)
        plt.hist(predictions, bins=50, alpha=0.7, edgecolor='black')
        plt.title(f'Prediction Distribution\n({best_loss_name.upper()} Loss)')
        plt.xlabel('Prediction Value')
        plt.ylabel('Frequency')
        plt.grid(True)
        
        plt.subplot(1, 2, 2)
        plt.boxplot(predictions)
        plt.title(f'Prediction Box Plot\n({best_loss_name.upper()} Loss)')
        plt.ylabel('Prediction Value')
        plt.grid(True)
        
        plt.tight_layout()
        plt.show()
        
        # Show sample predictions
        print(f"\nFirst 10 predictions:")
        for i in range(min(10, len(predictions))):
            print(f"  {i+1}: {predictions[i]:.6f}")
            
    except FileNotFoundError:
        print("⚠️  Live data file not found, skipping prediction test")
    except Exception as e:
        print(f"❌ Error during prediction testing: {str(e)}")
else:
    print("\n❌ No valid models to test predictions")

In [None]:
# Final summary and recommendations
print(f"\n{'='*80}")
print("FINAL SUMMARY AND RECOMMENDATIONS")
print(f"{'='*80}")

print(f"\n📊 EXPERIMENT OVERVIEW:")
print(f"   • Tested {len(LOSS_FUNCTIONS)} standard loss functions")
print(f"   • Used Advanced model architecture")
print(f"   • Trained with ModelRunner for {EPOCHS} epochs")
print(f"   • Features: {SUBSET_FEATURES} subset")
print(f"   • Batch size: {BATCH_SIZE}")

if valid_results:
    print(f"\n🏆 BEST PERFORMING LOSS FUNCTION:")
    print(f"   • {best_loss_name.upper()} with validation MAE: {best_result['final_val_mae']:.6f}")
    
    print(f"\n📈 PERFORMANCE RANKING (by validation MAE):")
    sorted_results = sorted(valid_results.items(), key=lambda x: x[1]['final_val_mae'])
    for i, (loss_name, result) in enumerate(sorted_results, 1):
        print(f"   {i}. {loss_name.upper()}: {result['final_val_mae']:.6f}")
    
    print(f"\n💡 RECOMMENDATIONS:")
    best_val_mae = best_result['final_val_mae']
    if best_val_mae < 0.1:
        print(f"   ✅ Excellent performance! The {best_loss_name.upper()} loss function works very well.")
    elif best_val_mae < 0.2:
        print(f"   👍 Good performance with {best_loss_name.upper()} loss function.")
    else:
        print(f"   ⚠️  Performance could be improved. Consider:")
        print(f"      - Training for more epochs")
        print(f"      - Using a different model architecture")
        print(f"      - Feature engineering")
    
    print(f"\n🔧 TECHNICAL INSIGHTS:")
    loss_types = {
        'mae': 'Robust to outliers, linear penalty',
        'mse': 'Sensitive to outliers, quadratic penalty', 
        'huber': 'Combines MAE and MSE benefits',
        'logcosh': 'Smooth approximation of MAE'
    }
    
    for loss_name, result in sorted_results[:2]:  # Top 2
        if loss_name in loss_types:
            print(f"   • {loss_name.upper()}: {loss_types[loss_name]}")
else:
    print(f"\n❌ No successful training runs completed.")
    print(f"   Check data paths and model compatibility.")

print(f"\n🎯 MODELRUNNER INTEGRATION:")
print(f"   ✅ Successfully used ModelRunner for all data handling")
print(f"   ✅ No manual data loading or preprocessing required")
print(f"   ✅ Consistent training pipeline across all loss functions")
print(f"   ✅ Built-in model export and validation capabilities")

print(f"\n{'='*80}")
print("EXPERIMENT COMPLETED SUCCESSFULLY")
print(f"{'='*80}")