# Notebook 2: CVAE Model Training Pipeline

This notebook provides an interactive walkthrough of the new CVAE-based model training process. It demonstrates the advanced architecture with graph neural networks, temporal context modeling, and meta-learning capabilities. The training process uses the enhanced training pipeline from `src/training_pipeline.py` with conservative configuration for stability.

### 1. Setup and Imports

In [ ]:
import torch
import torch.optim as optim
from torch.utils.data import DataLoader
import pandas as pd
import numpy as np
import os
import sys
import joblib
from tqdm.notebook import tqdm # Use notebook-friendly tqdm
import random
import matplotlib.pyplot as plt

# Add the source directory to the Python path
sys.path.append(os.path.abspath(os.path.join('..')))

# Import from our project's new CVAE-based architecture
from src.config import CONFIG
from src.cvae_model import CVAEModel
from src.cvae_engine import CVAETrainingEngine
from src.cvae_data_loader import CVAEDataLoader
from src.feature_engineering import FeatureEngineer
from src.graph_encoder import GraphEncoder
from src.temporal_context import TemporalContextModel
from src.meta_learner import MetaLearner
from src.debug_utils import setup_debug_logging, log_debug_info

print("Setup complete. New CVAE modules loaded.")
print(f"Device available: {'CUDA' if torch.cuda.is_available() else 'CPU'}")
print(f"Using conservative training configuration for stability.")

### 2. Initialize Debug Logging and Load Data

The new architecture includes comprehensive debugging utilities to monitor training stability and catch potential issues early.

In [ ]:
# Setup debug logging for comprehensive monitoring
debug_logger = setup_debug_logging("notebook_training")
log_debug_info("Starting CVAE training notebook", debug_logger)

# Load and prepare data
col_names = [
    'Draw', 'Date', 'Winning_Num_1', 'Winning_Num_2', 'Winning_Num_3',
    'Winning_Num_4', 'Winning_Num_5', 'Winning_Num_6', 'Extra_Num',
    'From_Last', 'Low', 'High', 'Odd', 'Even', '1-10', '11-20', '21-30',
    '31-40', '41-50', 'Div_1_Winners', 'Div_1_Prize', 'Div_2_Winners',
    'Div_2_Prize', 'Div_3_Winners', 'Div_3_Prize', 'Div_4_Winners',
    'Div_4_Prize', 'Div_5_Winners', 'Div_5_Prize', 'Div_6_Winners',
    'Div_6_Prize', 'Div_7_Winners', 'Div_7_Prize', 'Turnover'
]

data_path = os.path.join('..', CONFIG["data_path"])
df = pd.read_csv(data_path, header=None, skiprows=33, names=col_names)
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values(by='Date').reset_index(drop=True)

print(f"Loaded {len(df)} historical draws")
print(f"Data range: {df['Date'].min()} to {df['Date'].max()}")

# Initialize and fit the enhanced feature engineer
feature_engineer = FeatureEngineer()
feature_engineer.fit(df)
print(f"Feature engineer fitted. Feature dimension: {feature_engineer.get_feature_dim()}")

### 3. Initialize CVAE Architecture Components

The new architecture consists of multiple components working together: CVAE core, graph encoder, temporal context, and meta-learner.

In [ ]:
# Setup device and configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Initialize CVAE data loader with negative sampling
cvae_data_loader = CVAEDataLoader(CONFIG)
train_loader, val_loader = cvae_data_loader.create_data_loaders(df, feature_engineer)

print(f"Created data loaders:")
print(f"  - Training batches: {len(train_loader)}")
print(f"  - Validation batches: {len(val_loader)}")
print(f"  - Batch size: {CONFIG['batch_size']}")

# Initialize graph encoder for number relationships
graph_encoder = GraphEncoder(
    num_nodes=CONFIG['num_lotto_numbers'],
    input_dim=CONFIG['graph_input_dim'],
    hidden_dim=CONFIG['graph_hidden_dim'],
    output_dim=CONFIG['graph_output_dim']
).to(device)

# Initialize temporal context model
temporal_model = TemporalContextModel(
    input_dim=CONFIG['temporal_input_dim'],
    hidden_dim=CONFIG['temporal_hidden_dim'],
    output_dim=CONFIG['temporal_output_dim']
).to(device)

# Initialize meta-learner for ensemble optimization
meta_learner = MetaLearner(
    input_dim=CONFIG['meta_input_dim'],
    hidden_dim=CONFIG['meta_hidden_dim'],
    num_scorers=CONFIG['num_ensemble_scorers']
).to(device)

# Initialize main CVAE model
cvae_model = CVAEModel(
    feature_dim=feature_engineer.get_feature_dim(),
    latent_dim=CONFIG['cvae_latent_dim'],
    hidden_dim=CONFIG['cvae_hidden_dim'],
    graph_encoder=graph_encoder,
    temporal_encoder=temporal_model,
    meta_learner=meta_learner
).to(device)

print(f"CVAE model initialized with {sum(p.numel() for p in cvae_model.parameters())} parameters")
print(f"Conservative configuration: latent_dim={CONFIG['cvae_latent_dim']}, hidden_dim={CONFIG['cvae_hidden_dim']}")

### 4. Initialize Training Engine and Optimizer

The new training engine includes advanced features like mixed precision handling, gradient clipping, and error recovery.

In [ ]:
# Initialize training engine with conservative settings
training_engine = CVAETrainingEngine(
    model=cvae_model,
    config=CONFIG,
    device=device,
    logger=debug_logger
)

# Setup optimizer with conservative learning rate
optimizer = torch.optim.AdamW(
    cvae_model.parameters(),
    lr=CONFIG['learning_rate'],  # Conservative: 5e-5
    weight_decay=CONFIG['weight_decay']
)

print(f"Training engine initialized with:")
print(f"  - Learning rate: {CONFIG['learning_rate']}")
print(f"  - Weight decay: {CONFIG['weight_decay']}")
print(f"  - Gradient clipping: {CONFIG['gradient_clip_norm']}")
print(f"  - Mixed precision: {CONFIG['use_mixed_precision']}")
print(f"  - Conservative training: {CONFIG['conservative_training']}")

# Initialize loss tracking
train_losses = []
val_losses = []
reconstruction_losses = []
kl_losses = []
contrastive_losses = []

### 5. Run CVAE Training Loop

The training loop includes comprehensive monitoring, error handling, and stability checks. The multi-component loss combines reconstruction, KL divergence, and contrastive learning.

In [ ]:
print(f"\nStarting CVAE training for {CONFIG['epochs']} epochs...")
print("=" * 60)

best_val_loss = float('inf')
patience_counter = 0

for epoch in range(CONFIG['epochs']):
    print(f"\nEpoch {epoch+1}/{CONFIG['epochs']}")
    print("-" * 40)
    
    # Training phase
    train_metrics = training_engine.train_epoch(train_loader, optimizer, epoch)
    
    # Validation phase
    val_metrics = training_engine.validate_epoch(val_loader, epoch)
    
    # Track losses
    train_losses.append(train_metrics['total_loss'])
    val_losses.append(val_metrics['total_loss'])
    reconstruction_losses.append(train_metrics['reconstruction_loss'])
    kl_losses.append(train_metrics['kl_loss'])
    contrastive_losses.append(train_metrics['contrastive_loss'])
    
    # Print metrics
    print(f"Train Loss: {train_metrics['total_loss']:.4f} "
          f"(Recon: {train_metrics['reconstruction_loss']:.4f}, "
          f"KL: {train_metrics['kl_loss']:.4f}, "
          f"Contrastive: {train_metrics['contrastive_loss']:.4f})")
    
    print(f"Val Loss: {val_metrics['total_loss']:.4f} "
          f"(Recon: {val_metrics['reconstruction_loss']:.4f}, "
          f"KL: {val_metrics['kl_loss']:.4f}, "
          f"Contrastive: {val_metrics['contrastive_loss']:.4f})")
    
    # Early stopping check
    if val_metrics['total_loss'] < best_val_loss:
        best_val_loss = val_metrics['total_loss']
        patience_counter = 0
        print("✓ New best validation loss - saving checkpoint")
    else:
        patience_counter += 1
        if patience_counter >= CONFIG['patience']:
            print(f"Early stopping triggered after {patience_counter} epochs without improvement")
            break
    
    # Memory cleanup
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

print("\nTraining completed!")
print(f"Best validation loss: {best_val_loss:.4f}")

### 6. Visualize Training Progress

Let's plot the training curves to understand how the different loss components evolved during training.

In [ ]:
# Plot training curves
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Total loss
axes[0, 0].plot(train_losses, label='Training', color='blue')
axes[0, 0].plot(val_losses, label='Validation', color='red')
axes[0, 0].set_title('Total Loss')
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('Loss')
axes[0, 0].legend()
axes[0, 0].grid(True)

# Reconstruction loss
axes[0, 1].plot(reconstruction_losses, label='Reconstruction', color='green')
axes[0, 1].set_title('Reconstruction Loss')
axes[0, 1].set_xlabel('Epoch')
axes[0, 1].set_ylabel('Loss')
axes[0, 1].legend()
axes[0, 1].grid(True)

# KL divergence loss
axes[1, 0].plot(kl_losses, label='KL Divergence', color='orange')
axes[1, 0].set_title('KL Divergence Loss')
axes[1, 0].set_xlabel('Epoch')
axes[1, 0].set_ylabel('Loss')
axes[1, 0].legend()
axes[1, 0].grid(True)

# Contrastive loss
axes[1, 1].plot(contrastive_losses, label='Contrastive', color='purple')
axes[1, 1].set_title('Contrastive Loss')
axes[1, 1].set_xlabel('Epoch')
axes[1, 1].set_ylabel('Loss')
axes[1, 1].legend()
axes[1, 1].grid(True)

plt.tight_layout()
plt.show()

print("Training visualization complete.")
print(f"Final training loss: {train_losses[-1]:.4f}")
print(f"Final validation loss: {val_losses[-1]:.4f}")

# Save training artifacts
print("\nSaving CVAE model and feature engineer...")
model_dir = os.path.join('..', 'models')
os.makedirs(model_dir, exist_ok=True)

# Save with conservative prefix
model_save_path = os.path.join(model_dir, 'conservative_cvae_model.pth')
fe_save_path = os.path.join(model_dir, 'conservative_feature_engineer.pkl')

torch.save(cvae_model.state_dict(), model_save_path)
joblib.dump(feature_engineer, fe_save_path)

print(f"CVAE model saved to: {model_save_path}")
print(f"Feature engineer saved to: {fe_save_path}")
print("Training notebook completed successfully!")