# Notebook 3: CVAE Inference and Evaluation

This notebook demonstrates the advanced inference and evaluation capabilities of the new CVAE-based system. It shows how to generate number combinations using the trained CVAE model with graph neural networks, temporal context, and meta-learning ensemble optimization. The evaluation measures the model's performance using sophisticated probabilistic metrics.

### 1. Setup and Imports

In [ ]:
import torch
import pandas as pd
import numpy as np
import joblib
from tqdm.notebook import tqdm
import random
import os
import sys
import matplotlib.pyplot as plt
import seaborn as sns

# Add the source directory to the Python path
sys.path.append(os.path.abspath(os.path.join('..')))

# Import all necessary components from our new CVAE-based architecture
from src.config import CONFIG
from src.cvae_model import CVAEModel
from src.graph_encoder import GraphEncoder
from src.temporal_context import TemporalContextModel
from src.meta_learner import MetaLearner
from src.feature_engineering import FeatureEngineer
from src.inference_pipeline import CVAEInferencePipeline
from src.evaluation_pipeline import CVAEEvaluationPipeline
from src.visualization import create_inference_plots, create_evaluation_plots

print("Setup complete. New CVAE inference modules loaded.")
print(f"Device available: {'CUDA' if torch.cuda.is_available() else 'CPU'}")

### 2. Load CVAE Model and Components

The new architecture requires loading multiple components: the CVAE model, graph encoder, temporal context, meta-learner, and feature engineer.

In [ ]:
# Check if CVAE model artifacts exist
conservative_model_path = os.path.join('..', 'models', 'conservative_cvae_model.pth')
conservative_fe_path = os.path.join('..', 'models', 'conservative_feature_engineer.pkl')

# Fallback to regular model paths
model_path = os.path.join('..', 'models', 'scoring_model.pth')
fe_path = os.path.join('..', 'models', 'feature_engineer.pkl')

# Check which model artifacts are available
if os.path.exists(conservative_model_path) and os.path.exists(conservative_fe_path):
    model_path, fe_path = conservative_model_path, conservative_fe_path
    print("Loading conservative CVAE model artifacts...")
elif os.path.exists(model_path) and os.path.exists(fe_path):
    print("Loading standard model artifacts...")
else:
    print("Model artifacts not found! Please run the training pipeline first.")
    print("Expected files:")
    print(f"  - {conservative_model_path}")
    print(f"  - {conservative_fe_path}")
    raise FileNotFoundError("Model artifacts missing")

# Setup device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load feature engineer
feature_engineer = joblib.load(fe_path)
print(f"Feature engineer loaded. Feature dimension: {feature_engineer.get_feature_dim()}")

# Load historical data for model components
col_names = [
    'Draw', 'Date', 'Winning_Num_1', 'Winning_Num_2', 'Winning_Num_3',
    'Winning_Num_4', 'Winning_Num_5', 'Winning_Num_6', 'Extra_Num',
    'From_Last', 'Low', 'High', 'Odd', 'Even', '1-10', '11-20', '21-30',
    '31-40', '41-50', 'Div_1_Winners', 'Div_1_Prize', 'Div_2_Winners',
    'Div_2_Prize', 'Div_3_Winners', 'Div_3_Prize', 'Div_4_Winners',
    'Div_4_Prize', 'Div_5_Winners', 'Div_5_Prize', 'Div_6_Winners',
    'Div_6_Prize', 'Div_7_Winners', 'Div_7_Prize', 'Turnover'
]

data_path = os.path.join('..', CONFIG["data_path"])
df = pd.read_csv(data_path, header=None, skiprows=33, names=col_names)
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values(by='Date').reset_index(drop=True)

print(f"Historical data loaded: {len(df)} draws from {df['Date'].min()} to {df['Date'].max()}")

### 3. Initialize CVAE Architecture and Load Trained Weights

Recreate the complete CVAE architecture and load the trained model weights.

In [ ]:
# Initialize graph encoder for number relationships
graph_encoder = GraphEncoder(
    num_nodes=CONFIG['num_lotto_numbers'],
    input_dim=CONFIG['graph_input_dim'],
    hidden_dim=CONFIG['graph_hidden_dim'],
    output_dim=CONFIG['graph_output_dim']
).to(device)

# Initialize temporal context model
temporal_model = TemporalContextModel(
    input_dim=CONFIG['temporal_input_dim'],
    hidden_dim=CONFIG['temporal_hidden_dim'],
    output_dim=CONFIG['temporal_output_dim']
).to(device)

# Initialize meta-learner for ensemble optimization
meta_learner = MetaLearner(
    input_dim=CONFIG['meta_input_dim'],
    hidden_dim=CONFIG['meta_hidden_dim'],
    num_scorers=CONFIG['num_ensemble_scorers']
).to(device)

# Initialize main CVAE model
cvae_model = CVAEModel(
    feature_dim=feature_engineer.get_feature_dim(),
    latent_dim=CONFIG['cvae_latent_dim'],
    hidden_dim=CONFIG['cvae_hidden_dim'],
    graph_encoder=graph_encoder,
    temporal_encoder=temporal_model,
    meta_learner=meta_learner
).to(device)

# Load trained weights
try:
    cvae_model.load_state_dict(torch.load(model_path, map_location=device))
    cvae_model.eval()
    print(f"CVAE model loaded successfully from {model_path}")
    print(f"Model has {sum(p.numel() for p in cvae_model.parameters())} parameters")
except Exception as e:
    print(f"Error loading CVAE model: {e}")
    print("Attempting to load as legacy model...")
    # Fallback code for legacy models could go here
    raise

print("CVAE model initialized and loaded for inference!")

### 4. Advanced CVAE Inference: Generate Number Combinations

Use the sophisticated CVAE inference pipeline to generate high-quality number combinations with confidence scores.

In [ ]:
### 6. Advanced CVAE Model Evaluation

Evaluate the CVAE model's performance using sophisticated metrics including reconstruction quality, latent space coherence, and predictive performance.

# Initialize the advanced evaluation pipeline
evaluation_pipeline = CVAEEvaluationPipeline(
    cvae_model=cvae_model,
    feature_engineer=feature_engineer,
    config=CONFIG,
    device=device
)

print("\n" + "="*60)
print("COMPREHENSIVE CVAE MODEL EVALUATION")
print("="*60)

# Split data for evaluation
train_size = int(len(df) * 0.85)
val_df = df.iloc[train_size:].reset_index(drop=True)

print(f"Evaluating on {len(val_df)} validation draws...")

# Run comprehensive evaluation
evaluation_results = evaluation_pipeline.evaluate_comprehensive(val_df)

print("\n1. RECONSTRUCTION QUALITY")
print("-" * 30)
print(f"Reconstruction Loss: {evaluation_results['reconstruction_loss']:.4f}")
print(f"Reconstruction Accuracy: {evaluation_results['reconstruction_accuracy']:.2f}%")
print(f"Feature Correlation: {evaluation_results['feature_correlation']:.3f}")

print("\n2. LATENT SPACE ANALYSIS")
print("-" * 30) 
print(f"KL Divergence: {evaluation_results['kl_divergence']:.4f}")
print(f"Latent Space Utilization: {evaluation_results['latent_utilization']:.2f}%")
print(f"Latent Consistency: {evaluation_results['latent_consistency']:.3f}")

print("\n3. GENERATIVE QUALITY")
print("-" * 30)
print(f"Sample Diversity: {evaluation_results['sample_diversity']:.3f}")
print(f"Historical Similarity: {evaluation_results['historical_similarity']:.3f}")
print(f"Pattern Capture Score: {evaluation_results['pattern_capture']:.3f}")

print("\n4. PREDICTIVE PERFORMANCE")
print("-" * 30)
print(f"Win Rate vs Random: {evaluation_results['win_rate']:.2f}%")
print(f"Ranking Correlation: {evaluation_results['ranking_correlation']:.3f}")
print(f"Confidence Calibration: {evaluation_results['confidence_calibration']:.3f}")

print("\n5. ENSEMBLE PERFORMANCE")
print("-" * 30)
print(f"Meta-Learner Accuracy: {evaluation_results['meta_accuracy']:.2f}%")
print(f"Ensemble Improvement: {evaluation_results['ensemble_improvement']:.2f}%")
print(f"Weight Stability: {evaluation_results['weight_stability']:.3f}")

# Create evaluation visualization
fig, axes = plt.subplots(2, 3, figsize=(18, 12))

# Reconstruction quality over time
axes[0, 0].plot(evaluation_results['reconstruction_timeline'])
axes[0, 0].set_title('Reconstruction Quality Timeline')
axes[0, 0].set_xlabel('Draw Number')
axes[0, 0].set_ylabel('Reconstruction Loss')

# Latent space visualization (2D projection)
latent_samples = evaluation_results['latent_samples']
axes[0, 1].scatter(latent_samples[:, 0], latent_samples[:, 1], alpha=0.6)
axes[0, 1].set_title('Latent Space Visualization (2D)')
axes[0, 1].set_xlabel('Latent Dimension 1')
axes[0, 1].set_ylabel('Latent Dimension 2')

# Win rate progression
axes[0, 2].plot(evaluation_results['win_rate_progression'])
axes[0, 2].set_title('Win Rate Progression')
axes[0, 2].set_xlabel('Evaluation Step')
axes[0, 2].set_ylabel('Win Rate %')

# Confidence calibration curve
axes[1, 0].plot(evaluation_results['confidence_bins'], evaluation_results['accuracy_bins'])
axes[1, 0].plot([0, 1], [0, 1], 'r--', alpha=0.5)
axes[1, 0].set_title('Confidence Calibration')
axes[1, 0].set_xlabel('Confidence')
axes[1, 0].set_ylabel('Accuracy')

# Ensemble weight evolution
weights_evolution = evaluation_results['weights_evolution']
for i, weight_series in enumerate(weights_evolution.T):
    axes[1, 1].plot(weight_series, label=f'Scorer {i+1}')
axes[1, 1].set_title('Ensemble Weight Evolution')
axes[1, 1].set_xlabel('Evaluation Step')
axes[1, 1].set_ylabel('Weight')
axes[1, 1].legend()

# Performance comparison
methods = ['CVAE', 'Graph Only', 'Temporal Only', 'Meta Only', 'Full Ensemble']
performances = evaluation_results['component_comparison']
axes[1, 2].bar(methods, performances)
axes[1, 2].set_title('Component Performance Comparison')
axes[1, 2].set_ylabel('Win Rate %')
axes[1, 2].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

print(f"\n" + "="*60)
print("EVALUATION SUMMARY")
print("="*60)
print(f"Overall Model Performance: {evaluation_results['overall_score']:.2f}/100")
print(f"Recommendation: {evaluation_results['recommendation']}")
print("="*60)

In [ ]:
# Initialize the advanced inference pipeline
inference_pipeline = CVAEInferencePipeline(
    cvae_model=cvae_model,
    feature_engineer=feature_engineer,
    config=CONFIG,
    device=device
)

# Fit the pipeline with historical data
print("Fitting inference pipeline with historical data...")
inference_pipeline.fit(df)

# Generate multiple number sets with different approaches
print("\n" + "="*60)
print("GENERATING NUMBER COMBINATIONS")
print("="*60)

# Method 1: Pure CVAE sampling
print("\n1. CVAE Latent Space Sampling")
print("-" * 40)
cvae_sets = inference_pipeline.generate_sets_cvae_sampling(
    num_sets=5,
    num_samples=CONFIG['inference_samples']
)

for i, (combo, score, confidence) in enumerate(cvae_sets, 1):
    print(f"Set {i}: {combo} (Score: {score:.4f}, Confidence: {confidence:.3f})")

# Method 2: Meta-learned ensemble with CVAE guidance
print("\n2. Meta-Learned Ensemble Generation")
print("-" * 40)
ensemble_sets = inference_pipeline.generate_sets_meta_ensemble(
    num_sets=5,
    use_cvae_guidance=True
)

for i, (combo, score, confidence) in enumerate(ensemble_sets, 1):
    print(f"Set {i}: {combo} (Score: {score:.4f}, Confidence: {confidence:.3f})")

# Method 3: Hybrid approach with local search
print("\n3. Hybrid CVAE + Local Search")
print("-" * 40)
hybrid_sets = inference_pipeline.generate_sets_hybrid_search(
    num_sets=5,
    search_iterations=CONFIG['search_iterations']
)

for i, (combo, score, confidence) in enumerate(hybrid_sets, 1):
    print(f"Set {i}: {combo} (Score: {score:.4f}, Confidence: {confidence:.3f})")

# Store results for analysis
all_generated_sets = {
    'CVAE Sampling': cvae_sets,
    'Meta Ensemble': ensemble_sets,
    'Hybrid Search': hybrid_sets
}