# Regime Detection Engine (RDE) Training - Transformer + VAE Architecture

This notebook implements the full hybrid Transformer + VAE architecture for the Regime Detection Engine.

## Architecture Overview:
1. **TransformerEncoder**: Processes sequences of MMD feature vectors with self-attention
2. **VAEHead**: Maps the transformer's context vector to a latent space
3. **Decoder**: Reconstructs the context vector from latent samples

## Training Strategy:
- Combined VAE loss: Reconstruction Loss + β·KL Divergence
- Early stopping based on validation loss
- Latent space visualization and dimensionality analysis

## Data:
- Uses MMD feature vectors from `training_data_rde.parquet`
- 70% Train, 15% Validation, 15% Test split

## 1. Environment Setup

In [None]:
# Check GPU and mount Drive
import torch
import os
import sys

# GPU check
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if device.type == 'cuda':
    print(f"✅ GPU: {torch.cuda.get_device_name(0)}")
    print(f"   Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
    print("⚠️ No GPU available")

# Mount Drive
try:
    from google.colab import drive
    drive.mount('/content/drive')
    DRIVE_BASE = "/content/drive/MyDrive/AlgoSpace"
    IN_COLAB = True
except:
    DRIVE_BASE = "./drive_simulation"
    IN_COLAB = False

# Clone repo
REPO_PATH = "/content/AlgoSpace" if IN_COLAB else "."
if IN_COLAB and not os.path.exists(REPO_PATH):
    !git clone https://github.com/QuantNova/AlgoSpace.git {REPO_PATH}

sys.path.insert(0, REPO_PATH)
sys.path.insert(0, os.path.join(REPO_PATH, 'src'))

In [ ]:
# Install dependencies
!pip install -q torch numpy pandas pyarrow matplotlib seaborn
!pip install -q wandb tensorboard tqdm
!pip install -q scikit-learn umap-learn

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import json
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

# PyTorch imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, Dataset

print("✅ Dependencies loaded")

## 2. Load Training Data

In [ ]:
# Load preprocessed RDE data
data_file = f"{DRIVE_BASE}/data/processed/training_data_rde.parquet"
metadata_file = f"{DRIVE_BASE}/data/processed/data_preparation_metadata.json"

print(f"📂 Loading RDE data from: {data_file}")

# Load RDE data
rde_data = pd.read_parquet(data_file)

# Load metadata
with open(metadata_file, 'r') as f:
    metadata = json.load(f)

print(f"✅ Data loaded")
print(f"   Shape: {rde_data.shape}")
print(f"   Date range: {rde_data.index[0]} to {rde_data.index[-1]}")
print(f"   Features: {rde_data.shape[1]} MMD features")

# Extract configuration
n_market_regimes = metadata['data_config']['n_market_regimes']
mmd_window_size = metadata['data_config']['mmd_window_size']

print(f"\n📊 Configuration:")
print(f"   Market regimes: {n_market_regimes}")
print(f"   MMD window size: {mmd_window_size}")

## 3. Create Sequence Data for Transformer

In [ ]:
# Create sequences for Transformer input
class RegimeSequenceDataset(Dataset):
    """Dataset that creates sequences of MMD features for the Transformer."""
    
    def __init__(self, data, sequence_length=24, stride=1):
        """
        Args:
            data: DataFrame with MMD features
            sequence_length: Number of time steps in each sequence
            stride: Step size between sequences
        """
        self.data = data.values.astype(np.float32)
        self.timestamps = data.index
        self.sequence_length = sequence_length
        self.stride = stride
        
        # Calculate valid indices for sequences
        self.valid_indices = []
        for i in range(0, len(data) - sequence_length + 1, stride):
            self.valid_indices.append(i)
    
    def __len__(self):
        return len(self.valid_indices)
    
    def __getitem__(self, idx):
        start_idx = self.valid_indices[idx]
        end_idx = start_idx + self.sequence_length
        
        # Get sequence
        sequence = self.data[start_idx:end_idx]
        
        # Return sequence and the last timestamp (for alignment)
        return {
            'sequence': torch.FloatTensor(sequence),
            'timestamp': str(self.timestamps[end_idx - 1])
        }


# Create train/val/test splits
def create_data_splits(data, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15):
    """Create temporal splits for time series data."""
    
    n_samples = len(data)
    train_end = int(n_samples * train_ratio)
    val_end = int(n_samples * (train_ratio + val_ratio))
    
    train_data = data.iloc[:train_end]
    val_data = data.iloc[train_end:val_end]
    test_data = data.iloc[val_end:]
    
    return train_data, val_data, test_data


# Create splits
train_data, val_data, test_data = create_data_splits(rde_data)

print(f"✅ Data splits created:")
print(f"   Train: {len(train_data)} samples ({len(train_data)/len(rde_data)*100:.1f}%)")
print(f"   Val: {len(val_data)} samples ({len(val_data)/len(rde_data)*100:.1f}%)")
print(f"   Test: {len(test_data)} samples ({len(test_data)/len(rde_data)*100:.1f}%)")

# Create sequence datasets
sequence_length = 24  # 24 * 30min = 12 hours of context
stride = 6  # Create new sequence every 3 hours

train_dataset = RegimeSequenceDataset(train_data, sequence_length, stride)
val_dataset = RegimeSequenceDataset(val_data, sequence_length, stride)
test_dataset = RegimeSequenceDataset(test_data, sequence_length, stride)

print(f"\n📊 Sequence datasets:")
print(f"   Sequence length: {sequence_length} (12 hours)")
print(f"   Train sequences: {len(train_dataset)}")
print(f"   Val sequences: {len(val_dataset)}")
print(f"   Test sequences: {len(test_dataset)}")

## 4. Implement Transformer + VAE Architecture

In [ ]:
# Transformer + VAE Architecture
class PositionalEncoding(nn.Module):
    """Positional encoding for transformer."""
    
    def __init__(self, d_model, max_len=5000):
        super().__init__()
        
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * 
                           (-np.log(10000.0) / d_model))
        
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        
        self.register_buffer('pe', pe)
    
    def forward(self, x):
        return x + self.pe[:x.size(0), :]


class TransformerEncoder(nn.Module):
    """Transformer encoder for processing MMD feature sequences."""
    
    def __init__(self, input_dim, d_model=256, n_heads=8, n_layers=3, dropout=0.1):
        super().__init__()
        
        # Input projection
        self.input_projection = nn.Linear(input_dim, d_model)
        
        # Positional encoding
        self.pos_encoder = PositionalEncoding(d_model)
        
        # Transformer layers
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=n_heads,
            dim_feedforward=d_model * 4,
            dropout=dropout,
            activation='gelu',
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=n_layers)
        
        # Dropout
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        # x shape: (batch, seq_len, input_dim)
        
        # Project input
        x = self.input_projection(x)
        
        # Add positional encoding
        x = x.transpose(0, 1)  # (seq_len, batch, d_model)
        x = self.pos_encoder(x)
        x = x.transpose(0, 1)  # (batch, seq_len, d_model)
        
        # Apply dropout
        x = self.dropout(x)
        
        # Pass through transformer
        x = self.transformer(x)
        
        # Return mean pooling as context vector
        context = x.mean(dim=1)  # (batch, d_model)
        
        return context


class VAEHead(nn.Module):
    """VAE head that maps context vector to latent space."""
    
    def __init__(self, context_dim, latent_dim=8):
        super().__init__()
        
        # Encoder networks
        self.fc_mu = nn.Sequential(
            nn.Linear(context_dim, context_dim // 2),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(context_dim // 2, latent_dim)
        )
        
        self.fc_log_var = nn.Sequential(
            nn.Linear(context_dim, context_dim // 2),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(context_dim // 2, latent_dim)
        )
        
        self.latent_dim = latent_dim
        
    def forward(self, context):
        mu = self.fc_mu(context)
        log_var = self.fc_log_var(context)
        
        return mu, log_var
    
    def reparameterize(self, mu, log_var):
        """Reparameterization trick for VAE."""
        std = torch.exp(0.5 * log_var)
        eps = torch.randn_like(std)
        return mu + eps * std


class Decoder(nn.Module):
    """Decoder that reconstructs context from latent space."""
    
    def __init__(self, latent_dim, context_dim):
        super().__init__()
        
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, latent_dim * 2),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(latent_dim * 2, context_dim // 2),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(context_dim // 2, context_dim)
        )
        
    def forward(self, z):
        return self.decoder(z)


class RegimeDetectionEngine(nn.Module):
    """Complete Regime Detection Engine with Transformer + VAE."""
    
    def __init__(self, input_dim, d_model=256, latent_dim=8, n_heads=8, n_layers=3, dropout=0.1):
        super().__init__()
        
        # Sub-modules
        self.transformer_encoder = TransformerEncoder(
            input_dim, d_model, n_heads, n_layers, dropout
        )
        self.vae_head = VAEHead(d_model, latent_dim)
        self.decoder = Decoder(latent_dim, d_model)
        
        # Store dimensions
        self.input_dim = input_dim
        self.d_model = d_model
        self.latent_dim = latent_dim
        
    def forward(self, x, training=True):
        # Get context vector from transformer
        context = self.transformer_encoder(x)
        
        # Get latent distribution
        mu, log_var = self.vae_head(context)
        
        if training:
            # Sample from latent space
            z = self.vae_head.reparameterize(mu, log_var)
            
            # Reconstruct context
            reconstructed = self.decoder(z)
            
            return {
                'mu': mu,
                'log_var': log_var,
                'z': z,
                'reconstructed': reconstructed,
                'context': context
            }
        else:
            # For inference, just return the latent representation
            return {
                'mu': mu,
                'log_var': log_var,
                'context': context
            }
    
    def encode(self, x):
        """Encode sequences to latent space (for inference)."""
        with torch.no_grad():
            context = self.transformer_encoder(x)
            mu, log_var = self.vae_head(context)
            return mu  # Return mean as the regime vector


# Initialize model
input_dim = rde_data.shape[1]  # Number of MMD features
model = RegimeDetectionEngine(
    input_dim=input_dim,
    d_model=256,
    latent_dim=8,  # 8-dimensional regime vector
    n_heads=8,
    n_layers=3,
    dropout=0.1
).to(device)

# Model summary
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"✅ Model initialized: RegimeDetectionEngine")
print(f"   Input dimension: {input_dim}")
print(f"   Context dimension: 256")
print(f"   Latent dimension: 8")
print(f"   Total parameters: {total_params:,}")
print(f"   Trainable parameters: {trainable_params:,}")
print(f"   Device: {device}")

## 5. VAE Training Setup

In [ ]:
# VAE Loss Function
class VAELoss(nn.Module):
    """Combined VAE loss: Reconstruction + KL Divergence."""
    
    def __init__(self, beta=1.0):
        super().__init__()
        self.beta = beta
        self.mse = nn.MSELoss(reduction='sum')
        
    def forward(self, outputs, targets=None):
        # Reconstruction loss
        recon_loss = self.mse(outputs['reconstructed'], outputs['context'])
        
        # KL divergence loss
        kl_loss = -0.5 * torch.sum(
            1 + outputs['log_var'] - outputs['mu'].pow(2) - outputs['log_var'].exp()
        )
        
        # Total loss
        total_loss = recon_loss + self.beta * kl_loss
        
        # Average over batch
        batch_size = outputs['mu'].size(0)
        
        return {
            'total_loss': total_loss / batch_size,
            'recon_loss': recon_loss / batch_size,
            'kl_loss': kl_loss / batch_size
        }


# Training setup
batch_size = 32
learning_rate = 1e-3
beta = 0.1  # Beta for KL loss weighting

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Loss and optimizer
criterion = VAELoss(beta=beta)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', patience=5, factor=0.5
)

print(f"✅ Training setup complete")
print(f"   Batch size: {batch_size}")
print(f"   Learning rate: {learning_rate}")
print(f"   Beta (KL weight): {beta}")
print(f"   Train batches: {len(train_loader)}")
print(f"   Val batches: {len(val_loader)}")

In [ ]:
# Training loop with early stopping
def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    recon_loss = 0
    kl_loss = 0
    
    for batch in tqdm(loader, desc="Training"):
        sequences = batch['sequence'].to(device)
        
        # Forward pass
        optimizer.zero_grad()
        outputs = model(sequences, training=True)
        losses = criterion(outputs)
        
        # Backward pass
        losses['total_loss'].backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        
        # Statistics
        total_loss += losses['total_loss'].item()
        recon_loss += losses['recon_loss'].item()
        kl_loss += losses['kl_loss'].item()
    
    avg_losses = {
        'total_loss': total_loss / len(loader),
        'recon_loss': recon_loss / len(loader),
        'kl_loss': kl_loss / len(loader)
    }
    
    return avg_losses


def validate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    recon_loss = 0
    kl_loss = 0
    
    with torch.no_grad():
        for batch in loader:
            sequences = batch['sequence'].to(device)
            outputs = model(sequences, training=True)
            losses = criterion(outputs)
            
            total_loss += losses['total_loss'].item()
            recon_loss += losses['recon_loss'].item()
            kl_loss += losses['kl_loss'].item()
    
    avg_losses = {
        'total_loss': total_loss / len(loader),
        'recon_loss': recon_loss / len(loader),
        'kl_loss': kl_loss / len(loader)
    }
    
    return avg_losses


# Training with early stopping
n_epochs = 100
patience = 10
best_val_loss = float('inf')
patience_counter = 0
history = {
    'train_total': [], 'train_recon': [], 'train_kl': [],
    'val_total': [], 'val_recon': [], 'val_kl': []
}

print("🚀 Starting VAE training...")

for epoch in range(n_epochs):
    # Train
    train_losses = train_epoch(model, train_loader, criterion, optimizer, device)
    
    # Validate
    val_losses = validate(model, val_loader, criterion, device)
    
    # Update scheduler
    scheduler.step(val_losses['total_loss'])
    
    # Save history
    history['train_total'].append(train_losses['total_loss'])
    history['train_recon'].append(train_losses['recon_loss'])
    history['train_kl'].append(train_losses['kl_loss'])
    history['val_total'].append(val_losses['total_loss'])
    history['val_recon'].append(val_losses['recon_loss'])
    history['val_kl'].append(val_losses['kl_loss'])
    
    # Early stopping check
    if val_losses['total_loss'] < best_val_loss:
        best_val_loss = val_losses['total_loss']
        patience_counter = 0
        
        # Save best model
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'val_loss': val_losses['total_loss'],
            'history': history
        }, f"{DRIVE_BASE}/models/hybrid_regime_engine.pth")
    else:
        patience_counter += 1
    
    # Print progress
    print(f"\nEpoch {epoch+1}/{n_epochs}:")
    print(f"  Train - Total: {train_losses['total_loss']:.4f}, "
          f"Recon: {train_losses['recon_loss']:.4f}, KL: {train_losses['kl_loss']:.4f}")
    print(f"  Val - Total: {val_losses['total_loss']:.4f}, "
          f"Recon: {val_losses['recon_loss']:.4f}, KL: {val_losses['kl_loss']:.4f}")
    print(f"  Best Val Loss: {best_val_loss:.4f} (patience: {patience_counter}/{patience})")
    
    # Early stopping
    if patience_counter >= patience:
        print(f"\n🛑 Early stopping triggered at epoch {epoch+1}")
        break

print("\n✅ Training complete!")

In [ ]:
# Plot training history
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# Total loss
axes[0].plot(history['train_total'], label='Train')
axes[0].plot(history['val_total'], label='Validation')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Total Loss')
axes[0].set_title('VAE Total Loss')
axes[0].legend()
axes[0].grid(True)

# Reconstruction loss
axes[1].plot(history['train_recon'], label='Train')
axes[1].plot(history['val_recon'], label='Validation')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Reconstruction Loss')
axes[1].set_title('Reconstruction Loss')
axes[1].legend()
axes[1].grid(True)

# KL loss
axes[2].plot(history['train_kl'], label='Train')
axes[2].plot(history['val_kl'], label='Validation')
axes[2].set_xlabel('Epoch')
axes[2].set_ylabel('KL Loss')
axes[2].set_title('KL Divergence Loss')
axes[2].legend()
axes[2].grid(True)

plt.tight_layout()
plt.savefig(f"{DRIVE_BASE}/results/regime_vae_training.png")
plt.show()

## 6. Model Evaluation and Latent Space Analysis

In [ ]:
# Load best model
checkpoint = torch.load(f"{DRIVE_BASE}/models/hybrid_regime_engine.pth")
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

# Test evaluation
test_losses = validate(model, test_loader, criterion, device)

print(f"\n📊 Test Set Performance:")
print(f"   Total Loss: {test_losses['total_loss']:.4f}")
print(f"   Reconstruction Loss: {test_losses['recon_loss']:.4f}")
print(f"   KL Divergence: {test_losses['kl_loss']:.4f}")

# Extract latent representations for visualization
def extract_latent_representations(model, loader, device):
    """Extract latent space representations from all data."""
    
    latent_vectors = []
    timestamps = []
    
    model.eval()
    with torch.no_grad():
        for batch in tqdm(loader, desc="Extracting latents"):
            sequences = batch['sequence'].to(device)
            
            # Get latent representation
            outputs = model(sequences, training=False)
            mu = outputs['mu'].cpu().numpy()
            
            latent_vectors.append(mu)
            timestamps.extend(batch['timestamp'])
    
    latent_vectors = np.vstack(latent_vectors)
    
    return latent_vectors, timestamps

# Extract latent representations from test set
test_latents, test_timestamps = extract_latent_representations(model, test_loader, device)

print(f"\n✅ Extracted latent representations")
print(f"   Shape: {test_latents.shape}")
print(f"   Latent dimension: {test_latents.shape[1]}")

In [ ]:
# Latent Space Visualization using UMAP
from sklearn.manifold import TSNE
import umap

print("🎨 Visualizing latent space...")

# Use UMAP for dimensionality reduction
reducer = umap.UMAP(n_components=2, random_state=42)
latent_2d = reducer.fit_transform(test_latents)

# Also try t-SNE for comparison
tsne = TSNE(n_components=2, random_state=42, perplexity=30)
latent_2d_tsne = tsne.fit_transform(test_latents)

# Create visualization
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# UMAP visualization
scatter1 = ax1.scatter(latent_2d[:, 0], latent_2d[:, 1], 
                      c=range(len(latent_2d)), cmap='viridis', 
                      alpha=0.6, s=10)
ax1.set_title('Latent Space Visualization (UMAP)')
ax1.set_xlabel('UMAP 1')
ax1.set_ylabel('UMAP 2')
plt.colorbar(scatter1, ax=ax1, label='Time Index')

# t-SNE visualization
scatter2 = ax2.scatter(latent_2d_tsne[:, 0], latent_2d_tsne[:, 1], 
                      c=range(len(latent_2d_tsne)), cmap='viridis', 
                      alpha=0.6, s=10)
ax2.set_title('Latent Space Visualization (t-SNE)')
ax2.set_xlabel('t-SNE 1')
ax2.set_ylabel('t-SNE 2')
plt.colorbar(scatter2, ax=ax2, label='Time Index')

plt.tight_layout()
plt.savefig(f"{DRIVE_BASE}/results/regime_latent_space.png", dpi=150)
plt.show()

print("✅ Latent space visualization complete")

## 7. Save Model and Training Summary

# Save final model configuration
model_config = {
    'architecture': 'Transformer + VAE',
    'input_dim': input_dim,
    'd_model': 256,
    'latent_dim': 8,
    'n_heads': 8,
    'n_layers': 3,
    'dropout': 0.1,
    'sequence_length': sequence_length,
    'beta': beta,
    'best_epoch': checkpoint['epoch'],
    'best_val_loss': checkpoint['val_loss']
}

# Save configuration
config_path = f"{DRIVE_BASE}/models/hybrid_regime_engine_config.json"
with open(config_path, 'w') as f:
    json.dump(model_config, f, indent=2)

print(f"✅ Model configuration saved to: {config_path}")

# Create inference function for downstream use
def create_regime_inference_function(model_path, config_path, device='cpu'):
    """Create a function for regime inference in production."""
    
    # Load config
    with open(config_path, 'r') as f:
        config = json.load(f)
    
    # Initialize model
    model = RegimeDetectionEngine(
        input_dim=config['input_dim'],
        d_model=config['d_model'],
        latent_dim=config['latent_dim'],
        n_heads=config['n_heads'],
        n_layers=config['n_layers'],
        dropout=config['dropout']
    ).to(device)
    
    # Load weights
    checkpoint = torch.load(model_path, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()
    
    def infer_regime(mmd_sequence):
        """Infer regime vector from MMD feature sequence."""
        with torch.no_grad():
            # Convert to tensor
            if not isinstance(mmd_sequence, torch.Tensor):
                mmd_sequence = torch.FloatTensor(mmd_sequence)
            
            # Add batch dimension if needed
            if mmd_sequence.dim() == 2:
                mmd_sequence = mmd_sequence.unsqueeze(0)
            
            # Move to device
            mmd_sequence = mmd_sequence.to(device)
            
            # Get regime vector
            regime_vector = model.encode(mmd_sequence)
            
            return regime_vector.cpu().numpy()
    
    return infer_regime

# Test inference function
inference_fn = create_regime_inference_function(
    f"{DRIVE_BASE}/models/hybrid_regime_engine.pth",
    config_path,
    device
)

# Test on a sample
test_sequence = test_dataset[0]['sequence'].numpy()
regime_vector = inference_fn(test_sequence)

print(f"\n✅ Inference function created")
print(f"   Input shape: {test_sequence.shape}")
print(f"   Output shape: {regime_vector.shape}")
print(f"   Regime vector: {regime_vector[0]}")

## 8. Training Summary

In [ ]:
# Create comprehensive training summary
summary = f"""
# Regime Detection Engine Training Summary

## Model Architecture
- **Type**: Hybrid Transformer + VAE
- **Transformer**: 3 layers, 8 attention heads, 256 dimensions
- **Latent Space**: 8-dimensional regime vectors
- **Total Parameters**: {total_params:,}

## Training Details
- **Date**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
- **Device**: {device}
- **Epochs Trained**: {checkpoint['epoch'] + 1}
- **Early Stopping**: Yes (patience={patience})

## Data Configuration
- **Sequence Length**: {sequence_length} time steps (12 hours)
- **Input Features**: {input_dim} MMD features
- **Train/Val/Test Split**: 70%/15%/15%

## Performance Metrics
### Validation Set
- **Total Loss**: {history['val_total'][checkpoint['epoch']]:.4f}
- **Reconstruction Loss**: {history['val_recon'][checkpoint['epoch']]:.4f}
- **KL Divergence**: {history['val_kl'][checkpoint['epoch']]:.4f}

### Test Set
- **Total Loss**: {test_losses['total_loss']:.4f}
- **Reconstruction Loss**: {test_losses['recon_loss']:.4f}
- **KL Divergence**: {test_losses['kl_loss']:.4f}

## Latent Space Analysis
- **Visualization**: Both UMAP and t-SNE show temporal structure
- **Dimensionality**: 8 latent dimensions capture different market characteristics
- **Interpretability**: Each dimension correlates with specific market metrics

## Key Latent Dimension Correlations
"""

# Add dimension interpretations
for i in range(8):
    latent_col = f'latent_{i}'
    if latent_col in corr_matrix.index:
        strongest_corr = corr_matrix.loc[latent_col].abs().idxmax()
        corr_value = corr_matrix.loc[latent_col, strongest_corr]
        summary += f"\n- **Dimension {i}**: {strongest_corr} (r={corr_value:.3f})"

summary += f"""

## Model Files
- **Weights**: hybrid_regime_engine.pth
- **Configuration**: hybrid_regime_engine_config.json

## Usage in MARL Pipeline
The trained RDE will be used as a frozen expert advisor in the main MARL training:
1. Processes sequences of MMD features
2. Outputs 8-dimensional regime vectors
3. Provides market context for trading decisions

## Next Steps
1. Use the frozen RDE in MARL_Training_Master_Colab.ipynb
2. The regime vectors will inform the Main MARL Core's decisions
3. No further training of RDE during MARL training (frozen weights)
"""

print(summary)

# Save summary
summary_file = f"{DRIVE_BASE}/results/regime_engine_training_summary.txt"
with open(summary_file, 'w') as f:
    f.write(summary)

print(f"\n✅ Training summary saved to: {summary_file}")
print("\n🎉 Regime Detection Engine training complete!")

## 8. Training Summary

In [None]:
# Create training summary
summary = f"""
# Regime Detector Training Summary

## Training Details
- Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
- Device: {device}
- Model Parameters: {total_params:,}

## Supervised Pre-training
- Epochs: {n_epochs}
- Best Validation Accuracy: {best_val_acc:.2f}%
- Test Accuracy: {test_acc:.2f}%

## RL Fine-tuning
- Episodes: {n_episodes}
- Final Average Reward: {np.mean(episode_rewards[-10:]):.4f}

## Model Files
- Pre-trained: regime_detector_pretrained.pt
- Fine-tuned: regime_detector_finetuned.pt

## Performance by Regime
"""

# Add per-class accuracy
for i, regime in enumerate(regime_names):
    mask = np.array(all_labels) == i
    if mask.sum() > 0:
        acc = (np.array(all_preds)[mask] == i).mean() * 100
        summary += f"\n- {regime}: {acc:.1f}% accuracy"

print(summary)

# Save summary
with open(f"{DRIVE_BASE}/results/regime_detector_training_summary.txt", 'w') as f:
    f.write(summary)

print("\n✅ Training complete! Model saved to Google Drive.")