# Regime Detector Agent Training - Google Colab

This notebook trains the Regime Detector agent individually before multi-agent training.

## Agent Overview:
The Regime Detector identifies market regimes (trending, mean-reverting, volatile) using:
- Correlation matrices
- Volatility patterns
- Market microstructure signals

## Training Strategy:
- Pre-train on historical regime labels
- Fine-tune with reinforcement learning
- Optimize for regime detection accuracy

## 1. Environment Setup

In [None]:
# Check GPU and mount Drive
import torch
import os
import sys

# GPU check
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if device.type == 'cuda':
    print(f"✅ GPU: {torch.cuda.get_device_name(0)}")
    print(f"   Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
    print("⚠️ No GPU available")

# Mount Drive
try:
    from google.colab import drive
    drive.mount('/content/drive')
    DRIVE_BASE = "/content/drive/MyDrive/AlgoSpace"
    IN_COLAB = True
except:
    DRIVE_BASE = "./drive_simulation"
    IN_COLAB = False

# Clone repo
REPO_PATH = "/content/AlgoSpace" if IN_COLAB else "."
if IN_COLAB and not os.path.exists(REPO_PATH):
    !git clone https://github.com/QuantNova/AlgoSpace.git {REPO_PATH}

sys.path.insert(0, REPO_PATH)
sys.path.insert(0, os.path.join(REPO_PATH, 'src'))

In [None]:
# Install dependencies
!pip install -q torch numpy pandas h5py matplotlib seaborn
!pip install -q wandb tensorboard tqdm
!pip install -q scikit-learn

import numpy as np
import pandas as pd
import h5py
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import json
from tqdm import tqdm

print("✅ Dependencies loaded")

## 2. Load Training Data

In [None]:
# Load preprocessed data
data_file = f"{DRIVE_BASE}/data/processed/marl_training_data_20231201.h5"  # Update with your file

print(f"📂 Loading data from: {data_file}")

with h5py.File(data_file, 'r') as f:
    # Load regime detector specific data
    train_data = f['regime_detector/train/data'][:]
    train_dates = [d.decode('utf-8') for d in f['regime_detector/train/dates'][:]]
    
    val_data = f['regime_detector/val/data'][:]
    val_dates = [d.decode('utf-8') for d in f['regime_detector/val/dates'][:]]
    
    test_data = f['regime_detector/test/data'][:]
    test_dates = [d.decode('utf-8') for d in f['regime_detector/test/dates'][:]]
    
    # Load correlation matrices
    corr_train = f['correlation_matrices/train/data'][:]
    corr_val = f['correlation_matrices/val/data'][:]
    corr_test = f['correlation_matrices/test/data'][:]
    
    # Load metadata
    metadata = {key: f['metadata'].attrs[key] for key in f['metadata'].attrs}

print(f"✅ Data loaded")
print(f"   Train: {train_data.shape}")
print(f"   Val: {val_data.shape}")
print(f"   Test: {test_data.shape}")
print(f"   Correlation matrices: {corr_train.shape}")

## 3. Generate Regime Labels

In [None]:
# Generate regime labels based on market characteristics
def generate_regime_labels(data, lookback=20):
    """Generate regime labels from market data.
    
    Regimes:
    0: Low volatility / Trending
    1: High volatility / Choppy
    2: Mean reverting
    3: Crisis / Extreme volatility
    """
    
    labels = []
    
    # Assuming features include: returns, volume_ratio, atr, adx, vix_proxy
    # Shape: (samples, assets, features)
    
    for i in range(len(data)):
        # Get average metrics across assets
        returns = data[i, :, 0]  # First feature is returns
        volatility = np.std(returns) * np.sqrt(252)  # Annualized vol
        
        # Simple regime classification
        if volatility < 0.15:  # Low vol
            label = 0
        elif volatility > 0.30:  # High vol
            label = 3 if volatility > 0.50 else 1
        else:  # Medium vol
            # Check for mean reversion using autocorrelation
            if i >= lookback:
                recent_returns = data[i-lookback:i, :, 0].mean(axis=1)
                autocorr = np.corrcoef(recent_returns[:-1], recent_returns[1:])[0, 1]
                label = 2 if autocorr < -0.1 else 1
            else:
                label = 1
        
        labels.append(label)
    
    return np.array(labels)

# Generate labels
train_labels = generate_regime_labels(train_data)
val_labels = generate_regime_labels(val_data)
test_labels = generate_regime_labels(test_data)

# Show label distribution
regime_names = ['Low Vol/Trend', 'High Vol/Choppy', 'Mean Reverting', 'Crisis']

plt.figure(figsize=(10, 6))
for i, (labels, name) in enumerate([(train_labels, 'Train'), 
                                    (val_labels, 'Val'), 
                                    (test_labels, 'Test')]):
    plt.subplot(1, 3, i+1)
    counts = np.bincount(labels, minlength=4)
    plt.bar(range(4), counts)
    plt.title(f'{name} Set')
    plt.xlabel('Regime')
    plt.ylabel('Count')
    plt.xticks(range(4), regime_names, rotation=45)

plt.tight_layout()
plt.show()

## 4. Initialize Regime Detector Model

In [None]:
# Import Regime Detector
from agents.regime_detector import RegimeDetector
import yaml

# Load config
config_path = os.path.join(REPO_PATH, 'config/training_config.yaml')
with open(config_path, 'r') as f:
    config = yaml.safe_load(f)

# Get regime detector config
regime_config = config['agents']['regime_detector']

# Update config for individual training
regime_config['n_features'] = train_data.shape[-1]
regime_config['n_assets'] = train_data.shape[1]
regime_config['n_regimes'] = 4  # Number of regime classes

# Initialize model
model = RegimeDetector(regime_config).to(device)

# Model summary
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"✅ Model initialized")
print(f"   Total parameters: {total_params:,}")
print(f"   Trainable parameters: {trainable_params:,}")
print(f"   Device: {device}")

## 5. Supervised Pre-training

In [None]:
# Training setup
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Create datasets
train_dataset = TensorDataset(
    torch.FloatTensor(train_data),
    torch.FloatTensor(corr_train),
    torch.LongTensor(train_labels)
)

val_dataset = TensorDataset(
    torch.FloatTensor(val_data),
    torch.FloatTensor(corr_val),
    torch.LongTensor(val_labels)
)

# Create dataloaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5)

print(f"✅ Training setup complete")
print(f"   Batch size: {batch_size}")
print(f"   Train batches: {len(train_loader)}")
print(f"   Val batches: {len(val_loader)}")

In [None]:
# Training loop
def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    for features, corr_matrix, labels in tqdm(loader, desc="Training"):
        features = features.to(device)
        corr_matrix = corr_matrix.to(device)
        labels = labels.to(device)
        
        # Forward pass
        optimizer.zero_grad()
        
        # Process through regime detector
        market_state = {'features': features, 'correlation_matrix': corr_matrix}
        output = model(market_state)
        
        # Get regime logits (assuming model outputs include 'regime_logits')
        if hasattr(model, 'regime_classifier'):
            regime_logits = output['regime_logits']
        else:
            # If not, use the main output
            regime_logits = output['regime_probabilities']
        
        loss = criterion(regime_logits, labels)
        
        # Backward pass
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        
        # Statistics
        total_loss += loss.item()
        _, predicted = torch.max(regime_logits, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    accuracy = 100 * correct / total
    avg_loss = total_loss / len(loader)
    
    return avg_loss, accuracy

def validate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for features, corr_matrix, labels in loader:
            features = features.to(device)
            corr_matrix = corr_matrix.to(device)
            labels = labels.to(device)
            
            market_state = {'features': features, 'correlation_matrix': corr_matrix}
            output = model(market_state)
            
            if hasattr(model, 'regime_classifier'):
                regime_logits = output['regime_logits']
            else:
                regime_logits = output['regime_probabilities']
            
            loss = criterion(regime_logits, labels)
            
            total_loss += loss.item()
            _, predicted = torch.max(regime_logits, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    accuracy = 100 * correct / total
    avg_loss = total_loss / len(loader)
    
    return avg_loss, accuracy

# Training
n_epochs = 30
best_val_acc = 0
history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}

print("🚀 Starting supervised pre-training...")

for epoch in range(n_epochs):
    # Train
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    
    # Validate
    val_loss, val_acc = validate(model, val_loader, criterion, device)
    
    # Update scheduler
    scheduler.step(val_loss)
    
    # Save history
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    
    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'val_acc': val_acc,
            'config': regime_config
        }, f"{DRIVE_BASE}/models/regime_detector_pretrained.pt")
    
    print(f"Epoch {epoch+1}/{n_epochs}:")
    print(f"  Train Loss: {train_loss:.4f}, Acc: {train_acc:.2f}%")
    print(f"  Val Loss: {val_loss:.4f}, Acc: {val_acc:.2f}%")
    print(f"  Best Val Acc: {best_val_acc:.2f}%")
    print()

In [None]:
# Plot training history
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))

# Loss
ax1.plot(history['train_loss'], label='Train')
ax1.plot(history['val_loss'], label='Validation')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.set_title('Training Loss')
ax1.legend()
ax1.grid(True)

# Accuracy
ax2.plot(history['train_acc'], label='Train')
ax2.plot(history['val_acc'], label='Validation')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy (%)')
ax2.set_title('Regime Detection Accuracy')
ax2.legend()
ax2.grid(True)

plt.tight_layout()
plt.savefig(f"{DRIVE_BASE}/results/regime_detector_training.png")
plt.show()

## 6. Model Evaluation

In [None]:
# Load best model
checkpoint = torch.load(f"{DRIVE_BASE}/models/regime_detector_pretrained.pt")
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

# Test evaluation
test_dataset = TensorDataset(
    torch.FloatTensor(test_data),
    torch.FloatTensor(corr_test),
    torch.LongTensor(test_labels)
)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Evaluate
test_loss, test_acc = validate(model, test_loader, criterion, device)

print(f"\n📊 Test Set Performance:")
print(f"   Loss: {test_loss:.4f}")
print(f"   Accuracy: {test_acc:.2f}%")

# Confusion matrix
from sklearn.metrics import confusion_matrix, classification_report

all_preds = []
all_labels = []

with torch.no_grad():
    for features, corr_matrix, labels in test_loader:
        features = features.to(device)
        corr_matrix = corr_matrix.to(device)
        
        market_state = {'features': features, 'correlation_matrix': corr_matrix}
        output = model(market_state)
        
        if hasattr(model, 'regime_classifier'):
            regime_logits = output['regime_logits']
        else:
            regime_logits = output['regime_probabilities']
        
        _, predicted = torch.max(regime_logits, 1)
        
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.numpy())

# Plot confusion matrix
cm = confusion_matrix(all_labels, all_preds)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=regime_names, yticklabels=regime_names)
plt.title('Regime Detection Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.tight_layout()
plt.savefig(f"{DRIVE_BASE}/results/regime_detector_confusion_matrix.png")
plt.show()

# Classification report
print("\n📋 Classification Report:")
print(classification_report(all_labels, all_preds, target_names=regime_names))

## 7. Reinforcement Learning Fine-tuning

In [None]:
# RL fine-tuning setup
from training.environment import MultiAgentTradingEnv
from training.rewards import RegimeDetectionReward

# Create simplified environment for single agent
env_config = {
    'n_agents': 1,
    'episode_length': 100,
    'initial_capital': 100000,
    'transaction_cost': 0.001
}

env = MultiAgentTradingEnv(env_config)
reward_fn = RegimeDetectionReward()

# PPO setup for fine-tuning
class RegimePPO:
    def __init__(self, model, lr=3e-4):
        self.model = model
        self.optimizer = optim.Adam(model.parameters(), lr=lr)
        self.gamma = 0.99
        self.eps_clip = 0.2
        self.value_loss_coef = 0.5
        self.entropy_coef = 0.01
    
    def compute_returns(self, rewards, dones, values, next_value):
        returns = []
        R = next_value
        
        for step in reversed(range(len(rewards))):
            R = rewards[step] + self.gamma * R * (1 - dones[step])
            returns.insert(0, R)
        
        return torch.tensor(returns)
    
    def update(self, states, actions, returns, old_log_probs):
        # Forward pass
        output = self.model(states)
        
        # Compute new log probs and values
        dist = torch.distributions.Categorical(logits=output['regime_logits'])
        new_log_probs = dist.log_prob(actions)
        entropy = dist.entropy().mean()
        values = output.get('value', torch.zeros_like(returns))
        
        # PPO loss
        ratio = torch.exp(new_log_probs - old_log_probs)
        advantages = returns - values.detach()
        
        surr1 = ratio * advantages
        surr2 = torch.clamp(ratio, 1 - self.eps_clip, 1 + self.eps_clip) * advantages
        
        policy_loss = -torch.min(surr1, surr2).mean()
        value_loss = nn.MSELoss()(values, returns)
        
        loss = policy_loss + self.value_loss_coef * value_loss - self.entropy_coef * entropy
        
        self.optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5)
        self.optimizer.step()
        
        return loss.item(), policy_loss.item(), value_loss.item()

# Initialize PPO
ppo = RegimePPO(model, lr=1e-4)

print("✅ RL fine-tuning setup complete")

In [None]:
# RL fine-tuning loop (simplified)
n_episodes = 100
episode_rewards = []

print("🚀 Starting RL fine-tuning...")

for episode in range(n_episodes):
    # Reset environment
    obs = env.reset()
    done = False
    episode_reward = 0
    
    # Collect trajectory
    states, actions, rewards, log_probs, dones = [], [], [], [], []
    
    while not done:
        # Get action from model
        with torch.no_grad():
            state_tensor = torch.FloatTensor(obs).unsqueeze(0).to(device)
            output = model({'features': state_tensor})
            
            dist = torch.distributions.Categorical(logits=output['regime_logits'])
            action = dist.sample()
            log_prob = dist.log_prob(action)
        
        # Step environment
        next_obs, reward, done, info = env.step(action.item())
        
        # Store trajectory
        states.append(obs)
        actions.append(action)
        rewards.append(reward)
        log_probs.append(log_prob)
        dones.append(done)
        
        episode_reward += reward
        obs = next_obs
    
    # Update model
    if len(states) > 0:
        states_tensor = torch.FloatTensor(states).to(device)
        actions_tensor = torch.cat(actions)
        log_probs_tensor = torch.cat(log_probs)
        
        # Compute returns
        returns = ppo.compute_returns(rewards, dones, 
                                     torch.zeros(len(rewards)), 0)
        
        # Update
        loss, p_loss, v_loss = ppo.update(
            {'features': states_tensor},
            actions_tensor,
            returns.to(device),
            log_probs_tensor
        )
    
    episode_rewards.append(episode_reward)
    
    if episode % 10 == 0:
        avg_reward = np.mean(episode_rewards[-10:])
        print(f"Episode {episode}: Avg Reward = {avg_reward:.4f}")

# Save fine-tuned model
torch.save({
    'model_state_dict': model.state_dict(),
    'episode': episode,
    'avg_reward': np.mean(episode_rewards[-10:]),
    'config': regime_config
}, f"{DRIVE_BASE}/models/regime_detector_finetuned.pt")

print("\n✅ RL fine-tuning complete!")

## 8. Training Summary

In [None]:
# Create training summary
summary = f"""
# Regime Detector Training Summary

## Training Details
- Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
- Device: {device}
- Model Parameters: {total_params:,}

## Supervised Pre-training
- Epochs: {n_epochs}
- Best Validation Accuracy: {best_val_acc:.2f}%
- Test Accuracy: {test_acc:.2f}%

## RL Fine-tuning
- Episodes: {n_episodes}
- Final Average Reward: {np.mean(episode_rewards[-10:]):.4f}

## Model Files
- Pre-trained: regime_detector_pretrained.pt
- Fine-tuned: regime_detector_finetuned.pt

## Performance by Regime
"""

# Add per-class accuracy
for i, regime in enumerate(regime_names):
    mask = np.array(all_labels) == i
    if mask.sum() > 0:
        acc = (np.array(all_preds)[mask] == i).mean() * 100
        summary += f"\n- {regime}: {acc:.1f}% accuracy"

print(summary)

# Save summary
with open(f"{DRIVE_BASE}/results/regime_detector_training_summary.txt", 'w') as f:
    f.write(summary)

print("\n✅ Training complete! Model saved to Google Drive.")