# M-RMS (Multi-Agent Risk Management System) Training

Training the ensemble of risk management agents with Sortino ratio optimization.
This notebook trains position sizing, stop loss, and profit target agents.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import matplotlib.pyplot as plt
import sys
import os

# Add src to path
sys.path.append('../src')
from agents.mrms.models import RiskManagementEnsemble
from data.market_data import MarketDataHandler

In [None]:
# Configuration
config = {
    'input_dim': 40,
    'hidden_dim': 128,
    'learning_rate': 2e-4,
    'batch_size': 64,
    'epochs': 150,
    'device': 'cuda' if torch.cuda.is_available() else 'cpu'
}

print(f"Training on device: {config['device']}")
device = torch.device(config['device'])

In [None]:
# Initialize M-RMS ensemble
model = RiskManagementEnsemble(
    input_dim=config['input_dim'],
    hidden_dim=config['hidden_dim']
).to(device)

optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])
print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")

In [None]:
# Sortino Ratio Loss Function
def sortino_ratio_loss(returns, target_return=0):
    """Maximize Sortino ratio through loss minimization"""
    excess_returns = returns - target_return
    downside_returns = torch.where(returns < target_return, returns - target_return, torch.zeros_like(returns))
    
    expected_return = excess_returns.mean()
    downside_deviation = torch.sqrt(torch.mean(downside_returns ** 2) + 1e-8)
    
    sortino = expected_return / (downside_deviation + 1e-6)
    return -sortino  # Minimize negative Sortino

def risk_consistency_loss(position_sizes, stop_losses, profit_targets):
    """Ensure risk management consistency"""
    # Position size should correlate with stop loss distance
    risk_per_trade = position_sizes * stop_losses
    consistency_penalty = torch.var(risk_per_trade)
    
    # Risk-reward ratio should be reasonable
    risk_reward_ratio = profit_targets / (stop_losses + 1e-6)
    ratio_penalty = torch.mean(torch.abs(risk_reward_ratio - 2.0))  # Target 2:1 ratio
    
    return consistency_penalty + 0.1 * ratio_penalty

def train_epoch(model, dataloader, optimizer, device):
    model.train()
    total_loss = 0
    total_sortino = 0
    total_consistency = 0
    
    for batch_idx, (state_data, return_data) in enumerate(dataloader):
        state_data = state_data.to(device)
        return_data = return_data.to(device)
        
        optimizer.zero_grad()
        
        outputs = model(state_data)
        
        # Calculate adjusted returns based on risk management
        position_sizes = outputs['position_size'].squeeze()
        stop_losses = outputs['stop_loss'].squeeze()
        profit_targets = outputs['profit_target'].squeeze()
        
        # Simulate risk-adjusted returns
        adjusted_returns = return_data * position_sizes
        
        # Apply stop loss and profit target logic
        clamped_returns = torch.clamp(adjusted_returns, 
                                    min=-stop_losses, 
                                    max=profit_targets)
        
        # Loss components
        sortino_loss = sortino_ratio_loss(clamped_returns)
        consistency_loss = risk_consistency_loss(position_sizes, stop_losses, profit_targets)
        
        loss = sortino_loss + 0.1 * consistency_loss
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        total_sortino += sortino_loss.item()
        total_consistency += consistency_loss.item()
    
    return {
        'loss': total_loss / len(dataloader),
        'sortino_loss': total_sortino / len(dataloader),
        'consistency_loss': total_consistency / len(dataloader)
    }

In [None]:
# Generate synthetic training data
def create_risk_training_data(n_samples=5000):
    """Create synthetic state and return data for M-RMS training"""
    
    # Market state features (volatility, trend, momentum, etc.)
    states = []
    returns = []
    
    for _ in range(n_samples):
        # Generate market regime features
        volatility = np.random.exponential(0.02)  # Market volatility
        trend = np.random.normal(0, 0.001)  # Market trend
        momentum = np.random.normal(0, 0.01)  # Price momentum
        
        # Portfolio state
        cash_ratio = np.random.uniform(0.1, 0.9)
        current_position = np.random.uniform(-0.5, 0.5)
        unrealized_pnl = np.random.normal(0, 0.02)
        
        # Technical indicators
        rsi = np.random.uniform(20, 80)
        macd = np.random.normal(0, 0.001)
        bb_position = np.random.uniform(-1, 1)  # Bollinger band position
        
        # Additional features
        additional_features = np.random.randn(32) * 0.1
        
        state = np.array([volatility, trend, momentum, cash_ratio, current_position, 
                         unrealized_pnl, rsi/100, macd, bb_position] + list(additional_features))
        
        # Generate corresponding returns (influenced by volatility)
        base_return = np.random.normal(0, volatility)
        
        states.append(state)
        returns.append(base_return)
    
    return torch.FloatTensor(states), torch.FloatTensor(returns)

# Create datasets
train_states, train_returns = create_risk_training_data(n_samples=4000)
val_states, val_returns = create_risk_training_data(n_samples=1000)

train_dataset = TensorDataset(train_states, train_returns)
val_dataset = TensorDataset(val_states, val_returns)

train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=config['batch_size'], shuffle=False)

print(f"Training states shape: {train_states.shape}")
print(f"Training returns shape: {train_returns.shape}")

In [None]:
# Training loop
train_losses = []
val_losses = []
sortino_losses = []
best_val_loss = float('inf')

for epoch in range(config['epochs']):
    # Train
    train_metrics = train_epoch(model, train_loader, optimizer, device)
    train_losses.append(train_metrics['loss'])
    sortino_losses.append(train_metrics['sortino_loss'])
    
    # Validate
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for state_data, return_data in val_loader:
            state_data = state_data.to(device)
            return_data = return_data.to(device)
            
            outputs = model(state_data)
            position_sizes = outputs['position_size'].squeeze()
            adjusted_returns = return_data * position_sizes
            
            loss = sortino_ratio_loss(adjusted_returns)
            val_loss += loss.item()
    
    val_loss /= len(val_loader)
    val_losses.append(val_loss)
    
    # Save best model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), '../models/mrms_best.pth')
    
    # Print progress
    if epoch % 10 == 0:
        print(f"Epoch {epoch:3d}: Train Loss: {train_metrics['loss']:.6f}, "
              f"Val Loss: {val_loss:.6f}, "
              f"Sortino: {train_metrics['sortino_loss']:.6f}, "
              f"Consistency: {train_metrics['consistency_loss']:.6f}")

print(f"\nTraining completed! Best validation loss: {best_val_loss:.6f}")

In [None]:
# Plot training curves and analyze outputs
plt.figure(figsize=(15, 10))

# Training curves
plt.subplot(2, 3, 1)
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.title('M-RMS Training Progress')

plt.subplot(2, 3, 2)
plt.plot(sortino_losses, label='Sortino Loss', color='orange')
plt.xlabel('Epoch')
plt.ylabel('Sortino Loss')
plt.title('Sortino Ratio Optimization')

# Analyze model outputs
model.eval()
with torch.no_grad():
    sample_states = val_states[:100].to(device)
    outputs = model(sample_states)
    
    position_sizes = outputs['position_size'].cpu().numpy().flatten()
    stop_losses = outputs['stop_loss'].cpu().numpy().flatten()
    profit_targets = outputs['profit_target'].cpu().numpy().flatten()

# Plot distributions
plt.subplot(2, 3, 3)
plt.hist(position_sizes, bins=20, alpha=0.7, label='Position Sizes')
plt.xlabel('Position Size')
plt.ylabel('Frequency')
plt.title('Position Size Distribution')
plt.legend()

plt.subplot(2, 3, 4)
plt.hist(stop_losses, bins=20, alpha=0.7, color='red', label='Stop Losses')
plt.xlabel('Stop Loss %')
plt.ylabel('Frequency')
plt.title('Stop Loss Distribution')
plt.legend()

plt.subplot(2, 3, 5)
plt.hist(profit_targets, bins=20, alpha=0.7, color='green', label='Profit Targets')
plt.xlabel('Profit Target %')
plt.ylabel('Frequency')
plt.title('Profit Target Distribution')
plt.legend()

plt.subplot(2, 3, 6)
risk_reward = profit_targets / (stop_losses + 1e-6)
plt.scatter(stop_losses, profit_targets, alpha=0.6, c=risk_reward, cmap='viridis')
plt.xlabel('Stop Loss %')
plt.ylabel('Profit Target %')
plt.title('Risk-Reward Relationship')
plt.colorbar(label='Risk-Reward Ratio')

plt.tight_layout()
plt.show()

# Performance summary
print("\n📊 M-RMS Training Summary:")
print(f"Position Size Range: {position_sizes.min():.3f} to {position_sizes.max():.3f}")
print(f"Stop Loss Range: {stop_losses.min():.3f} to {stop_losses.max():.3f}")
print(f"Profit Target Range: {profit_targets.min():.3f} to {profit_targets.max():.3f}")
print(f"Average Risk-Reward Ratio: {risk_reward.mean():.2f}")

print("\n✅ M-RMS Training Complete!")
print("Model saved to: ../models/mrms_best.pth")
print("Next: Train Main MARL Core")