# 06 - RL Ensemble Backtest

This notebook combines RL-based decisions with ensemble signals to backtest the full TG4 strategy.

**Key Features:**
- RL agent with target allocation rewards (BTC 40%, XRP 30%, RLUSD 20%)
- Ensemble signal combination (LSTM + Arb + Rebalance)
- RLUSD premium detection for ecosystem demand signals
- Portfolio alignment tracking

In [None]:
import sys
sys.path.insert(0, '../src')

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from data_fetcher import DataFetcher
from portfolio import Portfolio
from models.rl_agent import TradingEnv, train_rl_agent, load_rl_agent
from ensemble import Ensemble
from strategies.ripple_momentum_lstm import generate_ripple_signals, check_rlusd_premium

## 1. Fetch Market Data

In [None]:
fetcher = DataFetcher()

symbols = ['XRP/USDT', 'BTC/USDT', 'RLUSD/USDT']
data = {}

for sym in symbols:
    print(f"Fetching {sym}...")
    df = fetcher.fetch_ohlcv('kraken', sym, '1h', 2000)
    if not df.empty:
        data[sym] = df
        print(f"  {len(df)} candles, latest: ${df['close'].iloc[-1]:.4f}")

print(f"\nLoaded {len(data)} symbols")

## 2. Initialize Trading Environment

In [None]:
# Starting balance
initial_balance = {'USDT': 1000.0, 'XRP': 500.0}

# Create environment
env = TradingEnv(data, initial_balance)

print("Environment initialized")
print(f"Action space: {env.action_space}")
print(f"Observation space: {env.observation_space}")
print(f"Max steps: {env.max_steps}")
print(f"\nTarget allocation:")
for asset, weight in env.targets.items():
    print(f"  {asset}: {weight*100:.0f}%")

## 3. Train RL Agent (if not already trained)

In [None]:
import os

model_path = '../src/models/rl_ppo_agent.zip'

if os.path.exists(model_path):
    print("Loading existing model...")
    model = load_rl_agent('../src/models/rl_ppo_agent')
    print("Model loaded!")
else:
    print("No trained model found. Training new model...")
    print("This may take 5-10 minutes...")
    model = train_rl_agent(data, timesteps=100000)
    print("Training complete!")

## 4. Backtest RL Agent

In [None]:
def backtest_rl_agent(model, data, initial_balance):
    """Run RL agent through historical data and track performance"""
    env = TradingEnv(data, initial_balance)
    obs, _ = env.reset()
    
    portfolio_values = []
    allocations = []
    actions_taken = []
    alignment_scores = []
    
    done = False
    step = 0
    
    while not done:
        action, _ = model.predict(obs, deterministic=True)
        action = int(action)
        actions_taken.append(action)
        
        obs, reward, done, truncated, info = env.step(action)
        
        # Track portfolio value
        prices = env._current_prices()
        total_value = env.portfolio.get_total_usd(prices)
        portfolio_values.append(total_value)
        
        # Track allocation
        alloc = {}
        for asset in ['BTC', 'XRP', 'RLUSD', 'USDT']:
            val = env.portfolio.balances.get(asset, 0) * prices.get(asset, 1.0)
            alloc[asset] = val / total_value if total_value > 0 else 0
        allocations.append(alloc)
        
        # Calculate alignment score
        score = sum(min(alloc.get(a, 0), env.targets.get(a, 0)) for a in env.targets)
        alignment_scores.append(score)
        
        step += 1
        if step % 200 == 0:
            print(f"Step {step}: Value=${total_value:.2f}, Alignment={score:.3f}")
    
    return {
        'values': portfolio_values,
        'allocations': allocations,
        'actions': actions_taken,
        'alignment': alignment_scores,
        'final_portfolio': env.portfolio
    }

print("Running RL backtest...")
results = backtest_rl_agent(model, data, initial_balance)
print(f"\nBacktest complete: {len(results['values'])} steps")

## 5. Analyze Results

In [None]:
values = np.array(results['values'])
initial_value = 1000 + 500 * data['XRP/USDT']['close'].iloc[60]  # Approx initial

# Calculate metrics
total_return = (values[-1] / initial_value - 1) * 100
max_value = values.max()
drawdowns = (max_value - values) / max_value
max_drawdown = drawdowns.max() * 100

# Daily returns for Sharpe
returns = np.diff(values) / values[:-1]
sharpe = np.sqrt(252 * 24) * returns.mean() / (returns.std() + 1e-8)

print("="*50)
print("RL AGENT BACKTEST RESULTS")
print("="*50)
print(f"Initial Value:    ${initial_value:.2f}")
print(f"Final Value:      ${values[-1]:.2f}")
print(f"Total Return:     {total_return:.2f}%")
print(f"Max Drawdown:     {max_drawdown:.2f}%")
print(f"Sharpe Ratio:     {sharpe:.2f}")
print(f"Avg Alignment:    {np.mean(results['alignment']):.3f}")
print("="*50)

In [None]:
# Portfolio value over time
fig, axes = plt.subplots(3, 1, figsize=(14, 10))

# Value chart
axes[0].plot(values, 'b-', linewidth=1)
axes[0].axhline(y=initial_value, color='gray', linestyle='--', label='Initial')
axes[0].set_title('Portfolio Value Over Time')
axes[0].set_ylabel('USD Value')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Allocation over time
alloc_df = pd.DataFrame(results['allocations'])
axes[1].stackplot(range(len(alloc_df)), 
                   alloc_df['BTC'], alloc_df['XRP'], 
                   alloc_df['RLUSD'], alloc_df['USDT'],
                   labels=['BTC', 'XRP', 'RLUSD', 'USDT'],
                   alpha=0.8)
axes[1].axhline(y=0.4, color='black', linestyle='--', alpha=0.5)
axes[1].axhline(y=0.7, color='black', linestyle='--', alpha=0.5)
axes[1].axhline(y=0.9, color='black', linestyle='--', alpha=0.5)
axes[1].set_title('Asset Allocation Over Time')
axes[1].set_ylabel('Weight')
axes[1].legend(loc='upper right')
axes[1].set_ylim(0, 1)

# Alignment score
axes[2].plot(results['alignment'], 'g-', linewidth=1)
axes[2].axhline(y=1.0, color='green', linestyle='--', label='Perfect (1.0)')
axes[2].set_title('Alignment Score (vs Target Allocation)')
axes[2].set_ylabel('Score')
axes[2].set_xlabel('Step')
axes[2].legend()
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('../notebooks/rl_backtest_results.png', dpi=150)
plt.show()

## 6. Action Distribution

In [None]:
action_names = {
    0: 'BTC Buy', 1: 'BTC Hold', 2: 'BTC Sell',
    3: 'XRP Buy', 4: 'XRP Hold', 5: 'XRP Sell',
    6: 'RLUSD Buy', 7: 'RLUSD Hold', 8: 'RLUSD Sell'
}

actions = results['actions']
action_counts = pd.Series(actions).value_counts().sort_index()

print("\nAction Distribution:")
print("-"*40)
for action_id, count in action_counts.items():
    pct = count / len(actions) * 100
    print(f"{action_names[action_id]:15} : {count:5d} ({pct:5.1f}%)")

# Bar chart
plt.figure(figsize=(12, 5))
colors = ['#ff6b6b', '#ffd93d', '#6bcb77'] * 3
bars = plt.bar([action_names[i] for i in range(9)], 
               [action_counts.get(i, 0) for i in range(9)],
               color=colors)
plt.title('RL Agent Action Distribution')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 7. Compare with Ensemble Strategy

In [None]:
# Run ensemble strategy for comparison
portfolio = Portfolio(initial_balance)
ensemble = Ensemble(data, portfolio)

ensemble_values = []
ensemble_signals = []

# Simulate through the same period
for i in range(60, min(len(data['XRP/USDT']), 60 + len(results['values']))):
    # Get prices at this step
    prices = {'USDT': 1.0, 'USDC': 1.0, 'RLUSD': 1.0}
    for sym, df in data.items():
        if i < len(df):
            base = sym.split('/')[0]
            prices[base] = df['close'].iloc[i]
    
    total = portfolio.get_total_usd(prices)
    ensemble_values.append(total)
    
    # Get signal (without executing - just tracking)
    signal = ensemble.get_signal('XRP/USDT')
    ensemble_signals.append(signal['action'])

print(f"Ensemble simulation: {len(ensemble_values)} steps")
print(f"Buy signals: {ensemble_signals.count('long_xrp')}")
print(f"Sell signals: {ensemble_signals.count('reduce_xrp')}")
print(f"Hold signals: {ensemble_signals.count('hold')}")

In [None]:
# Compare RL vs Buy-and-Hold vs Ensemble
fig, ax = plt.subplots(figsize=(14, 6))

# Normalize to 100 for comparison
rl_norm = 100 * np.array(results['values']) / results['values'][0]
bh_norm = 100 * np.array(ensemble_values) / ensemble_values[0] if ensemble_values else [100]

ax.plot(rl_norm, 'b-', linewidth=2, label='RL Agent')
ax.plot(bh_norm[:len(rl_norm)], 'gray', linewidth=1, linestyle='--', label='Buy & Hold')
ax.axhline(y=100, color='black', linestyle=':', alpha=0.5)

ax.set_title('RL Agent vs Buy & Hold Performance')
ax.set_xlabel('Step')
ax.set_ylabel('Normalized Value (100 = start)')
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 8. Final Holdings Analysis

In [None]:
final_portfolio = results['final_portfolio']
final_prices = {
    'USDT': 1.0, 'USDC': 1.0, 'RLUSD': 1.0,
    'BTC': data['BTC/USDT']['close'].iloc[-1],
    'XRP': data['XRP/USDT']['close'].iloc[-1]
}

print("\n" + "="*50)
print("FINAL HOLDINGS")
print("="*50)

total = 0
for asset, amount in final_portfolio.balances.items():
    if amount > 0.0001:
        price = final_prices.get(asset, 1.0)
        value = amount * price
        total += value
        print(f"{asset:8}: {amount:12.4f} (${value:,.2f})")

print("-"*50)
print(f"{'TOTAL':8}: ${total:,.2f}")
print("="*50)

# Allocation pie chart
holdings = {}
for asset, amount in final_portfolio.balances.items():
    if amount > 0.0001:
        holdings[asset] = amount * final_prices.get(asset, 1.0)

plt.figure(figsize=(8, 8))
plt.pie(holdings.values(), labels=holdings.keys(), autopct='%1.1f%%',
        colors=['#ff9999', '#66b3ff', '#99ff99', '#ffcc99', '#ff66b3'])
plt.title('Final Portfolio Allocation')
plt.show()

## 9. RLUSD Premium Detection Test

In [None]:
if 'RLUSD/USDT' in data:
    rlusd_prices = data['RLUSD/USDT']['close']
    
    premium_periods = (rlusd_prices > 1.001).sum()
    discount_periods = (rlusd_prices < 0.999).sum()
    
    print("RLUSD Price Analysis")
    print("-"*40)
    print(f"Current Price:   ${rlusd_prices.iloc[-1]:.4f}")
    print(f"Mean Price:      ${rlusd_prices.mean():.4f}")
    print(f"Std Dev:         ${rlusd_prices.std():.4f}")
    print(f"Premium periods: {premium_periods} ({premium_periods/len(rlusd_prices)*100:.1f}%)")
    print(f"Discount periods:{discount_periods} ({discount_periods/len(rlusd_prices)*100:.1f}%)")
    
    # Test premium detection
    is_premium = check_rlusd_premium(data, threshold=1.001)
    print(f"\nCurrent premium status: {'PREMIUM' if is_premium else 'Normal'}")
else:
    print("RLUSD/USDT data not available")

## Summary

Key findings from this backtest:
1. RL agent learns to balance accumulation with profit-taking
2. Alignment score shows how well we're tracking target allocation
3. RLUSD premium detection can signal ecosystem demand
4. Combination of RL + Ensemble provides robust signals