In [None]:
# Imports
import sys
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import yaml

# Add project root to path
project_root = Path.cwd().parent
sys.path.insert(0, str(project_root))

from src.models.finrl_agent import VolatilityFinRLAgent, FinRLEnsembleAgent
from src.envs.volatility_env import VolatilityTradingEnv
from src.utils.logging_utils import get_logger

# Setup
sns.set_style('darkgrid')
plt.rcParams['figure.figsize'] = (14, 6)
%matplotlib inline

logger = get_logger(__name__)

## 1. Prepare Data

In [None]:
# Generate synthetic data for demonstration
# In production, replace with actual market data

n_days = 1000
dates = pd.date_range(start="2020-01-01", periods=n_days, freq="D")

np.random.seed(42)
price = 100 * np.exp(np.cumsum(np.random.randn(n_days) * 0.02))

data = pd.DataFrame(
    {
        "date": dates,
        "close": price,
        "high": price * (1 + np.abs(np.random.randn(n_days) * 0.01)),
        "low": price * (1 - np.abs(np.random.randn(n_days) * 0.01)),
        "volume": np.random.randint(1000000, 10000000, n_days),
        "volatility": np.abs(np.random.randn(n_days) * 0.2 + 0.15),
        "bid_ask_spread": np.abs(np.random.randn(n_days) * 0.002 + 0.001),
        "order_imbalance": np.random.randn(n_days) * 0.1,
    }
)

print(f"Data shape: {data.shape}")
data.head()

In [None]:
# Visualize data
fig, axes = plt.subplots(3, 1, figsize=(14, 10))

# Price
axes[0].plot(data["date"], data["close"], label="Close Price")
axes[0].fill_between(data["date"], data["low"], data["high"], alpha=0.3)
axes[0].set_title("Price History")
axes[0].set_ylabel("Price")
axes[0].legend()
axes[0].grid(True)

# Volatility
axes[1].plot(data["date"], data["volatility"], color="orange", label="Volatility")
axes[1].set_title("Volatility Over Time")
axes[1].set_ylabel("Volatility")
axes[1].legend()
axes[1].grid(True)

# Microstructure signals
axes[2].plot(data["date"], data["bid_ask_spread"], label="Bid-Ask Spread", alpha=0.7)
axes[2].plot(data["date"], data["order_imbalance"], label="Order Imbalance", alpha=0.7)
axes[2].set_title("Microstructure Signals")
axes[2].set_ylabel("Value")
axes[2].set_xlabel("Date")
axes[2].legend()
axes[2].grid(True)

plt.tight_layout()
plt.show()

## 2. Create Trading Environment

In [None]:
# Create environment
# Note: Adjust parameters based on your actual environment implementation

env = VolatilityTradingEnv(
    data=data,
    initial_balance=100000,
    # Add other environment-specific parameters
)

print(f"Observation space: {env.observation_space}")
print(f"Action space: {env.action_space}")

## 3. Train Single FinRL Agent

In [None]:
# Train PPO agent
ppo_agent = VolatilityFinRLAgent(
    env=env,
    model_name="ppo",
    model_kwargs={
        "learning_rate": 3e-4,
        "n_steps": 2048,
        "batch_size": 64,
        "gamma": 0.99,
    },
)

print("Training PPO agent...")
ppo_agent.train(total_timesteps=50000, tb_log_name="finrl_ppo_demo")

In [None]:
# Evaluate PPO agent
def evaluate_agent(agent, env, n_episodes=5):
    """Evaluate agent performance"""
    episode_rewards = []

    for episode in range(n_episodes):
        obs, _ = env.reset()
        done = False
        episode_reward = 0

        while not done:
            action, _ = agent.predict(obs, deterministic=True)
            obs, reward, terminated, truncated, info = env.step(action)
            done = terminated or truncated
            episode_reward += reward

        episode_rewards.append(episode_reward)

    return np.mean(episode_rewards), np.std(episode_rewards)


mean_reward, std_reward = evaluate_agent(ppo_agent, env)
print(f"PPO Performance: {mean_reward:.2f} ± {std_reward:.2f}")

## 4. Train Ensemble of Agents

In [None]:
# Create and train ensemble
ensemble = FinRLEnsembleAgent(
    env=env,
    model_names=["ppo", "a2c", "sac"],
)

print("Training ensemble (this may take a while)...")
ensemble.train_all(total_timesteps=30000)

In [None]:
# Compare ensemble with single agents
results = {}

# Evaluate each agent in ensemble
for name, agent in ensemble.agents.items():
    mean, std = evaluate_agent(agent, env, n_episodes=5)
    results[name] = {"mean": mean, "std": std}
    print(f"{name.upper()}: {mean:.2f} ± {std:.2f}")

# Plot comparison
models = list(results.keys())
means = [results[m]["mean"] for m in models]
stds = [results[m]["std"] for m in models]

plt.figure(figsize=(10, 6))
plt.bar(models, means, yerr=stds, capsize=5, alpha=0.7)
plt.title("FinRL Agents Performance Comparison")
plt.xlabel("Agent")
plt.ylabel("Mean Episode Reward")
plt.grid(True, axis="y")
plt.show()

## 5. Save and Load Models

In [None]:
# Save single agent
ppo_agent.save("../models/finrl_ppo_demo")
print("PPO agent saved")

# Save ensemble
ensemble.save_all("../models/finrl_ensemble_demo")
print("Ensemble saved")

In [None]:
# Load agent for inference
loaded_agent = VolatilityFinRLAgent(
    env=env,
    model_name="ppo",
)
loaded_agent.load("../models/finrl_ppo_demo")

# Test loaded agent
mean_reward, std_reward = evaluate_agent(loaded_agent, env, n_episodes=3)
print(f"Loaded agent performance: {mean_reward:.2f} ± {std_reward:.2f}")

## 6. Advanced: Hyperparameter Tuning with Optuna

FinRL works well with hyperparameter optimization libraries like Optuna.

In [None]:
import optuna
from optuna.pruners import MedianPruner
from optuna.samplers import TPESampler


def objective(trial):
    """Optuna objective function for hyperparameter tuning"""

    # Sample hyperparameters
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-3, log=True)
    batch_size = trial.suggest_categorical("batch_size", [32, 64, 128, 256])
    n_steps = trial.suggest_categorical("n_steps", [512, 1024, 2048])
    gamma = trial.suggest_float("gamma", 0.9, 0.9999)

    # Create agent with sampled params
    agent = VolatilityFinRLAgent(
        env=env,
        model_name="ppo",
        model_kwargs={
            "learning_rate": learning_rate,
            "batch_size": batch_size,
            "n_steps": n_steps,
            "gamma": gamma,
        },
    )

    # Train
    agent.train(total_timesteps=20000)

    # Evaluate
    mean_reward, _ = evaluate_agent(agent, env, n_episodes=3)

    return mean_reward


# Run optimization (uncomment to execute)
# study = optuna.create_study(direction="maximize", sampler=TPESampler(), pruner=MedianPruner())
# study.optimize(objective, n_trials=20)

# print("Best hyperparameters:", study.best_params)
# print("Best score:", study.best_value)# print("Best score:", study.best_value)

## Summary

In this notebook, we:
1. ✅ Integrated FinRL with volatility trading environment
2. ✅ Trained multiple RL algorithms (PPO, A2C, SAC)
3. ✅ Created an ensemble of agents
4. ✅ Saved and loaded models
5. ✅ Demonstrated hyperparameter tuning setup

## Next Steps
- Use real market data instead of synthetic data
- Implement proper train/validation/test split
- Add more sophisticated evaluation metrics
- Integrate with backtesting framework
- Deploy best model for live trading