# Validate RL Environment

This notebook validates the reinforcement learning environment for trading.

In [None]:
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Add src directory to path
sys.path.append('../src')

# Set up plotting
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 8)

## 1. Import RL Environment

First, let's import the RL environment.

In [None]:
# Import RL environment
from rl.env import TradingEnv, MultiAssetTradingEnv

## 2. Load Data

Let's load the data for the RL environment.

In [None]:
# Try to load data from different sources
try:
    # Try to load from batch features
    df = pd.read_parquet("../data/features/batch/technical/*.parquet")
    print("Loaded data from batch features.")
except Exception:
    try:
        # Try to load from processed data
        df = pd.read_parquet("../data/processed/training_data.parquet")
        print("Loaded data from processed data.")
    except Exception:
        try:
            # Try to load from raw data
            df = pd.read_parquet("../data/raw/ticks/*/*.parquet")
            print("Loaded data from raw data.")
        except Exception:
            # Create dummy data
            print("No data found. Creating dummy data.")
            
            # Create dummy data
            np.random.seed(42)
            n_samples = 100
            
            # Create price data
            symbols = ["AAPL", "MSFT", "GOOGL"]
            data = []
            
            for symbol in symbols:
                # Generate random walk
                if symbol == "AAPL":
                    price = 150.0
                elif symbol == "MSFT":
                    price = 250.0
                else:  # GOOGL
                    price = 2000.0
                
                prices = [price]
                for _ in range(n_samples - 1):
                    # Random price change
                    price_change = np.random.normal(0, 1) * price * 0.01
                    price += price_change
                    prices.append(price)
                
                # Create DataFrame
                for i, price in enumerate(prices):
                    data.append({
                        "symbol": symbol,
                        "timestamp": pd.Timestamp("2023-01-01") + pd.Timedelta(days=i),
                        "close": price,
                        "open": price * (1 - np.random.random() * 0.01),
                        "high": price * (1 + np.random.random() * 0.01),
                        "low": price * (1 - np.random.random() * 0.01),
                        "volume": np.random.randint(1000, 10000),
                        "ma_5": np.random.random() * 0.1,
                        "rsi_14": np.random.random() * 100
                    })
            
            # Create DataFrame
            df = pd.DataFrame(data)

In [None]:
# Display the first few rows
df.head()

In [None]:
# Check data types
print("Data types:")
print(df.dtypes)

# Check for missing values
print("\nMissing values:")
print(df.isnull().sum())

# Check unique symbols
print("\nUnique symbols:")
print(df["symbol"].unique())

# Check date range
if "timestamp" in df.columns:
    print("\nDate range:")
    print(f"Start: {df['timestamp'].min()}")
    print(f"End: {df['timestamp'].max()}")

## 3. Initialize RL Environment

Now, let's initialize the RL environment.

In [None]:
# Define feature columns
feature_cols = ["ma_5", "rsi_14"]

# Check if feature columns exist in the DataFrame
for col in feature_cols:
    if col not in df.columns:
        print(f"Feature column '{col}' not found in DataFrame. Available columns: {df.columns.tolist()}")
        # Try to find alternative feature columns
        numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
        # Exclude price and volume columns
        exclude_cols = ["open", "high", "low", "close", "volume"]
        feature_cols = [col for col in numeric_cols if col not in exclude_cols]
        print(f"Using alternative feature columns: {feature_cols}")
        break

# Initialize environment
env = TradingEnv(
    df=df,
    feature_cols=feature_cols,
    initial_capital=100_000,
    transaction_cost=0.001
)

# Print environment information
print("Environment initialized.")
print(f"Observation space: {env.observation_space}")
print(f"Action space: {env.action_space}")

## 4. Test Environment Reset

Let's test the environment reset function.

In [None]:
# Reset environment
obs = env.reset()

# Print observation
print(f"Observation shape: {obs.shape}")
print(f"Observation: {obs}")

# Print environment state
print(f"Current step: {env.current_step}")
print(f"Cash: ${env.cash:.2f}")
print(f"Positions: {env.positions}")

## 5. Test Environment Step

Now, let's test the environment step function.

In [None]:
# Take a step with a random action
action = env.action_space.sample()
obs, reward, done, info = env.step(action)

# Print step results
print(f"Action: {action}")
print(f"Reward: {reward:.6f}")
print(f"Done: {done}")
print(f"Info: {info}")

# Print environment state
print(f"Current step: {env.current_step}")
print(f"Cash: ${env.cash:.2f}")
print(f"Positions: {env.positions}")

## 6. Run a Complete Episode

Let's run a complete episode with random actions.

In [None]:
# Reset environment
obs = env.reset()

# Run episode
done = False
total_reward = 0
step_count = 0

while not done:
    # Take a step with a random action
    action = env.action_space.sample()
    obs, reward, done, info = env.step(action)
    
    # Update total reward
    total_reward += reward
    step_count += 1
    
    # Render environment
    env.render()
    
    # Break if too many steps
    if step_count >= 100:
        print("Maximum steps reached.")
        break

# Print episode results
print(f"Episode completed after {step_count} steps.")
print(f"Total reward: {total_reward:.2f}")
print(f"Final portfolio value: ${info['portfolio_value']:.2f}")

## 7. Analyze Portfolio History

Let's analyze the portfolio history from the episode.

In [None]:
# Get portfolio history
history_df = env.get_portfolio_history()

# Display portfolio history
history_df.head()

In [None]:
# Plot portfolio history
fig = env.plot_portfolio_history()
plt.show()

In [None]:
# Calculate portfolio statistics
initial_value = history_df["portfolio_value"].iloc[0]
final_value = history_df["portfolio_value"].iloc[-1]
total_return = (final_value / initial_value - 1) * 100

# Calculate daily returns
history_df["daily_return"] = history_df["portfolio_value"].pct_change()

# Calculate annualized return and volatility
if len(history_df) > 1:
    annualized_return = ((final_value / initial_value) ** (252 / len(history_df)) - 1) * 100
    annualized_volatility = history_df["daily_return"].std() * np.sqrt(252) * 100
    sharpe_ratio = annualized_return / annualized_volatility if annualized_volatility > 0 else 0
else:
    annualized_return = 0
    annualized_volatility = 0
    sharpe_ratio = 0

# Print portfolio statistics
print("Portfolio Statistics:")
print(f"Initial Value: ${initial_value:.2f}")
print(f"Final Value: ${final_value:.2f}")
print(f"Total Return: {total_return:.2f}%")
print(f"Annualized Return: {annualized_return:.2f}%")
print(f"Annualized Volatility: {annualized_volatility:.2f}%")
print(f"Sharpe Ratio: {sharpe_ratio:.2f}")

## 8. Test Multi-Asset Environment

Now, let's test the multi-asset environment.

In [None]:
# Initialize multi-asset environment
multi_env = MultiAssetTradingEnv(
    df=df,
    feature_cols=feature_cols,
    initial_capital=100_000,
    transaction_cost=0.001
)

# Print environment information
print("Multi-asset environment initialized.")
print(f"Observation space: {multi_env.observation_space}")
print(f"Action space: {multi_env.action_space}")

In [None]:
# Reset environment
obs = multi_env.reset()

# Run episode
done = False
total_reward = 0
step_count = 0

while not done:
    # Take a step with a random action
    action = multi_env.action_space.sample()
    obs, reward, done, info = multi_env.step(action)
    
    # Update total reward
    total_reward += reward
    step_count += 1
    
    # Render environment
    multi_env.render()
    
    # Break if too many steps
    if step_count >= 10:
        print("Maximum steps reached.")
        break

# Print episode results
print(f"Episode completed after {step_count} steps.")
print(f"Total reward: {total_reward:.2f}")
print(f"Final portfolio value: ${info['portfolio_value']:.2f}")

In [None]:
# Get portfolio history
multi_history_df = multi_env.get_portfolio_history()

# Plot portfolio history
fig = multi_env.plot_portfolio_history()
plt.show()

## 9. Implement a Simple Trading Strategy

Let's implement a simple trading strategy and test it in the environment.

In [None]:
# Define a simple trading strategy
def simple_strategy(env, obs):
    """
    A simple trading strategy based on RSI.
    
    Args:
        env: Trading environment
        obs: Current observation
        
    Returns:
        Action to take
    """
    # Get RSI values for each symbol
    rsi_values = {}
    for i, symbol in enumerate(env.symbols):
        # Find RSI column index
        rsi_idx = None
        for j, col in enumerate(env.feature_cols):
            if "rsi" in col.lower():
                rsi_idx = j + i * len(env.feature_cols)
                break
        
        if rsi_idx is not None:
            rsi_values[symbol] = obs[rsi_idx]
        else:
            # If RSI not found, use a random value
            rsi_values[symbol] = np.random.random() * 100
    
    # Determine action for each symbol
    symbol_actions = {}
    for symbol, rsi in rsi_values.items():
        if rsi < 30:  # Oversold, buy
            symbol_actions[symbol] = 1
        elif rsi > 70:  # Overbought, sell
            symbol_actions[symbol] = 2
        else:  # Neutral, hold
            symbol_actions[symbol] = 0
    
    # Encode action
    action = 0
    for i, symbol in enumerate(env.symbols):
        action += symbol_actions[symbol] * (3 ** i)
    
    return action

In [None]:
# Reset environment
obs = env.reset()

# Run episode with simple strategy
done = False
total_reward = 0
step_count = 0

while not done:
    # Take a step with the simple strategy
    action = simple_strategy(env, obs)
    obs, reward, done, info = env.step(action)
    
    # Update total reward
    total_reward += reward
    step_count += 1
    
    # Render environment
    env.render()
    
    # Break if too many steps
    if step_count >= 100:
        print("Maximum steps reached.")
        break

# Print episode results
print(f"Episode completed after {step_count} steps.")
print(f"Total reward: {total_reward:.2f}")
print(f"Final portfolio value: ${info['portfolio_value']:.2f}")

In [None]:
# Get portfolio history
strategy_history_df = env.get_portfolio_history()

# Plot portfolio history
fig = env.plot_portfolio_history()
plt.show()

In [None]:
# Compare random strategy with simple strategy
plt.figure(figsize=(12, 6))

plt.plot(history_df["step"], history_df["portfolio_value"], label="Random Strategy")
plt.plot(strategy_history_df["step"], strategy_history_df["portfolio_value"], label="Simple Strategy")

plt.title("Portfolio Value Comparison")
plt.xlabel("Step")
plt.ylabel("Portfolio Value ($)")
plt.legend()
plt.grid(True)
plt.show()

## 10. Summary

In this notebook, we have validated the reinforcement learning environment for trading. We have:

1. Initialized the RL environment
2. Tested the environment reset and step functions
3. Run a complete episode with random actions
4. Analyzed the portfolio history
5. Tested the multi-asset environment
6. Implemented a simple trading strategy

The RL environment provides a flexible framework for developing and testing trading strategies using reinforcement learning.