# Environment Validation for DRL Portfolio

This notebook validates the custom Gymnasium environment (PortfolioGym-v0).

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

from data_acquisition import DataAcquisition
from portfolio_env import PortfolioEnv

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

## 1. Load Data and Create Environment

In [None]:
# Load data
data_acq = DataAcquisition('config.yaml')
dataset = data_acq.fetch_full_dataset()

# Create environment with training data
env = PortfolioEnv(
    prices=dataset['train']['prices'],
    returns=dataset['train']['returns']
)

print(f"Action space: {env.action_space}")
print(f"Observation space: {env.observation_space}")
print(f"State dimension: {env.state_dim}")

## 2. Test Reset and Step

In [None]:
# Test reset
obs, info = env.reset()

print(f"Initial observation shape: {obs.shape}")
print(f"Initial weights: {env.current_weights}")
print(f"Weights sum to: {np.sum(env.current_weights):.6f}")

In [None]:
# Test step with random action
action = env.action_space.sample()
print(f"\nRandom action (raw): {action}")

obs, reward, terminated, truncated, info = env.step(action)

print(f"\nAfter step:")
print(f"  Reward: {reward:.6f}")
print(f"  Portfolio value: {info['portfolio_value']:.6f}")
print(f"  Portfolio return: {info['portfolio_return']:.6f}")
print(f"  Volatility: {info['volatility']:.6f}")
print(f"  Cost: {info['cost']:.6f}")
print(f"  New weights: {env.current_weights}")
print(f"  Weights sum to: {np.sum(env.current_weights):.6f}")

## 3. Random Policy Trajectory

In [None]:
# Run episode with random policy
obs, _ = env.reset()
done = False
step_count = 0
max_steps = 100

while not done and step_count < max_steps:
    action = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(action)
    done = terminated or truncated
    step_count += 1

print(f"Episode completed in {step_count} steps")
print(f"Final portfolio value: ${env.portfolio_value:.4f}")

In [None]:
# Plot portfolio evolution
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10))

# Portfolio value
ax1.plot(env.portfolio_history, linewidth=2)
ax1.set_title('Random Policy - Portfolio Value', fontsize=14, fontweight='bold')
ax1.set_xlabel('Step', fontsize=12)
ax1.set_ylabel('Portfolio Value', fontsize=12)
ax1.grid(True, alpha=0.3)

# Weights evolution
weights_array = np.array(env.weights_history)
for i, symbol in enumerate(dataset['train']['prices'].columns):
    ax2.plot(weights_array[:, i], label=symbol, linewidth=2)

ax2.set_title('Random Policy - Weight Evolution', fontsize=14, fontweight='bold')
ax2.set_xlabel('Step', fontsize=12)
ax2.set_ylabel('Weight', fontsize=12)
ax2.legend(fontsize=10)
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 4. Reward Function Sensitivity

In [None]:
# Test different actions and observe rewards
test_actions = [
    np.array([0.25, 0.25, 0.25, 0.25]),  # Equal weight
    np.array([0.4, 0.4, 0.1, 0.1]),      # Concentrated
    np.array([0.6, 0.2, 0.1, 0.1]),      # Aggressive SPY
    np.array([0.0, 0.6, 0.2, 0.2])       # Conservative AGG
]

action_labels = ['Equal', 'Concentrated', 'Aggressive', 'Conservative']

for action, label in zip(test_actions, action_labels):
    env.reset()
    obs, reward, _, _, info = env.step(action)
    
    print(f"\n{label}:")
    print(f"  Action: {action}")
    print(f"  Reward: {reward:.6f}")
    print(f"  Return: {info['portfolio_return']:.6f}")
    print(f"  Volatility: {info['volatility']:.6f}")
    print(f"  Cost: {info['cost']:.6f}")

## 5. Constraint Validation

In [None]:
# Test constraint enforcement
print("Testing constraint enforcement:\n")

# Test 1: Weights sum to 1.0
env.reset()
for _ in range(10):
    action = env.action_space.sample()
    obs, reward, _, _, info = env.step(action)
    weight_sum = np.sum(env.current_weights)
    
    if not np.isclose(weight_sum, 1.0, atol=1e-6):
        print(f"WARNING: Weights sum to {weight_sum:.8f}")

print("✓ All weights sum to 1.0")

# Test 2: Weights within bounds
min_bound = env.min_weight
max_bound = env.max_weight

env.reset()
for _ in range(10):
    action = env.action_space.sample()
    obs, reward, _, _, info = env.step(action)
    
    if np.any(env.current_weights < min_bound - 1e-6) or np.any(env.current_weights > max_bound + 1e-6):
        print(f"WARNING: Weights out of bounds: {env.current_weights}")

print(f"✓ All weights within [{min_bound}, {max_bound}]")

## 6. Circuit Breaker Test

In [None]:
# Test circuit breaker (15% drawdown)
print("Testing circuit breaker (15% drawdown threshold):\n")

env.reset()
max_drawdown = 0
circuit_breaker_triggered = False

for step in range(200):
    # Use aggressive random action
    action = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(action)
    
    max_drawdown = max(max_drawdown, info['drawdown'])
    
    if truncated:
        circuit_breaker_triggered = True
        print(f"Circuit breaker triggered at step {step}")
        print(f"Drawdown: {info['drawdown']:.2%}")
        break
    
    if terminated:
        break

if not circuit_breaker_triggered:
    print(f"Circuit breaker not triggered. Max drawdown: {max_drawdown:.2%}")