In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.callbacks import EvalCallback, CheckpointCallback

import sys
sys.path.append("/Users/pnl1f276/code/bennystu/trend_surgeon/Trend-Surgeon-Time-Series/ts_boilerplate")

from gym_anytrading.envs.flexible_env import FlexibleTradingEnv
from dataprep import build_feature_dataset, get_X_y

In [2]:
df = build_feature_dataset()

--- Data Integrity Report ---
Rows: 2,891, Columns: 33
Index sorted: True
Index unique: True
Rows that are entirely NaN: 0
Inf values: 0

Top columns with NaNs:
  (None)

--- Data Integrity Report ---
Rows: 2,891, Columns: 33
Index sorted: True
Index unique: True
Rows that are entirely NaN: 0
Inf values: 0

Top columns with NaNs:
  (None)

Markdown feature documentation written to: ../docs/feature_documentation.md


In [3]:
df.head()

Unnamed: 0_level_0,PPH_Open,PPH_High,PPH_Low,PPH_Close,PPH_Volume,XPH_Open,XPH_High,XPH_Low,XPH_Close,XPH_Volume,...,XPH_Ratio_PPH_t-1,day_of_week,day_of_month,month,quarter,is_holiday_adjacent,days_to_cpi,days_since_cpi,days_to_nfp,days_since_nfp
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-02-04,53.450001,53.700001,53.369999,53.68,117000,44.595001,44.82,44.215,44.490002,180200,...,0.833804,1,4,2,1,0,9,24,32,2
2014-02-05,53.799999,53.959999,53.450001,53.84,40400,44.43,44.509998,43.535,44.299999,184400,...,0.8288,2,5,2,1,0,8,25,31,3
2014-02-06,53.959999,54.09,53.790001,53.959999,24200,44.610001,44.775002,43.970001,44.195,76800,...,0.822808,3,6,2,1,0,7,26,30,4
2014-02-07,54.200001,54.959999,54.139999,54.959999,36100,44.419998,45.349998,44.244999,45.349998,186800,...,0.819033,4,7,2,1,0,6,27,29,5
2014-02-10,55.029999,55.349998,54.900002,55.349998,173000,45.424999,45.959999,45.185001,45.959999,127200,...,0.825146,0,10,2,1,0,3,30,26,8


In [4]:
prices = df["target_close"].values.astype(np.float32)

feature_cols = df.columns.drop("target_close")
signal_features = df[feature_cols].values.astype(np.float32)

In [None]:
# ============================================
# TRAIN / TEST SPLIT
# ============================================
window_size = 30
train_ratio = 0.8

total_len = len(df)
train_end = int(total_len * train_ratio)

# Training: from window_size to 80% of data
train_frame_bound = (window_size, train_end)

# Testing: from 80% of data to end (with window for lookback)
test_frame_bound = (train_end, total_len)

print(f"Total samples: {total_len}")
print(f"Train bounds: {train_frame_bound} ({train_end - window_size} steps)")
print(f"Test bounds:  {test_frame_bound} ({total_len - train_end} steps)")

In [None]:
# ============================================
# ENVIRONMENT FACTORY FUNCTIONS
# ============================================
# Note: The updated FlexibleTradingEnv now includes:
# - Position in observation (agent knows if it's long/short)
# - Realistic double fees when flipping LONG‚ÜîSHORT
# - Optional short_borrow_cost for realistic shorting

def make_train_env():
    def _init():
        return FlexibleTradingEnv(
            df=df,
            prices=prices,
            signal_features=signal_features,
            window_size=window_size,
            frame_bound=train_frame_bound,
            fee=0.0005,              # 0.05% per trade
            holding_cost=0.00001,    # small per-step cost
            short_borrow_cost=0.0,   # set if you want to penalize shorting
            include_position_in_obs=True,  # agent knows its position
            render_mode=None
        )
    return _init

def make_test_env():
    def _init():
        return FlexibleTradingEnv(
            df=df,
            prices=prices,
            signal_features=signal_features,
            window_size=window_size,
            frame_bound=test_frame_bound,
            fee=0.0005,
            holding_cost=0.00001,
            short_borrow_cost=0.0,
            include_position_in_obs=True,
            render_mode=None
        )
    return _init

In [None]:
# ============================================
# CREATE ENVIRONMENTS WITH PROPER VECNORMALIZE
# ============================================

# Training environment
train_env = DummyVecEnv([make_train_env()])
train_env = VecNormalize(train_env, norm_obs=True, norm_reward=True, clip_reward=np.inf)

# Eval callback env - uses TRAIN data but shares normalization stats
# We'll sync stats after creating it
eval_callback_env = DummyVecEnv([make_train_env()])
eval_callback_env = VecNormalize(eval_callback_env, training=False, norm_obs=True, norm_reward=False)

In [8]:
model = PPO(
    policy="MlpPolicy",
    env=train_env,
    learning_rate=3e-4,
    n_steps=512,
    batch_size=128,
    gamma=0.99,
    gae_lambda=0.95,
    ent_coef=0.01,
    verbose=1,
    tensorboard_log="./ppo_logs/"
)


Using cpu device


In [None]:
# ============================================
# CALLBACKS WITH PROPER NORMALIZATION SYNC
# ============================================
from stable_baselines3.common.callbacks import BaseCallback

class SyncNormCallback(BaseCallback):
    """Syncs VecNormalize stats from train_env to eval_env before each evaluation."""
    def __init__(self, train_env, eval_env, verbose=0):
        super().__init__(verbose)
        self.train_env = train_env
        self.eval_env = eval_env
    
    def _on_step(self):
        # Sync normalization stats before EvalCallback runs
        self.eval_env.obs_rms = self.train_env.obs_rms
        self.eval_env.ret_rms = self.train_env.ret_rms
        return True

sync_callback = SyncNormCallback(train_env, eval_callback_env)

eval_callback = EvalCallback(
    eval_callback_env,
    best_model_save_path="./ppo_best_model/",
    log_path="./ppo_eval_logs/",
    eval_freq=5000,
    deterministic=True,
    render=False
)

checkpoint_callback = CheckpointCallback(
    save_freq=10000,
    save_path="./ppo_checkpoints/",
    name_prefix="ppo_trading_model"
)

In [None]:
model.learn(
    total_timesteps=200_000,
    callback=[sync_callback, eval_callback, checkpoint_callback]
)

model.save("ppo_trading_final")
train_env.save("vec_normalize.pkl")

In [None]:
# ============================================
# QUICK SANITY CHECK ON TRAIN DATA
# ============================================
sanity_env = DummyVecEnv([make_train_env()])
sanity_env = VecNormalize.load("vec_normalize.pkl", sanity_env)
sanity_env.training = False
sanity_env.norm_reward = False

model = PPO.load("ppo_trading_final")

obs = sanity_env.reset()
done = False
profits = []

while not done:
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, done, info = sanity_env.step(action)
    profits.append(info[0]["total_profit"])

plt.plot(profits)
plt.title("Agent Performance (Train Data - Sanity Check)")
plt.xlabel("Step")
plt.ylabel("Cumulative Log Return")
plt.grid()
plt.show()

In [None]:
def evaluate_agent(model, vec_env, episodes=1, fee=0.0005):
    """
    Comprehensive evaluation for a trading env using SB3 + VecNormalize.
    
    Includes:
    - Equity curve with transaction costs
    - Multiple benchmarks (Buy & Hold, Short & Hold)
    - Detailed performance metrics
    - Position analysis
    - Drawdown visualization
    - Trade markers
    """

    # Get raw (true) underlying environment
    base_env = vec_env.venv.envs[0].unwrapped
    all_prices = base_env.prices.astype(float)
    start_tick = base_env._start_tick

    print("\n" + "="*60)
    print("EVALUATION STARTED")
    print("="*60)
    print(f"Environment starts at tick {start_tick}")

    for ep in range(episodes):

        obs = vec_env.reset()
        done = False

        equity_gross = []      # Without fees
        equity_net = []        # With fees
        positions = []
        ticks = []
        actions_taken = []

        # Start portfolio at 1.0
        current_equity_gross = 1.0
        current_equity_net = 1.0
        last_position = 0

        # Get initial tick after reset
        current_tick = base_env._current_tick
        ticks.append(current_tick)
        equity_gross.append(current_equity_gross)
        equity_net.append(current_equity_net)
        positions.append(0)

        while not done:
            action, _ = model.predict(obs, deterministic=True)
            obs, reward, done, info = vec_env.step(action)

            done = bool(done[0])
            info = info[0]

            current_tick = info["tick"]
            pos = info["position"]
            
            ticks.append(current_tick)
            positions.append(pos)
            actions_taken.append(int(action[0]) if hasattr(action, '__len__') else int(action))

            # Update equity using actual price indices
            if len(ticks) > 1:
                prev_tick = ticks[-2]
                price_ratio = all_prices[current_tick] / all_prices[prev_tick]
                
                # Gross equity (no fees)
                current_equity_gross *= price_ratio ** last_position
                
                # Net equity (with fees)
                current_equity_net *= price_ratio ** last_position
                if pos != last_position:
                    if last_position == 0:
                        current_equity_net *= (1 - fee)
                    elif pos == 0:
                        current_equity_net *= (1 - fee)
                    else:  # flip
                        current_equity_net *= (1 - 2*fee)

            equity_gross.append(current_equity_gross)
            equity_net.append(current_equity_net)
            last_position = pos

        # ============================================
        # BENCHMARKS
        # ============================================
        first_tick = ticks[0]
        last_tick = ticks[-1]
        price_segment = all_prices[first_tick:last_tick+1]
        
        buy_hold = price_segment / price_segment[0]
        short_hold = price_segment[0] / price_segment  # Inverse

        # ============================================
        # TRADE ANALYSIS
        # ============================================
        positions_arr = np.array(positions)
        position_changes = np.diff(positions_arr)
        trade_indices = np.where(position_changes != 0)[0] + 1
        
        # Calculate per-trade returns
        trade_returns = []
        if len(trade_indices) > 1:
            for i in range(len(trade_indices) - 1):
                start_idx = trade_indices[i]
                end_idx = trade_indices[i + 1]
                pos_during = positions_arr[start_idx]
                
                start_price = all_prices[ticks[start_idx]]
                end_price = all_prices[ticks[end_idx]]
                
                if pos_during == 1:  # Long
                    ret = (end_price / start_price) - 1
                elif pos_during == -1:  # Short
                    ret = (start_price / end_price) - 1
                else:
                    ret = 0
                trade_returns.append(ret)
        
        trade_returns = np.array(trade_returns)
        winning_trades = trade_returns[trade_returns > 0]
        losing_trades = trade_returns[trade_returns < 0]

        # ============================================
        # METRICS
        # ============================================
        equity_arr = np.array(equity_net)
        returns = np.diff(np.log(equity_arr + 1e-12))
        
        # Core metrics
        total_return = (equity_arr[-1] / equity_arr[0]) - 1
        total_return_gross = (equity_gross[-1] / equity_gross[0]) - 1
        sharpe = np.mean(returns) / (np.std(returns) + 1e-8) * np.sqrt(252)
        
        # Drawdown
        running_max = np.maximum.accumulate(equity_arr)
        drawdowns = (running_max - equity_arr) / running_max
        max_dd = np.max(drawdowns)
        
        # Calmar ratio
        calmar = (total_return * 252 / len(returns)) / (max_dd + 1e-8) if max_dd > 0 else 0
        
        # Trade metrics
        n_trades = len(trade_returns)
        win_rate = len(winning_trades) / n_trades * 100 if n_trades > 0 else 0
        avg_win = np.mean(winning_trades) * 100 if len(winning_trades) > 0 else 0
        avg_loss = np.mean(losing_trades) * 100 if len(losing_trades) > 0 else 0
        profit_factor = abs(np.sum(winning_trades) / np.sum(losing_trades)) if len(losing_trades) > 0 and np.sum(losing_trades) != 0 else np.inf
        
        # Position analysis
        long_pct = np.sum(positions_arr == 1) / len(positions_arr) * 100
        short_pct = np.sum(positions_arr == -1) / len(positions_arr) * 100
        flat_pct = np.sum(positions_arr == 0) / len(positions_arr) * 100

        # ============================================
        # PRINT RESULTS
        # ============================================
        print(f"\n{'‚îÄ'*60}")
        print(f"EPISODE {ep+1} RESULTS")
        print(f"{'‚îÄ'*60}")
        
        print(f"\nüìä RETURNS")
        print(f"   Total Return (net):    {total_return*100:+.2f}%")
        print(f"   Total Return (gross):  {total_return_gross*100:+.2f}%")
        print(f"   Buy & Hold Return:     {(buy_hold[-1]-1)*100:+.2f}%")
        print(f"   Short & Hold Return:   {(short_hold[-1]-1)*100:+.2f}%")
        print(f"   Outperformance vs B&H: {(total_return - (buy_hold[-1]-1))*100:+.2f}%")
        
        print(f"\nüìà RISK METRICS")
        print(f"   Sharpe Ratio (ann.):   {sharpe:.3f}")
        print(f"   Max Drawdown:          {max_dd*100:.2f}%")
        print(f"   Calmar Ratio:          {calmar:.3f}")
        
        print(f"\nüîÑ TRADE ANALYSIS")
        print(f"   Total Trades:          {n_trades}")
        print(f"   Win Rate:              {win_rate:.1f}%")
        print(f"   Avg Winning Trade:     {avg_win:+.2f}%")
        print(f"   Avg Losing Trade:      {avg_loss:+.2f}%")
        print(f"   Profit Factor:         {profit_factor:.2f}")
        
        print(f"\n‚öñÔ∏è  POSITION DISTRIBUTION")
        print(f"   Long:  {long_pct:5.1f}%  {'‚ñà' * int(long_pct/5)}")
        print(f"   Short: {short_pct:5.1f}%  {'‚ñà' * int(short_pct/5)}")
        print(f"   Flat:  {flat_pct:5.1f}%  {'‚ñà' * int(flat_pct/5)}")

        # ============================================
        # PLOT 1: EQUITY CURVES
        # ============================================
        fig, axes = plt.subplots(3, 1, figsize=(14, 12))
        
        x_axis = np.arange(len(price_segment))
        
        # Panel 1: Price + Equity
        ax1 = axes[0]
        ax1_twin = ax1.twinx()
        
        ax1.plot(x_axis, price_segment, color="blue", alpha=0.4, linewidth=1, label="Price")
        ax1.set_ylabel("Price", color="blue")
        ax1.tick_params(axis="y", labelcolor="blue")
        
        ax1_twin.plot(x_axis, equity_net[:len(x_axis)], color="green", linewidth=2, label="PPO (net)")
        ax1_twin.plot(x_axis, equity_gross[:len(x_axis)], color="lightgreen", linewidth=1, linestyle="--", label="PPO (gross)", alpha=0.7)
        ax1_twin.plot(x_axis, buy_hold, color="gray", linewidth=1.5, linestyle="--", label="Buy & Hold")
        ax1_twin.plot(x_axis, short_hold, color="red", linewidth=1, linestyle=":", alpha=0.5, label="Short & Hold")
        ax1_twin.axhline(y=1.0, color="black", linestyle="-", alpha=0.3)
        ax1_twin.set_ylabel("Equity", color="green")
        ax1_twin.tick_params(axis="y", labelcolor="green")
        ax1_twin.legend(loc="upper left")
        
        ax1.set_title("Equity Curves vs Benchmarks", fontsize=12, fontweight="bold")
        ax1.grid(alpha=0.3)
        
        # Panel 2: Drawdown
        ax2 = axes[1]
        ax2.fill_between(x_axis, 0, -drawdowns[:len(x_axis)]*100, color="red", alpha=0.3)
        ax2.plot(x_axis, -drawdowns[:len(x_axis)]*100, color="red", linewidth=1)
        ax2.axhline(y=-max_dd*100, color="darkred", linestyle="--", label=f"Max DD: {max_dd*100:.1f}%")
        ax2.set_ylabel("Drawdown (%)")
        ax2.set_title("Underwater Curve (Drawdown)", fontsize=12, fontweight="bold")
        ax2.legend(loc="lower left")
        ax2.grid(alpha=0.3)
        
        # Panel 3: Position over time with trade markers
        ax3 = axes[2]
        
        # Color the background based on position
        for i in range(len(x_axis) - 1):
            if i < len(positions):
                if positions[i] == 1:
                    ax3.axvspan(i, i+1, alpha=0.3, color="green")
                elif positions[i] == -1:
                    ax3.axvspan(i, i+1, alpha=0.3, color="red")
        
        ax3.plot(x_axis, price_segment, color="blue", linewidth=1, alpha=0.7)
        
        # Mark trades
        for idx in trade_indices:
            if idx < len(x_axis):
                marker = "^" if positions[idx] == 1 else "v"
                color = "green" if positions[idx] == 1 else "red"
                ax3.scatter(idx, price_segment[idx], marker=marker, color=color, s=50, zorder=5)
        
        ax3.set_ylabel("Price")
        ax3.set_xlabel("Step")
        ax3.set_title("Position & Trade Markers (‚ñ≤=Long, ‚ñº=Short)", fontsize=12, fontweight="bold")
        ax3.grid(alpha=0.3)
        
        plt.tight_layout()
        plt.show()

        # ============================================
        # PLOT 2: TRADE RETURN DISTRIBUTION
        # ============================================
        if len(trade_returns) > 0:
            fig, axes = plt.subplots(1, 2, figsize=(12, 4))
            
            # Histogram
            ax1 = axes[0]
            ax1.hist(trade_returns * 100, bins=30, color="steelblue", edgecolor="white", alpha=0.7)
            ax1.axvline(x=0, color="black", linestyle="--")
            ax1.axvline(x=np.mean(trade_returns)*100, color="green", linestyle="-", label=f"Mean: {np.mean(trade_returns)*100:.2f}%")
            ax1.set_xlabel("Trade Return (%)")
            ax1.set_ylabel("Frequency")
            ax1.set_title("Trade Return Distribution", fontsize=12, fontweight="bold")
            ax1.legend()
            ax1.grid(alpha=0.3)
            
            # Cumulative trade returns
            ax2 = axes[1]
            cum_trade_returns = np.cumprod(1 + trade_returns)
            ax2.plot(cum_trade_returns, color="green", linewidth=2)
            ax2.axhline(y=1.0, color="black", linestyle="--", alpha=0.5)
            ax2.set_xlabel("Trade #")
            ax2.set_ylabel("Cumulative Return")
            ax2.set_title("Cumulative Returns by Trade", fontsize=12, fontweight="bold")
            ax2.grid(alpha=0.3)
            
            plt.tight_layout()
            plt.show()

    print("\n" + "="*60)
    print("EVALUATION COMPLETE")
    print("="*60 + "\n")
    
    return {
        "total_return": total_return,
        "sharpe": sharpe,
        "max_drawdown": max_dd,
        "win_rate": win_rate,
        "profit_factor": profit_factor,
        "n_trades": n_trades,
        "equity_curve": equity_net,
        "positions": positions
    }

In [None]:
# ============================================
# FINAL EVALUATION ON HELD-OUT TEST DATA
# ============================================
print("=" * 50)
print("EVALUATING ON UNSEEN TEST DATA")
print("=" * 50)

# Create test environment with TEST frame bounds
test_env = DummyVecEnv([make_test_env()])

# Load normalization statistics from training
test_env = VecNormalize.load("vec_normalize.pkl", test_env)
test_env.training = False
test_env.norm_reward = False

model = PPO.load("ppo_trading_final")

evaluate_agent(model, test_env, episodes=1)