In [1]:
import os
import torch
import numpy as np
import pickle
import shutil
from Env.market_env import FTPEnv
from Env.recorder import StrategyRecorder
from models.model import TradingPolicy
from Env.benchmarks import FOICPolicy, GLFTPolicy
from models.GateUnits import SGU1, SGU2
from pipeline.agent_trainer import load_signals_bundle

In this notebook, we run backtest for four components:

- `DRL MM Agent with adversarial training (DRL1)`
- `DRL MM Agent (DRL2)`
- `GLFT`
- `FOIC`

on the same OOS data

In [2]:
def run_drl_backtest(symbol, method_name, weight_path, bundle, phi, fee_rate, train_stats):
    s1, s2, mid, ask, bid, b_max, s_min = bundle 

    policy = TradingPolicy()
    if os.path.exists(weight_path):
        policy.load_state_dict(torch.load(weight_path, weights_only=True))
        policy.eval()
    else:
        print(f"Warning: Weights not found at {weight_path}")
        return None
    
    env = FTPEnv(phi=phi, tick_size=0.001, fee_rate=fee_rate) 
    recorder = StrategyRecorder()

    with torch.no_grad():
        for t in range(len(mid)):
            n_s = torch.tensor([[(s1[t]-train_stats['s1_m'])/train_stats['s1_s'], 
                                 (s2[t]-train_stats['s2_m'])/train_stats['s2_s'], 
                                 env.inventory/2.0]], dtype=torch.float32)
            
            raw_act = policy.forward(n_s).squeeze().cpu().numpy()
            scaled_act = np.round(raw_act * 5.0).astype(int) 

            reward, info = env.step(scaled_act, mid[t], ask[t], bid[t], b_max[t], s_min[t])

            record_data = {
                'step': t,
                'mid': mid[t],
                'ask': ask[t],
                'bid': bid[t],
                'off_a': scaled_act[0], 
                'off_b': scaled_act[1], 
                'action': scaled_act,
                'reward': reward,
                'inventory': env.inventory,
                'cash': env.cash,
                'fee_paid': info.get('fee_paid', 0.0), 
                's1_pred': s1[t],
                's2_pred': s2[t]
            }
            record_data.update(info)
            recorder.data.append(record_data)
            
    df = recorder.to_dataframe() 
    save_path = f"output/{symbol}/{method_name}/backtest_{phi}.parquet"
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    df.to_parquet(save_path, index=False)
    print(f"Success: {method_name} backtest completed.")
    return df

def run_benchmark_and_save(strategy_name, policy, bundle, phi_param, fee_rate, save_dir, train_stats):
    s1, s2, mid, ask, bid, b_max, s_min = bundle
    env = FTPEnv(phi=phi_param, tick_size=0.001, fee_rate=fee_rate)
    recorder = StrategyRecorder()

    for t in range(len(mid)):
        raw_offsets = policy.get_action(env.inventory)
        mid_p = (ask[t] + bid[t]) / 2.0

        if strategy_name == 'glft':
            off_a = ((mid_p + raw_offsets[0]) - ask[t]) / 0.001
            off_b = (bid[t] - (mid_p - raw_offsets[1])) / 0.001
            action = np.round([off_a, off_b]).astype(int)
        else:
            action = np.round(raw_offsets).astype(int)
        
        reward, info = env.step(action, mid[t], ask[t], bid[t], b_max[t], s_min[t])

        record_data = {
            'step': t, 'mid': mid[t], 'ask': ask[t], 'bid': bid[t],
            'off_a': action[0], 'off_b': action[1], 
            'action': action, 'reward': reward, 'inventory': env.inventory, 'cash': env.cash,
            'fee_paid': info.get('fee_paid', 0.0),
            's1_pred': s1[t], 's2_pred': s2[t]
        }
        record_data.update(info)
        recorder.data.append(record_data)
        
    df = recorder.to_dataframe()
    save_path = f"{save_dir}/backtest_{phi_param}.parquet"
    os.makedirs(save_dir, exist_ok=True)
    df.to_parquet(save_path, index=False)
    print(f"Success: {strategy_name} benchmark completed.")
    return df

In [3]:
symbol = "510300"
phi_val = 0.0001
fee_rate = 0.0000
sgu_train_range = (20240401, 20240528)
base_path = os.path.abspath("checkpoints") 
checkpoint_dir = os.path.join(base_path, symbol)

In [4]:
# loading signals and scaler
m1 = SGU1()
s1_raw_path = os.path.join(checkpoint_dir, f"sgu1_{sgu_train_range[0]}_{sgu_train_range[1]}.json")
tmp_s1_path = "tmp_sgu1_model.json"
shutil.copy2(s1_raw_path, tmp_s1_path)
try:
    m1.load(tmp_s1_path)
    print(">>> SGU1 loaded successfully.")
finally:
    if os.path.exists(tmp_s1_path):
        os.remove(tmp_s1_path)

m2 = SGU2(input_size=1, hidden_size=10)
s2_path = os.path.join(checkpoint_dir, f"sgu2_{sgu_train_range[0]}_{sgu_train_range[1]}.pth")
m2.load(s2_path)
print(">>> SGU2 (LSTM) loaded successfully.")

scaler_path = os.path.join(checkpoint_dir, f"sgu2_scaler_{sgu_train_range[0]}_{sgu_train_range[1]}.pkl")
with open(scaler_path, 'rb') as f:
    scaler = pickle.load(f)
    
s3_start_date = sgu_train_range[1]
print(f">>> Scaler loaded. S3 evaluation starts after: {s3_start_date}")

>>> SGU1 loaded successfully.
SGU2 model loaded from d:\UW\Course\2026 WINTER\522_trade_sys\replication\checkpoints\510300\sgu2_20240401_20240528.pth and set to eval mode.
>>> SGU2 (LSTM) loaded successfully.
>>> Scaler loaded. S3 evaluation starts after: 20240528


In [5]:
snap_dir = f'data/{symbol}/snap'
all_dates = sorted([f[:8] for f in os.listdir(snap_dir) if f.endswith('.parquet')])
s3_dates = [d for d in all_dates if int(d) > s3_start_date]
n_s3 = len(s3_dates)
if n_s3 < 5:
    raise ValueError(f"Insufficient S3 data after {s3_start_date}.")

train_idx = int(n_s3 * 0.70)
val_idx = int(n_s3 * 0.85)

train_dates = s3_dates[:train_idx]
test_dates = s3_dates[val_idx:]

print(f">>> S3 Data Pool: {n_s3} days | Train-subset: {len(train_dates)} | Test-subset: {len(test_dates)}")

print("Bundling S3-Train data...")
train_bundle = load_signals_bundle(symbol, train_dates, m1, m2, scaler)
print("Bundling S3-Test data (Blind Test)...")
s3_bundle = load_signals_bundle(symbol, test_dates, m1, m2, scaler)

>>> S3 Data Pool: 22 days | Train-subset: 15 | Test-subset: 4
Bundling S3-Train data...


                                                                 

Bundling S3-Test data (Blind Test)...


                                                               

In [6]:
s1_t, s2_t = train_bundle[0], train_bundle[1]

train_stats = {
    's1_m': np.mean(s1_t), 
    's1_s': np.std(s1_t) + 1e-9,
    's2_m': np.mean(s2_t), 
    's2_s': np.std(s2_t) + 1e-9
}

print(f">>> Normalization stats locked: {train_stats}")

>>> Normalization stats locked: {'s1_m': 2.0911791, 's1_s': 0.3246540139184723, 's2_m': 0.019486755, 's2_s': 0.4570208797918091}


In [7]:
path_drl = os.path.join(checkpoint_dir, f"without_adv/agent_best_val_{phi_val}.pth")
path_arl = os.path.join(checkpoint_dir, f"with_adv/agent_best_val_{phi_val}.pth")
# DRL
run_drl_backtest(symbol, "drl", path_drl, s3_bundle, phi_val, fee_rate, train_stats)
# Adv DRL
run_drl_backtest(symbol, "arl", path_arl, s3_bundle, phi_val, fee_rate, train_stats)
# GLFT
glft_p = GLFTPolicy(gamma=0.001, kappa=100, A=0.1, sigma=0.01)
run_benchmark_and_save("glft", glft_p, s3_bundle, phi_val, fee_rate, f"output/{symbol}/glft", train_stats)
# FOIC
foic_p = FOICPolicy(offset_a=0, offset_b=0)
run_benchmark_and_save("foic", foic_p, s3_bundle, phi_val, fee_rate, f"output/{symbol}/foic", train_stats)

print("\n>>> All backtests completed. Parquet datasets generated in 'output/' directory.")

Success: drl backtest completed.
Success: arl backtest completed.
Success: glft benchmark completed.
Success: foic benchmark completed.

>>> All backtests completed. Parquet datasets generated in 'output/' directory.
