In [1]:
# Cell 0 — optional installs
import sys
!{sys.executable} -m pip install --upgrade pip setuptools wheel
!{sys.executable} -m pip install --quiet numpy pandas matplotlib seaborn pillow ffmpeg-python MetaTrader5 stable-baselines3 gymnasium==0.29.1 torch




In [2]:
# Cell 1 — imports & config
import os, glob, json
from datetime import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("darkgrid")

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import VecNormalize

DATA_DIR = os.path.join("data", "multiasset")
MODEL_DIR = os.path.join("models", "multiasset")
MODEL_FILE = os.path.join(MODEL_DIR, "ppo_multiasset.zip")
VEC_FILE = os.path.join(MODEL_DIR, "vec_normalize.pkl")
EMBED_FILE = os.path.join(MODEL_DIR, "asset_embeddings.npy")
ASSET_MAP_FILE = os.path.join(DATA_DIR, "asset_to_idx.csv")

WINDOW = 50
STARTING_BALANCE = 10_000.0
RISK_PER_TRADE = 0.01
LEVERAGE = 100


Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


In [3]:
# Cell 2 — load datasets and embeddings
def load_normalized_datasets_quick(data_dir=DATA_DIR, window=WINDOW):
    csv_files = sorted(glob.glob(os.path.join(data_dir, "*_normalized.csv")))
    datasets = {}
    for p in csv_files:
        safe = os.path.basename(p).replace("_normalized.csv","")
        df = pd.read_csv(p, index_col=0, parse_dates=True)
        df = df[['o_pc','h_pc','l_pc','c_pc','v_pc','Close_raw']].dropna()
        if len(df) > window:
            datasets[safe] = df
    return datasets

datasets = load_normalized_datasets_quick(DATA_DIR, WINDOW)
embeddings = np.load(EMBED_FILE) if os.path.exists(EMBED_FILE) else np.zeros((len(datasets), EMBED_DIM))
asset_map = {s:i for i,s in enumerate(datasets.keys())}
print("Loaded", len(datasets), "datasets")


Loaded 16 datasets


In [4]:
# Cell 3 — run backtest using the env
from stable_baselines3 import PPO

def run_single_episode(model, datasets, embeddings, asset_map, asset_to_symbol=None, window=WINDOW):
    env = MultiAssetEnv(datasets, asset_map, embeddings, asset_to_symbol=asset_to_symbol, window=window)
    obs, _ = env.reset()
    done = False
    while not done:
        action, _ = model.predict(obs, deterministic=True)
        act = extract_action_scalar(action)
        obs, reward, done, truncated, info = env.step(act)
    return env  # env.trades, env.balance, env.prices etc.

def run_backtests(model_file=MODEL_FILE, n_runs=5, asset_to_symbol=None):
    model = PPO.load(model_file)
    runs = {}
    for i in range(n_runs):
        env = run_single_episode(model, datasets, embeddings, asset_map, asset_to_symbol, WINDOW)
        runs[f"run{i}"] = {"final_balance": float(env.balance), "trades": env.trades, "prices": env.prices}
        print(f"Run {i}: final_balance={env.balance:.2f} trades={len(env.trades)}")
    return runs

# Example:
# model = PPO.load(MODEL_FILE)
# runs = run_backtests(MODEL_FILE, n_runs=5)


In [5]:
# Cell 4 — metrics helpers
def compute_max_drawdown(equity):
    arr = np.array(equity)
    peaks = np.maximum.accumulate(arr)
    dd = (peaks - arr) / peaks
    return float(np.nanmax(dd)) if arr.size>0 else 0.0

def summarize_runs(runs, starting_balance=STARTING_BALANCE):
    rows = []
    for k,v in runs.items():
        trades = v['trades']
        final = v['final_balance']
        returns = (final/starting_balance)-1.0
        pnl_list = [t.get('pnl',0.0) for t in trades if 'pnl' in t]
        equity = [starting_balance] + list(np.cumsum(pnl_list) + starting_balance) if pnl_list else [starting_balance, final]
        max_dd = compute_max_drawdown(equity)
        rows.append({"run":k, "final_balance":final, "return":returns, "n_trades":len(trades), "max_drawdown":max_dd})
    return pd.DataFrame(rows).sort_values("final_balance", ascending=False)


In [6]:
# Cell 5 — plotting and export
def plot_equity_from_trades(trades, starting_balance=STARTING_BALANCE):
    pnl = [t.get('pnl',0.0) for t in trades if 'pnl' in t]
    equity = [starting_balance]
    for p in pnl:
        equity.append(equity[-1] + p)
    plt.figure(figsize=(10,4))
    plt.plot(equity)
    plt.title("Equity curve")
    plt.xlabel("Trade #")
    plt.ylabel("Balance")
    plt.grid(True)
    plt.show()

def export_trades(runs, out_csv=os.path.join(MODEL_DIR,"backtest_trades.csv")):
    all_trades = []
    for run, v in runs.items():
        for t in v['trades']:
            row = t.copy()
            row['run'] = run
            all_trades.append(row)
    if not all_trades:
        print("No trades to export.")
        return None
    df = pd.DataFrame(all_trades)
    df.to_csv(out_csv, index=False)
    print("Saved trades to", out_csv)
    return out_csv


In [9]:
# Cell 6 — execute backtests & save
model = PPO.load(MODEL_FILE)
runs = run_backtests(MODEL_FILE, n_runs=5, asset_to_symbol={s:s for s in datasets.keys()})
summary = summarize_runs(runs)
display(summary)
export_trades(runs)
with open(os.path.join(MODEL_DIR,"backtest_summary.json"), "w") as fh:
    json.dump({"summary": summary.to_dict(orient="records"), "timestamp": datetime.utcnow().isoformat()}, fh, indent=2)
print("Saved summary")


NameError: name 'extract_action_scalar' is not defined

In [None]:
# Cell 7 — save animation of last run
last_env = run_single_episode(PPO.load(MODEL_FILE), datasets, embeddings, asset_map)
anim_path = os.path.join(MODEL_DIR, "sample_rollout.mp4")
last_env.render(animate=True, save_path=anim_path, fps=12)
print("Saved animation:", anim_path)
