In [1]:
# Install required packages (run once). If your environment already has them, you can skip.
import sys
!{sys.executable} -m pip install --upgrade pip setuptools wheel
!{sys.executable} -m pip install --quiet numpy pandas matplotlib pillow ffmpeg-python
!{sys.executable} -m pip install --quiet "stable-baselines3==2.3.0" "gymnasium==0.29.1" "shimmy==0.2.1" torch tensorboard




In [2]:
# Imports
import os
import glob
import json
import time
from datetime import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import gymnasium as gym
from gymnasium import spaces

# stable-baselines3 + wrappers
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize

# optional: stable-baselines3 env checker
try:
    from stable_baselines3.common.env_checker import check_env as sb3_check_env
except Exception:
    sb3_check_env = None


Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


In [3]:
# Configuration (edit to taste)
DATA_DIR = os.path.join("data", "multiasset")   # where Notebook 01 saved *_normalized.csv and asset_to_idx.csv
MODEL_DIR = os.path.join("models", "multiasset")
os.makedirs(MODEL_DIR, exist_ok=True)

WINDOW = 50
TRAIN_EPISODES = 50
TOTAL_TIMESTEPS = TRAIN_EPISODES * 10_000  # used for model.learn()
N_ENVS = 4
EMBED_DIM = 8

EMBED_FILE = os.path.join(MODEL_DIR, "asset_embeddings.npy")
ASSET_MAP_FILE = os.path.join(DATA_DIR, "asset_to_idx.csv")

MODEL_FILE = os.path.join(MODEL_DIR, "ppo_multiasset.zip")
VEC_FILE = os.path.join(MODEL_DIR, "vec_normalize.pkl")
METRICS_FILE = os.path.join(MODEL_DIR, "eval_metrics.json")

# PPO params (change as needed)
PPO_PARAMS = dict(
    policy="MlpPolicy",
    verbose=1,
    learning_rate=3e-4,
    batch_size=256,
    n_epochs=10,
    ent_coef=0.01
)


In [4]:
# Load normalized CSVs created by Notebook 01
def load_normalized_datasets(data_dir, window=WINDOW):
    csv_files = sorted(glob.glob(os.path.join(data_dir, "*_normalized.csv")))
    if not csv_files:
        raise FileNotFoundError(f"No normalized CSVs found in {data_dir}. Run Notebook 01 first.")
    datasets = {}
    for p in csv_files:
        safe = os.path.basename(p).replace("_normalized.csv","")
        df = pd.read_csv(p, index_col=0, parse_dates=True)
        # required columns
        required = ['o_pc','h_pc','l_pc','c_pc','v_pc','Close_raw']
        if not all(c in df.columns for c in required):
            # try auto-convert if raw fields exist
            if all(c in df.columns for c in ['open','high','low','close','volume']):
                tmp = df.copy()
                tmp['o_pc'] = tmp['open'].pct_change()
                tmp['h_pc'] = tmp['high'].pct_change()
                tmp['l_pc'] = tmp['low'].pct_change()
                tmp['c_pc'] = tmp['close'].pct_change()
                tmp['v_pc'] = tmp['volume'].pct_change()
                tmp['Close_raw'] = tmp['close']
                tmp = tmp.dropna()
                df = tmp[required]
            else:
                raise ValueError(f"{p} missing required columns and cannot be auto-converted.")
        else:
            df = df[required].dropna()
        if len(df) > window:
            datasets[safe] = df
    if not datasets:
        raise ValueError("No datasets passed the minimum length requirement.")
    print(f"Loaded {len(datasets)} datasets from {data_dir}")
    return datasets

# Example:
datasets = load_normalized_datasets(DATA_DIR, WINDOW)


Loaded 16 datasets from data\multiasset


In [5]:
def load_asset_map_and_embeddings(datasets, asset_map_file=ASSET_MAP_FILE, embed_file=EMBED_FILE, embed_dim=EMBED_DIM):
    """
    Ensures asset->index ordering matches `datasets` keys.
    If asset_map_file exists it will be used, otherwise it creates one from datasets order.
    Loads embeddings if present and matches order; otherwise creates random embeddings matching order and saves them.
    Returns (embeddings ndarray shape (n_assets, embed_dim), asset_to_idx dict)
    """
    # use datasets order as canonical order
    safe_names = list(datasets.keys())
    # try to load existing map; prefer file only if keys match dataset keys
    asset_to_idx = {s:i for i,s in enumerate(safe_names)}
    if os.path.exists(asset_map_file):
        try:
            df_map = pd.read_csv(asset_map_file, index_col=0, squeeze=True)
            # df_map may be a series mapping safe->idx
            loaded_map = df_map.to_dict() if hasattr(df_map, "to_dict") else dict(df_map)
            # compare sets
            if set(loaded_map.keys()) == set(safe_names):
                asset_to_idx = {k:int(loaded_map[k]) for k in safe_names}
            else:
                # override file to match current datasets order
                pd.Series({s:i for i,s in enumerate(safe_names)}).to_csv(asset_map_file)
                asset_to_idx = {s:i for i,s in enumerate(safe_names)}
        except Exception:
            pd.Series({s:i for i,s in enumerate(safe_names)}).to_csv(asset_map_file)
            asset_to_idx = {s:i for i,s in enumerate(safe_names)}
    else:
        # create the file
        pd.Series({s:i for i,s in enumerate(safe_names)}).to_csv(asset_map_file)
        asset_to_idx = {s:i for i,s in enumerate(safe_names)}

    n_assets = len(asset_to_idx)
    # load embeddings if present and compatible
    if os.path.exists(embed_file):
        emb = np.load(embed_file)
        if emb.shape[0] != n_assets or emb.shape[1] < 1:
            # mismatch → recreate aligned embeddings
            emb = np.random.randn(n_assets, embed_dim).astype(np.float32)
            np.save(embed_file, emb)
            print("Embedding file mismatch; recreated embeddings to match dataset order.")
    else:
        emb = np.random.randn(n_assets, embed_dim).astype(np.float32)
        np.save(embed_file, emb)
        print(f"Created new embeddings: {embed_file}")

    # ensure dtype
    emb = emb.astype(np.float32)
    print(f"Loaded embeddings for {n_assets} assets (dim={emb.shape[1]})")
    return emb, asset_to_idx

# Example:
embeddings, asset_map = load_asset_map_and_embeddings(datasets)


Loaded embeddings for 16 assets (dim=8)


import gym
import numpy as np
from gym import spaces

class MultiAssetEnv(gym.Env):
    """
    MultiAssetEnv:
    - datasets: dict safe_name -> dataframe with columns [o_pc,h_pc,l_pc,c_pc,v_pc,Close_raw]
    - asset_to_idx: dict safe_name->index
    - embeddings: ndarray (n_assets, embed_dim)
    Observation: (window, 5 + 1 + embed_dim)  -> features, balance, embedding per timestep
    Action space: Discrete(3): 0 hold, 1 buy, 2 sell
    """

    metadata = {'render.modes': ['human']}

    def __init__(self, datasets, asset_to_idx, embeddings, window=WINDOW, initial_balance=10_000, risk=0.02, leverage=100):
        super().__init__()
        self.datasets = datasets
        self.asset_to_idx = asset_to_idx
        self.embeddings = embeddings
        self.window = window
        self.initial_balance = initial_balance
        self.risk = risk
        self.leverage = leverage

        self.safe_names = list(self.datasets.keys())
        self.n_assets = len(self.safe_names)
        self.asset_idx = 0
        self.current_safe = self.safe_names[self.asset_idx]
        self.data = self.datasets[self.current_safe]
        self.ptr = self.window
        self.balance = self.initial_balance
        self.position = 0

        self.embed_dim = self.embeddings.shape[1] if self.embeddings is not None else 0
        n_features = 5
        obs_dim = n_features + 1 + self.embed_dim
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.window, obs_dim), dtype=np.float32)
        self.action_space = spaces.Discrete(3)

        # RNG
        self.np_random = None
        self.seed(None)

        # trackers for render (per symbol)
        self.prices = {s:[] for s in self.safe_names}
        self.actions = {s:[] for s in self.safe_names}

    def seed(self, seed=None):
        self.np_random, seed = gym.utils.seeding.np_random(seed)
        return [seed]

    def reset(self, seed=None, options=None):
        if seed is not None:
            self.seed(seed)
        self.asset_idx = int(self.np_random.integers(0, self.n_assets))
        self.current_safe = self.safe_names[self.asset_idx]
        self.data = self.datasets[self.current_safe]
        self.ptr = self.window
        self.balance = self.initial_balance
        self.position = 0

        # initialize trackers with the initial window close prices
        init_prices = list(self.data['Close_raw'].iloc[self.ptr-self.window:self.ptr].values)
        self.prices[self.current_safe] = init_prices.copy()
        self.actions[self.current_safe] = [0]*len(init_prices)
        return self._get_obs(), {}

    def _get_obs(self):
        window_data = self.data.iloc[self.ptr-self.window:self.ptr]
        feat = window_data[['o_pc','h_pc','l_pc','c_pc','v_pc']].values.astype(np.float32)
        balance_col = np.full((self.window,1), float(self.balance)/float(self.initial_balance), dtype=np.float32)
        emb = np.zeros((self.window, self.embed_dim), dtype=np.float32)
        idx = self.asset_idx
        if self.embeddings is not None and idx < self.embeddings.shape[0]:
            emb = np.tile(self.embeddings[idx].reshape(1,-1).astype(np.float32), (self.window, 1))
        obs = np.concatenate([feat, balance_col, emb], axis=1)
        return obs

    def step(self, action):
        prev_close = float(self.data['Close_raw'].iloc[self.ptr-1])
        new_close = float(self.data['Close_raw'].iloc[self.ptr])

        if action == 1:
            self.position = 1
        elif action == 2:
            self.position = -1
        else:
            self.position = 0

        pnl = (new_close - prev_close)/prev_close * self.position * self.leverage
        self.balance *= (1.0 + pnl * self.risk)
        reward = float(pnl)

        # record
        safe = self.current_safe
        self.prices[safe].append(new_close)
        self.actions[safe].append(int(action))

        self.ptr += 1
        done = self.ptr >= len(self.data)
        info = {'balance': self.balance, 'asset': safe}
        return self._get_obs(), reward, done, False, info

    def render(self, mode='human', animate=False, interval=200, save_path=None, fps=10):
        import matplotlib.animation as animation
        assets_with_data = [s for s in self.safe_names if len(self.prices.get(s,[]))>0]
        if not assets_with_data:
            print("⚠️ Nothing recorded to render yet.")
            return
        n = len(assets_with_data)
        fig, axes = plt.subplots(n,1, figsize=(10,4*n), sharex=True)
        if n==1:
            axes=[axes]
        if not animate:
            for i,s in enumerate(assets_with_data):
                prices = np.array(self.prices[s])
                acts = np.array(self.actions[s])
                steps = np.arange(len(prices))
                axes[i].plot(steps, prices, label=f"{s} price")
                axes[i].scatter(steps[acts==1], prices[acts==1], marker='^', color='green', label='Buy')
                axes[i].scatter(steps[acts==2], prices[acts==2], marker='v', color='red', label='Sell')
                axes[i].legend(); axes[i].grid(True)
            plt.tight_layout()
            if save_path and save_path.lower().endswith(('.png','.jpg','.pdf')):
                plt.savefig(save_path, dpi=200); print("Saved static render to", save_path)
            plt.show()
            return

        # Animated rendering
        lines, buys, sells = [], [], []
        for ax, s in zip(axes, assets_with_data):
            line, = ax.plot([],[],lw=2)
            buy_sc = ax.scatter([],[], marker='^', color='green')
            sell_sc = ax.scatter([],[], marker='v', color='red')
            lines.append(line); buys.append(buy_sc); sells.append(sell_sc)
            arr = np.array(self.prices[s])
            ax.set_xlim(0, max(1,len(arr)))
            ax.set_ylim(np.min(arr)*0.98, np.max(arr)*1.02)
            ax.set_title(s)
            ax.grid(True)

        def update(frame):
            artists=[]
            for i,s in enumerate(assets_with_data):
                pr = np.array(self.prices[s]); ac = np.array(self.actions[s])
                if frame>len(pr): f = len(pr)
                else: f = frame
                x = np.arange(f); y = pr[:f]
                lines[i].set_data(x,y); artists.append(lines[i])
                buys_idx = x[ac[:f]==1] if f>0 else []
                sells_idx = x[ac[:f]==2] if f>0 else []
                if len(buys_idx)>0:
                    buys[i].set_offsets(np.c_[buys_idx, pr[:f][ac[:f]==1]])
                else:
                    buys[i].set_offsets([])
                if len(sells_idx)>0:
                    sells[i].set_offsets(np.c_[sells_idx, pr[:f][ac[:f]==2]])
                else:
                    sells[i].set_offsets([])
                artists += [buys[i], sells[i]]
            return artists

        frames = len(self.prices[assets_with_data[0]])
        ani = animation.FuncAnimation(fig, update, frames=frames, interval=interval, blit=True, repeat=False)
        if save_path:
            ext = os.path.splitext(save_path)[1].lower()
            if ext=='.mp4':
                ani.save(save_path, writer='ffmpeg', fps=fps)
            elif ext=='.gif':
                ani.save(save_path, writer='pillow', fps=fps)
            print("Saved animation to", save_path)
        plt.show()


In [None]:
class MultiAssetEnv(gym.Env):
    def __init__(
        self,
        datasets,
        asset_map,
        embeddings,
        window=50,
        initial_balance=10000,
        risk=0.02,
        leverage=100
    ):
        super().__init__()

        self.datasets = datasets
        self.asset_map = asset_map              # NEW
        self.embeddings = embeddings            # NEW
        self.window = window
        self.initial_balance = initial_balance
        self.risk = risk
        self.leverage = leverage

        self.asset_idx = 0
        self.data = self.datasets[list(self.datasets.keys())[self.asset_idx]]
        self.ptr = self.window
        self.balance = self.initial_balance
        self.position = 0

        # Add embedding_dim to observations
        emb_dim = self.embeddings.shape[1]

        n_features = 5 + emb_dim    # price features + embedding
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf,
            shape=(window, n_features),
            dtype=np.float32
        )

        self.action_space = spaces.Discrete(3)


In [7]:
def make_env_factory(datasets, asset_to_idx, embeddings, window=WINDOW):
    def _init():
        return MultiAssetEnv(datasets, asset_to_idx, embeddings, window=window)
    return _init


In [8]:
def train_and_save(datasets, asset_map, embeddings, model_dir=MODEL_DIR, total_timesteps=TOTAL_TIMESTEPS, n_envs=N_ENVS):
    os.makedirs(model_dir, exist_ok=True)
    env_fns = [make_env_factory(datasets, asset_map, embeddings, WINDOW) for _ in range(n_envs)]
    vec_env = DummyVecEnv(env_fns)
    vec_env = VecNormalize(vec_env, norm_obs=True, norm_reward=False, clip_obs=10.)

    model = PPO(**PPO_PARAMS, env=vec_env, tensorboard_log=os.path.join(model_dir,"tensorboard"))
    print("Starting training for", total_timesteps, "timesteps...")
    model.learn(total_timesteps=total_timesteps)
    model.save(MODEL_FILE)
    vec_env.save(VEC_FILE)
    print("Saved model ->", MODEL_FILE)
    return model, vec_env


In [9]:
def evaluate_model(model_path, datasets, n_episodes=10, window=WINDOW):
    model = PPO.load(model_path)

    # Dummy embeddings in case actual ones were not saved
    emb_dummy = np.zeros((len(datasets), EMBED_DIM), dtype=np.float32)
    asset_map = {s: i for i, s in enumerate(datasets.keys())}

    env = MultiAssetEnv(
        datasets=datasets,
        asset_map=asset_map,
        embeddings=emb_dummy,
        window=window
    )

    rewards = []

    for ep in range(n_episodes):
        obs, _ = env.reset()
        done = False
        ep_r = 0.0

        while not done:
            action, _ = model.predict(obs, deterministic=True)

            # --- FIXED ACTION CONVERSION ---
            if isinstance(action, (np.integer, int)):
                act = int(action)
            else:
                act = int(np.array(action).flatten()[0])
            # --------------------------------

            obs, reward, done, truncated, info = env.step(act)
            ep_r += float(reward)

        rewards.append(ep_r)

    metrics = {
        "n_episodes": n_episodes,
        "mean_reward": float(np.mean(rewards)),
        "std_reward": float(np.std(rewards)),
        "total_reward": float(np.sum(rewards)),
        "timestamp": datetime.utcnow().isoformat()
    }

    with open(METRICS_FILE, "w") as fh:
        json.dump(metrics, fh, indent=2)

    print("Evaluation metrics:", metrics)
    return metrics


In [10]:
# Main pipeline
datasets = load_normalized_datasets(DATA_DIR, WINDOW)
embeddings, asset_map = load_asset_map_and_embeddings(datasets)

# Train
model, vec_env = train_and_save(datasets, asset_map, embeddings, MODEL_DIR, TOTAL_TIMESTEPS, N_ENVS)

# Evaluate
metrics = evaluate_model(MODEL_FILE, datasets, n_episodes=10, window=WINDOW)
print("Done. Metrics:", metrics)


Loaded 16 datasets from data\multiasset
Loaded embeddings for 16 assets (dim=8)
Using cpu device




Starting training for 500000 timesteps...
Logging to models\multiasset\tensorboard\PPO_2
-----------------------------
| time/              |      |
|    fps             | 506  |
|    iterations      | 1    |
|    time_elapsed    | 16   |
|    total_timesteps | 8192 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 432         |
|    iterations           | 2           |
|    time_elapsed         | 37          |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.009282675 |
|    clip_fraction        | 0.0696      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.09       |
|    explained_variance   | 0.052509964 |
|    learning_rate        | 0.0003      |
|    loss                 | 0.594       |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.0112     |
|    value_loss           | 1

TypeError: MultiAssetEnv.__init__() got an unexpected keyword argument 'asset_map'

In [None]:
# Visualize a single rollout and save animation/mp4
model = PPO.load(MODEL_FILE)
env = MultiAssetEnv(datasets, asset_map, embeddings, window=WINDOW)
obs, _ = env.reset()
done = False
while not done:
    action, _ = model.predict(obs, deterministic=True)
    act = int(action) if isinstance(action,(np.integer,int)) else int(action[0])
    obs, reward, done, truncated, info = env.step(act)

# Save animation to file
out_path = os.path.join(MODEL_DIR, "last_run.mp4")
env.render(animate=True, save_path=out_path, fps=12)


In [None]:
# Visualize a single rollout and save animation/mp4
model = PPO.load(MODEL_FILE)
env = MultiAssetEnv(datasets, asset_map, embeddings, window=WINDOW)

obs, _ = env.reset()
done = False

while not done:
    action, _ = model.predict(obs, deterministic=True)
    action = int(action) if isinstance(action, (int, np.integer)) else int(action[0])
    obs, reward, done, truncated, info = env.step(action)

# Save the animated trading chart
animation_path = os.path.join(MODEL_DIR, "sample_rollout.mp4")
env.render(animate=True, save_path=animation_path)

print("Saved rollout animation:", animation_path)


In [None]:
# Export summary of training session
summary = {
    "window": WINDOW,
    "train_episodes": TRAIN_EPISODES,
    "total_timesteps": TOTAL_TIMESTEPS,
    "n_assets": len(datasets),
    "embedding_dimension": EMBED_DIM,
    "model_file": MODEL_FILE,
    "vec_file": VEC_FILE,
    "metrics_file": METRICS_FILE,
    "rollout_animation": os.path.join(MODEL_DIR, "sample_rollout.mp4"),
    "timestamp": datetime.utcnow().isoformat()
}

SUMMARY_FILE = os.path.join(MODEL_DIR, "run_summary.json")
with open(SUMMARY_FILE, "w") as f:
    json.dump(summary, f, indent=2)

print("=== TRAINING COMPLETE ===")
print("Summary saved to:", SUMMARY_FILE)
print("\nArtifacts generated:")
for k, v in summary.items():
    print(f"{k}: {v}")
