In [32]:
import os
import datetime as dt
from typing import List, Optional

import numpy as np
import pandas as pd
import yfinance as yf
import torch
import torch.nn as nn

import timesfm
from peft import LoraConfig, get_peft_model

import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.env_util import make_vec_env

# never set explicit colors per instructions
import matplotlib.pyplot as plt
import vectorbt as vbt

In [33]:

# =====================================================================
# Configuration
# =====================================================================
CONFIG = {
    "tickers": ["AAPL", "MSFT", "NVDA", "GOOGL", "AMZN"],
    "macro":   ["^GSPC", "^VIX"],
    "start":   "2015-01-01",
    "end":     dt.date.today().isoformat(),
    "window":  256,
    "horizon": 1,
    "train_ratio": 0.8,
    "device":  "cuda" if torch.cuda.is_available() else "cpu",
    "lora_r":  8,
    "ppo_steps": 200_000,
    "seed":    42,
}

# Directories
DATA_DIR = "./data"
MODEL_DIR = "./models"
PLOT_DIR = "./plots"
LOG_DIR = "./logs"
for d in [DATA_DIR, MODEL_DIR, PLOT_DIR, LOG_DIR]:
    os.makedirs(d, exist_ok=True)

# Set seeds
np.random.seed(CONFIG["seed"])
torch.manual_seed(CONFIG["seed"])
if CONFIG["device"] == "cuda":
    torch.cuda.manual_seed_all(CONFIG["seed"])

In [34]:

# =====================================================================
# 1. Data Download & Pre‑processing
# =====================================================================

def download_prices(tickers: List[str], start: str, end: str) -> pd.DataFrame:
    df = yf.download(tickers, start=start, end=end,
                     group_by="ticker", progress=False, auto_adjust=True)
    closes = pd.concat({t: df[t]["Close"] for t in tickers}, axis=1)
    closes.columns = pd.Index(tickers, name="ticker")
    closes = closes.resample("1D").ffill()
    return closes

print("Downloading data …")
price_df = download_prices(
    CONFIG["tickers"] + CONFIG["macro"], CONFIG["start"], CONFIG["end"]
)
price_df.to_csv(f"{DATA_DIR}/prices.csv")

# Split into train / test
split_idx = int(len(price_df) * CONFIG["train_ratio"])
train_df = price_df.iloc[:split_idx]
val_df = price_df.iloc[split_idx:]


Downloading data …


In [35]:

# =====================================================================
# 2. Dataset & DataLoader (unchanged)
# =====================================================================

class MultiAssetDataset(torch.utils.data.Dataset):
    def __init__(self, df: pd.DataFrame, window: int, horizon: int, target_ticker: str):
        self.df = df
        self.window = window
        self.horizon = horizon
        self.target_idx = df.columns.get_loc(target_ticker)

    def __len__(self):
        return len(self.df) - self.window - self.horizon

    def __getitem__(self, idx):
        window_slice = self.df.iloc[idx: idx + self.window].values.astype(np.float32)
        future_price = self.df.iloc[idx + self.window + self.horizon - 1, self.target_idx].astype(np.float32)
        return window_slice, future_price


In [36]:

# =====================================================================
# 3. Model Definition (Encoder → TimesFM → Head)
# =====================================================================

class Encoder(nn.Module):
    def __init__(self, n_channels: int, d_model: int):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(n_channels, d_model, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv1d(d_model, d_model, kernel_size=3, padding=1),
            nn.ReLU(),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = x.permute(0, 2, 1)
        h = self.net(x)
        return h.permute(0, 2, 1)

class TimesFMWrapper(nn.Module):
    def __init__(self, n_channels: int, d_model: int = 512):
        super().__init__()
        self.encoder = Encoder(n_channels, d_model)
        self.core = timesfm.TimesFm(
            hparams=timesfm.TimesFmHparams(
                backend="gpu" if CONFIG["device"] == "cuda" else "cpu",
                per_core_batch_size=CONFIG["lora_r"],
                horizon_len=CONFIG["horizon"],
            ),
            checkpoint=timesfm.TimesFmCheckpoint(
                huggingface_repo_id="google/timesfm-1.0-200m-pytorch",
            ),
        )
        # Freeze TimesFM core
        if isinstance(self.core, nn.Module):
            for p in self.core.parameters():
                p.requires_grad = False

        self.head = nn.Linear(d_model, 1)
        # Apply LoRA to head
        try:
            lora_cfg = LoraConfig(
                r=CONFIG["lora_r"],
                lora_alpha=CONFIG["lora_r"] * 2,
                target_modules=["linear"],
                bias="none",
            )
            self.head = get_peft_model(self.head, lora_cfg)
        except ValueError as e:
            print("[WARN] LoRA adaptation skipped:", e)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        h = self.encoder(x)
        y_hat = self.core(h)
        out = self.head(y_hat[:, -1, :])
        return out.squeeze(-1)

In [37]:

# =====================================================================
# 4. Gym Environment with Price-Delta Reward
# =====================================================================

class StockForecastEnv(gym.Env):
    metadata = {"render_modes": ["human"]}

    def __init__(self, df: pd.DataFrame, target: str):
        super().__init__()
        self.df = df.values.astype(np.float32)
        self.target_idx = df.columns.get_loc(target)
        self.window = CONFIG["window"]
        self.horizon = CONFIG["horizon"]
        self.current_step = 0
        self.action_space = gym.spaces.Box(low=-10.0, high=10.0, shape=(1,), dtype=np.float32)
        self.observation_space = gym.spaces.Box(
            low=-np.inf, high=np.inf,
            shape=(self.window, df.shape[1]), dtype=np.float32
        )

    def reset(self, seed: Optional[int] = None, options: Optional[dict] = None):
        super().reset(seed=seed)
        self.current_step = 0
        obs = self.df[:self.window]
        return obs, {}

    def step(self, action: np.ndarray):
        pred_pct = float(np.tanh(action.item()) * 0.05)
        idx_now = self.current_step + self.window - 1
        idx_next = idx_now + 1
        price_now = self.df[idx_now, self.target_idx]
        price_next = self.df[idx_next, self.target_idx]
        log_ret = float(np.log(price_next / price_now + 1e-8))
        reward = pred_pct * log_ret
        self.current_step += 1
        done = (self.current_step + self.window + self.horizon) >= len(self.df)
        if not done:
            obs = self.df[self.current_step: self.current_step + self.window]
        else:
            obs = np.zeros((self.window, self.df.shape[1]), dtype=np.float32)
        info = {"log_return": log_ret, "pred_pct": pred_pct}
        return obs, reward, done, False, info


In [38]:

# =====================================================================
# 5. Train PPO
# =====================================================================

def train_ppo(target: str):
    env_fn = lambda: Monitor(StockForecastEnv(train_df, target))
    vec_env = make_vec_env(env_fn, n_envs=1, seed=CONFIG["seed"], monitor_dir=LOG_DIR)
    model = PPO(
        "MlpPolicy", vec_env,
        verbose=1,
        tensorboard_log="./ppo_tb",
        learning_rate=1e-4,
        n_steps=4096,
        policy_kwargs=dict(net_arch=[256, 256]),
        gae_lambda=0.9,
        device=CONFIG["device"],
    )
    model.learn(total_timesteps=CONFIG["ppo_steps"])
    model.save(f"{MODEL_DIR}/ppo_{target}")
    vec_env.close()


print(f"Training PPO for {ticker} …")
train_ppo(ticker)


Training PPO for AAPL …
Using cuda device
Logging to ./ppo_tb/PPO_5




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2.81e+03 |
|    ep_rew_mean     | 0.0456   |
| time/              |          |
|    fps             | 847      |
|    iterations      | 1        |
|    time_elapsed    | 4        |
|    total_timesteps | 4096     |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.81e+03      |
|    ep_rew_mean          | 0.0367        |
| time/                   |               |
|    fps                  | 584           |
|    iterations           | 2             |
|    time_elapsed         | 14            |
|    total_timesteps      | 8192          |
| train/                  |               |
|    approx_kl            | 0.00014681811 |
|    clip_fraction        | 0.00703       |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.42         |
|    explained_variance   | 0.256         |
