In [1]:
# --- Imports ---
import os
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecMonitor
from env import TradingEnv
from extractor import CNNLSTMPolicy
from pathlib import Path
import pandas as pd

# --- Config ---
meta_path = Path("../dataset/meta.csv").resolve()
meta_df = pd.read_csv(meta_path, parse_dates=["timestamp"])
meta_root = meta_path.parent.resolve()

# Train/Test split
split_ratio = 0.8
split_idx = int(len(meta_df) * split_ratio)
train_df = meta_df.iloc[:split_idx].reset_index(drop=True)
test_df = meta_df.iloc[split_idx:].reset_index(drop=True)

print(f"[INFO] Training on {len(train_df)} samples, testing on {len(test_df)}")

# --- Env builders ---
def make_train_env():
    def _init():
        return TradingEnv(
            meta_df=train_df,
            root_dir=meta_root,
            n_steps=8,
            image_size=(128, 128),
            num_actions=3,
            starting_balance=100_000,
            leverage=1,
            risk_per_trade=1
        )
    return _init

def make_test_env():
    def _init():
        return TradingEnv(
            meta_df=test_df,
            root_dir=meta_root,
            n_steps=8,
            image_size=(128, 128),
            num_actions=3,
            starting_balance=100_000,
            leverage=1,
            risk_per_trade=1
        )
    return _init

# --- Env setup ---
train_env = DummyVecEnv([make_train_env()])
train_env = VecMonitor(train_env)

test_env = DummyVecEnv([make_test_env()])

# --- Model path ---
base_model_path = "ppo-cnnlstm-trading"

# --- Load or initialize model ---
if os.path.exists(base_model_path + ".zip"):
    print(f"[📂 LOADING EXISTING MODEL from {base_model_path}]")
    model = PPO.load(base_model_path, env=train_env, device="cuda")
else:
    print("[🆕 INITIALIZING NEW MODEL]")
    model = PPO(
        policy=CNNLSTMPolicy,
        env=train_env,
        verbose=1,
        n_steps=256,
        batch_size=64,
        learning_rate=1e-4,
        tensorboard_log="./logs",
        device="cuda"
    )

# --- Training loop ---
total_target_steps = 5_000_000
chunk_size = 100_000
chunks = total_target_steps // chunk_size

for i in range(chunks):
    print(f"\n[🔁 TRAINING CHUNK {i+1}/{chunks}]")
    
    model.learn(total_timesteps=chunk_size, reset_num_timesteps=False)

    steps_so_far = (i+1) * chunk_size
    chunk_model_path = f"{base_model_path}-{steps_so_far//1000}k"
    model.save(chunk_model_path)
    print(f"[✅ Saved checkpoint: {chunk_model_path}.zip]")

    # --- Test after each chunk ---
print("[🚀 TESTING STARTED]")
obs = test_env.reset()
total_rewards, episode_reward = [], 0

for _ in range(len(test_df)):
    action, _ = model.predict(obs, deterministic=True)  # << fully greedy
    obs, reward, done, info = test_env.step(action)
    episode_reward += reward[0]
    action, _ = model.predict(obs, deterministic=True)
    # print(f"Step {_}: Action={action[0]}, Info={info}")  # show chosen action + env info

    if done[0]:
        total_rewards.append(episode_reward)
        print(f"Episode reward: {episode_reward:.2f}")
        episode_reward = 0
        obs = test_env.reset()

print("\n[📊 TESTING COMPLETE]")
if total_rewards:
    print(f"Average Reward: {sum(total_rewards)/len(total_rewards):.2f}")
    print(f"Max Reward: {max(total_rewards):.2f}")
    print(f"Min Reward: {min(total_rewards):.2f}")
else:
    print("No episodes completed during testing.")




Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.
INFO:env:[ENV INIT] Loaded 45117 rows with 5 TFs, history=8 steps.
INFO:env:[ENV INIT] Loaded 11280 rows with 5 TFs, history=8 steps.


[INFO] Training on 45117 samples, testing on 11280
[🆕 INITIALIZING NEW MODEL]
Using cuda device

[🔁 TRAINING CHUNK 1/50]
Logging to ./logs/PPO_0
----------------------------
| time/              |     |
|    fps             | 39  |
|    iterations      | 1   |
|    time_elapsed    | 6   |
|    total_timesteps | 256 |
----------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 20            |
|    iterations           | 2             |
|    time_elapsed         | 24            |
|    total_timesteps      | 512           |
| train/                  |               |
|    approx_kl            | 1.9724015e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.1          |
|    explained_variance   | 6.59e-05      |
|    learning_rate        | 0.0001        |
|    loss                 | 4.69e+04      |
|    n_updates            | 10      

KeyboardInterrupt: 