In [None]:
import pandas as pd
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

from trading_env import TradingEnv
from custom_policy import CustomYOLOPolicy

# --- Load your meta.csv ---
meta_df = pd.read_csv("dataset/meta.csv", parse_dates=["timestamp"])

# --- Create your environment ---
def make_env():
    return TradingEnv(meta_df)

# Wrap in a vectorized env (even with 1 env, SB3 expects this)
vec_env = make_vec_env(make_env, n_envs=1)

# --- Create PPO model ---
model = PPO(
    policy=CustomYOLOPolicy,
    env=vec_env,
    verbose=1,
    n_steps=2048,
    batch_size=64,
    learning_rate=3e-4,
    ent_coef=0.01,
    tensorboard_log="./logs"
)

# --- Train the model ---
model.learn(total_timesteps=100_000)

# --- Save it ---
model.save("ppo-yolo-trading")
print("✅ Training complete. Model saved to: ppo-yolo-trading")
