In [1]:
import torch
print(torch.cuda.is_available())   # should be True
print(torch.cuda.get_device_name(0))  # should show your GPU model



True
NVIDIA GeForce RTX 4080


In [None]:
from pathlib import Path
import pandas as pd
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import EvalCallback

from environment.trading_environment import TradingEnv
from models.yolo_extractor import YOLOCNNExtractor

# --- Load data ---
meta_df = pd.read_csv("./data/dataset/metadata.csv", parse_dates=["timestamp"])

# --- Factory functions ---
def make_train_env():
    env = TradingEnv(meta_df)
    env = Monitor(env, "./monitor_logs/")
    return env

def make_eval_env():
    env = TradingEnv(meta_df)
    env = Monitor(env, "./eval_monitor_logs/")
    return env


# --- Build envs ---
train_env = DummyVecEnv([make_train_env])
train_env = VecNormalize(train_env, norm_obs=True, norm_reward=True, clip_obs=10.)

eval_env = DummyVecEnv([make_eval_env])
eval_env = VecNormalize(eval_env, norm_obs=True, norm_reward=True, clip_obs=10.)

# --- PPO setup ---
policy_kwargs = dict(
    features_extractor_class=YOLOCNNExtractor,
    features_extractor_kwargs=dict(
        model_path="./models/best.pt",
        features_dim=512,
    ),
)

model = PPO(
    "MultiInputPolicy",
    train_env,
    n_steps=128,
    batch_size=32,
    n_epochs=4,
    learning_rate=3e-4,
    policy_kwargs=policy_kwargs,
    verbose=1,
    device="cuda",
    tensorboard_log="./tensorboard_logs/"
)

# --- Eval callback ---
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path="./models/rl/",
    log_path="./logs/",
    eval_freq=10_000,       # every N steps
    n_eval_episodes=1,   #
    deterministic=True,
    render=False,
)

# --- Train ---
model.learn(total_timesteps=200_000, callback=eval_callback)

# --- Save model + normalization stats ---
model.save("./models/rl/final_model")
train_env.save("./models/rl/vecnormalize.pkl")



Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


Using cuda device
Logging to ./tensorboard_logs/PPO_5
