In [None]:
import numpy as np

import pandas as pd
# Available in the github repo : examples/data/BTC_USD-Hourly.csv
url = "https://raw.githubusercontent.com/ClementPerroud/Gym-Trading-Env/main/examples/data/BTC_USD-Hourly.csv"
df = pd.read_csv(url, parse_dates=["date"], index_col= "date")
df.sort_index(inplace= True)
df.dropna(inplace= True)
df.drop_duplicates(inplace=True)

In [None]:
# df is a DataFrame with columns : "open", "high", "low", "close", "Volume USD"

# Create the feature : ( close[t] - close[t-1] )/ close[t-1]
df["feature_close"] = df["close"].pct_change()

# Create the feature : open[t] / close[t]
df["feature_open"] = df["open"]/df["close"]

# Create the feature : high[t] / close[t]
df["feature_high"] = df["high"]/df["close"]

# Create the feature : low[t] / close[t]
df["feature_low"] = df["low"]/df["close"]

 # Create the feature : volume[t] / max(*volume[t-7*24:t+1])
df["feature_volume"] = df["Volume USD"] / df["Volume USD"].rolling(7*24).max()

df.dropna(inplace= True) # Clean again !
# Eatch step, the environment will return 5 inputs  : "feature_close", "feature_open", "feature_high", "feature_low", "feature_volume"

In [None]:
import gymnasium as gym
import environments

#from environments import TradingEnv

In [None]:
import gym_trading_env

In [None]:
#import gymnasium as gym
#import gym_trading_env
#env = gym.make('MultiDatasetTradingEnv',
#    dataset_dir = 'preprocessed_data/*.pkl',
#    positions=[-1, 0, 1],  # -1 (=SHORT), 0(=OUT), +1 (=LONG)
#)

In [None]:
# Run 10 episodes
#for _ in range(10):
  # At every episode, the env will pick a new dataset.
#  done, truncated = False, False
#  observation, info = env.reset()
#  while not done and not truncated:
#      position_index = env.action_space.sample() # Pick random position index
#      observation, reward, done, truncated, info = env.step(position_index)

In [None]:
from environments import TradingEnv

# Create the environment directly
env = TradingEnv(
    name="BTCUSD",
    df=df,  # Your dataset with your custom features
    positions=[-0.8, 0.0, 0.8],  # -1 (=SHORT), 0(=OUT), +1 (=LONG)
    trading_fees=0.01/100,  # 0.01% per stock buy/sell (Binance fees)
    borrow_interest_rate=0.0003/100,  # 0.0003% per timestep (one timestep = 1h here)
)

In [None]:
from stable_baselines3 import PPO
from stable_baselines3 import SAC
from stable_baselines3.common.vec_env import VecEnv

In [None]:
policy = "MlpPolicy"

In [None]:
model = PPO(policy, env, verbose=1)

In [None]:
# Experiment with different hyperparameters based on your problem and environment
model = PPO(
    "MlpPolicy",
    env,
    verbose=1,
    learning_rate=0.001,  # Adjust as needed
    batch_size=32,  # Adjust based on memory and learning speed
    gamma=0.99,  # Discount factor
)


In [None]:
model.learn(total_timesteps=200000)

In [None]:
model.save("my_trading_agent")

In [None]:
# Create the environment directly
env2 = TradingEnv(
    name="BTCUSD",
    df=df,  # Your dataset with your custom features
    positions=[-0.8, 0, 0.8],  # -1 (=SHORT), 0(=OUT), +1 (=LONG)
    trading_fees=0.01/100,  # 0.01% per stock buy/sell (Binance fees)
    borrow_interest_rate=0.0003/100,  # 0.0003% per timestep (one timestep = 1h here)
)

In [None]:
# Run an episode until it ends :
done, truncated = False, False
obs, info = env2.reset()
while not done and not truncated:
    action, _ = model.predict(obs)
    obs, reward, done, truncated, info = env2.step(action)
    # ... (evaluate further)

In [None]:
# At the end of the episode you want to render
env2.unwrapped.save_for_render(dir="render_logs")