Dependencies

In [28]:
import gymnasium as gym
import gym_anytrading
import pandas as pd
import numpy as np
import os

from stable_baselines3 import PPO
from matplotlib import pyplot as plt
from gym_anytrading.envs import StocksEnv
from stable_baselines3.common.vec_env import DummyVecEnv
from sklearn.preprocessing import MinMaxScaler, StandardScaler

Data Preparation

In [50]:
header_names = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume']
df = pd.read_csv('eth_usdt_5m.csv', header=None, names=header_names)

df['Date'] = pd.to_datetime(df['Date'], unit='ms')
df.set_index('Date', inplace=True)
df = df.sort_index(ascending=True)

df['Day_Of_Week'] = df.index.day_of_week
df['Time_of_Day'] = df.index.astype('int64') // 10**9

df['Close_pct_change'] = df['Close'].pct_change() * 100

df['Typical_Price'] = (df['High'] + df['Low'] + df['Close']) / 3
df['Volume_Weighted_Price'] = df['Typical_Price'] * df['Volume']
df['Cumulative_Volume_Weighted_Price'] = df['Volume_Weighted_Price'].cumsum()
df['Cumulative_Volume'] = df['Volume'].cumsum()
df['VWAP'] = df['Cumulative_Volume_Weighted_Price'] / df['Cumulative_Volume']

df = df[['Day_Of_Week','Time_of_Day', 'Close', 'Volume', 'Close_pct_change', 'VWAP']]

df.head(10)

Unnamed: 0_level_0,Day_Of_Week,Time_of_Day,Close,Volume,Close_pct_change,VWAP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-03-15 00:00:00,0,1615766400,1858.45,1822.14,,1853.75
2021-03-15 00:05:00,0,1615766700,1863.0,779.2,0.244828,1856.101373
2021-03-15 00:10:00,0,1615767000,1863.9,199.95,0.048309,1856.660401
2021-03-15 00:15:00,0,1615767300,1872.05,214.51,0.437255,1857.540472
2021-03-15 00:20:00,0,1615767600,1878.3,264.05,0.333859,1859.023905
2021-03-15 00:25:00,0,1615767900,1878.45,188.07,0.007986,1860.106332
2021-03-15 00:30:00,0,1615768200,1876.9,183.79,-0.082515,1860.942327
2021-03-15 00:35:00,0,1615768500,1878.85,81.6,0.103895,1861.324997
2021-03-15 00:40:00,0,1615768800,1880.15,175.66,0.069191,1862.133501
2021-03-15 00:45:00,0,1615769100,1879.4,95.8,-0.03989,1862.591194


In [51]:
header_names = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume']
btc_df = pd.read_csv('btc_usdt_5m.csv', header=None, names=header_names)
btc_df = btc_df[['Date', 'Close']]
btc_df['Date'] = pd.to_datetime(btc_df['Date'], unit='ms')
btc_df.set_index('Date', inplace=True)
btc_df = btc_df.sort_index(ascending=True)
btc_df['btc_close_pct_change'] = btc_df['Close'].pct_change() * 100
btc_df.head(5)

Unnamed: 0_level_0,Close,btc_close_pct_change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-03-14 00:45:00,61360.0,
2021-03-14 00:50:00,61434.5,0.121415
2021-03-14 00:55:00,61386.5,-0.078132
2021-03-14 01:00:00,61119.0,-0.435764
2021-03-14 01:05:00,60958.0,-0.263421


In [52]:
df['btc_close_pct_change'] = btc_df['btc_close_pct_change']
df['btc_close_pct_change'] = df['btc_close_pct_change'].fillna(method='ffill')
df=df.dropna()
df.head(5)

Unnamed: 0_level_0,Day_Of_Week,Time_of_Day,Close,Volume,Close_pct_change,VWAP,btc_close_pct_change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-03-15 00:05:00,0,1615766700,1863.0,779.2,0.244828,1856.101373,0.187133
2021-03-15 00:10:00,0,1615767000,1863.9,199.95,0.048309,1856.660401,0.133778
2021-03-15 00:15:00,0,1615767300,1872.05,214.51,0.437255,1857.540472,0.452051
2021-03-15 00:20:00,0,1615767600,1878.3,264.05,0.333859,1859.023905,0.185695
2021-03-15 00:25:00,0,1615767900,1878.45,188.07,0.007986,1860.106332,0.134421


In [53]:
hourly_df = df.resample('1h').agg({'Close': 'last'})
hourly_df.head(5)

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2021-03-15 00:00:00,1874.65
2021-03-15 01:00:00,1887.3
2021-03-15 02:00:00,1885.2
2021-03-15 03:00:00,1886.5
2021-03-15 04:00:00,1864.05


In [55]:
hourly_df['hourly_close_pct_change'] = hourly_df['Close'].pct_change() * 100
hourly_df.head(5)

Unnamed: 0_level_0,Close,hourly_close_pct_change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-03-15 00:00:00,1874.65,
2021-03-15 01:00:00,1887.3,0.674793
2021-03-15 02:00:00,1885.2,-0.11127
2021-03-15 03:00:00,1886.5,0.068958
2021-03-15 04:00:00,1864.05,-1.190034


In [56]:
df['hourly_close_pct_change'] = hourly_df['hourly_close_pct_change']
df['hourly_close_pct_change'] = df['hourly_close_pct_change'].fillna(method='ffill')
df=df.dropna()
df.head(5)

Unnamed: 0_level_0,Day_Of_Week,Time_of_Day,Close,Volume,Close_pct_change,VWAP,btc_close_pct_change,hourly_close_pct_change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-03-15 01:00:00,0,1615770000,1880.0,314.39,0.285387,1864.964786,0.125822,0.674793
2021-03-15 01:05:00,0,1615770300,1873.5,261.52,-0.345745,1865.49275,-0.214466,0.674793
2021-03-15 01:10:00,0,1615770600,1877.75,347.76,0.226848,1866.160428,0.391235,0.674793
2021-03-15 01:15:00,0,1615770900,1880.8,148.73,0.162428,1866.497642,0.064394,0.674793
2021-03-15 01:20:00,0,1615771200,1887.2,198.39,0.340281,1867.123454,0.407846,0.674793


In [61]:
scaled_columns = ['Day_Of_Week',
                'Time_of_Day',
                'Close', 
                'Volume',
                'Close_pct_change',
                'VWAP',
                'btc_close_pct_change',
                'hourly_close_pct_change']
scaler = MinMaxScaler()
df[scaled_columns] = scaler.fit_transform(df[scaled_columns])

In [58]:
df.dtypes

Day_Of_Week                float64
Time_of_Day                float64
Close                      float64
Volume                     float64
Close_pct_change           float64
VWAP                       float64
btc_close_pct_change       float64
hourly_close_pct_change    float64
dtype: object

In [63]:
def add_signals(env):
    start = env.frame_bound[0] - env.window_size
    end = env.frame_bound[1]
    prices = env.df.loc[:, 'Close'].to_numpy()[start:end]
    signal_features = env.df.loc[:, ['Day_Of_Week',
                                    'Time_of_Day',
                                    'Close', 
                                    'Volume',
                                    'Close_pct_change',
                                    'VWAP',
                                    'btc_close_pct_change',
                                    'hourly_close_pct_change']].to_numpy()[start:end]
    return prices, signal_features                                

Custom Environment

In [72]:
class MyCustomEnv(StocksEnv):
    _process_data = add_signals

env2 = MyCustomEnv(df=df, window_size = 24, frame_bound=(24,258300))

In [73]:
env_maker = lambda: env2
env = DummyVecEnv([env_maker])

Tensorboard Log

In [75]:
log_path = os.path.join('model', 'Log')

Model Training

In [76]:
model = PPO('MlpPolicy',
        env,
        learning_rate=5e-5,
        gamma=0.95,
        ent_coef=0.03,
        target_kl=0.01,
        verbose=1,
        tensorboard_log=log_path) 
model.learn(total_timesteps=100000000)

Using cpu device
Logging to model\Log\PPO_4
-----------------------------
| time/              |      |
|    fps             | 691  |
|    iterations      | 1    |
|    time_elapsed    | 2    |
|    total_timesteps | 2048 |
-----------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 473           |
|    iterations           | 2             |
|    time_elapsed         | 8             |
|    total_timesteps      | 4096          |
| train/                  |               |
|    approx_kl            | 0.00090570224 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.693        |
|    explained_variance   | -0.0889       |
|    learning_rate        | 5e-05         |
|    loss                 | -0.0225       |
|    n_updates            | 10            |
|    policy_gradient_loss | -0.000268     |
|    value_loss           | 0.00053       

  shares = (self._total_profit * (1 - self.trade_fee_ask_percent)) / last_trade_price


----------------------------------------
| time/                   |            |
|    fps                  | 500        |
|    iterations           | 318        |
|    time_elapsed         | 1302       |
|    total_timesteps      | 651264     |
| train/                  |            |
|    approx_kl            | 0.01289156 |
|    clip_fraction        | 0.0517     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.666     |
|    explained_variance   | 0.118      |
|    learning_rate        | 5e-05      |
|    loss                 | -0.0278    |
|    n_updates            | 3040       |
|    policy_gradient_loss | -0.00533   |
|    value_loss           | 1.3e-05    |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 499         |
|    iterations           | 319         |
|    time_elapsed         | 1306        |
|    total_timesteps      | 653312      |
| train/  

In [None]:
training_log_path = os.path.join(log_path)

In [None]:
!tensorboard --logdir={training_log_path}