In [1]:
# Gym stuff
import gym
import gym_anytrading
from gym_anytrading.envs import StocksEnv, ForexEnv


# Stable baselines - rl stuff
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import A2C, PPO

# Technicals
from finta import TA

# Processing libraries
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt


# Project Modules
from collections import deque
from config import LOG_PATH, MODEL_PATH
import cryptomanager as cm
from datetime import datetime
from IPython.display import clear_output
import os
import time
from trader import trader_agent

# The class that will do the work of trading
trade_me = trader_agent()

1


In [2]:
# Initial DataFrame to train on
df = cm.historical_data_df(days_delta=10000)
df

start date:  1995-06-20 00:00:00


Unnamed: 0,Date,Open,High,Low,Close,Volume,Trade_Count,VWAP
0,2020-04-08 08:09,7314.0,7314.0,7314.0,7314.0,0.0001,1.0,7314.000000
1,2020-04-14 06:31,7101.5,7101.5,7101.5,7101.5,0.0001,1.0,7101.500000
2,2020-04-14 07:55,7100.0,7100.0,7100.0,7100.0,0.0001,1.0,7100.000000
3,2020-04-22 23:29,7118.5,7118.5,7118.5,7118.5,0.0001,1.0,7118.500000
4,2020-04-28 12:28,7748.5,7748.5,7748.5,7748.5,0.0003,1.0,7748.500000
...,...,...,...,...,...,...,...,...
582693,2022-11-05 13:59,21277.0,21277.0,21272.0,21272.0,0.0003,3.0,21275.333333
582694,2022-11-05 14:00,21272.0,21272.0,21271.0,21271.0,0.0003,3.0,21271.333333
582695,2022-11-05 14:01,21272.0,21286.0,21272.0,21286.0,3.6011,21.0,21278.584405
582696,2022-11-05 14:02,21289.0,21300.0,21289.0,21300.0,0.3317,6.0,21299.085620


In [3]:
# The range that we want to train on each interval
window_size = 100
lower_bound = len(df) - window_size
upper_bound = len(df)
df[lower_bound:upper_bound]

Unnamed: 0,Date,Open,High,Low,Close,Volume,Trade_Count,VWAP
582598,2022-11-05 12:13,21371.0,21371.0,21371.0,21371.0,0.0001,1.0,21371.000000
582599,2022-11-05 12:14,21371.0,21372.0,21369.0,21372.0,0.0108,7.0,21370.953704
582600,2022-11-05 12:15,21371.0,21372.0,21371.0,21371.0,0.0023,5.0,21371.478261
582601,2022-11-05 12:16,21372.0,21376.0,21372.0,21376.0,0.0051,7.0,21374.254902
582602,2022-11-05 12:17,21375.0,21375.0,21369.0,21369.0,0.0046,4.0,21372.652174
...,...,...,...,...,...,...,...,...
582693,2022-11-05 13:59,21277.0,21277.0,21272.0,21272.0,0.0003,3.0,21275.333333
582694,2022-11-05 14:00,21272.0,21272.0,21271.0,21271.0,0.0003,3.0,21271.333333
582695,2022-11-05 14:01,21272.0,21286.0,21272.0,21286.0,3.6011,21.0,21278.584405
582696,2022-11-05 14:02,21289.0,21300.0,21289.0,21300.0,0.3317,6.0,21299.085620


In [4]:
# Adding technicals to DataFrame

def add_columns_df(df):
    df['OBV'] = TA.OBV(df) # unsure if I want to use this yet or not
    df['EMA'] = TA.EMA(df, 200)
    df['RSI'] = TA.RSI(df)
    df  = df.join(TA.PIVOT_FIB(df))
    df = df.join(TA.MACD(df))
    df.fillna(0, inplace=True)
    return df
    
df = add_columns_df(df)
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Trade_Count,VWAP,OBV,EMA,...,s1,s2,s3,s4,r1,r2,r3,r4,MACD,SIGNAL
0,2020-04-08 08:09,7314.0,7314.0,7314.0,7314.0,0.0001,1.0,7314.0,0.0,7314.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2020-04-14 06:31,7101.5,7101.5,7101.5,7101.5,0.0001,1.0,7101.5,-0.0001,7207.21875,...,7314.0,7314.0,7314.0,7314.0,7314.0,7314.0,7314.0,7314.0,-4.767628,-2.648682
2,2020-04-14 07:55,7100.0,7100.0,7100.0,7100.0,0.0001,1.0,7100.0,-0.0002,7171.121178,...,7101.5,7101.5,7101.5,7101.5,7101.5,7101.5,7101.5,7101.5,-6.12142,-4.071935
3,2020-04-22 23:29,7118.5,7118.5,7118.5,7118.5,0.0001,1.0,7118.5,-0.0001,7157.7679,...,7100.0,7100.0,7100.0,7100.0,7100.0,7100.0,7100.0,7100.0,-5.919194,-4.697701
4,2020-04-28 12:28,7748.5,7748.5,7748.5,7748.5,0.0003,1.0,7748.5,0.0002,7278.289003,...,7118.5,7118.5,7118.5,7118.5,7118.5,7118.5,7118.5,7118.5,19.384455,2.466196


In [5]:
# Adding metrics to Env

def add_signals(env, df = df):
    start = env.frame_bound[0] - env.window_size
    end = env.frame_bound[1]
    prices = env.df.loc[:, 'Low'].to_numpy()[start:end]
    keys= []
    for key in df.columns:
        keys.append(key)
    signal_features = env.df.loc[:, [key for key in keys[1:]]].to_numpy()[start:end]
    return prices, signal_features

In [6]:
class Cypto_Env(StocksEnv):
    _process_data = add_signals
    

In [7]:
# The initial training Env before we swtich to up-to-date Env

env2 = Cypto_Env(df=df, window_size=window_size, frame_bound=(lower_bound,upper_bound))
env_maker = lambda: env2
env = DummyVecEnv([env_maker])

In [8]:
 
# create and train prediction model

model = A2C('MlpPolicy', env, verbose=1, tensorboard_log=LOG_PATH, device="cpu") 

name = str(datetime.now().strftime("%Y-%d-%m-%H-%M") )

episodes = 5
step_count = 1000
for i in range (episodes):
    model = model.learn(total_timesteps=step_count, 
    progress_bar=True,
    ) 
    print(i)

Using cpu device
Logging to ./data\log\A2C_1


Output()

------------------------------------
| time/                 |          |
|    fps                | 127      |
|    iterations         | 100      |
|    time_elapsed       | 3        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -0.689   |
|    explained_variance | 0.0122   |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | -1.61    |
|    value_loss         | 13.8     |
------------------------------------


------------------------------------
| time/                 |          |
|    fps                | 208      |
|    iterations         | 200      |
|    time_elapsed       | 4        |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -0.682   |
|    explained_variance | -15.7    |
|    learning_rate      | 0.0007   |
|    n_updates          | 199      |
|    policy_loss        | -0.0366  |
|    value_loss         | 0.00795  |
------------------------------------


0
Logging to ./data\log\A2C_2


Output()

-------------------------------------
| time/                 |           |
|    fps                | 335       |
|    iterations         | 100       |
|    time_elapsed       | 1         |
|    total_timesteps    | 500       |
| train/                |           |
|    entropy_loss       | -0.334    |
|    explained_variance | -6.65e+11 |
|    learning_rate      | 0.0007    |
|    n_updates          | 299       |
|    policy_loss        | 0.126     |
|    value_loss         | 0.0513    |
-------------------------------------


------------------------------------
| time/                 |          |
|    fps                | 298      |
|    iterations         | 200      |
|    time_elapsed       | 3        |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -0.41    |
|    explained_variance | -118     |
|    learning_rate      | 0.0007   |
|    n_updates          | 399      |
|    policy_loss        | -0.0134  |
|    value_loss         | 0.0079   |
------------------------------------


1
Logging to ./data\log\A2C_3


Output()

-------------------------------------
| time/                 |           |
|    fps                | 324       |
|    iterations         | 100       |
|    time_elapsed       | 1         |
|    total_timesteps    | 500       |
| train/                |           |
|    entropy_loss       | -0.682    |
|    explained_variance | -3.74e+12 |
|    learning_rate      | 0.0007    |
|    n_updates          | 499       |
|    policy_loss        | -0.0885   |
|    value_loss         | 0.0185    |
-------------------------------------


------------------------------------
| time/                 |          |
|    fps                | 291      |
|    iterations         | 200      |
|    time_elapsed       | 3        |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -0.405   |
|    explained_variance | 0.00184  |
|    learning_rate      | 0.0007   |
|    n_updates          | 599      |
|    policy_loss        | -1.5     |
|    value_loss         | 7.27     |
------------------------------------


2
Logging to ./data\log\A2C_4


Output()

------------------------------------
| time/                 |          |
|    fps                | 350      |
|    iterations         | 100      |
|    time_elapsed       | 1        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -0.318   |
|    explained_variance | -0.00787 |
|    learning_rate      | 0.0007   |
|    n_updates          | 699      |
|    policy_loss        | 0.582    |
|    value_loss         | 0.76     |
------------------------------------


------------------------------------
| time/                 |          |
|    fps                | 311      |
|    iterations         | 200      |
|    time_elapsed       | 3        |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -0.248   |
|    explained_variance | -2.28    |
|    learning_rate      | 0.0007   |
|    n_updates          | 799      |
|    policy_loss        | -0.00326 |
|    value_loss         | 0.00219  |
------------------------------------


3
Logging to ./data\log\A2C_5


Output()

-------------------------------------
| time/                 |           |
|    fps                | 394       |
|    iterations         | 100       |
|    time_elapsed       | 1         |
|    total_timesteps    | 500       |
| train/                |           |
|    entropy_loss       | -0.131    |
|    explained_variance | -1.02e+14 |
|    learning_rate      | 0.0007    |
|    n_updates          | 899       |
|    policy_loss        | 0.0078    |
|    value_loss         | 0.000184  |
-------------------------------------


------------------------------------
| time/                 |          |
|    fps                | 267      |
|    iterations         | 200      |
|    time_elapsed       | 3        |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -0.0737  |
|    explained_variance | -10.9    |
|    learning_rate      | 0.0007   |
|    n_updates          | 999      |
|    policy_loss        | 0.000187 |
|    value_loss         | 0.000392 |
------------------------------------


4


In [9]:
prev_min = 0
print(len(df)-lower_bound,len(df))

100 582698


In [11]:
past_trades = deque(maxlen=60)
cur_min = datetime.now().minute
prev_min = 0
test_env = Cypto_Env(df=df, window_size=window_size, frame_bound=(lower_bound,upper_bound))
wait_time = 1
while True:
    try:
        cur_min = datetime.now().minute
        if cur_min > prev_min or cur_min == 1:
            cur_min = datetime.now().minute
            prev_min = cur_min + wait_time - 1
            
            df = cm.historical_data_df()

            df = add_columns_df(df)

            test_env = Cypto_Env(df=df, window_size=window_size, frame_bound=(window_size,upper_bound))
            
            def return_obs(df):
                start = len(df) - window_size
                end = len(df)
                keys= []
                for key in df.columns:
                    keys.append(key)
                signal_features = df.loc[:, [key for key in keys[1:]]].to_numpy()[start:end]
                return signal_features
            
            obs = return_obs(df)
            action = model.predict(obs)
            print("made a prediction")
            print(action)
            if action == 1 or action[0] == 1:
                trade_me.buy_position_at_market("BTC/USD")
                print("b") 
            if action == 0 or action[0] == 0:
                trade_me.sell_position_market("BTCUSD")
                print("s")
            past_trades.append([action, ("price: ", df["Low"].iloc[-1]), ("time: ", datetime.now())])
            for episode in range(episodes):
                model = model.learn(total_timesteps=step_count, 
                )
                # model.save(os.path.join(MODEL_PATH, str(time.time()) + ".zip"))
                clear_output()
                print("past trades: ", list(past_trades))
    except Exception as e:
        print(e)
        for episode in range(episodes):
            model = model.learn(total_timesteps=step_count,
             )
            # model.save(os.path.join(MODEL_PATH, str(time.time()) + ".zip"))
            print("past trades: ", list(past_trades))



past trades:  [[(array(1, dtype=int64), None), ('price: ', 21256.0), ('time: ', datetime.datetime(2022, 11, 5, 10, 25, 2, 558546))], [(array(1, dtype=int64), None), ('price: ', 21262.0), ('time: ', datetime.datetime(2022, 11, 5, 10, 27, 0, 949347))]]
made a prediction
(array(1, dtype=int64), None)
buying  BTC/USD
b
Logging to ./data\log\A2C_51
-------------------------------------
| time/                 |           |
|    fps                | 188       |
|    iterations         | 100       |
|    time_elapsed       | 2         |
|    total_timesteps    | 500       |
| train/                |           |
|    entropy_loss       | -0.000719 |
|    explained_variance | nan       |
|    learning_rate      | 0.0007    |
|    n_updates          | 14099     |
|    policy_loss        | -5.68e-07 |
|    value_loss         | 7.03e-05  |
-------------------------------------


In [None]:
# Save the Model
model.save(os.path.join(MODEL_PATH, str(time.time()) + ".zip"))