In [2]:
# Gym stuff
import gym
import gym_anytrading
from gym_anytrading.envs import StocksEnv, ForexEnv


# Stable baselines - rl stuff
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import A2C, PPO

# Technicals
from finta import TA

# Processing libraries
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt


# Project Modules
from collections import deque
from config import LOG_PATH, MODEL_PATH
import cryptomanager as cm
from datetime import datetime
from IPython.display import clear_output
import os
import time
from trader import trader_agent

# The class that will do the work of trading
trade_me = trader_agent()

1


In [3]:
# Initial DataFrame to train on
df = cm.historical_data_df(days_delta=10000)
df

1995-06-20 00:00:00


Unnamed: 0,date,Open,High,Low,Close
0,2020-04-08 08:09:00+00:00,7314.0,7314.0,7314.0,7314.0
1,2020-04-14 06:31:00+00:00,7101.5,7101.5,7101.5,7101.5
2,2020-04-14 07:55:00+00:00,7100.0,7100.0,7100.0,7100.0
3,2020-04-22 23:29:00+00:00,7118.5,7118.5,7118.5,7118.5
4,2020-04-28 12:28:00+00:00,7748.5,7748.5,7748.5,7748.5
...,...,...,...,...,...
582658,2022-11-05 13:19:00+00:00,21306.0,21306.0,21306.0,21306.0
582659,2022-11-05 13:20:00+00:00,21305.0,21306.0,21305.0,21306.0
582660,2022-11-05 13:21:00+00:00,21306.0,21306.0,21306.0,21306.0
582661,2022-11-05 13:22:00+00:00,21305.0,21305.0,21305.0,21305.0


In [4]:
# The range that we want to train on each interval
window_size = 100
lower_bound = len(df) - window_size
upper_bound = len(df)
df[lower_bound:upper_bound]

Unnamed: 0,date,Open,High,Low,Close
582563,2022-11-05 11:36:00+00:00,21378.0,21384.0,21378.0,21384.0
582564,2022-11-05 11:37:00+00:00,21382.0,21388.0,21382.0,21388.0
582565,2022-11-05 11:39:00+00:00,21387.0,21387.0,21386.0,21386.0
582566,2022-11-05 11:40:00+00:00,21387.0,21396.0,21387.0,21396.0
582567,2022-11-05 11:41:00+00:00,21401.0,21409.0,21401.0,21409.0
...,...,...,...,...,...
582658,2022-11-05 13:19:00+00:00,21306.0,21306.0,21306.0,21306.0
582659,2022-11-05 13:20:00+00:00,21305.0,21306.0,21305.0,21306.0
582660,2022-11-05 13:21:00+00:00,21306.0,21306.0,21306.0,21306.0
582661,2022-11-05 13:22:00+00:00,21305.0,21305.0,21305.0,21305.0


In [5]:
# Adding technicals to DataFrame

# df['OBV'] = TA.OBV(df) # unsure if I want to use this yet or not
df['EMA'] = TA.EMA(df, 200)
df['RSI'] = TA.RSI(df)
df.fillna(0, inplace=True)
df = df.join(TA.MACD(df))
df.head()

Unnamed: 0,date,Open,High,Low,Close,EMA,RSI,MACD,SIGNAL
0,2020-04-08 08:09:00+00:00,7314.0,7314.0,7314.0,7314.0,7314.0,0.0,0.0,0.0
1,2020-04-14 06:31:00+00:00,7101.5,7101.5,7101.5,7101.5,7207.21875,0.0,-4.767628,-2.648682
2,2020-04-14 07:55:00+00:00,7100.0,7100.0,7100.0,7100.0,7171.121178,0.0,-6.12142,-4.071935
3,2020-04-22 23:29:00+00:00,7118.5,7118.5,7118.5,7118.5,7157.7679,9.107921,-5.919194,-4.697701
4,2020-04-28 12:28:00+00:00,7748.5,7748.5,7748.5,7748.5,7278.289003,79.058101,19.384455,2.466196


In [6]:
# Adding metrics to Env

def add_signals(env):
    start = env.frame_bound[0] - env.window_size
    end = env.frame_bound[1]
    prices = env.df.loc[:, 'Low'].to_numpy()[start:end]
    signal_features = env.df.loc[:, ['Low', "Volume" ,'EMA',"MACD","SIGNAL", 'RSI']].to_numpy()[start:end]
    return prices, signal_features

In [7]:
class Cypto_Env(StocksEnv):
    _process_data = add_signals
    

In [8]:
# The initial training Env before we swtich to up-to-date Env

env2 = Cypto_Env(df=df, window_size=window_size, frame_bound=(lower_bound,upper_bound))
env_maker = lambda: env2
env = DummyVecEnv([env_maker])

In [17]:
 
# create and train prediction model

model = A2C('MlpPolicy', env, verbose=1, tensorboard_log=LOG_PATH, device="cpu") 

name = str(datetime.now().strftime("%Y-%d-%m-%H-%M") )

episodes = 5
step_count = 1000
for i in range (episodes):
    model = model.learn(total_timesteps=step_count, 
    progress_bar=True,
    tb_log_name=f"A2C-{name}"
    ) 
    print(i)

LiveError: Only one live display may be active at once

In [18]:
prev_min = 0
print(len(df)-lower_bound,len(df))

In [21]:
past_trades = deque(maxlen=60)
cur_min = datetime.now().minute
prev_min = 0
test_env = Cypto_Env(df=df, window_size=window_size, frame_bound=(lower_bound,upper_bound))
wait_time = 1
while True:
    try:
        cur_min = datetime.now().minute
        if cur_min > prev_min or cur_min == 1:
            cur_min = datetime.now().minute
            prev_min = cur_min + wait_time - 1
            df = cm.historical_data_df()
            df = df.loc[-100:]
            df['EMA'] = TA.EMA(df, 200)
            df['RSI'] = TA.RSI(df)
            df.fillna(0, inplace=True)
            df = df.join(TA.MACD(df))

            test_env = Cypto_Env(df=df, window_size=window_size, frame_bound=(window_size,upper_bound))
            print(test_env._total_reward)
            def return_obs(df):
                start = len(df) - window_size
                end = len(df)
                signal_features = df.loc[:, ['Low','EMA',"MACD","SIGNAL", 'RSI']].to_numpy()[start:end]
                return signal_features
            
            obs = return_obs(df)
            action = model.predict(obs)
            print("made a prediction")
            print(action)
            if action == 1 or action[0] == 1:
                # trade_me.buy_position_at_market("BTC/USD")
                print("b") 
            if action == 0 or action[0] == 0:
                # trade_me.sell_position_market("BTCUSD")
                print("s")
            past_trades.append([action, ("price: ", df["Low"].iloc[-1]), ("time: ", datetime.now())])
            for episode in range(episodes):
                model = model.learn(total_timesteps=step_count, 
                tb_log_name= f"A2C-{str(time.time())}")
                model.save(os.path.join(MODEL_PATH, str(time.time()) + ".zip"))
                clear_output()
                print("past trades: ", list(past_trades))
    except Exception as e:
        print(e)
        for episode in range(episodes):
            model = model.learn(total_timesteps=step_count, tb_log_name= f"A2C-{str(time.time())}")
            model.save(os.path.join(MODEL_PATH, str(time.time()) + ".zip"))
            print("past trades: ", list(past_trades))



In [None]:
# Save the Model
model.save(os.path.join(MODEL_PATH, str(time.time()) + ".zip"))