In [1]:
# Gym stuff
import gym
import gym_anytrading
from gym_anytrading.envs import StocksEnv, ForexEnv


# Stable baselines - rl stuff
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import A2C, PPO

# Technicals
from finta import TA

# Processing libraries
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt


# Project Modules
from collections import deque
from config import LOG_PATH, MODEL_PATH
import cryptomanager as cm
import assetpicker as ap
from datetime import datetime
from IPython.display import clear_output
import os
import time
from trader import trader_agent

# The class that will do the work of trading
trade_me = trader_agent()

In [2]:
# Initial DataFrame to train on
df = ap.historical_data_df(days_delta=10000)
df



start date:  1995-07-12 00:00:00


Unnamed: 0,Date,Open,High,Low,Close,Volume,Trade_Count,VWAP
0,2015-12-01 09:04,209.4300,209.43,209.43,209.43,470.0,2.0,209.430000
1,2015-12-01 09:05,209.3500,209.35,209.32,209.32,1160.0,3.0,209.322586
2,2015-12-01 09:06,209.4200,209.42,209.42,209.42,100.0,1.0,209.420000
3,2015-12-01 09:18,209.4400,209.44,209.44,209.44,1000.0,1.0,209.440000
4,2015-12-01 09:19,209.4300,209.43,209.42,209.43,800.0,4.0,209.427500
...,...,...,...,...,...,...,...,...
1396432,2022-11-25 21:55,402.2800,402.28,402.28,402.28,243.0,6.0,402.267675
1396433,2022-11-25 21:56,402.2801,402.30,402.20,402.20,3603.0,18.0,402.234926
1396434,2022-11-25 21:57,402.2000,402.20,402.00,402.20,5753.0,47.0,402.086326
1396435,2022-11-25 21:58,402.1000,402.10,402.10,402.10,550.0,13.0,402.100182


In [3]:
# The range that we want to train on each interval
window_size = 1 
lower_bound = window_size
upper_bound = len(df)
df[lower_bound:upper_bound]

Unnamed: 0,Date,Open,High,Low,Close,Volume,Trade_Count,VWAP
1,2015-12-01 09:05,209.3500,209.35,209.32,209.32,1160.0,3.0,209.322586
2,2015-12-01 09:06,209.4200,209.42,209.42,209.42,100.0,1.0,209.420000
3,2015-12-01 09:18,209.4400,209.44,209.44,209.44,1000.0,1.0,209.440000
4,2015-12-01 09:19,209.4300,209.43,209.42,209.43,800.0,4.0,209.427500
5,2015-12-01 09:20,209.4300,209.43,209.42,209.42,520.0,6.0,209.424615
...,...,...,...,...,...,...,...,...
1396432,2022-11-25 21:55,402.2800,402.28,402.28,402.28,243.0,6.0,402.267675
1396433,2022-11-25 21:56,402.2801,402.30,402.20,402.20,3603.0,18.0,402.234926
1396434,2022-11-25 21:57,402.2000,402.20,402.00,402.20,5753.0,47.0,402.086326
1396435,2022-11-25 21:58,402.1000,402.10,402.10,402.10,550.0,13.0,402.100182


In [4]:
# Adding technicals to DataFrame
def add_columns_df(df):
    df['OBV'] = TA.OBV(df) # unsure if I want to use this yet or not
    df['EMA'] = TA.EMA(df, 200)
    df['RSI'] = TA.RSI(df)
    df  = df.join(TA.PIVOT_FIB(df))
    df = df.join(TA.MACD(df))
    df.fillna(0, inplace=True)
    return df
    
df = add_columns_df(df)
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Trade_Count,VWAP,OBV,EMA,...,s1,s2,s3,s4,r1,r2,r3,r4,MACD,SIGNAL
0,2015-12-01 09:04,209.43,209.43,209.43,209.43,470.0,2.0,209.43,0.0,209.43,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2015-12-01 09:05,209.35,209.35,209.32,209.32,1160.0,3.0,209.322586,-1160.0,209.374725,...,209.43,209.43,209.43,209.43,209.43,209.43,209.43,209.43,-0.002468,-0.001371
2,2015-12-01 09:06,209.42,209.42,209.42,209.42,100.0,1.0,209.42,-1060.0,209.389968,...,209.31854,209.31146,209.3,209.28854,209.34146,209.34854,209.36,209.37146,-4.4e-05,-0.000827
3,2015-12-01 09:18,209.44,209.44,209.44,209.44,1000.0,1.0,209.44,-60.0,209.402664,...,209.42,209.42,209.42,209.42,209.42,209.42,209.42,209.42,0.001782,5.7e-05
4,2015-12-01 09:19,209.43,209.43,209.42,209.43,800.0,4.0,209.4275,-860.0,209.408241,...,209.44,209.44,209.44,209.44,209.44,209.44,209.44,209.44,0.002339,0.000736


In [5]:
# Adding metrics to Env
def add_signals(env, df = df):
    start = env.frame_bound[0] - env.window_size
    end = env.frame_bound[1]
    prices = env.df.loc[:, 'Low'].to_numpy()[start:end]
    keys= []
    for key in df.columns:
        keys.append(key)
    signal_features = env.df.loc[:, [key for key in keys[1:]]].to_numpy()[start:end]
    return prices, signal_features

In [6]:
class Cypto_Env(StocksEnv):
    _process_data = add_signals
    def __init__(self, df, window_size, frame_bound):
        super().__init__(df, window_size, frame_bound)
        self.trade_fee_bid_percent = 0  # unit
        self.trade_fee_ask_percent = 0  # unit
    

In [7]:
# The initial training Env before we swtich to up-to-date Env
env2 = Cypto_Env(df=df, window_size=window_size, frame_bound=(lower_bound,upper_bound))
env_maker = lambda: env2
env = DummyVecEnv([env_maker])

In [8]:
 
# create and train prediction model
episodes = 5
step_count = 1000

# model = A2C.load(path="./data/model/1667783635.2006977", env = env)
model = A2C('MlpPolicy', env, verbose=1, device="cuda") 

name = str(datetime.now().strftime("%Y-%d-%m-%H-%M") )

for i in range (episodes):
    model = model.learn(total_timesteps=step_count, 
    ) 
    print(i)

Using cpu device
------------------------------------
| time/                 |          |
|    fps                | 676      |
|    iterations         | 100      |
|    time_elapsed       | 0        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -0.653   |
|    explained_variance | -358     |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | 0.00248  |
|    value_loss         | 0.000146 |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 685      |
|    iterations         | 200      |
|    time_elapsed       | 1        |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -0.427   |
|    explained_variance | -50.8    |
|    learning_rate      | 0.0007   |
|    n_updates          | 199      |
|    policy_loss        | -0.0253  |
|    value_loss      

In [9]:
prev_min = 0
print(len(df)-lower_bound,len(df))

# stop double selling
has_sold = True

1396436 1396437


In [10]:

df_temp = cm.historical_data_df()


df_temp = add_columns_df(df_temp)

df_temp = pd.concat([df, df_temp], ignore_index=True, sort=False)

df_temp.iloc[:,0]
past_trades = deque(maxlen=60)


In [14]:
cur_min = datetime.now().minute
prev_min = cur_min
test_env = Cypto_Env(df=df, window_size=window_size, frame_bound=(lower_bound,upper_bound))
wait_time = window_size
min_elapsed = 0
def rebalance_df():
    df_temp = cm.historical_data_df()

    df_temp = add_columns_df(df_temp)
    df_temp = pd.concat([df, df_temp], ignore_index=True, sort=False)

    test_env = Cypto_Env(df=df_temp, window_size=window_size, frame_bound=(window_size,upper_bound))
    return df_temp

def return_obs(df):
    start = len(df) - window_size
    end = len(df)
    keys= []
    for key in df.columns:
        keys.append(key)
    signal_features = df.loc[:, [key for key in keys[1:]]].to_numpy()[start:end]
    return signal_features

while True:
    try:
        cur_min = datetime.now().minute
        if cur_min == 0:
            prev_min = 0
        if cur_min > prev_min:
            clear_output()

            min_elapsed += wait_time

            print("mins til model update: ", window_size - min_elapsed)
            cur_min = datetime.now().minute
            prev_min = cur_min + wait_time - 1
            isOpen = trade_me.isOpen
            if not isOpen:
                # trade_me.buy_position_at_market(ticker = "BTC/USD")
                print("market is closed, returning")
                continue

            df_temp = rebalance_df()
            
            obs = return_obs(df_temp)

            action, _ = model.predict(obs)
            print("made a prediction")
            print(action)
            print("Last was sold: ",has_sold)
            
            if action == 1:
                has_sold = False
                # if market is closed, buy BTCUSD
                trade_me.buy_position_at_market(ticker = "SPY")
                print("b") 
            elif ( action == 0 ):
                has_sold = True
                # if market is closed, sell BTCUSD
                trade_me.sell_position_limit(ticker = "SPY")
                print("s")
            past_trades.append(action)
            print("past trades: ", list(past_trades))
            #   print(df_temp.iloc[-window_size:])
            if min_elapsed >= window_size:
                min_elapsed = 0
            for episode in range(episodes):
                model.learn(total_timesteps=step_count, 
                )
                        # model.save(os.path.join(MODEL_PATH, str(time.time()) + ".zip"))
                print(episode +1)

    except Exception as e:
        print(e)   
        print("past trades: ", list(past_trades))
        # min_elapsed += wait_time
        if min_elapsed >= window_size:
            min_elapsed = 0
        for episode in range(episodes):
            model.learn(total_timesteps=step_count, 
            )

                # model.save(os.path.join(MODEL_PATH, str(time.time()) + ".zip"))
            print(episode +1)



mins til model update:  0
market is closed, returning
made a prediction
0
Last was sold:  True
Market is closed
SPY not sold; could be day trade.
s
past trades:  [array(1, dtype=int64), array(0, dtype=int64), array(0, dtype=int64)]
------------------------------------
| time/                 |          |
|    fps                | 708      |
|    iterations         | 100      |
|    time_elapsed       | 0        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -0.121   |
|    explained_variance | -0.038   |
|    learning_rate      | 0.0007   |
|    n_updates          | 4099     |
|    policy_loss        | -0.0181  |
|    value_loss         | 0.000372 |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 699      |
|    iterations         | 200      |
|    time_elapsed       | 1        |
|    total_timesteps    | 1000     |
| train/                |   

In [None]:
# Save the Model
model.save(os.path.join(MODEL_PATH, str(time.time()) + ".zip"))
past_trades

In [None]:
def Crypto_USD() -> list:

    
    output = []
    cryptos = cm.get_cryptos_tradable()
    for crypto in cryptos:
        if crypto.split("/")[1] == "USD":
            output.append(crypto)
    return output

USD_crypto =  Crypto_USD()   
# USD_crypto.pop(Crypto_USD().index("BTC/USD"))
print(len(USD_crypto))


In [None]:
# get data frame for each crypto
df_list = []
for crypto in USD_crypto:
    try:
        df_temp = cm.historical_data_df(days_delta=0, crypto=[crypto])
        df_temp = add_columns_df(df)
        df_list.append(df_temp)
    except:
        continue



In [None]:
# create the environments
init_envs = []
for df_single in df_list:
    env2 = Cypto_Env(df=df_single, window_size=window_size, frame_bound=(lower_bound,upper_bound))
    env_maker = lambda: env2
    env = DummyVecEnv([env_maker])
    init_envs.append(env2)
    

In [None]:
print(init_envs)

In [None]:
# train model for each env
models = []
for env_single in init_envs: 
    model_temp = A2C('MlpPolicy', env, verbose=0, device="cuda") 

    name = str(datetime.now().strftime("%Y-%d-%m-%H-%M") )

    episodes = 1
    step_count = 1000
    for i in range (episodes):
        model_temp = model_temp.learn(total_timesteps=step_count, 
        ) 
        print(i)
    models.append(model_temp)
    
    

In [None]:
print(models)

In [None]:
prev_min = 0
print(len(df)-lower_bound,len(df))

# stop double selling
has_sold = False

In [None]:
past_trades = deque(maxlen=60)
cur_min = datetime.now().minute
prev_min = 0
# test_env = Cypto_Env(df=df, window_size=window_size, frame_bound=(lower_bound,upper_bound))
wait_time = 1
while True:
    try:
        cur_min = datetime.now().minute
        if cur_min > prev_min or cur_min == 1:
            cur_min = datetime.now().minute
            prev_min = cur_min + wait_time - 1
            for i, model in enumerate(models):
                print("made it this far, looking for df")
                clear_output()
                cur_crypto = USD_crypto[i]
                print(cur_crypto)
                df_temp = cm.historical_data_df(crypto=[cur_crypto])

                df_temp = add_columns_df(df_temp)

                df_temp = pd.concat([df_list[i], df_temp], ignore_index=True, sort=False)
                
                test_env = Cypto_Env(df=df_temp, window_size=window_size, frame_bound=(window_size,upper_bound))
                
                def return_obs(df):
                    start = len(df) - window_size
                    end = len(df)
                    keys= []
                    for key in df.columns:
                        keys.append(key)
                    signal_features = df.loc[:, [key for key in keys[1:]]].to_numpy()[start:end]
                    return signal_features
                
                # clear_output()
                obs = return_obs(df)
                action = model.predict(obs)
                print("made a prediction on ", cur_crypto)
                print(action)
                if action == 1 or action[0] == 1:
                    trade_me.buy_position_at_market(cur_crypto)
                    print("b") 
                    has_sold = False
                if not has_sold and( action == 0 or action[0] == 0):
                    trade_me.sell_position_market(cur_crypto)
                    print("s")
                    has_sold = True
                # past_trades.append([action, ("price: ", df["Low"].iloc[-1]), ("time: ", datetime.now())])
                # for episode in range(episodes):
                #     model = model.learn(total_timesteps=step_count, 
                #     )
                    # model.save(os.path.join(MODEL_PATH, str(time.time()) + ".zip"))
                    # print("past trades: ", list(past_trades))
    except Exception as e:
        print(e)
        # for episode in range(episodes):
        #     model = model.learn(total_timesteps=step_count,
        #      )
            # model.save(os.path.join(MODEL_PATH, str(time.time()) + ".zip"))
            # print("past trades: ", list(past_trades))

