In [1]:
# Gym stuff
import gym
import gym_anytrading
from gym_anytrading.envs import StocksEnv, ForexEnv


# Stable baselines - rl stuff
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import A2C, PPO

# Technicals
from finta import TA

# Processing libraries
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt


# Project Modules
from collections import deque
from config import LOG_PATH, MODEL_PATH
import cryptomanager as cm
from datetime import datetime
from IPython.display import clear_output
import os
import time
from trader import trader_agent

# The class that will do the work of trading
trade_me = trader_agent()

1


In [2]:
# Initial DataFrame to train on
df = cm.historical_data_df(days_delta=10000)
df

start date:  1995-06-20 00:00:00


Unnamed: 0,Date,Open,High,Low,Close,Volume,Trade_Count,VWAP
0,2020-04-08 08:09,7314.0,7314.0,7314.0,7314.0,0.0001,1.0,7314.000000
1,2020-04-14 06:31,7101.5,7101.5,7101.5,7101.5,0.0001,1.0,7101.500000
2,2020-04-14 07:55,7100.0,7100.0,7100.0,7100.0,0.0001,1.0,7100.000000
3,2020-04-22 23:29,7118.5,7118.5,7118.5,7118.5,0.0001,1.0,7118.500000
4,2020-04-28 12:28,7748.5,7748.5,7748.5,7748.5,0.0003,1.0,7748.500000
...,...,...,...,...,...,...,...,...
582797,2022-11-05 15:51,21313.0,21313.0,21305.0,21305.0,0.0012,3.0,21306.500000
582798,2022-11-05 15:52,21301.0,21301.0,21300.0,21300.0,0.0002,2.0,21300.500000
582799,2022-11-05 15:53,21305.0,21305.0,21305.0,21305.0,0.0003,3.0,21305.000000
582800,2022-11-05 15:54,21310.0,21310.0,21300.0,21300.0,0.0055,4.0,21303.072727


In [3]:
# The range that we want to train on each interval
window_size = 100
lower_bound = len(df) - window_size
upper_bound = len(df)
df[lower_bound:upper_bound]

Unnamed: 0,Date,Open,High,Low,Close,Volume,Trade_Count,VWAP
582702,2022-11-05 14:08,21301.0,21311.0,21301.0,21311.0,0.1019,7.0,21310.847890
582703,2022-11-05 14:09,21311.0,21320.0,21311.0,21320.0,0.5257,5.0,21319.043941
582704,2022-11-05 14:10,21325.0,21330.0,21325.0,21330.0,0.0023,4.0,21327.043478
582705,2022-11-05 14:11,21329.0,21329.0,21329.0,21329.0,0.0540,2.0,21329.000000
582706,2022-11-05 14:12,21326.0,21326.0,21326.0,21326.0,0.0830,1.0,21326.000000
...,...,...,...,...,...,...,...,...
582797,2022-11-05 15:51,21313.0,21313.0,21305.0,21305.0,0.0012,3.0,21306.500000
582798,2022-11-05 15:52,21301.0,21301.0,21300.0,21300.0,0.0002,2.0,21300.500000
582799,2022-11-05 15:53,21305.0,21305.0,21305.0,21305.0,0.0003,3.0,21305.000000
582800,2022-11-05 15:54,21310.0,21310.0,21300.0,21300.0,0.0055,4.0,21303.072727


In [4]:
# Adding technicals to DataFrame

def add_columns_df(df):
    df['OBV'] = TA.OBV(df) # unsure if I want to use this yet or not
    df['EMA'] = TA.EMA(df, 200)
    df['RSI'] = TA.RSI(df)
    df  = df.join(TA.PIVOT_FIB(df))
    df = df.join(TA.MACD(df))
    df.fillna(0, inplace=True)
    return df
    
df = add_columns_df(df)
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Trade_Count,VWAP,OBV,EMA,...,s1,s2,s3,s4,r1,r2,r3,r4,MACD,SIGNAL
0,2020-04-08 08:09,7314.0,7314.0,7314.0,7314.0,0.0001,1.0,7314.0,0.0,7314.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2020-04-14 06:31,7101.5,7101.5,7101.5,7101.5,0.0001,1.0,7101.5,-0.0001,7207.21875,...,7314.0,7314.0,7314.0,7314.0,7314.0,7314.0,7314.0,7314.0,-4.767628,-2.648682
2,2020-04-14 07:55,7100.0,7100.0,7100.0,7100.0,0.0001,1.0,7100.0,-0.0002,7171.121178,...,7101.5,7101.5,7101.5,7101.5,7101.5,7101.5,7101.5,7101.5,-6.12142,-4.071935
3,2020-04-22 23:29,7118.5,7118.5,7118.5,7118.5,0.0001,1.0,7118.5,-0.0001,7157.7679,...,7100.0,7100.0,7100.0,7100.0,7100.0,7100.0,7100.0,7100.0,-5.919194,-4.697701
4,2020-04-28 12:28,7748.5,7748.5,7748.5,7748.5,0.0003,1.0,7748.5,0.0002,7278.289003,...,7118.5,7118.5,7118.5,7118.5,7118.5,7118.5,7118.5,7118.5,19.384455,2.466196


In [5]:
# Adding metrics to Env

def add_signals(env, df = df):
    start = env.frame_bound[0] - env.window_size
    end = env.frame_bound[1]
    prices = env.df.loc[:, 'Low'].to_numpy()[start:end]
    keys= []
    for key in df.columns:
        keys.append(key)
    signal_features = env.df.loc[:, [key for key in keys[1:]]].to_numpy()[start:end]
    return prices, signal_features

In [6]:
class Cypto_Env(StocksEnv):
    _process_data = add_signals
    

In [7]:
# The initial training Env before we swtich to up-to-date Env

env2 = Cypto_Env(df=df, window_size=window_size, frame_bound=(lower_bound,upper_bound))
env_maker = lambda: env2
env = DummyVecEnv([env_maker])

In [8]:
 
# create and train prediction model

model = A2C('MlpPolicy', env, verbose=1, device="cpu") 

name = str(datetime.now().strftime("%Y-%d-%m-%H-%M") )

episodes = 5
step_count = 1000
for i in range (episodes):
    model = model.learn(total_timesteps=step_count, 
    ) 
    print(i)

Using cpu device
------------------------------------
| time/                 |          |
|    fps                | 731      |
|    iterations         | 100      |
|    time_elapsed       | 0        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -0.588   |
|    explained_variance | -0.25    |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | -0.312   |
|    value_loss         | 0.194    |
------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 740       |
|    iterations         | 200       |
|    time_elapsed       | 1         |
|    total_timesteps    | 1000      |
| train/                |           |
|    entropy_loss       | -0.681    |
|    explained_variance | -0.000467 |
|    learning_rate      | 0.0007    |
|    n_updates          | 199       |
|    policy_loss        | 3.94      |
|    valu

In [11]:
prev_min = 0
print(len(df)-lower_bound,len(df))

# stop double selling
has_sold = False

-582101 601


In [12]:
past_trades = deque(maxlen=60)
cur_min = datetime.now().minute
prev_min = 0
test_env = Cypto_Env(df=df, window_size=window_size, frame_bound=(lower_bound,upper_bound))
wait_time = 1
while True:
    try:
        cur_min = datetime.now().minute
        if cur_min > prev_min or cur_min == 1:
            cur_min = datetime.now().minute
            prev_min = cur_min + wait_time - 1
            
            df = cm.historical_data_df()

            df = add_columns_df(df)

            test_env = Cypto_Env(df=df, window_size=window_size, frame_bound=(window_size,upper_bound))
            
            def return_obs(df):
                start = len(df) - window_size
                end = len(df)
                keys= []
                for key in df.columns:
                    keys.append(key)
                signal_features = df.loc[:, [key for key in keys[1:]]].to_numpy()[start:end]
                return signal_features
            
            clear_output()
            obs = return_obs(df)
            action = model.predict(obs)
            print("made a prediction")
            print(action)
            if action == 1 or action[0] == 1:
                trade_me.buy_position_at_market("BTC/USD")
                print("b") 
                has_sold = False
            if not has_sold and( action == 0 or action[0] == 0):
                trade_me.sell_position_market("BTCUSD")
                print("s")
                has_sold = True
            past_trades.append([action, ("price: ", df["Low"].iloc[-1]), ("time: ", datetime.now())])
            for episode in range(episodes):
                model = model.learn(total_timesteps=step_count, 
                )
                # model.save(os.path.join(MODEL_PATH, str(time.time()) + ".zip"))
                print("past trades: ", list(past_trades))
    except Exception as e:
        print(e)
        for episode in range(episodes):
            model = model.learn(total_timesteps=step_count,
             )
            # model.save(os.path.join(MODEL_PATH, str(time.time()) + ".zip"))
            print("past trades: ", list(past_trades))



made a prediction
(array(1, dtype=int64), None)
buying  BTC/USD
{"available":"0.97","balance":"0.976486076","code":40310000,"message":"insufficient balance for USD (requested: 1, available: 0.97)","symbol":"USD"}
-------------------------------------
| time/                 |           |
|    fps                | 586       |
|    iterations         | 100       |
|    time_elapsed       | 0         |
|    total_timesteps    | 500       |
| train/                |           |
|    entropy_loss       | -0.000224 |
|    explained_variance | -0.667    |
|    learning_rate      | 0.0007    |
|    n_updates          | 28099     |
|    policy_loss        | 5.3e-08   |
|    value_loss         | 7.87e-06  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 607       |
|    iterations         | 200       |
|    time_elapsed       | 1         |
|    total_timesteps    | 1000      |
| train/                |  

In [None]:
# Save the Model
model.save(os.path.join(MODEL_PATH, str(time.time()) + ".zip"))