In [None]:
# Gym stuff
import gym
import gym_anytrading
from gym_anytrading.envs import StocksEnv, ForexEnv


# Stable baselines - rl stuff
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import A2C, PPO

# Technicals
from finta import TA

# Processing libraries
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt


# Project Modules
from collections import deque
from config import LOG_PATH, MODEL_PATH
import cryptomanager as cm
import assetpicker as ap
from datetime import datetime
from IPython.display import clear_output
import os
import time
from trader import trader_agent

# The class that will do the work of trading
trade_me = trader_agent()

In [None]:
# Initial DataFrame to train on
df = ap.historical_data_df(days_delta=10000)
df



In [None]:
# The range that we want to train on each interval
window_size = 5
lower_bound = window_size
upper_bound = len(df)
df[lower_bound:upper_bound]

In [None]:
# Adding technicals to DataFrame

def add_columns_df(df):
    df['OBV'] = TA.OBV(df) # unsure if I want to use this yet or not
    df['EMA'] = TA.EMA(df, 200)
    df['RSI'] = TA.RSI(df)
    df  = df.join(TA.PIVOT_FIB(df))
    df = df.join(TA.MACD(df))
    df.fillna(0, inplace=True)
    return df
    
df = add_columns_df(df)
df.head()

In [None]:
# Adding metrics to Env

def add_signals(env, df = df):
    start = env.frame_bound[0] - env.window_size
    end = env.frame_bound[1]
    prices = env.df.loc[:, 'Low'].to_numpy()[start:end]
    keys= []
    for key in df.columns:
        keys.append(key)
    signal_features = env.df.loc[:, [key for key in keys[1:]]].to_numpy()[start:end]
    return prices, signal_features

In [None]:
class Cypto_Env(StocksEnv):
    _process_data = add_signals
    def __init__(self, df, window_size, frame_bound):
        super().__init__(df, window_size, frame_bound)
        self.trade_fee_bid_percent = 0  # unit
        self.trade_fee_ask_percent = 0  # unit
    

In [None]:
# The initial training Env before we swtich to up-to-date Env

env2 = Cypto_Env(df=df, window_size=window_size, frame_bound=(lower_bound,upper_bound))
env_maker = lambda: env2
env = DummyVecEnv([env_maker])

In [None]:
 
# create and train prediction model

episodes = 5
step_count = 1000

# model = A2C.load(path="./data/model/1667783635.2006977", env = env)
model = A2C('MlpPolicy', env, verbose=1, device="cuda") 

name = str(datetime.now().strftime("%Y-%d-%m-%H-%M") )

for i in range (episodes):
    model = model.learn(total_timesteps=step_count, 
    ) 
    print(i)

In [None]:
prev_min = 0
print(len(df)-lower_bound,len(df))

# stop double selling
has_sold = True

In [None]:

df_temp = cm.historical_data_df()


df_temp = add_columns_df(df_temp)

df_temp = pd.concat([df, df_temp], ignore_index=True, sort=False)

df_temp.iloc[:,0]
past_trades = deque(maxlen=60)


In [None]:
cur_min = datetime.now().minute
prev_min = cur_min
test_env = Cypto_Env(df=df, window_size=window_size, frame_bound=(lower_bound,upper_bound))
wait_time = window_size
min_elapsed = 0
while True:
    try:
        cur_min = datetime.now().minute
        if cur_min == 0:
            prev_min = 0
        if cur_min % 5 == 0 and cur_min > prev_min:
            clear_output()
            min_elapsed += wait_time
            print("mins til model update: ", window_size - min_elapsed)
            cur_min = datetime.now().minute
            prev_min = cur_min + wait_time - 1

            df_temp = cm.historical_data_df()

            df_temp = add_columns_df(df_temp)
            df_temp = pd.concat([df, df_temp], ignore_index=True, sort=False)

            test_env = Cypto_Env(df=df_temp, window_size=window_size, frame_bound=(window_size,upper_bound))
            # env_maker = lambda: test_env
            # env = DummyVecEnv([env_maker])

            # model = A2C('MlpPolicy', env, verbose=1, device="cuda") 
            def return_obs(df):
                start = len(df) - window_size
                end = len(df)
                keys= []
                for key in df.columns:
                    keys.append(key)
                signal_features = df.loc[:, [key for key in keys[1:]]].to_numpy()[start:end]
                return signal_features
            
            obs = return_obs(df_temp)

            #   print(obs)
            action, _ = model.predict(obs)
            print("made a prediction")
            print(action)
            print("Last was sold: ",has_sold)
            if action == 1:
                has_sold = False
                trade_me.buy_position_at_market(ticker = "BTC/USD")
                print("b") 
            elif ( action == 0 ):
                has_sold = True
                trade_me.sell_position_limit(ticker = "BTCUSD")
                print("s")
            past_trades.append(action)
            print("past trades: ", list(past_trades))
            #   print(df_temp.iloc[-window_size:])
            if min_elapsed >= window_size:
                min_elapsed = 0
            for episode in range(episodes):
                model.learn(total_timesteps=step_count, 
                )
                        # model.save(os.path.join(MODEL_PATH, str(time.time()) + ".zip"))
                print(episode +1)

    except Exception as e:
        print(e)
        print("past trades: ", list(past_trades))
        # min_elapsed += wait_time
        if min_elapsed >= window_size:
            min_elapsed = 0
        for episode in range(episodes):
            model.learn(total_timesteps=step_count, 
            )

                # model.save(os.path.join(MODEL_PATH, str(time.time()) + ".zip"))
            print(episode +1)



In [None]:
# Save the Model
model.save(os.path.join(MODEL_PATH, str(time.time()) + ".zip"))
past_trades

In [None]:
def Crypto_USD() -> list:

    
    output = []
    cryptos = cm.get_cryptos_tradable()
    for crypto in cryptos:
        if crypto.split("/")[1] == "USD":
            output.append(crypto)
    return output

USD_crypto =  Crypto_USD()   
# USD_crypto.pop(Crypto_USD().index("BTC/USD"))
print(len(USD_crypto))


In [None]:
# get data frame for each crypto
df_list = []
for crypto in USD_crypto:
    try:
        df_temp = cm.historical_data_df(days_delta=0, crypto=[crypto])
        df_temp = add_columns_df(df)
        df_list.append(df_temp)
    except:
        continue



In [None]:
# create the environments
init_envs = []
for df_single in df_list:
    env2 = Cypto_Env(df=df_single, window_size=window_size, frame_bound=(lower_bound,upper_bound))
    env_maker = lambda: env2
    env = DummyVecEnv([env_maker])
    init_envs.append(env2)
    

In [None]:
print(init_envs)

In [None]:
# train model for each env
models = []
for env_single in init_envs: 
    model_temp = A2C('MlpPolicy', env, verbose=0, device="cuda") 

    name = str(datetime.now().strftime("%Y-%d-%m-%H-%M") )

    episodes = 1
    step_count = 1000
    for i in range (episodes):
        model_temp = model_temp.learn(total_timesteps=step_count, 
        ) 
        print(i)
    models.append(model_temp)
    
    

In [None]:
print(models)

In [None]:
prev_min = 0
print(len(df)-lower_bound,len(df))

# stop double selling
has_sold = False

In [None]:
past_trades = deque(maxlen=60)
cur_min = datetime.now().minute
prev_min = 0
# test_env = Cypto_Env(df=df, window_size=window_size, frame_bound=(lower_bound,upper_bound))
wait_time = 1
while True:
    try:
        cur_min = datetime.now().minute
        if cur_min > prev_min or cur_min == 1:
            cur_min = datetime.now().minute
            prev_min = cur_min + wait_time - 1
            for i, model in enumerate(models):
                print("made it this far, looking for df")
                clear_output()
                cur_crypto = USD_crypto[i]
                print(cur_crypto)
                df_temp = cm.historical_data_df(crypto=[cur_crypto])

                df_temp = add_columns_df(df_temp)

                df_temp = pd.concat([df_list[i], df_temp], ignore_index=True, sort=False)
                
                test_env = Cypto_Env(df=df_temp, window_size=window_size, frame_bound=(window_size,upper_bound))
                
                def return_obs(df):
                    start = len(df) - window_size
                    end = len(df)
                    keys= []
                    for key in df.columns:
                        keys.append(key)
                    signal_features = df.loc[:, [key for key in keys[1:]]].to_numpy()[start:end]
                    return signal_features
                
                # clear_output()
                obs = return_obs(df)
                action = model.predict(obs)
                print("made a prediction on ", cur_crypto)
                print(action)
                if action == 1 or action[0] == 1:
                    trade_me.buy_position_at_market(cur_crypto)
                    print("b") 
                    has_sold = False
                if not has_sold and( action == 0 or action[0] == 0):
                    trade_me.sell_position_market(cur_crypto)
                    print("s")
                    has_sold = True
                # past_trades.append([action, ("price: ", df["Low"].iloc[-1]), ("time: ", datetime.now())])
                # for episode in range(episodes):
                #     model = model.learn(total_timesteps=step_count, 
                #     )
                    # model.save(os.path.join(MODEL_PATH, str(time.time()) + ".zip"))
                    # print("past trades: ", list(past_trades))
    except Exception as e:
        print(e)
        # for episode in range(episodes):
        #     model = model.learn(total_timesteps=step_count,
        #      )
            # model.save(os.path.join(MODEL_PATH, str(time.time()) + ".zip"))
            # print("past trades: ", list(past_trades))

