In [None]:
#https://gym-trading-env.readthedocs.io/en/latest/rl_tutorial.html
#https://medium.com/@sthanikamsanthosh1994/custom-gym-environment-stock-trading-for-reinforcement-learning-stable-baseline3-629a489d462d

### Understand the action space
Positions
I have seen many environments that consider actions such as BUY, SELL. In my experience, it is a mistake to consider a reinforcement learning agent in the same way as a trader. Because, behind a trade, what really matter is the : position reached. In the environment, we label each position by a number : (example with pair BTC/USD)

1 : All of our portfolio is converted into BTC. (=BUY ALL)

0 : All of our portfolio is converted into USD. (=SELL ALL)

Now, we can imagine half position and other variants :

0.5 : 50% in BTC & 50% in USD

Even : 0.1 : 10% in BTC & 90% in USD ….

# ------------------------------------ Trading Bot --------------------------------------

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import logging #logger
#https://www.youtube.com/watch?v=SMZfgeHFFcA
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
#from network import DeepQNetwork
#from replay_memory import ReplayBuffer
#tf.get_logger().setLevel('ERROR')

import pandas as pd

In [2]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 

In [3]:
import datetime

In [4]:
from env.environment import *
from env.portfolio import *
from utils.utils import *
from agent.agent import *



In [5]:
from pandas.tseries.holiday import USFederalHolidayCalendar

In [6]:
import psycopg2 as pg

In [7]:
from prettytable import PrettyTable

In [8]:
# »»»»»»»»»»»»»»»»» STOCKS ««««««««««««««««««««z

STOCKS={'amazon':'AMZN','apple':'AAPL','netflix':'NFLX','google':'GOOG','Accenture':'ACN',
       'alibaba':'BABA','Turtle Beach':'HEAR','Disney':'DIS',
       'LG Display':'LPL','microsoft':'MICS','sony':'SONY',
       'Cenovus Energy':'CVE','YPF':'YPF','SHELL':'SHEL','Petrobras':'PBR',
       'Coca-Cola':'KO','PespsiCo':'PEP','Unilever':'UL','Kimberly-Clark':'KMB',
       'Mondelez':'MDLZ'}

#STOCKS = {'amazon':'AMZN'}

# dir where parquete file will be created
stocks_dir = '/dataset/stocks_aux/'

#*****************************************************************

In [9]:
def turn_of_logger():
    logger=logging.getLogger()
    for handler in logger.handlers[:]:  #make a copy of the list
        logger.removeHandler(handler)
    return logger

In [10]:
strin_format='%(i)s, %(market_return)s,%(pr)s,%(Sharpe)s,%(score)s,%(avg_score)s,%(best_score)s,%(loss)s,%(n_steps)s,%(pr_val)s,%(score_val)s, %(avg_score_val)s'
def initLogging(filename, logger_name):
    logger=turn_of_logger()
    logger.setLevel(logging.DEBUG)
    formatter=logging.Formatter(fmt='%(asctime)s,%(message)s',datefmt='%Y-%m-%d,%H:%M:%S')
    #formatter=logging.Formatter(fmt='%(message)s')
    fh=logging.FileHandler(filename)
    fh.setFormatter(formatter)
    logger.addHandler(fh)
    return logger


    # sh=logging.StreamHandler(sys.stdout)
    # sh.setFormatter(formatter)
    # logger.addHandler(sh)


### ------------------------------- Reading and preparing data -----------------------------------------

In [115]:
df_training=pd.read_parquet("./get_data/data_for_model_v1.parquet")
df_training.shape

(17528, 55)

In [116]:
#check if index is sorted. if yes ==Ture
df_training.datetime.is_monotonic_increasing

True

In [117]:
df_training.head(5)

Unnamed: 0,ticker,datetime,date,hour,holidays,n_weekday,n_hour,open,high,low,...,source10_y,source9_y,source8_y,source7_y,source6_y,source5_y,source4_y,source3_y,source2_y,source1_y
0,AAPL,2022-04-06 04:00:00,2022-04-06,04:00:00,0,2,4,174.91,175.0,173.41,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,AAPL,2022-04-06 05:00:00,2022-04-06,05:00:00,0,2,5,173.72,174.1,173.66,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,AAPL,2022-04-06 06:00:00,2022-04-06,06:00:00,0,2,6,173.81,173.86,173.21,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,AAPL,2022-04-06 07:00:00,2022-04-06,07:00:00,0,2,7,173.33,173.35,172.5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,AAPL,2022-04-06 08:00:00,2022-04-06,08:00:00,0,2,8,172.69,175.23,172.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [118]:
#df_training=df_training[(df_training.relevance_score>=0.5]

In [119]:
#df_training.loc[0:1,['volume']]=1

In [120]:
#Because I am not using all data, I can fill in with other value different from 0. The first value never will be 0.
df_training['volume'] = df_training['volume'].replace(0, np.nan).ffill()

In [121]:
#filter the dates where sentiment is not available

#Select Validation first
df_validation=df_training[(df_training.date>=datetime.date(2024,1,10))&(df_training.date<=datetime.date(2024,4,25))]
print("Validation: ",df_validation.shape)
#check if index is sorted. if yes ==Ture
print(df_validation.datetime.is_monotonic_increasing)
# selece Traning second -check to improve this
df_training=df_training[(df_training.date>=datetime.date(2023,8,1))&(df_training.date<datetime.date(2024,1,10))]
print("Training: ",df_training.shape)
#check if index is sorted. if yes ==Ture
print(df_training.datetime.is_monotonic_increasing)

Validation:  (2076, 55)
True
Training:  (3888, 55)
True


In [122]:
df_validation

Unnamed: 0,ticker,datetime,date,hour,holidays,n_weekday,n_hour,open,high,low,...,source10_y,source9_y,source8_y,source7_y,source6_y,source5_y,source4_y,source3_y,source2_y,source1_y
15452,AAPL,2024-01-10 00:00:00,2024-01-10,00:00:00,0,2,0,185.230000,185.480000,185.1500,...,0.0,0.0,0.005196,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15453,AAPL,2024-01-10 01:00:00,2024-01-10,01:00:00,0,2,1,185.230000,185.480000,185.1500,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15454,AAPL,2024-01-10 02:00:00,2024-01-10,02:00:00,0,2,2,185.230000,185.480000,185.1500,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15455,AAPL,2024-01-10 03:00:00,2024-01-10,03:00:00,0,2,3,185.230000,185.480000,185.1500,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15456,AAPL,2024-01-10 04:00:00,2024-01-10,04:00:00,0,2,4,184.880000,185.330000,184.5700,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17523,AAPL,2024-04-05 15:00:00,2024-04-05,15:00:00,0,4,15,169.889999,169.979996,169.5000,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17524,AAPL,2024-04-05 16:00:00,2024-04-05,16:00:00,0,4,16,169.570000,170.970100,158.8039,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17525,AAPL,2024-04-05 17:00:00,2024-04-05,17:00:00,0,4,17,169.560000,169.700000,169.5300,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17526,AAPL,2024-04-05 18:00:00,2024-04-05,18:00:00,0,4,18,169.615000,169.680000,169.4500,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
# ---------------- arrange columns-----------------------------

In [123]:
l1=list(df_training.columns)[2:55] #remove ticker, datetime 

In [124]:
df_training=df_training[l1]

In [125]:
df_validation=df_validation[l1]

In [126]:
 #---------------------------------------------------------------------

In [127]:
def _remove_linear_tendecy (x):
    return np.ediff1d(x, to_begin=1)

In [128]:
def _simple_net_return (x):
    """
        Fractional change between the current and a prior element.
        Computes the fractional change from the immediately previous row by default. 
        This is useful in comparing the fraction of change in a time series of elements.
    """
    result=x.pct_change()
    result.fillna(0,inplace=True)
    return result

In [129]:
def _log_return (x):
    result=np.log((x/x.shift(1))
    result.fillna(0,inplace=True)
    return (result)

In [90]:

 #---------------------------------------------------------------------

In [91]:
# #df_training is a DataFrame with columns : "open", "high", "low", "close", "Volume USD"
for dataframe in ['df_training','df_validation']:
    #---------------------------------------------------------------------
    eval(dataframe)["feature_nweek"] = eval(dataframe)["n_weekday"]
    eval(dataframe)["feature_holiday"] =  eval(dataframe)["holidays"]
    eval(dataframe)["feature_nhour"] =  eval(dataframe)["n_hour"]
    
    #Create the feature : ( close[t] - close[t-1] )/ close[t-1]
    #df_training["diff_close"] = df_training['close'].diff()
    #eval(dataframe)["feature_close"] = ( eval(dataframe)["close"]) /  eval(dataframe)["close"].abs().max() 
    #eval(dataframe)["feature_close"] = _remove_linear_tendecy(eval(dataframe)["close"]) 
    #eval(dataframe)["feature_close"] = _simple_net_return(eval(dataframe)["close"]) 
    eval(dataframe)["feature_close"] = _log_return(eval(dataframe)["close"]) 
    
    # Create the feature : open[t] / close[t]
    eval(dataframe)["feature_open"] = _log_return(eval(dataframe)["open"]) 
    #eval(dataframe)["feature_open"] = ( eval(dataframe)["open"])/  eval(dataframe)["close"].abs().max() 
    
    # Create the feature : high[t] / close[t]
    eval(dataframe)["feature_high"] = _log_return(eval(dataframe)["high"]) 
    #eval(dataframe)["feature_high"] = ( eval(dataframe)["high"])/  eval(dataframe)["close"].abs().max() 
    
    # Create the feature : low[t] / close[t]
    eval(dataframe)["feature_low"] = _log_return(eval(dataframe)["low"]) 
    #eval(dataframe)["feature_low"]= ( eval(dataframe)["low"])/  eval(dataframe)["close"].abs().max() 
    
    #eval(dataframe)["feature_volume"] =  eval(dataframe)["volume"].apply(lambda x: np.log(x) if x!=0 else x)/  eval(dataframe)["close"].abs().max() 
    eval(dataframe)["feature_volume"] = _log_return(eval(dataframe)["volume"])     
    #eval(dataframe)["feature_volume"] =  eval(dataframe)["volume"]/  (eval(dataframe)["close"].abs().max()*1000000)
    #create columns for  relevance_score (x) and ticket_sentiment_score (y)
    # l1 is from 11 to 55 because I am selecting from score20_x to score1_y
    for column in l1[11:55]:
        column_name="feature_"+column
        eval(dataframe)[column_name]= eval(dataframe)[column] 
    #---------------------------------------------------------------------
    #eval(dataframe).dropna(inplace= True) 

In [85]:
feature_list=[ x for x in list(df_training.columns) if "feature_" in x] #find all features in columns
print(feature_list)

['feature_nweek', 'feature_holiday', 'feature_nhour', 'feature_close', 'feature_open', 'feature_high', 'feature_low', 'feature_volume', 'feature_source20_x', 'feature_source19_x', 'feature_source18_x', 'feature_source17_x', 'feature_source16_x', 'feature_source15_x', 'feature_source14_x', 'feature_source13_x', 'feature_source12_x', 'feature_source11_x', 'feature_source10_x', 'feature_source9_x', 'feature_source8_x', 'feature_source7_x', 'feature_source6_x', 'feature_source5_x', 'feature_source4_x', 'feature_source3_x', 'feature_source2_x', 'feature_source1_x', 'feature_source20_y', 'feature_source19_y', 'feature_source18_y', 'feature_source17_y', 'feature_source16_y', 'feature_source15_y', 'feature_source14_y', 'feature_source13_y', 'feature_source12_y', 'feature_source11_y', 'feature_source10_y', 'feature_source9_y', 'feature_source8_y', 'feature_source7_y', 'feature_source6_y', 'feature_source5_y', 'feature_source4_y', 'feature_source3_y', 'feature_source2_y', 'feature_source1_y', 'f

In [25]:

#---------------------------- check for Nulls & NAN _ relevance_score -------------------
l1=list(df_training.columns)[11:55]
serie_1=df_training[l1].isna().sum()
serie_2=df_training[l1].isnull().sum()
pd.concat( [serie_1, serie_2], join='outer',keys = ['Nan','Null'], axis=1)

Unnamed: 0,Nan,Null
source20_x,0,0
source19_x,0,0
source18_x,0,0
source17_x,0,0
source16_x,0,0
source15_x,0,0
source14_x,0,0
source13_x,0,0
source12_x,0,0
source11_x,0,0


In [26]:
#check validation notna in close columns
df_validation.feature_close.notna().sum()

2076

In [27]:
print("Training: ",df_training.shape)
print("Validation: ",df_validation.shape)

Training:  (3888, 99)
Validation:  (2076, 99)


In [28]:
#check if index is sorted. if yes ==Ture
df_training.index.is_monotonic_increasing

True

In [29]:
#Check folders to save output
import datetime
    
YEAR        = str(datetime.date.today().year)     # the current year
MONTH       = str(datetime.date.today().month)   # the current month
DATE        = str(datetime.date.today().day)      # the current day
HOUR        = str(datetime.datetime.now().hour)   # the current hour
MINUTE      = str(datetime.datetime.now().minute) # the current minute
#SECONDS     = datetime.datetime.now().second #the current second
string_folder=YEAR+MONTH+DATE+"_"+HOUR+MINUTE

In [30]:
string_folder

'2024416_1523'

In [31]:
PATH = './output/'+string_folder+'/'
if not os.path.exists(PATH):
    os.makedirs(PATH)
    # Creating folders for best DQN agents
    PATH_EVAL=PATH+'agent_best_eval_models'+'/'
    PATH_NEXT=PATH+'agent_best_next_models'+'/'
    os.makedirs(PATH_EVAL)
    os.makedirs(PATH_NEXT)
    

In [32]:

logger=initLogging(PATH+"output.log","output")
logger.info('i,market_return,pr,Sharpe,score,avg_score,best_score,loss,n_steps,pr_val,score_val, avg_score_val')

In [33]:
NUMBER_GAMES=200

In [34]:
if __name__ == '__main__':
    tf.compat.v1.disable_eager_execution() #improve 
    #manage_memory()
    #----------------Training----------------
    env = TradingEnv(
        name= "AAPL",
        df = df_training, # Your dataset with your custom features
        positions = [0, 1], # -1 (=SHORT), 0(=OUT), +1 (=LONG)
        portfolio_initial_value=100,
        initial_position=1,
        
        obs_columns=feature_list, #automatically set columns as input
        
        #trading_fees = 0.01/100, # 0.01% per stock buy / sell (Binance fees)
        trading_fees =0,
        #borrow_interest_rate= 0.0003/100, # 0.0003% per timestep (one timestep = 1h here)
        borrow_interest_rate= 0,
        verbose=1
    )
    #----------------Validation----------------
    env_val = TradingEnv(
        name= "AAPL",
        df = df_validation, # Your dataset with your custom features
        positions = [0, 1], # -1 (=SHORT), 0(=OUT), +1 (=LONG)
        portfolio_initial_value=100,
        initial_position=1,

        obs_columns=feature_list, #automatically set columns as input
        
        trading_fees =0,
        borrow_interest_rate= 0,
        verbose=1
    )
    
    best_score = -np.inf
    load_checkpoint = False
    record_agent = False
    n_games = NUMBER_GAMES
    # Epsilon should be in 0 because I need the agent's brain tries to predidct and it must not select action by random.
    agent_brain = Agent(gamma=0.99, epsilon=1, lr=0.001,
                  #input_dims=env.observation_space.shape,n_actions=env.action_space.n, 
                  input_dims=env.observation_space,n_actions=len(env.action_space),
                  mem_size=1000000, eps_min=0.01,
                  batch_size=64, replace=1000, eps_dec=1e-3, # batch_size=32 ATTENTION
                  n_neurons1=256,n_neurons2=256,
                  dir=PATH_EVAL, dir2=PATH_NEXT,
                  env_name='Trading_bot')
    n_steps = 0
   
    scores, eps_history, steps_array, losses,portfolios,scores_val,portfolios_val = [], [], [], [],[],[],[]    
    for i in range(n_games):
        done = False
        score = 0
        terminated = False
        truncated =False
        done=(terminated|truncated)
        #actual_obs=env.reset(seed=7)[0]# I had to do this because array is not two parameters due to new gym version
        actual_obs,info=env.reset(seed=7)
        while not done: #playing one game.
            action = agent_brain.choose_action(actual_obs,True)
            #print("env.step_action ",action)
            new_obs, reward, terminated,truncated,info = env.step(action)
            #----------------------------------------------------------------------
            #   New GYM version needs to create done from Terminadted and Truncated (after 200)
            done=(terminated|truncated)
            #-----------------------------------------------------------------------    
            score += reward

            #print(actual_obs)
            agent_brain.store_transition(actual_obs, action,reward, new_obs, done)
        
            #--------------------------------------
            #New observaction is tronsformed to actual_obs
            actual_obs = new_obs
            #-----------------------------------------
            #-----------------------------------------
            # start to train
            loss=agent_brain.learn()
            #------------------------------------------
            n_steps += 1
            
        #----------------------------- Validation ------------------
        score_val=0
        terminated_val = False
        truncated_val =False
        done_val=(terminated_val|truncated_val)
        actual_obs_val,info_val=env_val.reset(seed=357)
        while not done_val: #playing one game.
            action_val= agent_brain.choose_action(actual_obs_val,False)
            #print("env.step_action ",action)
            new_obs_val, reward_val, terminated_val,truncated_val,info_val = env_val.step(action_val)
            #----------------------------------------------------------------------
            #   New GYM version needs to create done from Terminadted and Truncated (after 200)
            done_val=(terminated_val|truncated_val)
            #-----------------------------------------------------------------------    
            score_val+=reward_val

        #----------------------------- Metrics ------------------
        
        eps_history.append(agent_brain.epsilon)
        scores.append(score)
        scores_val.append(score_val)
        losses.append(loss)
        #print("Score: ", score, " Loss: ", loss)
        steps_array.append(n_steps)
        
        pr=(100*(env.df.loc[env._idx,'portfolio_valuation'] / env.df.loc[0,'portfolio_valuation'] -1))
        portfolios.append(pr)
        
        pr_val=(100*(env_val.df.loc[env_val._idx,'portfolio_valuation'] / env_val.df.loc[0,'portfolio_valuation'] -1))
        portfolios_val.append(pr_val)
        
        #This is the average score. From the last 100 scores
        avg_score = np.mean(scores[-100:])
        avg_score_val = np.mean(scores_val[-100:])
        market_return=(100*(env.df.loc[env._idx,'close'] / env.df.loc[0,'close'] -1))
 
        #----------------------------sharpe value
        Rx=((env.df.iloc[-1,:]['portfolio_valuation']/env.df.iloc[0,:]['portfolio_valuation'])-1)*100
        rt=3
        Std=np.std(env.df['portfolio_valuation'])
        Sharpe=(Rx-rt)/Std
        #--------------------------------------------------------------------------------------
       
        print("-------------------------------------------------------------------------------------------------------")
        print('episode {} | MarketReturn {:.2f} | PortfolioReturn {:.2f} | Sharpe {:.2f} | score {:.1f} |  avg score {:.1f} | best score {:.1f} | loss {:.4f} | steps {}'\
              .format(i, market_return,pr,Sharpe,score, avg_score, best_score,loss, n_steps))
        print(' PortfolioReturn_val {:.2f} |  score_val {:.1f} | avg score_val {:.1f} '\
              .format(pr_val,score_val, avg_score_val))
        print("-------------------------------------------------------------------------------------------------------")
        logger.info(f'{i},{market_return},{pr},{Sharpe},{score},{avg_score},{best_score},{loss},{n_steps},{pr_val},{score_val}, {avg_score_val}')
        if score > best_score:
            if not load_checkpoint:
                agent_brain.save_models()
            best_score = score
        if i % 7 == 0:
            agent_brain.target_net.set_weights(agent_brain.policy_net.get_weights())
            print('Target_net has changed...')
        

[0, 1]
[0, 1]
No GPU, using /device:CPU:0.
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 256)               12800     
                                                                 
 dense_1 (Dense)             (None, 256)               65792     
                                                                 
 dense_2 (Dense)             (None, 2)                 514       
                                                                 
Total params: 79,106
Trainable params: 79,106
Non-trainable params: 0
_________________________________________________________________
No GPU, using /device:CPU:0.
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 256)               12800     
                        

  updates=self.state_updates,


-------------------------------------------------------------------------------------------------------
episode 0 | MarketReturn -5.53 | PortfolioReturn -12.68 | Sharpe -8.21 | score -12.7 |  avg score -12.7 | best score -inf | loss 1225.2273 | steps 3887
 PortfolioReturn_val 0.00 |  score_val 0.0 | avg score_val 0.0 
-------------------------------------------------------------------------------------------------------
... models saved successfully ...
Target_net has changed...
-------------------------------------------------------------------------------------------------------
episode 1 | MarketReturn -5.53 | PortfolioReturn -9.45 | Sharpe -3.71 | score -9.4 |  avg score -11.1 | best score -12.7 | loss 279.9337 | steps 7774
 PortfolioReturn_val -8.59 |  score_val -8.6 | avg score_val -4.3 
-------------------------------------------------------------------------------------------------------
... models saved successfully ...
---------------------------------------------------------

KeyboardInterrupt: 

In [None]:
    
x = [i+1 for i in range(len(scores))]
agent_brain.policy_net.save(PATH+'model_q_eval.h5')

In [None]:
linear_chart_score(steps_array,scores,PATH)

In [None]:
linear_chart_score(steps_array,scores_val,PATH)

In [None]:
linear_chart_loss(steps_array,losses,PATH)

In [None]:
linear_chart_portfolio(steps_array,portfolios,PATH)

In [None]:
env.df.to_csv(f"{PATH}df_{string_folder}.csv")

In [None]:
env.df.head(50)

In [None]:
env_val.df.head(50)