In [107]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import sqlite3
import datetime
from sklearn import preprocessing
from finrl.config import config
from finrl.preprocessing.preprocessors import FeatureEngineer
from finrl.preprocessing.data import data_split
from finrl.marketdata.yahoodownloader import YahooDownloader
from finrl.env.env_stocktrading import StockTradingEnv
from finrl.env.env_stocktrading_stoploss import StockTradingEnvStopLoss
from finrl.env.env_stocktrading_cashpenalty import StockTradingEnvCashpenalty
from finrl.model.models import DRLAgent
from finrl.trade.backtest import backtest_stats, get_baseline, backtest_plot
# Diable the warnings
import warnings

warnings.filterwarnings('ignore')
# set the period
start = '2012-12-31'
end = '2020-01-01'
# d1 = start.strftime('%Y-%m-%d')
# d2 = end.strftime('%Y-%m-%d')

# collect to historical
# stock = "MSFT"
stock_list = config.DOW_30_TICKER
MultiDataSet = pd.DataFrame(columns=['date','open','high','low','close','volume','tic'])
db = sqlite3.connect("db3.db")
for stock in stock_list:
    dataSet = pd.read_sql(
        con=db,
        sql=f'SELECT date, Open, High, Low, Close, Volume  FROM "{stock}" WHERE Date BETWEEN  "{start}" and "{end}"')
    dataSet['tic'] = stock
    dataSet.columns = ['date','open','high','low','close','volume','tic']
    MultiDataSet = MultiDataSet.append(dataSet,ignore_index=True)
MultiDataSet = MultiDataSet.reset_index(drop=True)



# MultiDataSet = YahooDownloader(start_date = start,
#                      end_date = end,
#                      ticker_list = config.DOW_30_TICKER).fetch_data()

In [108]:
# tech_indicator_list = config.TECHNICAL_INDICATORS_LIST
add_TA = ['macd',"boll_ub","boll_lb","rsi_30", "cci_30","close_30_sma",'kdjk','kdjd','wr_30','wr_10','cci_10','rsi_10']
# information_cols = add_TA + tech_indicator_list
fe = FeatureEngineer(use_technical_indicator=True, tech_indicator_list= add_TA, use_turbulence=False,
                     user_defined_feature=False)
processeDataSet = fe.preprocess_data(MultiDataSet)
processeDataSet['change'] = (processeDataSet.close-processeDataSet.open)/processeDataSet.close
processeDataSet['daily_variance'] = (processeDataSet.high-processeDataSet.low)/processeDataSet.close

Successfully added technical indicators


In [112]:
train = data_split(processeDataSet, start='2013-01-01', end='2019-01-01').reset_index(drop=True)
trade = data_split(processeDataSet, start='2019-01-01', end='2020-01-01').reset_index(drop=True)
info_col = ['change','daily_variance','close','macd', 'rsi_30', 'cci_30','kdjk','kdjd','wr_30']
# feature_list = list(train.columns)
# feature_list.remove('date')
# feature_list.remove('tic')
# feature_list.remove('close')
# print(feature_list)

# data_normaliser = preprocessing.StandardScaler()
# train[feature_list] = data_normaliser.fit_transform(train[feature_list])
# trade[feature_list] = data_normaliser.transform(trade[feature_list])


In [110]:
train

Unnamed: 0,date,open,high,low,close,volume,tic,macd,boll_ub,boll_lb,...,cci_30,close_30_sma,kdjk,kdjd,wr_30,wr_10,cci_10,rsi_10,change,daily_variance
0,2013-01-02,17.07,17.11,16.70,16.93,560518000.0,AAPL,0.011667,17.405391,15.934609,...,66.666667,16.670000,70.842399,59.747828,12.676056,12.676056,66.666667,100.000000,-0.008269,0.024217
1,2013-01-02,51.96,52.52,51.95,52.34,6091400.0,AXP,0.029391,53.537620,49.832380,...,66.666667,51.685000,75.260975,62.057295,6.949807,6.949807,66.666667,100.000000,0.007260,0.010890
2,2013-01-02,63.66,64.30,63.57,64.09,5036400.0,BA,0.031859,65.388183,61.371817,...,66.666667,63.380000,74.481812,61.611063,7.608696,7.608696,66.666667,100.000000,0.006709,0.011390
3,2013-01-02,72.19,73.35,72.19,72.62,11590000.0,CAT,0.067756,75.380925,66.839075,...,66.666667,71.110000,72.468788,60.766535,12.086093,12.086093,66.666667,100.000000,0.005921,0.015974
4,2013-01-02,15.75,15.92,15.67,15.92,40304500.0,CSCO,0.012115,16.413675,14.886325,...,66.666667,15.650000,72.063492,59.153439,0.000000,0.000000,66.666667,100.000000,0.010678,0.015704
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45296,2018-12-31,130.04,130.79,128.61,130.30,7976000.0,V,-2.424976,143.914619,121.125381,...,-59.759916,133.437333,55.509465,39.862566,57.136860,27.537260,50.416733,48.559650,0.001995,0.016731
45297,2018-12-31,51.04,51.69,50.62,51.68,15331800.0,VZ,-0.736328,55.280891,48.811109,...,-69.827429,52.974000,55.018579,36.510777,58.362989,22.172949,61.780739,50.083547,0.012384,0.020704
45298,2018-12-31,64.39,64.56,63.64,64.19,4870900.0,WBA,-3.371809,84.692583,58.421417,...,-124.183353,73.439333,21.754425,16.556253,82.470703,72.448196,-39.673389,28.392513,-0.003116,0.014332
45299,2018-12-31,89.67,90.36,89.26,90.13,7005800.0,WMT,-1.489003,95.272863,83.169137,...,-14.350252,90.351333,69.629578,47.012399,55.241682,4.166667,108.712937,55.222442,0.005104,0.012205


In [114]:
e_train_gym = StockTradingEnvCashpenalty(
    df = train,initial_amount = 1e6,hmax = 5000, 
                                turbulence_threshold = None, 
                                currency='$',
                                buy_cost_pct=3e-3,
                                sell_cost_pct=3e-3,
                                cash_penalty_proportion=0.2,
                                cache_indicator_data=False,
                                daily_information_cols = add_TA, 
                                print_verbosity = 500, 
                                random_start = True)
env_train, _ = e_train_gym.get_sb_env()

# env_trade, obs_trade = e_trade_gym.get_sb_env()

In [115]:
e_trade_gym = StockTradingEnvCashpenalty(df = trade,initial_amount = 1e6,hmax = 5000, 
                                turbulence_threshold = None, 
                                currency='$',
                                buy_cost_pct=3e-3,
                                sell_cost_pct=3e-3,
                                cash_penalty_proportion=0.2,
                                cache_indicator_data=False,
                                daily_information_cols = add_TA, 
                                print_verbosity = 500, 
                                random_start = False)
env_trade, _ = e_trade_gym.get_sb_env()

In [117]:
agent = DRLAgent(env=env_train)

# from torch.nn import Softsign, ReLU
ppo_params ={'n_steps': 256, 
             'ent_coef': 0.0, 
             'learning_rate': 0.0002, 
             'batch_size': 1024, 
            'gamma': 0.99}

policy_kwargs = {
#     "activation_fn": ReLU,
    "net_arch": [1024 for _ in range(10)], 
#     "squash_output": True
}
model_ppo = agent.get_model("ppo",  
                        model_kwargs = ppo_params, 
                        policy_kwargs = policy_kwargs, verbose = 0)


model_ppo.learn(total_timesteps = 50000, 
            eval_env = env_trade, 
            eval_freq = 500,
            log_interval = 1, 
            tb_log_name = 'env_cashpenalty_highlr',
            n_eval_episodes = 1)
# trained_model = agent.train_model(model=model_algo,
#                                 tb_log_name='env_cashpenalty_highlr',
#                                 total_timesteps=5000)

{'n_steps': 256, 'ent_coef': 0.0, 'learning_rate': 0.0002, 'batch_size': 1024, 'gamma': 0.99}
   2| 215|CASH SHORTAGE  |$2,851         |$1,054,583     |-0.07139% |5.45833%  |0.27%     
   3| 125|CASH SHORTAGE  |$9,333         |$1,043,074     |-0.12497% |4.30743%  |0.89%     
   4| 119|CASH SHORTAGE  |$36,800        |$977,676       |-0.15215% |-2.23241% |3.76%     
EPISODE|STEPS|TERMINAL_REASON|CASH           |TOT_ASSETS     |TERMINAL_REWARD_unsc|GAINLOSS_PCT|CASH_PROPORTION
   1| 227|CASH SHORTAGE  |$2,598         |$1,076,816     |-0.05989% |7.68165%  |0.24%     
Eval num_timesteps=500, episode_reward=0.02 +/- 0.00
Episode length: 228.00 +/- 0.00
New best mean reward!
   5| 103|CASH SHORTAGE  |$28,641        |$1,051,795     |-0.12614% |5.17946%  |2.72%     
   6| 101|CASH SHORTAGE  |$28,584        |$987,727       |-0.17944% |-1.22729% |2.89%     
   7| 118|CASH SHORTAGE  |$21,474        |$1,016,345     |-0.14021% |1.63451%  |2.11%     


KeyError: False

In [101]:
e_trade_gym.hmax = 500

df_account_value, df_actions = DRLAgent.DRL_prediction(model=model_ppo, environment=e_trade_gym)

print("==============Get Backtest Results===========")
perf_stats_all = backtest_stats(account_value=df_account_value, value_col_name = 'total_assets')

EPISODE|STEPS|TERMINAL_REASON|CASH           |TOT_ASSETS     |TERMINAL_REWARD_unsc|GAINLOSS_PCT|CASH_PROPORTION
   1| 251|Last Date      |$904,986       |$1,001,886     |0.00075%  |0.18862%  |90.33%    
hit end!
Annual return          0.001894
Cumulative returns     0.001886
Annual volatility      0.007460
Sharpe ratio           0.258350
Calmar ratio           0.274877
Stability              0.085906
Max drawdown          -0.006889
Omega ratio            1.047096
Sortino ratio          0.340876
Skew                        NaN
Kurtosis                    NaN
Tail ratio             0.946872
Daily value at risk   -0.000932
dtype: float64


In [95]:
df_actions.head()

Unnamed: 0,date,actions,transactions
0,2019-01-03,"[0.7214706, -0.26517645, 0.26131022, 0.2459640...","[1.8737050173367753, -0.0, 0.08304918158476986..."
1,2019-01-04,"[1.0, 1.0, -1.0, -0.9070668, -0.0899448, 0.742...","[2.8843680289176206, 1.1080784849474987, -0.08..."
2,2019-01-07,"[-0.64323676, -0.97596407, 0.25340176, -0.2420...","[-1.7793714649745784, -1.034815385750397, 0.07..."
3,2019-01-08,"[-1.0, 0.71171653, 0.40992773, 1.0, -0.8348412...","[-2.772448329380534, 0.7505595639186974, 0.128..."
4,2019-01-09,"[-1.0, 0.39127356, -1.0, 0.94297314, -0.896303...","[-0.206253251899283, 0.41062017205129975, -0.2..."


In [99]:
df_account_value.columns

Index(['cash', 'asset_value', 'total_assets', 'reward', 'date'], dtype='object')