In [13]:
import sys
sys.path.append("./finrl")
from env_stocktrading import StockTradingEnv
from preprocessors import data_split

from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split

import pandas as pd
import pickle 

# load the DataFrame from a pickle file
df = pd.read_pickle('dataset/processed.pkl')
TRAIN_START_DATE = '2010-01-01'
TRAIN_END_DATE = '2021-01-01'
TRADE_START_DATE = '2021-01-02'
TRADE_END_DATE = '2023-03-24'

In [14]:
INDICATORS = ['macd','rsi_14', 'rsi_21','rsi_28',
 'boll_ub',
 'boll_lb',
 'rsi_30',
 'cci_30',
 'dx_30',
 'close_30_sma',
 'close_60_sma']

In [15]:
import psutil
import ray
ray._private.utils.get_system_memory = lambda: psutil.virtual_memory().total
from ray.tune.registry import register_env

from gymnasium.wrappers import EnvCompatibility

In [16]:
train = data_split(df, TRAIN_START_DATE,TRAIN_END_DATE)
trade = data_split(df, TRADE_START_DATE,TRADE_END_DATE)
print(len(train))
print(len(trade))

80301
16211


In [17]:
stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 29, State Space: 378


In [18]:
buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension

env_kwargs = {
    "hmax": 100,
    "initial_amount": 1000000,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}

In [19]:
e_trade_gym = StockTradingEnv(df = trade, turbulence_threshold = 70, risk_indicator_col='vix', **env_kwargs)

In [20]:
def env_creator_trading(env_config):
    # env_config is passed as {} and defaults are set here
    df = env_config.get('df', trade)  # changed from the training environment
    
    hmax = env_config.get('hmax', 100)
    initial_amount = env_config.get('initial_amount', 1000000)
    num_stock_shares = env_config.get('num_stock_shares', [0] * stock_dimension)
    buy_cost_pct = env_config.get('buy_cost_pct', buy_cost_list)
    sell_cost_pct = env_config.get('sell_cost_pct', sell_cost_list)
    state_space = env_config.get('state_space', 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension)
    stock_dim = env_config.get('stock_dim', stock_dimension)
    tech_indicator_list = env_config.get('tech_indicator_list', INDICATORS)
    action_space = env_config.get('action_space', stock_dimension)
    reward_scaling = env_config.get('reward_scaling', 1e-4)
    
    # specific for trading
    turbulence_threshold = env_config.get('turbulence_threshold', 70)
    risk_indicator_col = env_config.get('risk_indicator_col', 'vix')

    return EnvCompatibility(StockTradingEnv(
        df=df,
        hmax=hmax,
        initial_amount=initial_amount,
        num_stock_shares=num_stock_shares,
        buy_cost_pct=buy_cost_pct,
        sell_cost_pct=sell_cost_pct,
        state_space=state_space,
        stock_dim=stock_dim,
        tech_indicator_list=tech_indicator_list,
        action_space=action_space,
        reward_scaling=reward_scaling,
        turbulence_threshold=turbulence_threshold,
        risk_indicator_col=risk_indicator_col
    ))

In [21]:
from ray.rllib.agents import ppo
ray.shutdown()

In [25]:
config = ppo.PPOConfig()
config = config.environment(
    env_config={'hmax':100, 'initial_amount':1000000}
)

# Training
config = config.training()

# Resources
config = config.resources(
    num_gpus=0,
    num_cpus_per_worker=1,
    num_gpus_per_worker=0,
    num_trainer_workers=2,
    num_gpus_per_trainer_worker = 0,
    num_cpus_per_trainer_worker = 1,
)
# Framework
#config = config.framework(
#    framework="torch",
#)
# Rollouts
config = config.rollouts(
    num_rollout_workers=2,
    num_envs_per_worker=1,
)

# Additional because we are evaluating now
config = config.exploration(
    explore=False
)
print(config)

<ray.rllib.algorithms.ppo.ppo.PPOConfig object at 0x7efe57f9ec10>


In [26]:
# registering the environment to ray
register_env("finrl_trading", env_creator_trading)
#trainer = config.build(env="finrl")
trainer = ppo.PPOTrainer(env='finrl_trading', config=config)
# define trainer
#trainer = ppo.PPOTrainer(env='finrl_trading', config=config)

[2m[36m(RolloutWorker pid=15356)[0m   logger.warn("Casting input x to numpy array.")
[2m[36m(RolloutWorker pid=15349)[0m   logger.warn("Casting input x to numpy array.")
2023-03-30 20:13:04,497	INFO trainable.py:172 -- Trainable.setup took 35.769 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


In [28]:
# load saved agent
#cwd_checkpoint = f"results/checkpoints/ppo_{total_episodes+1}/checkpoint_0000{total_episodes+1}"
#print(f"Restoring from checkpoint {total_episodes+1}")
cwd_checkpoint = 'model/checkpoint_001000'
trainer.restore(cwd_checkpoint)

2023-03-30 20:13:04,693	INFO trainable.py:791 -- Restored on 172.24.67.242 from checkpoint: model/checkpoint_001000
2023-03-30 20:13:04,694	INFO trainable.py:800 -- Current state after restoring: {'_iteration': 1000, '_timesteps_total': None, '_time_total': 11608.134165763855, '_episodes_total': 1438}


In [29]:
import time 
def DRL_prediction(model, environment):
    start = time.time()
    """make a prediction"""
    state = environment.reset()
    for i in range(len(environment.df.index.unique())):
        action = model.compute_single_action(state)
        state, reward, done, _ = environment.step(action)
        if i == (len(environment.df.index.unique()) - 2):
            account_memory = environment.save_asset_memory()
            actions_memory = environment.save_action_memory()
        if done:
            break
    print("Test Finished!")
    print((time.time()-start),'s')
    return account_memory, actions_memory

In [30]:
df_account_value, df_actions = DRL_prediction(
    model=trainer, 
    environment = e_trade_gym)

Test Finished!
2.372232437133789 s


In [31]:
df_account_value.tail()

Unnamed: 0,date,account_value
554,2023-03-17,961399.797539
555,2023-03-20,973393.081511
556,2023-03-21,983439.355478
557,2023-03-22,973221.208904
558,2023-03-23,980131.726567


In [32]:
df_actions.tail(100)

Unnamed: 0_level_0,AAPL,AMGN,AXP,BA,CAT,CRM,CSCO,CVX,DIS,GS,...,MRK,MSFT,NKE,PG,TRV,UNH,V,VZ,WBA,WMT
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-10-27,-52,0,95,58,100,-67,0,0,-23,-74,...,-6,79,0,-100,-11,0,-27,0,67,100
2022-10-28,0,-24,90,0,0,-33,0,0,0,0,...,-16,100,0,-100,0,0,0,-9,0,0
2022-10-31,85,-100,-36,-100,-100,0,100,2,0,-79,...,100,-100,0,-100,74,0,0,42,-22,77
2022-11-01,-29,100,-83,100,0,100,100,100,0,-100,...,100,0,0,86,0,0,0,1,-18,100
2022-11-02,0,-41,0,0,0,0,0,0,0,0,...,-100,0,0,0,0,0,0,4,-27,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-03-16,0,0,0,100,-31,0,2,-100,0,0,...,-76,0,0,25,0,0,-30,0,-23,-95
2023-03-17,0,0,-95,-100,0,0,100,0,0,-42,...,100,0,1,0,0,0,0,0,-1,100
2023-03-20,100,0,100,0,0,0,0,0,0,100,...,-100,0,-1,100,100,0,-100,0,100,-17
2023-03-21,0,0,-100,0,0,100,-40,0,100,-100,...,0,0,100,-3,0,0,-76,0,100,100


In [33]:
df_actions.sum()

AAPL    3446
AMGN       0
AXP        0
BA         0
CAT        0
CRM      100
CSCO     270
CVX        0
DIS        0
GS       132
HD         0
HON        0
IBM       16
INTC       0
JNJ        0
JPM       68
KO         0
MCD       16
MMM        0
MRK        0
MSFT       0
NKE      100
PG       122
TRV       23
UNH        0
V        625
VZ         1
WBA      311
WMT     1146
dtype: int64

In [34]:
df_actions.to_csv('actions1.csv', index=False)

In [None]:
import datetime
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline

print("==============Get Backtest Results===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

perf_stats_all = backtest_stats(account_value=df_account_value)
perf_stats_all = pd.DataFrame(perf_stats_all)
#perf_stats_all.to_csv("./"+RESULTS_DIR+"/perf_stats_all_"+now+'.csv')

In [None]:
#baseline stats
print("==============Get Baseline Stats===========")
baseline_df = get_baseline(
        ticker="^DJI", 
        start = df_account_value.loc[0,'date'],
        end = df_account_value.loc[len(df_account_value)-1,'date'])

stats = backtest_stats(baseline_df, value_col_name = 'close')

In [None]:
print("==============Compare to DJIA===========")
%matplotlib inline
# S&P 500: ^GSPC
# Dow Jones Index: ^DJI
# NASDAQ 100: ^NDX
backtest_plot(df_account_value, 
             baseline_ticker = '^DJI', 
             baseline_start = df_account_value.loc[0,'date'],
             baseline_end = df_account_value.loc[len(df_account_value)-1,'date'])