In [1]:
import sys
sys.path.append('./Ray_finrl/findrl_ray/finenv')
from finenv.env_stocktrading import StockTradingEnv
from finenv.preprocessors import FeatureEngineer, data_split
import pandas as pd
import pickle 
trade = pd.read_csv('dataset/trade_data.csv')
trade = trade.set_index(trade.columns[0])
trade.index.names = ['']

In [2]:
INDICATORS = ['macd','boll_ub','boll_lb','rsi_30','cci_30','dx_30','close_30_sma','close_60_sma']

In [3]:
import psutil
import ray
ray._private.utils.get_system_memory = lambda: psutil.virtual_memory().total
from ray.tune.registry import register_env
from gymnasium.wrappers import EnvCompatibility

In [4]:
stock_dimension = len(trade.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension
env_kwargs = {
    "hmax": 100,
    "initial_amount": 1000000,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}

In [5]:
def env_creator_trading(env_config):
    # env_config is passed as {} and defaults are set here
    df = env_config.get('df', trade)  # changed from the training environment
    hmax = env_config.get('hmax', 100)
    initial_amount = env_config.get('initial_amount', 1000000)
    num_stock_shares = env_config.get('num_stock_shares', [0] * stock_dimension)
    buy_cost_pct = env_config.get('buy_cost_pct', buy_cost_list)
    sell_cost_pct = env_config.get('sell_cost_pct', sell_cost_list)
    state_space = env_config.get('state_space', 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension)
    stock_dim = env_config.get('stock_dim', stock_dimension)
    tech_indicator_list = env_config.get('tech_indicator_list', INDICATORS)
    action_space = env_config.get('action_space', stock_dimension)
    reward_scaling = env_config.get('reward_scaling', 1e-4)
    # specific for trading
    return EnvCompatibility(StockTradingEnv(
        df=df,
        hmax=hmax,
        initial_amount=initial_amount,
        num_stock_shares=num_stock_shares,
        buy_cost_pct=buy_cost_pct,
        sell_cost_pct=sell_cost_pct,
        state_space=state_space,
        stock_dim=stock_dim,
        tech_indicator_list=tech_indicator_list,
        action_space=action_space,
        reward_scaling=reward_scaling,

    ))

In [6]:
from ray.rllib.agents import ppo
ray.shutdown()

In [10]:
config = ppo.PPOConfig()
config = config.training()
config = config.environment(env_config={'hmax':100, 'initial_amount':1000000})
config = config.framework(framework="torch")
config = config.rollouts(num_rollout_workers=0)
config = config.exploration(explore=False)

#config["model"]["fcnet_hiddens"] = [1024, 256, 128, 32]

config['seed'] = 0

In [11]:
# registering the environment to ray
register_env("finrl_trading", env_creator_trading)
trainer = config.build(env="finrl_trading") 

  logger.warn("Casting input x to numpy array.")


In [12]:
# load saved agent
cwd_checkpoint = 'model/checkpoint_000026'
trainer.restore(cwd_checkpoint)

2023-04-02 10:07:46,107	INFO trainable.py:791 -- Restored on 172.28.238.49 from checkpoint: model/checkpoint_000026
2023-04-02 10:07:46,108	INFO trainable.py:800 -- Current state after restoring: {'_iteration': 26, '_timesteps_total': None, '_time_total': 595.7833752632141, '_episodes_total': 30}


In [13]:
e_trade_gym = StockTradingEnv(df = trade, turbulence_threshold = None, risk_indicator_col= 'vix', **env_kwargs)
#e_trade_gym = StockTradingEnv(df = trade, **env_kwargs)

In [14]:
def DRL_prediction(model, environment):
    """make a prediction"""
    state = environment.reset()
    # Iterate through given df dates
    for i in range(len(environment.df.index.unique())):
        action = model.compute_single_action(state)
        state, reward, done, _ = environment.step(action)
        if i == (len(environment.df.index.unique()) - 2):
            account_memory = environment.save_asset_memory()
            actions_memory = environment.save_action_memory()
        if done:
            break
    return account_memory, actions_memory

In [16]:
df_account_value, df_actions = DRL_prediction(model=trainer, environment = e_trade_gym)
df_actions

Unnamed: 0_level_0,AAL,AAPL,ADBE,ADI,ADP,ADSK,ALGN,AMAT,AMD,AMGN,...,TTWO,TXN,UAL,ULTA,VRSN,VRTX,WBA,WDC,WYNN,XEL
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-05-13,0,0,17,0,0,0,0,100,0,92,...,0,0,100,0,3,0,28,73,100,75
2019-05-14,17,100,-17,0,68,0,45,-28,0,-53,...,100,0,21,25,100,26,37,100,24,-66
2019-05-15,21,-100,51,0,-46,83,-45,39,100,100,...,-19,23,-100,-25,13,88,13,-100,-39,-9
2019-05-16,-38,0,0,0,-22,65,0,100,-89,58,...,-29,94,-21,74,15,-100,-78,-2,-51,100
2019-05-17,0,0,0,0,0,0,0,0,-11,-5,...,0,-100,0,0,-96,0,0,-71,77,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-03-23,0,0,0,0,0,0,0,-28,0,-24,...,-17,0,0,0,0,0,1,0,0,0
2023-03-24,0,0,-77,0,0,0,0,-72,0,0,...,0,0,0,0,0,0,0,0,0,-100
2023-03-27,0,0,-23,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2023-03-28,0,0,100,0,0,31,0,0,1,0,...,0,0,0,0,0,0,-1,0,0,0


In [None]:
import json
data = df_actions.reset_index().to_dict(orient='records')
json_str = json.dumps(data)
j = json.loads(json_str)
# Returns the list for trading with Json. 
j[-1]