In [1]:
import pandas as pd
import pickle 
import os
import finrl
from finenv.env_stocktrading import StockTradingEnv
from finenv.preprocessors import data_split
from finenv.save_model import *
import psutil
import ray
from datetime import datetime
ray._private.utils.get_system_memory = lambda: psutil.virtual_memory().total
from ray.tune.registry import register_env
from gymnasium.wrappers import EnvCompatibility
from ray.rllib.agents import ppo
from ray.rllib.algorithms.td3 import TD3Config
from ray.rllib.algorithms.ddpg.ddpg import DDPGConfig

In [17]:
csv = True
if csv:
    train = pd.read_csv('dataset/train_data.csv')
    train = train.set_index(train.columns[0])
    #train.reset_index(drop=True, inplace=True)
    train.index.names = ['']
    INDICATORS = ['macd','boll_ub','boll_lb','rsi_30','cci_30','dx_30','close_30_sma','close_60_sma']

else: 
    train = pd.read_csv('dataset/idxetf_train.csv')
    train = train.set_index(train.columns[0])
    train.index.names = ['']
    INDICATORS = ['macd','rsi','cci','adx']
    
stock_dimension = len(train.tic.unique())
buy_cost_list = sell_cost_list = [0.001] * stock_dimension    
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")
train.head(10) 

Stock Dimension: 82, State Space: 821


Unnamed: 0,date,tic,open,high,low,close,volume,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,vix,turbulence
,,,,,,,,,,,,,,,,,,
0.0,2010-01-04,AAL,4.84,4.94,4.66,4.496876,9837300.0,0.0,0.0,5.471366,4.031468,100.0,66.666667,100.0,4.496876,4.496876,20.040001,0.0
0.0,2010-01-04,AAPL,7.6225,7.660714,7.585,6.50528,493729600.0,0.0,0.0,5.471366,4.031468,100.0,66.666667,100.0,6.50528,6.50528,20.040001,0.0
0.0,2010-01-04,ADBE,36.650002,37.299999,36.650002,37.09,4710200.0,0.0,0.0,5.471366,4.031468,100.0,66.666667,100.0,37.09,37.09,20.040001,0.0
0.0,2010-01-04,ADI,31.790001,32.189999,31.610001,22.960766,2102700.0,0.0,0.0,5.471366,4.031468,100.0,66.666667,100.0,22.960766,22.960766,20.040001,0.0
0.0,2010-01-04,ADP,38.226513,38.226513,37.489025,27.336391,3930120.0,0.0,0.0,5.471366,4.031468,100.0,66.666667,100.0,27.336391,27.336391,20.040001,0.0
0.0,2010-01-04,ADSK,25.610001,25.83,25.610001,25.67,2228600.0,0.0,0.0,5.471366,4.031468,100.0,66.666667,100.0,25.67,25.67,20.040001,0.0
0.0,2010-01-04,ALGN,18.049999,18.5,18.049999,18.5,374200.0,0.0,0.0,5.471366,4.031468,100.0,66.666667,100.0,18.5,18.5,20.040001,0.0
0.0,2010-01-04,AMAT,14.05,14.57,14.03,11.302063,18615100.0,0.0,0.0,5.471366,4.031468,100.0,66.666667,100.0,11.302063,11.302063,20.040001,0.0
0.0,2010-01-04,AMD,9.79,9.9,9.68,9.7,18748700.0,0.0,0.0,5.471366,4.031468,100.0,66.666667,100.0,9.7,9.7,20.040001,0.0


In [18]:
def env_creator(env_config):
    # env_config is passed as {} and defaults are set here
    df = env_config.get('df', train)
    hmax = env_config.get('hmax', 200)
    initial_amount = env_config.get('initial_amount', 1000000)
    num_stock_shares = env_config.get('num_stock_shares', [0] * stock_dimension)
    buy_cost_pct = env_config.get('buy_cost_pct', buy_cost_list)
    sell_cost_pct = env_config.get('sell_cost_pct', sell_cost_list)
    state_space = env_config.get('state_space', 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension)
    stock_dim = env_config.get('stock_dim', stock_dimension)
    tech_indicator_list = env_config.get('tech_indicator_list', INDICATORS)
    action_space = env_config.get('action_space', stock_dimension)
    reward_scaling = env_config.get('reward_scaling', 1e-2)

    return EnvCompatibility(StockTradingEnv(
        df=df,
        hmax=hmax,
        initial_amount=initial_amount,
        num_stock_shares=num_stock_shares,
        buy_cost_pct=buy_cost_pct,
        sell_cost_pct=sell_cost_pct,
        state_space=state_space,
        stock_dim=stock_dim,
        tech_indicator_list=tech_indicator_list,
        action_space=action_space,
        reward_scaling=reward_scaling
    ))


In [19]:
ray.shutdown()
register_env("finrl", env_creator)
#ray.init(num_cpus=122,dashboard_port=8080)
print(f"ray is being initialized")

ray is being initialized


In [20]:
#config = DDPGConfig().training(lr=0.01).resources(num_gpus=1).framework(framework="torch").rollouts(num_rollout_workers=10)
config = TD3Config().training(lr=0.01).resources(num_gpus=1).framework(framework="torch").rollouts(num_rollout_workers=10)
config["model"]["fcnet_hiddens"] = [1024,1024]
config['train_batch_size'] = 1024
#print(config.to_dict())  
# Build a Trainer object from the config and run one training iteration.
trainer = config.build(env="finrl") 

2023-04-09 20:49:28,604	INFO worker.py:1544 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m
[2m[36m(RolloutWorker pid=3856)[0m   logger.warn("Casting input x to numpy array.")
[2m[36m(RolloutWorker pid=3869)[0m   logger.warn("Casting input x to numpy array.")
[2m[36m(RolloutWorker pid=3896)[0m   logger.warn("Casting input x to numpy array.")
[2m[36m(RolloutWorker pid=3883)[0m   logger.warn("Casting input x to numpy array.")
[2m[36m(RolloutWorker pid=3910)[0m   logger.warn("Casting input x to numpy array.")
[2m[36m(RolloutWorker pid=3939)[0m   logger.warn("Casting input x to numpy array.")
[2m[36m(RolloutWorker pid=3924)[0m   logger.warn("Casting input x to numpy array.")
[2m[36m(RolloutWorker pid=3997)[0m   logger.warn("Casting input x to numpy array.")
[2m[36m(RolloutWorker pid=3953)[0m   logger.warn("Casting input x to numpy array.")
[2m[36m(RolloutWorker pid=4026)[0m   logger.warn("Casting input x to nump

In [21]:
from tqdm.notebook import tqdm
# Train away -------------------------------------------------------------
total_episodes = 500
agent_name = 'td3'
ep = 0
results = []
bar = tqdm(total=total_episodes, desc="Episodes")
date = datetime.now().strftime('%y%m%d')

while ep <= total_episodes:
    results.append(trainer.train())
    ep += 1
    bar.update(n=1)
    rwd = results[-1]['episode_reward_mean']
    if ep % 20 == 0:
        print(f'Mean Rwd:{rwd}')
    if ep % 100 == 0:
        #cwd_checkpoint = "results/checkpoints/" +  + '_' + str(ep)
        cwd_checkpoint = f"model/{agent_name}_{date}"
        trainer.save(cwd_checkpoint)
        print(f"Checkpoint{ep} saved in directory {cwd_checkpoint}")
bar.close()

Episodes:   0%|          | 0/500 [00:00<?, ?it/s]



Mean Rwd:nan
Mean Rwd:49705.832041753965


KeyboardInterrupt: 

In [None]:
#Save latest ckpt point
cwd_checkpoint = f"model/org_{agent_name}_{date}_{ep}"
trainer.save(cwd_checkpoint)
#Extract model weights 
model_weights = trainer.get_policy().get_weights()
print('passed model weights')
config2 = TD3Config()
print('config created')
config2 = config2.environment(env_config={'hmax':500,'initial_amount':1000000})  
config2 = config2.rollouts(num_rollout_workers=0) 
config2 = config2.framework(framework="torch")
config2["model"]["fcnet_hiddens"] = [256, 256, 256]
trainer2 = config2.build(env="finrl") 
trainer2.get_policy().set_weights(model_weights)
print('New Weights loaded. ')
ckpt2 = f"{cwd_checkpoint}_wt"
trainer2.save(ckpt2)
zipped_filename = f'ckpt_wt{date}_{ep}.zip'


In [None]:
zipped = zipfilem(ckpt2,zipped_filename)
st = sendfile('ckpt_wt230408_53.zip')
print(f'file: {zipped} ; {st}')