In [1]:
import pandas as pd
import pickle 
import sys
import os
import finrl
from finenv.env_stocktrading import StockTradingEnv
from finenv.preprocessors import data_split
from finenv.save_model import ftpsavemodel
import psutil
import ray
import numpy as np
from datetime import datetime
ray._private.utils.get_system_memory = lambda: psutil.virtual_memory().total
from ray.tune.registry import register_env
from gymnasium.wrappers import EnvCompatibility
from ray.rllib.agents import ppo
from ray.rllib.algorithms.td3 import TD3Config
# load the DataFrame from a pickle file
os.environ["OMP_NUM_THREADS"] = "1"
#os.environ["CUBLAS_WORKSPACE_CONFIG"]=":4096:8"

In [2]:
daily =  False
if daily:
    train = pd.read_csv('dataset/train_data.csv')
    train = train.set_index(train.columns[0])
    #train.reset_index(drop=True, inplace=True)
    train.index.names = ['']
    INDICATORS = ['macd','boll_ub','boll_lb','rsi_30','cci_30','dx_30','close_30_sma','close_60_sma']

else: 
    train = pd.read_csv('dataset/idxetf_train0416.csv')
    train = train.set_index(train.columns[0])
    train.index.names = ['']
    train = train.sort_values(['date'])
    train.index = train['date'].factorize()[0]
    INDICATORS = ['macd','rsi_14','cci','adx','bollup','bolllb','sma_14','sma_21']
    train.head()
    
stock_dimension = len(train.tic.unique())
buy_cost_list = sell_cost_list = [0.001] * stock_dimension    
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")
train.shape

Stock Dimension: 3, State Space: 19


(18660, 11)

In [3]:
def env_creator(env_config):
    # env_config is passed as {} and defaults are set here
    df = env_config.get('df', train)
    hmax = env_config.get('hmax', 200)
    initial_amount = env_config.get('initial_amount', 1000000)
    num_stock_shares = env_config.get('num_stock_shares', [0] * stock_dimension)
    buy_cost_pct = env_config.get('buy_cost_pct', buy_cost_list)
    sell_cost_pct = env_config.get('sell_cost_pct', sell_cost_list)
    state_space = env_config.get('state_space', 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension)
    stock_dim = env_config.get('stock_dim', stock_dimension)
    tech_indicator_list = env_config.get('tech_indicator_list', INDICATORS)
    action_space = env_config.get('action_space', stock_dimension)
    reward_scaling = env_config.get('reward_scaling', 1e-3)

    return EnvCompatibility(StockTradingEnv(
        df=df,
        hmax=hmax,
        initial_amount=initial_amount,
        num_stock_shares=num_stock_shares,
        buy_cost_pct=buy_cost_pct,
        sell_cost_pct=sell_cost_pct,
        state_space=state_space,
        stock_dim=stock_dim,
        tech_indicator_list=tech_indicator_list,
        action_space=action_space,
        reward_scaling=reward_scaling
    ))

register_env("finrl", env_creator)

In [1]:
ray.shutdown()
ray.init()
print(f"ray is being initialized")

NameError: name 'ray' is not defined

In [5]:
config = ppo.PPOConfig()
config = config.environment(env_config={'hmax':500,'initial_amount':1000000})
config = config.training(lr=0.001)  
config = config.resources(num_gpus=0,num_gpus_per_worker=0)
config = config.rollouts(num_rollout_workers=10) 
config = config.framework(framework="torch")
config["model"]["fcnet_hiddens"] = [512,512]
config['train_batch_size'] = 1024
#Build trainer with above config.
trainer = config.build(env="finrl") 

In [7]:
from tqdm.notebook import tqdm
# Train away -------------------------------------------------------------
total_episodes = 5 
agent_name = 'ppo'
ep = 0
results = []
bar = tqdm(total=total_episodes, desc="Episodes")
date = datetime.now().strftime('%y%m%d')

while ep <= total_episodes:
    results.append(trainer.train())
    ep += 1
    bar.update(n=1)
    rwd = results[-1]['episode_reward_mean']
    if ep % 5 == 0:
        print(f'Mean Rwd:{rwd}')
    if ep % 5 == 0:
        #cwd_checkpoint = "results/checkpoints/" +  + '_' + str(ep)
        cwd_checkpoint = f"model/{agent_name}_{date}"
        trainer.save(cwd_checkpoint)
        print(f"Checkpoint{ep} saved in directory {cwd_checkpoint}")
    if ep % 100 == 0:
        ep_rw = np.array(results[-1]['hist_stats']['episode_reward']).mean()
        total_ts = results[-1]['timesteps_total']
        print(f'EP Rwd:{ep_rw:.3f} ; Total ts:{total_ts}')

bar.close()

Episodes:   0%|          | 0/5 [00:00<?, ?it/s]



Mean Rwd:41856.129477016555
Checkpoint5 saved in directory model/ppo_230405


In [None]:
results[-1]

In [8]:
#Save latest ckpt point
cwd_checkpoint = f"model/org_{date}_{ep}"
trainer.save(cwd_checkpoint)
#Extract model weights 
model_weights = trainer.get_policy().get_weights()
print('passed model weights')
config2 = ppo.PPOConfig()
print('config created')
#config2 = config2.environment(env_config={'hmax':500,'initial_amount':1000000})
#config2 = config2.training(gamma=0.9, lr=0.005, kl_coeff=0.3)  
config2 = config2.rollouts(num_rollout_workers=0) 
config2 = config2.framework(framework="torch")
config2["model"]["fcnet_hiddens"] = [512,512]
config2['train_batch_size'] = 1000
trainer2 = ppo.PPOTrainer(env='finrl', config=config2)
trainer2.get_policy().set_weights(model_weights)
print('New Weights loaded. ')
ckpt2 = f"{cwd_checkpoint}_wt"
trainer2.save(ckpt2)
zip_filename = f'ckpt_wt{date}_{ep}.zip'
savefile = ftpsavemodel(ckpt2,zip_filename)
print(f'file{savefile}in{ckpt2}')

'model/checkpoint_000006'