In [2]:
import pandas as pd
import pickle 
import sys
sys.path.append("/Ray_finrl/findrl_ray/finenv")
sys.path.append("./FinRL")
import finrl
from finenv.env_stocktrading import StockTradingEnv
from finenv.preprocessors import data_split
from finenv.save_model import upload_files
import psutil
import ray
from datetime import datetime
ray._private.utils.get_system_memory = lambda: psutil.virtual_memory().total
from ray.tune.registry import register_env
from gymnasium.wrappers import EnvCompatibility

# load the DataFrame from a pickle file
#df = pd.read_pickle('dataset/nasdaq74_proc_test.pkl')
train = pd.read_csv('dataset/train_data.csv')
train = train.set_index(train.columns[0])
train.index.names = ['']

In [2]:
INDICATORS = ['macd','boll_ub','boll_lb','rsi_30','cci_30','dx_30','close_30_sma','close_60_sma']

In [3]:
stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 80, State Space: 801


In [4]:
train.head()
train.dtypes

date             object
tic              object
open            float64
high            float64
low             float64
close           float64
volume          float64
day             float64
macd            float64
boll_ub         float64
boll_lb         float64
rsi_30          float64
cci_30          float64
dx_30           float64
close_30_sma    float64
close_60_sma    float64
dtype: object

In [5]:
buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension

In [7]:
def env_creator(env_config):
    # env_config is passed as {} and defaults are set here
    df = env_config.get('df', train)
    hmax = env_config.get('hmax', 200)
    initial_amount = env_config.get('initial_amount', 1000000)
    num_stock_shares = env_config.get('num_stock_shares', [0] * stock_dimension)
    buy_cost_pct = env_config.get('buy_cost_pct', buy_cost_list)
    sell_cost_pct = env_config.get('sell_cost_pct', sell_cost_list)
    state_space = env_config.get('state_space', 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension)
    stock_dim = env_config.get('stock_dim', stock_dimension)
    tech_indicator_list = env_config.get('tech_indicator_list', INDICATORS)
    action_space = env_config.get('action_space', stock_dimension)
    reward_scaling = env_config.get('reward_scaling', 1e-2)

    return EnvCompatibility(StockTradingEnv(
        df=df,
        hmax=hmax,
        initial_amount=initial_amount,
        num_stock_shares=num_stock_shares,
        buy_cost_pct=buy_cost_pct,
        sell_cost_pct=sell_cost_pct,
        state_space=state_space,
        stock_dim=stock_dim,
        tech_indicator_list=tech_indicator_list,
        action_space=action_space,
        reward_scaling=reward_scaling
    ))

In [1]:
from ray.rllib.agents import ppo
ray.shutdown()
#ray.init(num_cpus=45,num_gpus=4,dashboard_port=8300)
print(f"ray is being initialized")

NameError: name 'ray' is not defined

In [10]:
config = ppo.PPOConfig()
config = config.environment(env_config={'hmax':500,'initial_amount':300000})
config = config.training(gamma=0.9, lr=0.001, kl_coeff=0.3)  
config = config.resources(num_gpus=0)
config = config.rollouts(num_rollout_workers=8) 
config = config.framework(framework="torch")
config['seed'] = 42
config["model"]["fcnet_hiddens"] = [1024, 256, 128,16]

In [11]:
# registering the environment to ray
register_env("finrl", env_creator)
#trainer = config.build(env="finrl") 
trainer = ppo.PPOTrainer(env='finrl', config=config)

2023-04-02 21:29:55,213	INFO worker.py:1544 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m
[2m[36m(RolloutWorker pid=1639)[0m   logger.warn("Casting input x to numpy array.")
[2m[36m(RolloutWorker pid=1626)[0m   logger.warn("Casting input x to numpy array.")
[2m[36m(RolloutWorker pid=1652)[0m   logger.warn("Casting input x to numpy array.")
[2m[36m(RolloutWorker pid=1666)[0m   logger.warn("Casting input x to numpy array.")
[2m[36m(RolloutWorker pid=1708)[0m   logger.warn("Casting input x to numpy array.")
[2m[36m(RolloutWorker pid=1680)[0m   logger.warn("Casting input x to numpy array.")
[2m[36m(RolloutWorker pid=1695)[0m   logger.warn("Casting input x to numpy array.")
[2m[36m(RolloutWorker pid=1722)[0m   logger.warn("Casting input x to numpy array.")
2023-04-02 21:30:05,597	INFO trainable.py:172 -- Trainable.setup took 16.993 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True t

In [12]:
from tqdm.notebook import tqdm
# Train away -------------------------------------------------------------
total_episodes = 25 
agent_name = 'ppo'
ep = 0
results = []
bar = tqdm(total=total_episodes, desc="Episodes")
date = datetime.now().strftime('%y%m%d')

while ep <= total_episodes:
    results.append(trainer.train())
    ep += 1
    bar.update(n=1)
    rwd = results[-1]['episode_reward_mean']
    print(f'Mean Rwd:{rwd}')
    if ep % 25 == 0:
        #cwd_checkpoint = "results/checkpoints/" +  + '_' + str(ep)
        cwd_checkpoint = f"model/{agent_name}_{date}"
        trainer.save(cwd_checkpoint)
        print(f"Checkpoint{ep} saved in directory {cwd_checkpoint}")
bar.close()

Episodes:   0%|          | 0/25 [00:00<?, ?it/s]

Mean Rwd:nan
Mean Rwd:nan
Mean Rwd:nan
Mean Rwd:nan
Mean Rwd:nan
Mean Rwd:5375.648789950077
Mean Rwd:5375.648789950077
Mean Rwd:5375.648789950077
Mean Rwd:5375.648789950077
Mean Rwd:5375.648789950077
Mean Rwd:1555.9627553721207
Mean Rwd:1555.9627553721207
Mean Rwd:1555.9627553721207
Mean Rwd:1555.9627553721207
Mean Rwd:1555.9627553721207
Mean Rwd:221.35846138788997
Mean Rwd:221.35846138788997
Mean Rwd:221.35846138788997
Mean Rwd:221.35846138788997
Mean Rwd:221.35846138788997
Mean Rwd:-405.1647962186432
Mean Rwd:-405.1647962186432
Mean Rwd:-405.1647962186432
Mean Rwd:-405.1647962186432
Mean Rwd:-405.1647962186432
Checkpoint saved in directory model/
Mean Rwd:-405.1647962186432


In [13]:
## Mean Rewards ### 
print(results[-1]['episode_reward_mean'])
#print(results[-1])

-405.1647962186432


In [14]:
# save the trained agent
#cwd_checkpoint = "model/" + str(agent_name) + '_' + str(ep)
cwd_checkpoint = "model/"
trainer.save(cwd_checkpoint)

'model/checkpoint_000026'