In [1]:
import pandas as pd
import pickle 
import os
import finrl
from finenv.env_stocktrading import StockTradingEnv
from finenv.preprocessors import data_split
from finenv.save_model import *
import psutil
import ray
from datetime import datetime
ray._private.utils.get_system_memory = lambda: psutil.virtual_memory().total
from ray.tune.registry import register_env
from gymnasium.wrappers import EnvCompatibility
from ray.rllib.agents import ppo
from ray.rllib.algorithms.td3 import TD3Config

#os.environ["OMP_NUM_THREADS"] = "1"
#os.environ["CUBLAS_WORKSPACE_CONFIG"]=":4096:8"

In [2]:
csv = False
if csv:
    train = pd.read_csv('dataset/train_data.csv')
    train = train.set_index(train.columns[0])
    train.index.names = ['']
    INDICATORS = ['macd','boll_ub','boll_lb','rsi_30','cci_30','dx_30','close_30_sma','close_60_sma']

else: 
    train = pd.read_pickle('dataset/qqq_train.pkl')
    train['date'] = pd.to_datetime(train['date'])
    train['date'] = train['date'].dt.strftime('%Y-%m-%d %H:%M')
    INDICATORS = ['macd','rsi','cci','adx']
    
stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")
train.head(5) 

Stock Dimension: 1, State Space: 7


Unnamed: 0,date,open,high,low,close,volume,tic,macd,rsi,cci,adx
0,2016-01-04 09:30,110.26,110.26,110.22,110.24,12700.0,QQQ,0.0,0.0,0.0,0.0
1,2016-01-04 09:45,110.14,110.14,110.0,110.0,200.0,QQQ,0.0,0.0,0.0,0.0
2,2016-01-04 10:00,110.07,110.07,110.07,110.07,400.0,QQQ,0.0,0.0,0.0,0.0
3,2016-01-04 10:15,110.05,110.05,110.02,110.03,700.0,QQQ,0.0,0.0,0.0,0.0
4,2016-01-04 10:30,109.95,110.03,109.95,110.03,800.0,QQQ,0.0,0.0,0.0,0.0


In [3]:
def env_creator(env_config):
    # env_config is passed as {} and defaults are set here
    df = env_config.get('df', train)
    hmax = env_config.get('hmax', 200)
    initial_amount = env_config.get('initial_amount', 1000000)
    num_stock_shares = env_config.get('num_stock_shares', [0] * stock_dimension)
    buy_cost_pct = env_config.get('buy_cost_pct', buy_cost_list)
    sell_cost_pct = env_config.get('sell_cost_pct', sell_cost_list)
    state_space = env_config.get('state_space', 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension)
    stock_dim = env_config.get('stock_dim', stock_dimension)
    tech_indicator_list = env_config.get('tech_indicator_list', INDICATORS)
    action_space = env_config.get('action_space', stock_dimension)
    reward_scaling = env_config.get('reward_scaling', 1e-3)

    return EnvCompatibility(StockTradingEnv(
        df=df,
        hmax=hmax,
        initial_amount=initial_amount,
        num_stock_shares=num_stock_shares,
        buy_cost_pct=buy_cost_pct,
        sell_cost_pct=sell_cost_pct,
        state_space=state_space,
        stock_dim=stock_dim,
        tech_indicator_list=tech_indicator_list,
        action_space=action_space,
        reward_scaling=reward_scaling
    ))
register_env("finrl", env_creator)

In [4]:
ray.shutdown()
#ray.init(num_cpus=122,dashboard_port=8080)
print(f"ray is being initialized")

ray is being initialized


In [5]:
from ray.rllib.algorithms.ddpg.ddpg import DDPGConfig
#config = DDPGConfig().training(lr=0.01).resources(num_gpus=1).framework(framework="torch").rollouts(num_rollout_workers=10)
config = TD3Config().training(lr=0.01).resources(num_gpus=1).framework(framework="torch").rollouts(num_rollout_workers=10)
config["model"]["fcnet_hiddens"] = [256, 256, 256]
config['train_batch_size'] = 1024
print(config.to_dict())  
# Build a Trainer object from the config and run one training iteration.
trainer = config.build(env="finrl") 

{'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'num_gpus': 1, 'num_cpus_per_worker': 1, 'num_gpus_per_worker': 0, '_fake_gpus': False, 'num_trainer_workers': 0, 'num_gpus_per_trainer_worker': 0, 'num_cpus_per_trainer_worker': 1, 'custom_resources_per_worker': {}, 'placement_strategy': 'PACK', 'eager_tracing': False, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'local_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'env': None, 'env_config': {}, 'observation_space': None, 'action_space': None, 'env_task_fn': None, 'render_env': False, 'clip_rewards': None, 'normalize_actions': True, 'clip_actions': False, 'disable_env_checking': False, 'is_atari': None, 'auto_wrap_old_gym_envs': True, 'num_envs_per_worker': 1, 'samp

2023-04-08 10:26:43,159	INFO worker.py:1544 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m
[2m[36m(RolloutWorker pid=30421)[0m 2023-04-08 10:26:54,789	ERROR worker.py:772 -- Exception raised in creation task: The actor died because of an error raised in its creation task, [36mray::RolloutWorker.__init__()[39m (pid=30421, ip=172.21.0.25, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f71c67bac70>)
[2m[36m(RolloutWorker pid=30421)[0m   File "/home/ga_aiot/anaconda3/envs/finrl/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 607, in __init__
[2m[36m(RolloutWorker pid=30421)[0m     self.env = env_creator(copy.deepcopy(self.env_context))
[2m[36m(RolloutWorker pid=30421)[0m   File "/tmp/ipykernel_28521/4124244707.py", line 7, in env_creator
[2m[36m(RolloutWorker pid=30421)[0m NameError: name 'buy_cost_list' is not defined
[2m[36m(RolloutWorker pid=30400)[0m 2023-04-08 10:26:54

NameError: name 'buy_cost_list' is not defined

In [None]:
from tqdm.notebook import tqdm
# Train away -------------------------------------------------------------
total_episodes = 200
agent_name = 'td3'
ep = 0
results = []
bar = tqdm(total=total_episodes, desc="Episodes")
date = datetime.now().strftime('%y%m%d')

while ep <= total_episodes:
    results.append(trainer.train())
    ep += 1
    bar.update(n=1)
    rwd = results[-1]['episode_reward_mean']
    if ep % 20 == 0:
        print(f'Mean Rwd:{rwd}')
    if ep % 100 == 0:
        #cwd_checkpoint = "results/checkpoints/" +  + '_' + str(ep)
        cwd_checkpoint = f"model/{agent_name}_{date}"
        trainer.save(cwd_checkpoint)
        print(f"Checkpoint{ep} saved in directory {cwd_checkpoint}")
bar.close()

In [None]:
#Save latest ckpt point
cwd_checkpoint = f"model/org_{agent_name}_{date}_{ep}"
trainer.save(cwd_checkpoint)
#Extract model weights 
model_weights = trainer.get_policy().get_weights()
print('passed model weights')
config2 = TD3Config()
print('config created')
config2 = config2.environment(env_config={'hmax':500,'initial_amount':1000000})  
config2 = config2.rollouts(num_rollout_workers=0) 
config2 = config2.framework(framework="torch")
config2["model"]["fcnet_hiddens"] = [256, 256, 256]
trainer2 = config2.build(env="finrl") 
trainer2.get_policy().set_weights(model_weights)
print('New Weights loaded. ')
ckpt2 = f"{cwd_checkpoint}_wt"
trainer2.save(ckpt2)
zipped_filename = f'ckpt_wt{date}_{ep}.zip'


In [None]:
#zipped = zipfilem(ckpt2,zipped_filename)
zipped = 'test'
ckpt2 = 'test'
st = sendfile('ckpt_wt230408_53.zip')
print(f'file {zipped} from {ckpt2} ; {st}')