In [1]:
import os
import pandas as pd
import gymnasium as gym
import pickle

from finrl.main import check_and_make_directories
from finrl.main import INDICATORS, TRAINED_MODEL_DIR, RESULTS_DIR

from stable_baselines3 import PPO
from finrl.agents.stablebaselines3.models import DRLAgent
from stable_baselines3.common.logger import configure

check_and_make_directories([TRAINED_MODEL_DIR])

In [2]:
with open('data/train.pickle', 'rb') as f:
    train = pickle.load(f)

with open('data/trade.pickle', 'rb') as f:
    trade = pickle.load(f)

train.head()

Unnamed: 0,date,close,high,low,open,volume,tic,day,gdp_log,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,turbulence,daily_return
0,2009-01-02,10.320797,10.358929,9.85687,9.914066,4236220,A,4,9.577127,0.0,10.989876,10.00125,100.0,66.666667,100.0,10.320797,10.320797,0.0,-0.233625
0,2009-01-02,7.909602,7.994448,7.230828,7.287392,5167000,AAL,4,9.577127,0.0,10.989876,10.00125,100.0,66.666667,100.0,7.909602,7.909602,0.0,-0.233625
0,2009-01-02,29.373983,29.511647,28.453358,29.133073,795900,AAP,4,9.577127,0.0,10.989876,10.00125,100.0,66.666667,100.0,29.373983,29.373983,0.0,2.713712
0,2009-01-02,2.724325,2.733032,2.556513,2.578127,746015200,AAPL,4,9.577127,0.0,10.989876,10.00125,100.0,66.666667,100.0,2.724325,2.724325,0.0,-0.907254
0,2009-01-02,17.51866,17.613515,17.129429,17.508848,13163193,ABT,4,9.577127,0.0,10.989876,10.00125,100.0,66.666667,100.0,17.51866,17.51866,0.0,5.430459


In [3]:
train.tic.unique(), INDICATORS

(array(['A', 'AAL', 'AAP', 'AAPL', 'ABT', 'ACN', 'ADBE', 'ADI', 'ADM',
        'ADP', 'ADSK', 'AEE', 'AEP', 'AES', 'AFL', 'AIG', 'AIV', 'AIZ',
        'AJG', 'AKAM', 'ALB', 'ALGN', 'ALK', 'ALL', 'AMAT', 'AMD', 'AME',
        'AMG', 'AMGN', 'AMP', 'AMT', 'AMZN', 'AON', 'AOS', 'APA', 'APD',
        'APH', 'ARE', 'ATO', 'AVB', 'AVY', 'AWK', 'AXP', 'AZO', 'BA',
        'BAC', 'BAX', 'BBT', 'BBY', 'BDX', 'BEN', 'BIIB', 'BK', 'BKNG',
        'BLK', 'BMY', 'BR', 'BSX', 'BWA', 'BXP', 'C', 'CAG', 'CAH', 'CAT',
        'CB', 'CBRE', 'CCI', 'CCL', 'CDNS', 'CE', 'CF', 'CHD', 'CHRW',
        'CI', 'CINF', 'CL', 'CLX', 'CMA', 'CMCSA', 'CME', 'CMG', 'CMI',
        'CMS', 'CNC', 'CNP', 'COF', 'COO', 'COP', 'COST', 'CPB', 'CPRT',
        'CRM', 'CSCO', 'CSX', 'CTAS', 'CTSH', 'CVS', 'CVX', 'D', 'DAL',
        'DD', 'DE', 'DGX', 'DHI', 'DHR', 'DIS', 'DLR', 'DLTR', 'DOV',
        'DRI', 'DTE', 'DUK', 'DVA', 'DVN', 'DXC', 'EA', 'EBAY', 'ECL',
        'ED', 'EFX', 'EIX', 'EL', 'EMN', 'EMR', 'EOG', 'EQIX', '

In [4]:
stock_dimension = len(train.tic.unique())
state_space = 1 + 2 * stock_dimension + len(INDICATORS) * stock_dimension + 1 * stock_dimension
print(f'Stock Dimension: {stock_dimension}, state space: {state_space}')

Stock Dimension: 393, state space: 4324


In [7]:
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv

buy_cost_list = sell_cost_list = [0.005] * stock_dimension
num_stock_shares = [0] * stock_dimension

env_kwargs = {
    'hmax':100,
    'initial_amount': 1000000,
    'num_stock_shares': num_stock_shares,
    'buy_cost_pct': buy_cost_list,
    'sell_cost_pct': sell_cost_list,
    'state_space': state_space,
    'stock_dim': stock_dimension,
    'tech_indicator_list': INDICATORS + ['gdp_log'],
    'action_space': stock_dimension,
    'reward_scaling': 1e-4
}

e_train_gym = StockTradingEnv(df=train, **env_kwargs)
env_train, _ = e_train_gym.get_sb_env()

In [8]:
len(e_train_gym.df.index.unique()) - 1

3396

In [9]:
e_train_gym.df.tic.count()

np.int64(1335021)

In [None]:
agent = DRLAgent(env=env_train)
model_ppo = agent.get_model('ppo')

tmp_path = RESULTS_DIR / 'ppo_model'
new_logger = configure(tmp_path, ['stdout', 'csv', 'tensorboard'])

model_ppo.set_logger(new_logger)

trained_ppo = agent.train_model(model=model_ppo,
                               tb_log_name='ppo',
                               total_timesteps=50000)