In [9]:
import os
import pandas as pd
import gymnasium as gym
import pickle

from finrl.main import check_and_make_directories
from finrl.main import INDICATORS, TRAINED_MODEL_DIR, RESULTS_DIR

from stable_baselines3 import PPO
from finrl.agents.stablebaselines3.models import DRLAgent
from stable_baselines3.common.logger import configure

check_and_make_directories([TRAINED_MODEL_DIR])

In [10]:
with open('data/train.pickle', 'rb') as f:
    train = pickle.load(f)

with open('data/trade.pickle', 'rb') as f:
    trade = pickle.load(f)

train.head()

Unnamed: 0,date,close,high,low,open,volume,tic,day,gdp_log,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,turbulence,daily_return
0,2009-01-02,10.320794,10.358926,9.856867,9.914063,4236220,A,4,9.577127,0.0,10.989866,10.001251,100.0,66.666667,100.0,10.320794,10.320794,0.0,-0.233625
0,2009-01-02,7.909602,7.994448,7.230828,7.287392,5167000,AAL,4,9.577127,0.0,10.989866,10.001251,100.0,66.666667,100.0,7.909602,7.909602,0.0,-0.233625
0,2009-01-02,29.373985,29.511649,28.45336,29.133075,795900,AAP,4,9.577127,0.0,10.989866,10.001251,100.0,66.666667,100.0,29.373985,29.373985,0.0,2.713712
0,2009-01-02,2.724325,2.733032,2.556513,2.578127,746015200,AAPL,4,9.577127,0.0,10.989866,10.001251,100.0,66.666667,100.0,2.724325,2.724325,0.0,-0.907254
0,2009-01-02,17.518652,17.613507,17.129422,17.50884,13163193,ABT,4,9.577127,0.0,10.989866,10.001251,100.0,66.666667,100.0,17.518652,17.518652,0.0,5.430456


In [11]:
train.tic.unique(), INDICATORS

(array(['A', 'AAL', 'AAP', 'AAPL', 'ABT', 'ACN', 'ADBE', 'ADI', 'ADM',
        'ADP', 'ADSK', 'AEE', 'AEP', 'AES', 'AFL', 'AIG', 'AIV', 'AIZ',
        'AJG', 'AKAM', 'ALB', 'ALGN', 'ALK', 'ALL', 'AMAT', 'AMD', 'AME',
        'AMG', 'AMGN', 'AMP', 'AMT', 'AMZN', 'AON', 'AOS', 'APA', 'APD',
        'APH', 'ARE', 'ATO', 'AVB', 'AVY', 'AWK', 'AXP', 'AZO', 'BA',
        'BAC', 'BAX', 'BBT', 'BBY', 'BDX', 'BEN', 'BIIB', 'BK', 'BKNG',
        'BLK', 'BMY', 'BR', 'BSX', 'BWA', 'BXP', 'C', 'CAG', 'CAH', 'CAT',
        'CB', 'CBRE', 'CCI', 'CCL', 'CDNS', 'CE', 'CF', 'CHD', 'CHRW',
        'CI', 'CINF', 'CL', 'CLX', 'CMA', 'CMCSA', 'CME', 'CMG', 'CMI',
        'CMS', 'CNC', 'CNP', 'COF', 'COO', 'COP', 'COST', 'CPB', 'CPRT',
        'CRM', 'CSCO', 'CSX', 'CTAS', 'CTSH', 'CVS', 'CVX', 'D', 'DAL',
        'DD', 'DE', 'DGX', 'DHI', 'DHR', 'DIS', 'DLR', 'DLTR', 'DOV',
        'DRI', 'DTE', 'DUK', 'DVA', 'DVN', 'DXC', 'EA', 'EBAY', 'ECL',
        'ED', 'EFX', 'EIX', 'EL', 'EMN', 'EMR', 'EOG', 'EQIX', '

In [12]:
stock_dimension = len(train.tic.unique())
state_space = 1 + 2 * stock_dimension + len(INDICATORS) * stock_dimension + 1 * stock_dimension
print(f'Stock Dimension: {stock_dimension}, state space: {state_space}')

Stock Dimension: 393, state space: 4324


In [13]:
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv

buy_cost_list = sell_cost_list = [0.005] * stock_dimension
num_stock_shares = [0] * stock_dimension

env_kwargs = {
    'hmax':100,
    'initial_amount': 1000000,
    'num_stock_shares': num_stock_shares,
    'buy_cost_pct': buy_cost_list,
    'sell_cost_pct': sell_cost_list,
    'state_space': state_space,
    'stock_dim': stock_dimension,
    'tech_indicator_list': INDICATORS + ['gdp_log'],
    'action_space': stock_dimension,
    'reward_scaling': 1e-4
}

e_train_gym = StockTradingEnv(df=train, **env_kwargs)
env_train, _ = e_train_gym.get_sb_env()

In [14]:
len(e_train_gym.df.index.unique()) - 1

3396

In [15]:
e_train_gym.df.tic.count()

np.int64(1335021)

In [None]:
agent = DRLAgent(env = env_train)
model_ppo = agent.get_model('ppo')

tmp_path = RESULTS_DIR + '/ppo'
new_logger_ppo = configure(tmp_path, ['stdout', 'csv', 'tensorboard'])

model_ppo.set_logger(new_logger_ppo)

trained_ppo = agent.train_model(model=model_ppo,
                                tb_log_name='ppo',
                                total_timesteps=50000)

In [None]:
trained_ppo.save(TRAINED_MODEL_DIR + '/agent_ppo')

In [16]:
model = PPO.load('trained_models/agent_ppo.zip')



In [18]:
import numpy as np

"""make a prediction and get results"""
env_train, obs = e_train_gym.get_sb_env()

chunk_size = 75
ds = []
states = []
feature = {}

s, a, r, d = [], [], [], []

env_train.reset()
# max_steps = len(e_train_gym.df.index.unique()) - 1
# max_steps = e_train_gym.df.tic.count() - 1
max_steps = 400000

for i in range(1, max_steps, 1):
    if i % 100000 == 0:
        print(i)

    current_obs = np.asarray(obs[0], dtype=np.float32).copy()
    action, _states = model.predict(obs, deterministic=True)

    s.append(current_obs)
    a.append(np.asarray(action, dtype=np.float32).reshape(-1))

    obs, rewards, dones, info = env_train.step(action)

    reward_val = float(np.asarray(rewards).reshape(-1)[0])
    done_val = bool(np.asarray(dones).reshape(-1)[0])

    r.append(reward_val)
    d.append(done_val)

    next_obs = np.asarray(obs[0], dtype=np.float32).copy()
    states.append(next_obs)

    if len(s) >= chunk_size:
        feature['observations'] = np.stack(s, axis=0)
        feature['actions'] = np.stack(a, axis=0)
        feature['rewards'] = np.asarray(r, dtype=np.float32)
        feature['dones'] = np.asarray(d, dtype=np.bool_)

        ds.append(feature)
        feature = {}
        s, a, r, d = [], [], [], []

if s:
    feature['observations'] = np.stack(s, axis=0)
    feature['actions'] = np.stack(a, axis=0)
    feature['rewards'] = np.asarray(r, dtype=np.float32)
    feature['dones'] = np.asarray(d, dtype=np.bool_)
    ds.append(feature)

states = np.vstack(states)
state_mean, state_std = np.mean(states, axis=0), np.std(states, axis=0) + 1e-6

day: 3396, episode: 10
begin_total_asset: 1000000.00
end_total_asset: 6323435.65
total_reward: 5323435.65
total_cost: 18023.62
total_trades: 620203
Sharpe: 0.799
day: 3396, episode: 20
begin_total_asset: 1000000.00
end_total_asset: 6323435.65
total_reward: 5323435.65
total_cost: 18023.62
total_trades: 620203
Sharpe: 0.799
day: 3396, episode: 30
begin_total_asset: 1000000.00
end_total_asset: 6323435.65
total_reward: 5323435.65
total_cost: 18023.62
total_trades: 620203
Sharpe: 0.799
100000
day: 3396, episode: 40
begin_total_asset: 1000000.00
end_total_asset: 6323435.65
total_reward: 5323435.65
total_cost: 18023.62
total_trades: 620203
Sharpe: 0.799
day: 3396, episode: 50
begin_total_asset: 1000000.00
end_total_asset: 6323435.65
total_reward: 5323435.65
total_cost: 18023.62
total_trades: 620203
Sharpe: 0.799
day: 3396, episode: 60
begin_total_asset: 1000000.00
end_total_asset: 6323435.65
total_reward: 5323435.65
total_cost: 18023.62
total_trades: 620203
Sharpe: 0.799
200000
day: 3396, epi

In [19]:
state_mean[:5], state_std[:5], state_mean.shape

(array([3984.7595  ,   52.783367,   24.06745 ,  106.868385,   41.663994],
       dtype=float32),
 array([5.1447590e+04, 3.6451923e+01, 1.5039037e+01, 4.7111446e+01,
        4.3134056e+01], dtype=float32),
 (4324,))

In [20]:
len_ds = len(ds)

state_mean = np.pad(state_mean, (0, (len_ds-state_space)))
state_std = np.pad(state_std, (0, (len_ds-state_space)))

In [21]:
state_mean, len(state_mean)

(array([3984.7595  ,   52.783367,   24.06745 , ...,    0.      ,
           0.      ,    0.      ], shape=(5334,), dtype=float32),
 5334)

In [22]:
len(ds), len(ds[0])

(5334, 4)

In [23]:
feature = ds[0]
len(feature['rewards'])

75

In [24]:
input_data = {}
input_data['train'] = ds
input_data['state_mean'] = state_mean
input_data['state_std'] = state_std

In [25]:
input_data.keys()

dict_keys(['train', 'state_mean', 'state_std'])

In [26]:
from datasets import Dataset

dataset = Dataset.from_dict(input_data)

In [27]:
dataset.save_to_disk("data/dataset/")

Saving the dataset (0/16 shards):   0%|          | 0/5334 [00:00<?, ? examples/s]

In [None]:
from datasets import Dataset

dataset = Dataset.from_dict(input_data)