In this Notebook, we will train a DRL Agent using an multi-period EIIE model as a policy function for the Dow Jones Industrial Average. The evaluation and performance including the agent's actions are displayed more explicity in the notebook.

## 1. Importing Libraries

In [1]:
import yfinance as yf
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from modules.scalers import GroupByScaler
from sklearn.preprocessing import MaxAbsScaler
import json
import torch

from modules.environment import MultiPeriodPortfolioOptimizationEnv
from modules.architectures import MultiPeriodEIIE
from modules.models import DRLAgent

sns.set()

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

## 2. Getting Config Data

In [2]:
config_file_name = "config.json"
with open(config_file_name, "r") as jsonfile:
    config_data = json.load(jsonfile)

In [3]:
djia = config_data["tickers"]["America"]["DJIA"]
NUM_ASSETS = len(djia)
len(djia)

30

In [4]:
start_date = config_data["timeframe"]["djia"]["start_date"]
end_date = config_data["timeframe"]["djia"]["end_date"]
data_interval = config_data["data_interval"]
test_ratio = config_data["train_test_ratio"]
random_seed = config_data["random_state_seed"]

## 3. Data Retrieval

In [5]:
portfolio_raw_df = yf.download(tickers=djia, start=start_date, end=end_date, interval=data_interval)

portfolio_raw_df.fillna(method="bfill", inplace=True)
portfolio_raw_df = portfolio_raw_df.stack(level=1).rename_axis(["Date", "Ticker"]).reset_index(level=1)
portfolio_raw_df = portfolio_raw_df.drop("Adj Close", axis=1)
portfolio_raw_df.columns.name = None
portfolio_raw_df = portfolio_raw_df.reset_index()
portfolio_raw_df.Date = portfolio_raw_df.Date.astype(str)
portfolio_raw_df.columns = ["date", "tic", "close", "high", "low", "open", "volume"]
portfolio_raw_df = portfolio_raw_df[["date", "tic", "close", "high", "low", "volume"]]

df_portfolio_raw_train, df_portfolio_raw_test = train_test_split(portfolio_raw_df, test_size=test_ratio, shuffle=False, random_state=random_seed)
df_portfolio_train = GroupByScaler(by="tic", scaler=MaxAbsScaler).fit_transform(df_portfolio_raw_train)
df_portfolio_test = GroupByScaler(by="tic", scaler=MaxAbsScaler).fit_transform(df_portfolio_raw_test)


[*********************100%%**********************]  30 of 30 completed
  portfolio_raw_df.fillna(method="bfill", inplace=True)
  portfolio_raw_df = portfolio_raw_df.stack(level=1).rename_axis(["Date", "Ticker"]).reset_index(level=1)


## 4. DRL Environment
### 4.1 Hyperparameter selection

In [6]:
# SETTING HYPERPARAMETERS
FEATURES = ["close", "high", "low", "volume"]
N = config_data["lookback_window"]
T = config_data["multi_step_horizon"]
NUM_FEATURES = len(FEATURES)
experiment_type = "EIIE_DJIA" 
N, T

(24, 3)

### 4.2 Train Environment Initialization

In [7]:
train_environment = MultiPeriodPortfolioOptimizationEnv(
    df_portfolio_train,
    initial_amount=100000,
    comission_fee_pct=0.0025,
    time_window=N,
    multi_period_horizon=T,
    features=FEATURES,
    normalize_df=None,
    is_train_mode=True,
    experiment_type=experiment_type
)

### 4.3 Model Parameters

In [8]:
# set PolicyGradient parameters
model_kwargs = {
    "lr": 0.0001,
    "policy": MultiPeriodEIIE,
    "multi_period_horizon": T
}

# here, we can set EIIE's parameters
policy_kwargs = {
    "initial_features": NUM_FEATURES,
    "k_size": 5, # Size of Initial Kernel
    "time_window": N,
    "prediction_horizon": T
}

### 4.4 Agent initialization

In [9]:
model = DRLAgent(train_environment).get_model("pg", device, model_kwargs, policy_kwargs)

## 5. Model Training

In [10]:
print("TRAINING AGENT.....")
DRLAgent.train_model(model, episodes=35)

TRAINING AGENT.....


  0%|          | 0/35 [00:00<?, ?it/s]

Initial portfolio value:100000
Final portfolio value: 2052130.6373720712
Final accumulative portfolio value: 20.521306373720712
Maximum DrawDown: -0.7375013473495553
Sharpe ratio: 6.247011436679249


  3%|▎         | 1/35 [00:01<00:53,  1.57s/it]

Initial portfolio value:100000
Final portfolio value: 2052220.2633846032
Final accumulative portfolio value: 20.52220263384603
Maximum DrawDown: -0.7375000740048303
Sharpe ratio: 6.247083585348032


  6%|▌         | 2/35 [00:03<00:49,  1.50s/it]

Initial portfolio value:100000
Final portfolio value: 2052302.5301899135
Final accumulative portfolio value: 20.523025301899136
Maximum DrawDown: -0.7374993007069215
Sharpe ratio: 6.247148389008831


  9%|▊         | 3/35 [00:04<00:48,  1.51s/it]

Initial portfolio value:100000
Final portfolio value: 2052389.9947064111
Final accumulative portfolio value: 20.523899947064113
Maximum DrawDown: -0.7374982138913739
Sharpe ratio: 6.247217722462787


 11%|█▏        | 4/35 [00:06<00:46,  1.49s/it]

Initial portfolio value:100000
Final portfolio value: 2052478.168889487
Final accumulative portfolio value: 20.52478168889487
Maximum DrawDown: -0.7374972915597352
Sharpe ratio: 6.2472876160785855


 14%|█▍        | 5/35 [00:07<00:44,  1.49s/it]

Initial portfolio value:100000
Final portfolio value: 2052567.4995074605
Final accumulative portfolio value: 20.525674995074606
Maximum DrawDown: -0.7374962321827394
Sharpe ratio: 6.247358022283462


 17%|█▋        | 6/35 [00:08<00:42,  1.48s/it]

Initial portfolio value:100000
Final portfolio value: 2052656.3555436046
Final accumulative portfolio value: 20.526563555436045
Maximum DrawDown: -0.7374952662363874
Sharpe ratio: 6.247429588258103


 20%|██        | 7/35 [00:10<00:41,  1.49s/it]

Initial portfolio value:100000
Final portfolio value: 2052752.3302363663
Final accumulative portfolio value: 20.52752330236366
Maximum DrawDown: -0.7374940895121849
Sharpe ratio: 6.247505785728876


 23%|██▎       | 8/35 [00:12<00:40,  1.50s/it]

Initial portfolio value:100000
Final portfolio value: 2052850.4809018606
Final accumulative portfolio value: 20.528504809018607
Maximum DrawDown: -0.7374930266310458
Sharpe ratio: 6.247584430446705


 26%|██▌       | 9/35 [00:13<00:39,  1.51s/it]

Initial portfolio value:100000
Final portfolio value: 2052947.7165949463
Final accumulative portfolio value: 20.529477165949462
Maximum DrawDown: -0.737491888739856
Sharpe ratio: 6.247661603019325


 29%|██▊       | 10/35 [00:15<00:37,  1.51s/it]

Initial portfolio value:100000
Final portfolio value: 2053041.5114658792
Final accumulative portfolio value: 20.53041511465879
Maximum DrawDown: -0.7374908404251183
Sharpe ratio: 6.247736689392777


 31%|███▏      | 11/35 [00:16<00:35,  1.49s/it]

Initial portfolio value:100000
Final portfolio value: 2053146.0092599853
Final accumulative portfolio value: 20.531460092599854
Maximum DrawDown: -0.7374894267162692
Sharpe ratio: 6.2478220673097224


 34%|███▍      | 12/35 [00:17<00:34,  1.50s/it]

Initial portfolio value:100000
Final portfolio value: 2053248.0373170373
Final accumulative portfolio value: 20.532480373170372
Maximum DrawDown: -0.7374882273611708
Sharpe ratio: 6.247901716164014


 37%|███▋      | 13/35 [00:19<00:32,  1.48s/it]

Initial portfolio value:100000
Final portfolio value: 2053358.234328327
Final accumulative portfolio value: 20.53358234328327
Maximum DrawDown: -0.7374870691360449
Sharpe ratio: 6.247987647593694


 40%|████      | 14/35 [00:20<00:30,  1.47s/it]

Initial portfolio value:100000
Final portfolio value: 2053463.569002823
Final accumulative portfolio value: 20.534635690028228
Maximum DrawDown: -0.7374858677307128
Sharpe ratio: 6.248071850807328


 43%|████▎     | 15/35 [00:22<00:29,  1.49s/it]

Initial portfolio value:100000
Final portfolio value: 2053577.228921115
Final accumulative portfolio value: 20.53577228921115
Maximum DrawDown: -0.7374845659350221
Sharpe ratio: 6.248159577233298


 46%|████▌     | 16/35 [00:23<00:28,  1.49s/it]

Initial portfolio value:100000
Final portfolio value: 2053691.926940295
Final accumulative portfolio value: 20.53691926940295
Maximum DrawDown: -0.7374836045174584
Sharpe ratio: 6.248247187968943


 49%|████▊     | 17/35 [00:25<00:26,  1.49s/it]

Initial portfolio value:100000
Final portfolio value: 2053805.6405846525
Final accumulative portfolio value: 20.538056405846525
Maximum DrawDown: -0.7374823945583715
Sharpe ratio: 6.248335577358461


 51%|█████▏    | 18/35 [00:26<00:25,  1.48s/it]

Initial portfolio value:100000
Final portfolio value: 2053917.1058571904
Final accumulative portfolio value: 20.539171058571906
Maximum DrawDown: -0.7374811459767099
Sharpe ratio: 6.248420626677034


 54%|█████▍    | 19/35 [00:28<00:23,  1.47s/it]

Initial portfolio value:100000
Final portfolio value: 2054035.332960544
Final accumulative portfolio value: 20.540353329605438
Maximum DrawDown: -0.7374799360869544
Sharpe ratio: 6.248511683858347


 57%|█████▋    | 20/35 [00:29<00:22,  1.47s/it]

Initial portfolio value:100000
Final portfolio value: 2054150.2458174664
Final accumulative portfolio value: 20.541502458174666
Maximum DrawDown: -0.7374787617768606
Sharpe ratio: 6.2485993322361635


 60%|██████    | 21/35 [00:31<00:20,  1.49s/it]

Initial portfolio value:100000
Final portfolio value: 2054276.9710573317
Final accumulative portfolio value: 20.542769710573317
Maximum DrawDown: -0.7374773457012738
Sharpe ratio: 6.248697934552046


 63%|██████▎   | 22/35 [00:32<00:19,  1.48s/it]

Initial portfolio value:100000
Final portfolio value: 2054394.6828019945
Final accumulative portfolio value: 20.543946828019944
Maximum DrawDown: -0.7374761322307337
Sharpe ratio: 6.248787577436906


 66%|██████▌   | 23/35 [00:34<00:18,  1.51s/it]

Initial portfolio value:100000
Final portfolio value: 2054525.930965337
Final accumulative portfolio value: 20.54525930965337
Maximum DrawDown: -0.7374746324422912
Sharpe ratio: 6.248889116813441


 69%|██████▊   | 24/35 [00:35<00:16,  1.50s/it]

Initial portfolio value:100000
Final portfolio value: 2054648.3846073735
Final accumulative portfolio value: 20.546483846073734
Maximum DrawDown: -0.7374733347876496
Sharpe ratio: 6.248984819895745


 71%|███████▏  | 25/35 [00:37<00:14,  1.48s/it]

Initial portfolio value:100000
Final portfolio value: 2054785.6288335214
Final accumulative portfolio value: 20.547856288335215
Maximum DrawDown: -0.7374717034071336
Sharpe ratio: 6.249093349827336


 74%|███████▍  | 26/35 [00:38<00:13,  1.48s/it]

Initial portfolio value:100000
Final portfolio value: 2054919.0524149083
Final accumulative portfolio value: 20.549190524149083
Maximum DrawDown: -0.7374700914581473
Sharpe ratio: 6.249197775003143


 77%|███████▋  | 27/35 [00:40<00:11,  1.47s/it]

Initial portfolio value:100000
Final portfolio value: 2055057.6159192733
Final accumulative portfolio value: 20.550576159192733
Maximum DrawDown: -0.737468612051766
Sharpe ratio: 6.249306178480906


 80%|████████  | 28/35 [00:41<00:10,  1.54s/it]

Initial portfolio value:100000
Final portfolio value: 2055186.052812633
Final accumulative portfolio value: 20.55186052812633
Maximum DrawDown: -0.7374669638403648
Sharpe ratio: 6.249408611418717


 83%|████████▎ | 29/35 [00:43<00:09,  1.53s/it]

Initial portfolio value:100000
Final portfolio value: 2055319.7174661893
Final accumulative portfolio value: 20.553197174661893
Maximum DrawDown: -0.7374652654239358
Sharpe ratio: 6.249514762418319


 86%|████████▌ | 30/35 [00:44<00:07,  1.51s/it]

Initial portfolio value:100000
Final portfolio value: 2055459.7182966417
Final accumulative portfolio value: 20.55459718296642
Maximum DrawDown: -0.7374632885472534
Sharpe ratio: 6.249625074298911


 89%|████████▊ | 31/35 [00:46<00:06,  1.50s/it]

Initial portfolio value:100000
Final portfolio value: 2055595.830394298
Final accumulative portfolio value: 20.55595830394298
Maximum DrawDown: -0.7374613672576897
Sharpe ratio: 6.249734129019814


 91%|█████████▏| 32/35 [00:47<00:04,  1.52s/it]

Initial portfolio value:100000
Final portfolio value: 2055732.744221108
Final accumulative portfolio value: 20.55732744221108
Maximum DrawDown: -0.7374595498934386
Sharpe ratio: 6.249843154063158


 94%|█████████▍| 33/35 [00:49<00:03,  1.54s/it]

Initial portfolio value:100000
Final portfolio value: 2055876.5115421284
Final accumulative portfolio value: 20.558765115421284
Maximum DrawDown: -0.7374573939885427
Sharpe ratio: 6.249958983152294


 97%|█████████▋| 34/35 [00:50<00:01,  1.51s/it]

Initial portfolio value:100000
Final portfolio value: 2056019.1394571343
Final accumulative portfolio value: 20.560191394571344
Maximum DrawDown: -0.7374552520081783
Sharpe ratio: 6.2500741812887615


100%|██████████| 35/35 [00:52<00:00,  1.50s/it]


<modules.algorithms.PolicyGradient at 0x284683110>

In [11]:
print("PERSISTING MODEL.....")
torch.save(model.train_policy.state_dict(), f"models/policy_{experiment_type}.pt")

PERSISTING MODEL.....


In [None]:
print("TESTING.....")
MEIIE_results = {
    "training": train_environment._asset_memory["final"],
    "test": {}
}

## 6. Model Evaluation

In [13]:
test_environment = MultiPeriodPortfolioOptimizationEnv(
    df_portfolio_test,
    initial_amount=100000,
    comission_fee_pct=0.0025,
    time_window=N,
    multi_period_horizon=T,
    features=FEATURES,
    normalize_df=None,
    is_train_mode=False,
    experiment_type=experiment_type
)

In [14]:
policy = MultiPeriodEIIE(
    initial_features=NUM_FEATURES,
    k_size=5,
    time_window=N,
    prediction_horizon=T
)

In [15]:
policy.load_state_dict(torch.load(f"models/policy_{experiment_type}.pt"))

<All keys matched successfully>

In [16]:
DRLAgent.DRL_validation(model=model, test_env=test_environment, policy=policy)

Initial portfolio value:100000
Final portfolio value: 386638.7936356264
Final accumulative portfolio value: 3.8663879363562637
Maximum DrawDown: -0.3422769582624733
Sharpe ratio: 5.172089596751162


## 7. Postprocessing

In [17]:
MEIIE_results["test"]["value"] = test_environment._asset_memory["final"]

In [20]:
drl_portfolio_performance = test_environment._asset_memory["final"][1:]
len(drl_portfolio_performance)

70

In [21]:
drl_portfolio_performance = [(x/100000) for x in drl_portfolio_performance]

In [22]:
performance_dataset = pd.read_csv("data/processed/performances_djia.csv", index_col=0)

In [23]:
performance_dataset["DRL_EIIE"] = drl_portfolio_performance

In [24]:
performance_dataset.to_csv("data/processed/performances_djia.csv")