In this Notebook, we will train a DRL Agent using an multi-period EIIE model as a policy function for the Dow Jones Industrial Average. The evaluation and performance including the agent's actions are displayed more explicity in the notebook.

## 1. Importing Libraries

In [1]:
import yfinance as yf
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from modules.scalers import GroupByScaler
from sklearn.preprocessing import MaxAbsScaler
import json
import torch

from modules.environment import MultiPeriodPortfolioOptimizationEnv
from modules.architectures import MultiPeriodEIIE
from modules.models import DRLAgent

sns.set()

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

## 2. Getting Config Data

In [2]:
config_file_name = "config.json"
with open(config_file_name, "r") as jsonfile:
    config_data = json.load(jsonfile)

In [3]:
djia = config_data["tickers"]["America"]["DJIA"]
NUM_ASSETS = len(djia)
len(djia)

30

In [4]:
start_date = config_data["timeframe"]["djia"]["start_date"]
end_date = config_data["timeframe"]["djia"]["end_date"]
data_interval = config_data["data_interval"]
test_ratio = config_data["train_test_ratio"]
random_seed = config_data["random_state_seed"]

## 3. Data Retrieval

In [5]:
portfolio_raw_df = yf.download(tickers=djia, start=start_date, end=end_date, interval=data_interval)

portfolio_raw_df.fillna(method="bfill", inplace=True)
portfolio_raw_df = portfolio_raw_df.stack(level=1).rename_axis(["Date", "Ticker"]).reset_index(level=1)
portfolio_raw_df = portfolio_raw_df.drop("Adj Close", axis=1)
portfolio_raw_df.columns.name = None
portfolio_raw_df = portfolio_raw_df.reset_index()
portfolio_raw_df.Date = portfolio_raw_df.Date.astype(str)
portfolio_raw_df.columns = ["date", "tic", "close", "high", "low", "open", "volume"]
portfolio_raw_df = portfolio_raw_df[["date", "tic", "close", "high", "low", "volume"]]

df_portfolio_raw_train, df_portfolio_raw_test = train_test_split(portfolio_raw_df, test_size=test_ratio, shuffle=False, random_state=random_seed)
df_portfolio_train = GroupByScaler(by="tic", scaler=MaxAbsScaler).fit_transform(df_portfolio_raw_train)
df_portfolio_test = GroupByScaler(by="tic", scaler=MaxAbsScaler).fit_transform(df_portfolio_raw_test)


[*********************100%%**********************]  30 of 30 completed
  portfolio_raw_df.fillna(method="bfill", inplace=True)
  portfolio_raw_df = portfolio_raw_df.stack(level=1).rename_axis(["Date", "Ticker"]).reset_index(level=1)


## 4. DRL Environment
### 4.1 Hyperparameter selection

In [6]:
# SETTING HYPERPARAMETERS
FEATURES = ["close", "high", "low", "volume"]
N = config_data["lookback_window"]
T = config_data["multi_step_horizon"]
NUM_FEATURES = len(FEATURES)
experiment_type = "EIIE_DJIA" 
N, T

(24, 3)

### 4.2 Train Environment Initialization

In [7]:
train_environment = MultiPeriodPortfolioOptimizationEnv(
    df_portfolio_train,
    initial_amount=100000,
    comission_fee_pct=0.0025,
    time_window=N,
    multi_period_horizon=T,
    features=FEATURES,
    normalize_df=None,
    is_train_mode=True,
    experiment_type=experiment_type
)

### 4.3 Model Parameters

In [8]:
# set PolicyGradient parameters
model_kwargs = {
    "lr": 0.0001,
    "policy": MultiPeriodEIIE,
    "multi_period_horizon": T
}

# here, we can set EIIE's parameters
policy_kwargs = {
    "initial_features": NUM_FEATURES,
    "k_size": 5, # Size of Initial Kernel
    "time_window": N,
    "prediction_horizon": T
}

### 4.4 Agent initialization

In [9]:
model = DRLAgent(train_environment).get_model("pg", device, model_kwargs, policy_kwargs)

## 5. Model Training

In [10]:
print("TRAINING AGENT.....")
DRLAgent.train_model(model, episodes=35)

TRAINING AGENT.....


  0%|          | 0/35 [00:00<?, ?it/s]

Initial portfolio value:100000
Final portfolio value: 2051062.8386826129
Final accumulative portfolio value: 20.510628386826127
Maximum DrawDown: -0.7375416177654324
Sharpe ratio: 1.3629840311500507


  3%|▎         | 1/35 [00:01<00:53,  1.57s/it]

Initial portfolio value:100000
Final portfolio value: 2051075.9844941504
Final accumulative portfolio value: 20.510759844941504
Maximum DrawDown: -0.7375417217224748
Sharpe ratio: 1.3629856533296734


  6%|▌         | 2/35 [00:03<00:51,  1.56s/it]

Initial portfolio value:100000
Final portfolio value: 2051100.8945364803
Final accumulative portfolio value: 20.511008945364804
Maximum DrawDown: -0.7375415538861501
Sharpe ratio: 1.3629902280428159


  9%|▊         | 3/35 [00:04<00:49,  1.55s/it]

Initial portfolio value:100000
Final portfolio value: 2051109.5824522164
Final accumulative portfolio value: 20.511095824522165
Maximum DrawDown: -0.7375415929632794
Sharpe ratio: 1.3629908740313932


 11%|█▏        | 4/35 [00:06<00:46,  1.52s/it]

Initial portfolio value:100000
Final portfolio value: 2051127.2333378703
Final accumulative portfolio value: 20.511272333378702
Maximum DrawDown: -0.7375415670218268
Sharpe ratio: 1.3629935147391672


 14%|█▍        | 5/35 [00:07<00:44,  1.49s/it]

Initial portfolio value:100000
Final portfolio value: 2051149.9046445966
Final accumulative portfolio value: 20.511499046445966
Maximum DrawDown: -0.7375413468738925
Sharpe ratio: 1.3629972811135778


 17%|█▋        | 6/35 [00:09<00:42,  1.48s/it]

Initial portfolio value:100000
Final portfolio value: 2051167.5412050814
Final accumulative portfolio value: 20.511675412050813
Maximum DrawDown: -0.7375412066366747
Sharpe ratio: 1.362999931575751


 20%|██        | 7/35 [00:10<00:41,  1.48s/it]

Initial portfolio value:100000
Final portfolio value: 2051191.0980095975
Final accumulative portfolio value: 20.511910980095976
Maximum DrawDown: -0.737540952816125
Sharpe ratio: 1.3630037732193534


 23%|██▎       | 8/35 [00:12<00:40,  1.49s/it]

Initial portfolio value:100000
Final portfolio value: 2051211.6668318845
Final accumulative portfolio value: 20.512116668318846
Maximum DrawDown: -0.7375407120157724
Sharpe ratio: 1.3630069651835657


 26%|██▌       | 9/35 [00:13<00:38,  1.48s/it]

Initial portfolio value:100000
Final portfolio value: 2051232.7797794316
Final accumulative portfolio value: 20.512327797794317
Maximum DrawDown: -0.7375407705999721
Sharpe ratio: 1.3630101844389164


 29%|██▊       | 10/35 [00:14<00:36,  1.47s/it]

Initial portfolio value:100000
Final portfolio value: 2051254.4721085648
Final accumulative portfolio value: 20.512544721085646
Maximum DrawDown: -0.7375404232699119
Sharpe ratio: 1.3630139813676059


 31%|███▏      | 11/35 [00:16<00:35,  1.46s/it]

Initial portfolio value:100000
Final portfolio value: 2051278.555104711
Final accumulative portfolio value: 20.51278555104711
Maximum DrawDown: -0.7375404122639625
Sharpe ratio: 1.3630174347000765


 34%|███▍      | 12/35 [00:17<00:33,  1.46s/it]

Initial portfolio value:100000
Final portfolio value: 2051303.854775673
Final accumulative portfolio value: 20.51303854775673
Maximum DrawDown: -0.7375398766629124
Sharpe ratio: 1.363021872188985


 37%|███▋      | 13/35 [00:19<00:31,  1.45s/it]

Initial portfolio value:100000
Final portfolio value: 2051328.3728313302
Final accumulative portfolio value: 20.5132837283133
Maximum DrawDown: -0.7375398333491517
Sharpe ratio: 1.363025794130566


 40%|████      | 14/35 [00:20<00:30,  1.45s/it]

Initial portfolio value:100000
Final portfolio value: 2051359.5325716157
Final accumulative portfolio value: 20.513595325716157
Maximum DrawDown: -0.7375395036618564
Sharpe ratio: 1.3630308256444936


 43%|████▎     | 15/35 [00:22<00:29,  1.47s/it]

Initial portfolio value:100000
Final portfolio value: 2051381.9849239038
Final accumulative portfolio value: 20.513819849239038
Maximum DrawDown: -0.7375392086529405
Sharpe ratio: 1.3630344476536151


 46%|████▌     | 16/35 [00:23<00:27,  1.46s/it]

Initial portfolio value:100000
Final portfolio value: 2051412.023893828
Final accumulative portfolio value: 20.51412023893828
Maximum DrawDown: -0.7375389534936558
Sharpe ratio: 1.3630395271435918


 49%|████▊     | 17/35 [00:25<00:26,  1.46s/it]

Initial portfolio value:100000
Final portfolio value: 2051441.6508817356
Final accumulative portfolio value: 20.514416508817355
Maximum DrawDown: -0.7375387142603871
Sharpe ratio: 1.3630442750021372


 51%|█████▏    | 18/35 [00:26<00:24,  1.45s/it]

Initial portfolio value:100000
Final portfolio value: 2051472.1988433881
Final accumulative portfolio value: 20.51472198843388
Maximum DrawDown: -0.7375385563693109
Sharpe ratio: 1.3630492224986912


 54%|█████▍    | 19/35 [00:27<00:23,  1.45s/it]

Initial portfolio value:100000
Final portfolio value: 2051508.490485599
Final accumulative portfolio value: 20.51508490485599
Maximum DrawDown: -0.7375380437671497
Sharpe ratio: 1.3630551934763924


 57%|█████▋    | 20/35 [00:29<00:21,  1.45s/it]

Initial portfolio value:100000
Final portfolio value: 2051542.9437570116
Final accumulative portfolio value: 20.515429437570116
Maximum DrawDown: -0.7375377211026248
Sharpe ratio: 1.3630609397211413


 60%|██████    | 21/35 [00:30<00:20,  1.45s/it]

Initial portfolio value:100000
Final portfolio value: 2051581.878461698
Final accumulative portfolio value: 20.515818784616982
Maximum DrawDown: -0.737537301765813
Sharpe ratio: 1.3630673151935704


 63%|██████▎   | 22/35 [00:32<00:19,  1.47s/it]

Initial portfolio value:100000
Final portfolio value: 2051623.3972551657
Final accumulative portfolio value: 20.516233972551657
Maximum DrawDown: -0.7375366418752216
Sharpe ratio: 1.3630744249307487


 66%|██████▌   | 23/35 [00:33<00:17,  1.50s/it]

Initial portfolio value:100000
Final portfolio value: 2051654.8320913317
Final accumulative portfolio value: 20.516548320913316
Maximum DrawDown: -0.7375364653971535
Sharpe ratio: 1.3630791182295237


 69%|██████▊   | 24/35 [00:35<00:16,  1.48s/it]

Initial portfolio value:100000
Final portfolio value: 2051696.1965835255
Final accumulative portfolio value: 20.516961965835254
Maximum DrawDown: -0.7375359120522346
Sharpe ratio: 1.363086069735528


 71%|███████▏  | 25/35 [00:36<00:14,  1.48s/it]

Initial portfolio value:100000
Final portfolio value: 2051740.4316830365
Final accumulative portfolio value: 20.517404316830365
Maximum DrawDown: -0.7375353917589673
Sharpe ratio: 1.3630933988617615


 74%|███████▍  | 26/35 [00:38<00:13,  1.47s/it]

Initial portfolio value:100000
Final portfolio value: 2051783.4503108205
Final accumulative portfolio value: 20.517834503108205
Maximum DrawDown: -0.7375349571675461
Sharpe ratio: 1.3631006416046039


 77%|███████▋  | 27/35 [00:39<00:11,  1.46s/it]

Initial portfolio value:100000
Final portfolio value: 2051831.2807934608
Final accumulative portfolio value: 20.518312807934606
Maximum DrawDown: -0.7375344150739565
Sharpe ratio: 1.3631087458246562


 80%|████████  | 28/35 [00:41<00:10,  1.46s/it]

Initial portfolio value:100000
Final portfolio value: 2051873.544007682
Final accumulative portfolio value: 20.51873544007682
Maximum DrawDown: -0.7375338031580664
Sharpe ratio: 1.363115665279121


 83%|████████▎ | 29/35 [00:42<00:08,  1.46s/it]

Initial portfolio value:100000
Final portfolio value: 2051920.5291197218
Final accumulative portfolio value: 20.519205291197217
Maximum DrawDown: -0.7375328963636117
Sharpe ratio: 1.3631237302684611


 86%|████████▌ | 30/35 [00:44<00:07,  1.46s/it]

Initial portfolio value:100000
Final portfolio value: 2051971.0187100214
Final accumulative portfolio value: 20.519710187100213
Maximum DrawDown: -0.7375323773220746
Sharpe ratio: 1.3631319654757195


 89%|████████▊ | 31/35 [00:45<00:05,  1.45s/it]

Initial portfolio value:100000
Final portfolio value: 2052026.4554658968
Final accumulative portfolio value: 20.52026455465897
Maximum DrawDown: -0.7375313868676601
Sharpe ratio: 1.363141907115699


 91%|█████████▏| 32/35 [00:47<00:04,  1.49s/it]

Initial portfolio value:100000
Final portfolio value: 2052078.5894223466
Final accumulative portfolio value: 20.520785894223465
Maximum DrawDown: -0.7375308569827537
Sharpe ratio: 1.3631501900197727


 94%|█████████▍| 33/35 [00:48<00:02,  1.48s/it]

Initial portfolio value:100000
Final portfolio value: 2052130.372921066
Final accumulative portfolio value: 20.521303729210658
Maximum DrawDown: -0.7375298285335696
Sharpe ratio: 1.3631594124554802


 97%|█████████▋| 34/35 [00:50<00:01,  1.47s/it]

Initial portfolio value:100000
Final portfolio value: 2052188.1962844545
Final accumulative portfolio value: 20.521881962844546
Maximum DrawDown: -0.737528800789583
Sharpe ratio: 1.363169457037799


100%|██████████| 35/35 [00:51<00:00,  1.47s/it]


<modules.algorithms.PolicyGradient at 0x283d45520>

In [11]:
print("PERSISTING MODEL.....")
torch.save(model.train_policy.state_dict(), f"models/policy_{experiment_type}.pt")

PERSISTING MODEL.....


In [12]:
print("TESTING.....")
MEIIE_results = {
    "training": train_environment._asset_memory["final"],
    "test": {}
}

TESTING.....


## 6. Model Evaluation

In [13]:
test_environment = MultiPeriodPortfolioOptimizationEnv(
    df_portfolio_test,
    initial_amount=100000,
    comission_fee_pct=0.0025,
    time_window=N,
    multi_period_horizon=T,
    features=FEATURES,
    normalize_df=None,
    is_train_mode=False,
    experiment_type=experiment_type
)

In [14]:
policy = MultiPeriodEIIE(
    initial_features=NUM_FEATURES,
    k_size=5,
    time_window=N,
    prediction_horizon=T
)

In [15]:
policy.load_state_dict(torch.load(f"models/policy_{experiment_type}.pt"))

<All keys matched successfully>

In [16]:
DRLAgent.DRL_validation(model=model, test_env=test_environment, policy=policy)

Initial portfolio value:100000
Final portfolio value: 386176.7602853223
Final accumulative portfolio value: 3.8617676028532233
Maximum DrawDown: -0.3424335191363347
Sharpe ratio: 1.127687493513427


## 7. Postprocessing

In [17]:
MEIIE_results["test"]["value"] = test_environment._asset_memory["final"]

In [18]:
drl_portfolio_performance = test_environment._asset_memory["final"][1:]
len(drl_portfolio_performance)

70

In [19]:
drl_portfolio_performance = [(x/100000) for x in drl_portfolio_performance]

In [20]:
performance_dataset = pd.read_csv("data/processed/performances_djia.csv", index_col=0)

In [21]:
performance_dataset["DRL_EIIE"] = drl_portfolio_performance

In [22]:
performance_dataset.to_csv("data/processed/performances_djia.csv")