In this Notebook, we will train a DRL Agent using an multi-period Hybrid-Transformer model as a policy function for the Dow Jones Industrial Average. The evaluation and performance including the agent's actions are displayed more explicity in the notebook.

## 1. Importing Libraries

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

import yfinance as yf
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from modules.scalers import GroupByScaler
from sklearn.preprocessing import MaxAbsScaler
import json
import torch

from modules.environment import MultiPeriodPortfolioOptimizationEnv
from modules.architectures import MultiPeriodConvAttentionNetwork
from modules.models import DRLAgent

sns.set()

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

## 2. Getting Config Data

In [2]:
config_file_name = "config.json"
with open(config_file_name, "r") as jsonfile:
    config_data = json.load(jsonfile)

In [3]:
djia = config_data["tickers"]["America"]["DJIA"]
NUM_ASSETS = len(djia)
len(djia)

30

In [4]:
start_date = config_data["timeframe"]["djia"]["start_date"]
end_date = config_data["timeframe"]["djia"]["end_date"]
data_interval = config_data["data_interval"]
test_ratio = config_data["train_test_ratio"]
random_seed = config_data["random_state_seed"]

## 3. Data Retrieval

In [5]:
portfolio_raw_df = yf.download(tickers=djia, start=start_date, end=end_date, interval=data_interval)

portfolio_raw_df.fillna(method="bfill", inplace=True)
portfolio_raw_df = portfolio_raw_df.stack(level=1).rename_axis(["Date", "Ticker"]).reset_index(level=1)
portfolio_raw_df = portfolio_raw_df.drop("Adj Close", axis=1)
portfolio_raw_df.columns.name = None
portfolio_raw_df = portfolio_raw_df.reset_index()
portfolio_raw_df.Date = portfolio_raw_df.Date.astype(str)
portfolio_raw_df.columns = ["date", "tic", "close", "high", "low", "open", "volume"]
portfolio_raw_df = portfolio_raw_df[["date", "tic", "close", "high", "low", "volume"]]

df_portfolio_raw_train, df_portfolio_raw_test = train_test_split(portfolio_raw_df, test_size=test_ratio, shuffle=False, random_state=random_seed)
df_portfolio_train = GroupByScaler(by="tic", scaler=MaxAbsScaler).fit_transform(df_portfolio_raw_train)
df_portfolio_test = GroupByScaler(by="tic", scaler=MaxAbsScaler).fit_transform(df_portfolio_raw_test)


[*********************100%%**********************]  30 of 30 completed
  portfolio_raw_df.fillna(method="bfill", inplace=True)
  portfolio_raw_df = portfolio_raw_df.stack(level=1).rename_axis(["Date", "Ticker"]).reset_index(level=1)


## 4. DRL Environment
### 1.1 Hyperparameter selection

In [6]:
# SETTING HYPERPARAMETERS
FEATURES = ["close", "high", "low", "volume"]
N = config_data["lookback_window"]
T = config_data["multi_step_horizon"]
NUM_FEATURES = len(FEATURES)
experiment_type = "HYBRID_TRANSFORMER_DJIA" 
N, T, NUM_ASSETS, NUM_FEATURES

(24, 3, 30, 4)

### 4.2 Train Environment Initialization

In [7]:
train_environment = MultiPeriodPortfolioOptimizationEnv(
    df_portfolio_train,
    initial_amount=100000,
    comission_fee_pct=0.0025,
    time_window=N,
    multi_period_horizon=T,
    features=FEATURES,
    normalize_df=None,
    is_train_mode=True,
    experiment_type=experiment_type
)

### 4.3 Model Parameters

In [8]:
# set PolicyGradient parameters
model_kwargs = {
    "lr": 0.0001,
    "policy": MultiPeriodConvAttentionNetwork,
    "multi_period_horizon": T
}

# here, we can set Hybrid-Transformer parameters
policy_kwargs = {
    "num_features": NUM_FEATURES,
    "num_stocks": NUM_ASSETS,
    "W": N,
    "T": T
}

### 4.4 Agent initialization

In [9]:
model = DRLAgent(train_environment).get_model("pg", device, model_kwargs, policy_kwargs)



## 5. Model Training

In [10]:
print("TRAINING AGENT.....")
DRLAgent.train_model(model, episodes=35)

TRAINING AGENT.....


  0%|          | 0/35 [00:00<?, ?it/s]

Initial portfolio value:100000
Final portfolio value: 2019266.8162732192
Final accumulative portfolio value: 20.192668162732193
Maximum DrawDown: -0.7373560487726822
Sharpe ratio: 1.3573405664745306


  3%|▎         | 1/35 [00:03<01:47,  3.17s/it]

Initial portfolio value:100000
Final portfolio value: 2099246.896240765
Final accumulative portfolio value: 20.992468962407653
Maximum DrawDown: -0.7361869691485308
Sharpe ratio: 1.3735433128474392


  6%|▌         | 2/35 [00:06<01:50,  3.36s/it]

Initial portfolio value:100000
Final portfolio value: 2138486.6185626695
Final accumulative portfolio value: 21.384866185626695
Maximum DrawDown: -0.736455996817204
Sharpe ratio: 1.3788668702636122


  9%|▊         | 3/35 [00:10<01:48,  3.38s/it]

Initial portfolio value:100000
Final portfolio value: 2174173.6451522
Final accumulative portfolio value: 21.741736451522
Maximum DrawDown: -0.7371536281262281
Sharpe ratio: 1.382809780863135


 11%|█▏        | 4/35 [00:13<01:40,  3.24s/it]

Initial portfolio value:100000
Final portfolio value: 2223284.9708254756
Final accumulative portfolio value: 22.232849708254758
Maximum DrawDown: -0.7351532656540145
Sharpe ratio: 1.391999684942356


 14%|█▍        | 5/35 [00:16<01:35,  3.17s/it]

Initial portfolio value:100000
Final portfolio value: 2266167.363756149
Final accumulative portfolio value: 22.661673637561492
Maximum DrawDown: -0.7355311674374418
Sharpe ratio: 1.3978919398652019


 17%|█▋        | 6/35 [00:19<01:30,  3.13s/it]

Initial portfolio value:100000
Final portfolio value: 2289036.640408566
Final accumulative portfolio value: 22.89036640408566
Maximum DrawDown: -0.7363499283106201
Sharpe ratio: 1.400539220049588


 20%|██        | 7/35 [00:22<01:26,  3.09s/it]

Initial portfolio value:100000
Final portfolio value: 2322244.052402707
Final accumulative portfolio value: 23.222440524027068
Maximum DrawDown: -0.7350750513521649
Sharpe ratio: 1.4040331826644614


 23%|██▎       | 8/35 [00:25<01:22,  3.07s/it]

Initial portfolio value:100000
Final portfolio value: 2388107.600600503
Final accumulative portfolio value: 23.881076006005028
Maximum DrawDown: -0.7333897005505704
Sharpe ratio: 1.4172729915641895


 26%|██▌       | 9/35 [00:28<01:19,  3.06s/it]

Initial portfolio value:100000
Final portfolio value: 2537081.973712628
Final accumulative portfolio value: 25.37081973712628
Maximum DrawDown: -0.7316221326429662
Sharpe ratio: 1.4355339513321874


 29%|██▊       | 10/35 [00:31<01:15,  3.03s/it]

Initial portfolio value:100000
Final portfolio value: 2613473.1899900185
Final accumulative portfolio value: 26.134731899900185
Maximum DrawDown: -0.7318215831912912
Sharpe ratio: 1.4437473517172215


 31%|███▏      | 11/35 [00:34<01:12,  3.01s/it]

Initial portfolio value:100000
Final portfolio value: 2663315.4990881123
Final accumulative portfolio value: 26.633154990881124
Maximum DrawDown: -0.7308301990953789
Sharpe ratio: 1.4486772793255238


 34%|███▍      | 12/35 [00:37<01:08,  2.99s/it]

Initial portfolio value:100000
Final portfolio value: 2691330.3420154005
Final accumulative portfolio value: 26.913303420154005
Maximum DrawDown: -0.7307965422055166
Sharpe ratio: 1.4515618490048312


 37%|███▋      | 13/35 [00:40<01:05,  2.99s/it]

Initial portfolio value:100000
Final portfolio value: 2722348.459554836
Final accumulative portfolio value: 27.22348459554836
Maximum DrawDown: -0.7302768567646061
Sharpe ratio: 1.4554980601295437


 40%|████      | 14/35 [00:43<01:02,  2.99s/it]

Initial portfolio value:100000
Final portfolio value: 2727800.210609151
Final accumulative portfolio value: 27.27800210609151
Maximum DrawDown: -0.730629667786541
Sharpe ratio: 1.4554941326072768


 43%|████▎     | 15/35 [00:46<01:01,  3.08s/it]

Initial portfolio value:100000
Final portfolio value: 2730277.3299324918
Final accumulative portfolio value: 27.30277329932492
Maximum DrawDown: -0.7300723951248196
Sharpe ratio: 1.4560937847791044


 46%|████▌     | 16/35 [00:49<00:58,  3.07s/it]

Initial portfolio value:100000
Final portfolio value: 2734893.929143798
Final accumulative portfolio value: 27.34893929143798
Maximum DrawDown: -0.730443014511589
Sharpe ratio: 1.4559360726788222


 49%|████▊     | 17/35 [00:52<00:54,  3.05s/it]

Initial portfolio value:100000
Final portfolio value: 2733205.2503231433
Final accumulative portfolio value: 27.332052503231434
Maximum DrawDown: -0.7303092878844217
Sharpe ratio: 1.4562285009736131


 51%|█████▏    | 18/35 [00:55<00:52,  3.06s/it]

Initial portfolio value:100000
Final portfolio value: 2742679.092123835
Final accumulative portfolio value: 27.42679092123835
Maximum DrawDown: -0.7300826825414164
Sharpe ratio: 1.4573523943714128


 54%|█████▍    | 19/35 [00:58<00:49,  3.07s/it]

Initial portfolio value:100000
Final portfolio value: 2742045.843867553
Final accumulative portfolio value: 27.420458438675528
Maximum DrawDown: -0.7302650922779328
Sharpe ratio: 1.4569938686159534


 57%|█████▋    | 20/35 [01:01<00:46,  3.10s/it]

Initial portfolio value:100000
Final portfolio value: 2744588.2965371595
Final accumulative portfolio value: 27.445882965371595
Maximum DrawDown: -0.7301945745836291
Sharpe ratio: 1.4573656033898195


 60%|██████    | 21/35 [01:04<00:43,  3.07s/it]

Initial portfolio value:100000
Final portfolio value: 2746274.7890806817
Final accumulative portfolio value: 27.462747890806817
Maximum DrawDown: -0.7301728910661099
Sharpe ratio: 1.4573412776728925


 63%|██████▎   | 22/35 [01:07<00:39,  3.04s/it]

Initial portfolio value:100000
Final portfolio value: 2745842.3759473977
Final accumulative portfolio value: 27.458423759473977
Maximum DrawDown: -0.7300961773920078
Sharpe ratio: 1.4575428633554361


 66%|██████▌   | 23/35 [01:10<00:36,  3.02s/it]

Initial portfolio value:100000
Final portfolio value: 2752203.067590538
Final accumulative portfolio value: 27.52203067590538
Maximum DrawDown: -0.7300563793912271
Sharpe ratio: 1.4582789711825863


 69%|██████▊   | 24/35 [01:13<00:33,  3.03s/it]

Initial portfolio value:100000
Final portfolio value: 2750922.135274314
Final accumulative portfolio value: 27.50922135274314
Maximum DrawDown: -0.7298755710511307
Sharpe ratio: 1.4584098306524367


 71%|███████▏  | 25/35 [01:16<00:29,  3.00s/it]

Initial portfolio value:100000
Final portfolio value: 2752318.9957981105
Final accumulative portfolio value: 27.523189957981106
Maximum DrawDown: -0.7299397433872463
Sharpe ratio: 1.4582293340823


 74%|███████▍  | 26/35 [01:19<00:26,  2.97s/it]

Initial portfolio value:100000
Final portfolio value: 2751035.5438333177
Final accumulative portfolio value: 27.51035543833318
Maximum DrawDown: -0.7301890791782515
Sharpe ratio: 1.458083926547711


 77%|███████▋  | 27/35 [01:22<00:23,  2.96s/it]

Initial portfolio value:100000
Final portfolio value: 2752381.264778439
Final accumulative portfolio value: 27.52381264778439
Maximum DrawDown: -0.7302035093182002
Sharpe ratio: 1.458298835241928


 80%|████████  | 28/35 [01:25<00:20,  2.94s/it]

Initial portfolio value:100000
Final portfolio value: 2751063.410285149
Final accumulative portfolio value: 27.510634102851487
Maximum DrawDown: -0.7300599435609361
Sharpe ratio: 1.458035603730406


 83%|████████▎ | 29/35 [01:28<00:17,  2.94s/it]

Initial portfolio value:100000
Final portfolio value: 2757162.2113570883
Final accumulative portfolio value: 27.571622113570882
Maximum DrawDown: -0.7299164589989418
Sharpe ratio: 1.4589800511766364


 86%|████████▌ | 30/35 [01:31<00:14,  2.94s/it]

Initial portfolio value:100000
Final portfolio value: 2752864.8522915626
Final accumulative portfolio value: 27.528648522915628
Maximum DrawDown: -0.7299301229869887
Sharpe ratio: 1.4584004486348345


 89%|████████▊ | 31/35 [01:34<00:11,  2.93s/it]

Initial portfolio value:100000
Final portfolio value: 2753060.071783372
Final accumulative portfolio value: 27.53060071783372
Maximum DrawDown: -0.7301086403120296
Sharpe ratio: 1.4583111384965786


 91%|█████████▏| 32/35 [01:37<00:08,  2.93s/it]

Initial portfolio value:100000
Final portfolio value: 2751841.49908204
Final accumulative portfolio value: 27.5184149908204
Maximum DrawDown: -0.7301726840217566
Sharpe ratio: 1.458088834658055


 94%|█████████▍| 33/35 [01:40<00:06,  3.02s/it]

Initial portfolio value:100000
Final portfolio value: 2754649.5299879294
Final accumulative portfolio value: 27.546495299879293
Maximum DrawDown: -0.7299682661111813
Sharpe ratio: 1.4586319252776863


 97%|█████████▋| 34/35 [01:43<00:03,  3.06s/it]

Initial portfolio value:100000
Final portfolio value: 2753104.9372223113
Final accumulative portfolio value: 27.531049372223112
Maximum DrawDown: -0.7300070730807402
Sharpe ratio: 1.4582026120486802


100%|██████████| 35/35 [01:46<00:00,  3.04s/it]


<modules.algorithms.PolicyGradient at 0x2bc7e0cb0>

In [11]:
print("PERSISTING MODEL.....")
torch.save(model.train_policy.state_dict(), f"models/policy_{experiment_type}.pt")

PERSISTING MODEL.....


In [12]:
print("TESTING.....")
MEIIE_results = {
    "training": train_environment._asset_memory["final"],
    "test": {}
}

TESTING.....


## 6. Model Evaluation

In [13]:
test_environment = MultiPeriodPortfolioOptimizationEnv(
    df_portfolio_test,
    initial_amount=100000,
    comission_fee_pct=0.0025,
    time_window=N,
    multi_period_horizon=T,
    features=FEATURES,
    normalize_df=None,
    is_train_mode=False,
    experiment_type=experiment_type
)

In [14]:
policy = MultiPeriodConvAttentionNetwork(num_stocks=NUM_ASSETS,
                                         num_features=NUM_FEATURES,
                                         W=N,
                                         T=T)



In [15]:
policy.load_state_dict(torch.load(f"models/policy_{experiment_type}.pt"))

<All keys matched successfully>

In [16]:
DRLAgent.DRL_validation(model=model, test_env=test_environment, policy=policy)

Initial portfolio value:100000
Final portfolio value: 411457.11404766777
Final accumulative portfolio value: 4.114571140476678
Maximum DrawDown: -0.3604384746970777
Sharpe ratio: 1.1776113082035955


## 7. Postprocessing

In [17]:
MEIIE_results["test"]["value"] = test_environment._asset_memory["final"]

In [18]:
drl_portfolio_performance = test_environment._asset_memory["final"][1:]
len(drl_portfolio_performance)

70

In [19]:
drl_portfolio_performance = [(x/100000) for x in drl_portfolio_performance]

In [20]:
performance_dataset = pd.read_csv("data/processed/performances_djia.csv", index_col=0)

In [21]:
performance_dataset["DRL_HYBRID_TRANSFORMER"] = drl_portfolio_performance

In [22]:
performance_dataset.to_csv("data/processed/performances_djia.csv")