In this Notebook, we will train a DRL Agent using an multi-period Hybrid-Transformer model as a policy function for the French stock market index. The evaluation and performance including the agent's actions are displayed more explicity in the notebook.

## 1. Importing Libraries

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

import yfinance as yf
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from modules.scalers import GroupByScaler
from sklearn.preprocessing import MaxAbsScaler
import json
import torch

from modules.environment import MultiPeriodPortfolioOptimizationEnv
from modules.architectures import MultiPeriodConvAttentionNetwork
from modules.models import DRLAgent

sns.set()

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

## 2. Getting Config Data

In [2]:
config_file_name = "config.json"
with open(config_file_name, "r") as jsonfile:
    config_data = json.load(jsonfile)

In [3]:
cac_40 = config_data["tickers"]["Europe"]["CAC_40"]
len(cac_40)

40

In [4]:
start_date = config_data["timeframe"]["cac40"]["start_date"]
end_date = config_data["timeframe"]["cac40"]["end_date"]
data_interval = config_data["data_interval"]
test_ratio = config_data["train_test_ratio"]
random_seed = config_data["random_state_seed"]

## 3. Data Retrieval

In [5]:
portfolio_raw_df = yf.download(tickers=cac_40, start=start_date, end=end_date, interval=data_interval)

portfolio_raw_df.fillna(method="bfill", inplace=True)
portfolio_raw_df = portfolio_raw_df.stack(level=1).rename_axis(["Date", "Ticker"]).reset_index(level=1)
portfolio_raw_df = portfolio_raw_df.drop("Adj Close", axis=1)
portfolio_raw_df.columns.name = None
portfolio_raw_df = portfolio_raw_df.reset_index()
portfolio_raw_df.Date = portfolio_raw_df.Date.astype(str)
portfolio_raw_df.columns = ["date", "tic", "close", "high", "low", "open", "volume"]
portfolio_raw_df = portfolio_raw_df[["date", "tic", "close", "high", "low", "volume"]]

df_portfolio_raw_train, df_portfolio_raw_test = train_test_split(portfolio_raw_df, test_size=test_ratio, shuffle=False, random_state=random_seed)
df_portfolio_train = GroupByScaler(by="tic", scaler=MaxAbsScaler).fit_transform(df_portfolio_raw_train)
df_portfolio_test = GroupByScaler(by="tic", scaler=MaxAbsScaler).fit_transform(df_portfolio_raw_test)


[*********************100%%**********************]  40 of 40 completed

2 Failed downloads:
['STLA.PA', 'STM.PA']: YFTzMissingError('$%ticker%: possibly delisted; No timezone found')
  portfolio_raw_df.fillna(method="bfill", inplace=True)
  portfolio_raw_df = portfolio_raw_df.stack(level=1).rename_axis(["Date", "Ticker"]).reset_index(level=1)


In [6]:
NUM_ASSETS = len(portfolio_raw_df.tic.unique().tolist())
NUM_ASSETS

38

## 4. DRL Environment
### 4.1 Hyperparameter selection

In [7]:
# SETTING HYPERPARAMETERS
FEATURES = ["close", "high", "low", "volume"]
N = config_data["lookback_window"]
T = config_data["multi_step_horizon"]
NUM_FEATURES = len(FEATURES)
experiment_type = "HYBRID_TRANSFORMER_CAC40" 
N, T, NUM_FEATURES

(24, 3, 4)

### 4.2 Train Environment Initialization

In [8]:
train_environment = MultiPeriodPortfolioOptimizationEnv(
    df_portfolio_train,
    initial_amount=100000,
    comission_fee_pct=0.0025,
    time_window=N,
    multi_period_horizon=T,
    features=FEATURES,
    normalize_df=None,
    is_train_mode=True,
    experiment_type=experiment_type
)

### 4.3 Model Parameters

In [9]:
# set PolicyGradient parameters
model_kwargs = {
    "lr": 0.0001,
    "policy": MultiPeriodConvAttentionNetwork,
    "multi_period_horizon": T
}

# here, we can set EIIE's parameters
policy_kwargs = {
    "num_features": NUM_FEATURES,
    "num_stocks": NUM_ASSETS,
    "W": N,
    "T": T
}

### 4.4 Agent initialization

In [10]:
model = DRLAgent(train_environment).get_model("pg", device, model_kwargs, policy_kwargs)



## 5. Model Training

In [11]:
print("TRAINING AGENT.....")
DRLAgent.train_model(model, episodes=35)


TRAINING AGENT.....


  0%|          | 0/35 [00:00<?, ?it/s]

Initial portfolio value:100000
Final portfolio value: 474581.3753718804
Final accumulative portfolio value: 4.745813753718804
Maximum DrawDown: -0.8546353738128416
Sharpe ratio: 0.6830115636936307


  3%|▎         | 1/35 [00:03<02:10,  3.85s/it]

Initial portfolio value:100000
Final portfolio value: 485863.739912027
Final accumulative portfolio value: 4.85863739912027
Maximum DrawDown: -0.8546137659067568
Sharpe ratio: 0.6910211097354267


  6%|▌         | 2/35 [00:07<02:04,  3.78s/it]

Initial portfolio value:100000
Final portfolio value: 504712.8021456986
Final accumulative portfolio value: 5.047128021456985
Maximum DrawDown: -0.8540287328089216
Sharpe ratio: 0.7044071985736994


  9%|▊         | 3/35 [00:10<01:54,  3.59s/it]

Initial portfolio value:100000
Final portfolio value: 520984.8558968727
Final accumulative portfolio value: 5.209848558968727
Maximum DrawDown: -0.8539342850020928
Sharpe ratio: 0.715216508125824


 11%|█▏        | 4/35 [00:14<01:48,  3.49s/it]

Initial portfolio value:100000
Final portfolio value: 534078.4457806257
Final accumulative portfolio value: 5.340784457806257
Maximum DrawDown: -0.8541557406075825
Sharpe ratio: 0.7229863012011343


 14%|█▍        | 5/35 [00:17<01:42,  3.42s/it]

Initial portfolio value:100000
Final portfolio value: 551688.3538311338
Final accumulative portfolio value: 5.516883538311338
Maximum DrawDown: -0.8534648412697657
Sharpe ratio: 0.7342198149080935


 17%|█▋        | 6/35 [00:20<01:38,  3.39s/it]

Initial portfolio value:100000
Final portfolio value: 558708.2423494334
Final accumulative portfolio value: 5.5870824234943335
Maximum DrawDown: -0.8543257599537382
Sharpe ratio: 0.7385953651976025


 20%|██        | 7/35 [00:24<01:34,  3.37s/it]

Initial portfolio value:100000
Final portfolio value: 565745.5498580666
Final accumulative portfolio value: 5.657455498580666
Maximum DrawDown: -0.853285852262587
Sharpe ratio: 0.7428310083302321


 23%|██▎       | 8/35 [00:27<01:31,  3.39s/it]

Initial portfolio value:100000
Final portfolio value: 567436.4960040143
Final accumulative portfolio value: 5.674364960040143
Maximum DrawDown: -0.8532313123880957
Sharpe ratio: 0.7441757862501952


 26%|██▌       | 9/35 [00:31<01:27,  3.38s/it]

Initial portfolio value:100000
Final portfolio value: 568563.3855512643
Final accumulative portfolio value: 5.685633855512643
Maximum DrawDown: -0.8533283475797846
Sharpe ratio: 0.7445711316441795


 29%|██▊       | 10/35 [00:34<01:23,  3.35s/it]

Initial portfolio value:100000
Final portfolio value: 573774.3516370313
Final accumulative portfolio value: 5.737743516370313
Maximum DrawDown: -0.8525616848021851
Sharpe ratio: 0.7479764547194739


 31%|███▏      | 11/35 [00:37<01:20,  3.34s/it]

Initial portfolio value:100000
Final portfolio value: 579077.5837232245
Final accumulative portfolio value: 5.790775837232244
Maximum DrawDown: -0.8528571085671715
Sharpe ratio: 0.7508285042414712


 34%|███▍      | 12/35 [00:40<01:16,  3.33s/it]

Initial portfolio value:100000
Final portfolio value: 584571.9293671545
Final accumulative portfolio value: 5.845719293671545
Maximum DrawDown: -0.8522879512750826
Sharpe ratio: 0.7538175178271339


 37%|███▋      | 13/35 [00:44<01:13,  3.32s/it]

Initial portfolio value:100000
Final portfolio value: 596901.6264264267
Final accumulative portfolio value: 5.969016264264267
Maximum DrawDown: -0.8514254045117869
Sharpe ratio: 0.7612225326116819


 40%|████      | 14/35 [00:47<01:09,  3.32s/it]

Initial portfolio value:100000
Final portfolio value: 598673.9894312402
Final accumulative portfolio value: 5.986739894312403
Maximum DrawDown: -0.8513852741609993
Sharpe ratio: 0.762053201598257


 43%|████▎     | 15/35 [00:51<01:07,  3.37s/it]

Initial portfolio value:100000
Final portfolio value: 599549.4936098043
Final accumulative portfolio value: 5.995494936098043
Maximum DrawDown: -0.850828345374215
Sharpe ratio: 0.7631458291975207


 46%|████▌     | 16/35 [00:54<01:03,  3.35s/it]

Initial portfolio value:100000
Final portfolio value: 607606.3284406234
Final accumulative portfolio value: 6.076063284406233
Maximum DrawDown: -0.8501746327594666
Sharpe ratio: 0.767759693872579


 49%|████▊     | 17/35 [00:57<01:00,  3.35s/it]

Initial portfolio value:100000
Final portfolio value: 612079.628392908
Final accumulative portfolio value: 6.120796283929081
Maximum DrawDown: -0.8493823394483881
Sharpe ratio: 0.7699103951692883


 51%|█████▏    | 18/35 [01:00<00:56,  3.33s/it]

Initial portfolio value:100000
Final portfolio value: 615195.860505022
Final accumulative portfolio value: 6.15195860505022
Maximum DrawDown: -0.8497868258070884
Sharpe ratio: 0.7713664083320549


 54%|█████▍    | 19/35 [01:04<00:53,  3.33s/it]

Initial portfolio value:100000
Final portfolio value: 602665.8867275201
Final accumulative portfolio value: 6.026658867275201
Maximum DrawDown: -0.8479032085109587
Sharpe ratio: 0.7657590270933261


 57%|█████▋    | 20/35 [01:07<00:49,  3.32s/it]

Initial portfolio value:100000
Final portfolio value: 623205.0336442711
Final accumulative portfolio value: 6.232050336442711
Maximum DrawDown: -0.8464489301701084
Sharpe ratio: 0.7767658325406265


 60%|██████    | 21/35 [01:10<00:46,  3.32s/it]

Initial portfolio value:100000
Final portfolio value: 620833.3077090849
Final accumulative portfolio value: 6.20833307709085
Maximum DrawDown: -0.8473064777534169
Sharpe ratio: 0.7751523239708671


 63%|██████▎   | 22/35 [01:14<00:43,  3.35s/it]

Initial portfolio value:100000
Final portfolio value: 637312.5970999772
Final accumulative portfolio value: 6.373125970999772
Maximum DrawDown: -0.8465754199185368
Sharpe ratio: 0.7843047676965943


 66%|██████▌   | 23/35 [01:17<00:40,  3.35s/it]

Initial portfolio value:100000
Final portfolio value: 639106.4767525793
Final accumulative portfolio value: 6.391064767525793
Maximum DrawDown: -0.8465718878593735
Sharpe ratio: 0.7845954119443338


 69%|██████▊   | 24/35 [01:21<00:36,  3.33s/it]

Initial portfolio value:100000
Final portfolio value: 636944.5488737901
Final accumulative portfolio value: 6.369445488737901
Maximum DrawDown: -0.8463406553966355
Sharpe ratio: 0.7836932046057219


 71%|███████▏  | 25/35 [01:24<00:33,  3.34s/it]

Initial portfolio value:100000
Final portfolio value: 643397.3449435728
Final accumulative portfolio value: 6.433973449435728
Maximum DrawDown: -0.8458761278521493
Sharpe ratio: 0.787277396932392


 74%|███████▍  | 26/35 [01:27<00:30,  3.34s/it]

Initial portfolio value:100000
Final portfolio value: 635396.2780668552
Final accumulative portfolio value: 6.353962780668551
Maximum DrawDown: -0.8460189882187445
Sharpe ratio: 0.7826124006748921


 77%|███████▋  | 27/35 [01:31<00:26,  3.33s/it]

Initial portfolio value:100000
Final portfolio value: 634735.0364277707
Final accumulative portfolio value: 6.347350364277707
Maximum DrawDown: -0.8459149667379335
Sharpe ratio: 0.7823455959566422


 80%|████████  | 28/35 [01:34<00:23,  3.32s/it]

Initial portfolio value:100000
Final portfolio value: 633514.4494470197
Final accumulative portfolio value: 6.335144494470197
Maximum DrawDown: -0.8460588701902065
Sharpe ratio: 0.7818220555551545


 83%|████████▎ | 29/35 [01:37<00:19,  3.33s/it]

Initial portfolio value:100000
Final portfolio value: 642406.8629862724
Final accumulative portfolio value: 6.424068629862725
Maximum DrawDown: -0.8458611114209837
Sharpe ratio: 0.7867118473786132


 86%|████████▌ | 30/35 [01:40<00:16,  3.32s/it]

Initial portfolio value:100000
Final portfolio value: 647181.289001135
Final accumulative portfolio value: 6.471812890011351
Maximum DrawDown: -0.8457651948460985
Sharpe ratio: 0.7893960541700633


 89%|████████▊ | 31/35 [01:44<00:13,  3.32s/it]

Initial portfolio value:100000
Final portfolio value: 644321.6303614273
Final accumulative portfolio value: 6.443216303614273
Maximum DrawDown: -0.8458681579125735
Sharpe ratio: 0.7872570758722945


 91%|█████████▏| 32/35 [01:47<00:10,  3.36s/it]

Initial portfolio value:100000
Final portfolio value: 653638.3523656374
Final accumulative portfolio value: 6.536383523656374
Maximum DrawDown: -0.8458106841292188
Sharpe ratio: 0.7967794320436601


 94%|█████████▍| 33/35 [01:51<00:06,  3.36s/it]

Initial portfolio value:100000
Final portfolio value: 608469.3004030174
Final accumulative portfolio value: 6.084693004030174
Maximum DrawDown: -0.8458114344905697
Sharpe ratio: 0.7726482277698955


 97%|█████████▋| 34/35 [01:54<00:03,  3.38s/it]

Initial portfolio value:100000
Final portfolio value: 587833.4360907153
Final accumulative portfolio value: 5.878334360907154
Maximum DrawDown: -0.8456521378537483
Sharpe ratio: 0.7616287374870923


100%|██████████| 35/35 [01:57<00:00,  3.37s/it]


<modules.algorithms.PolicyGradient at 0x28db21cd0>

In [12]:
print("PERSISTING MODEL.....")
torch.save(model.train_policy.state_dict(), f"models/policy_{experiment_type}.pt")

PERSISTING MODEL.....


In [13]:
print("TESTING.....")
MEIIE_results = {
    "training": train_environment._asset_memory["final"],
    "test": {}
}

TESTING.....


## 6. Model Evaluation

In [14]:
policy = MultiPeriodConvAttentionNetwork(num_stocks=NUM_ASSETS,
                                         num_features=NUM_FEATURES,
                                         W=N,
                                         T=T)



In [15]:
test_environment = MultiPeriodPortfolioOptimizationEnv(
    df_portfolio_test,
    initial_amount=100000,
    comission_fee_pct=0.0025,
    time_window=N,
    multi_period_horizon=T,
    features=FEATURES,
    normalize_df=None,
    is_train_mode=False,
    experiment_type=experiment_type
)

In [16]:
policy.load_state_dict(torch.load(f"models/policy_{experiment_type}.pt"))

<All keys matched successfully>

In [17]:
DRLAgent.DRL_validation(model=model, test_env=test_environment, policy=policy)

Initial portfolio value:100000
Final portfolio value: 313312.35378394224
Final accumulative portfolio value: 3.133123537839422
Maximum DrawDown: -0.5112760564685955
Sharpe ratio: 0.8405319067131358


## 7. Postprocessing

In [18]:
MEIIE_results["test"]["value"] = test_environment._asset_memory["final"]

In [19]:
drl_portfolio_performance = test_environment._asset_memory["final"][1:]
len(drl_portfolio_performance)

70

In [20]:
drl_portfolio_performance = [(x/100000) for x in drl_portfolio_performance]

In [21]:
performance_dataset = pd.read_csv("data/processed/performances_cac40.csv", index_col=0)

In [22]:
performance_dataset["DRL_HYBRID_TRANSFORMER"] = drl_portfolio_performance

In [23]:
performance_dataset.to_csv("data/processed/performances_cac40.csv")