In this Notebook, we will train a DRL Agent using an multi-period Hybrid-Transformer model as a policy function for the French stock market index. The evaluation and performance including the agent's actions are displayed more explicity in the notebook.

## 1. Importing Libraries

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

import yfinance as yf
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from modules.scalers import GroupByScaler
from sklearn.preprocessing import MaxAbsScaler
import json
import torch

from modules.environment import MultiPeriodPortfolioOptimizationEnv
from modules.architectures import MultiPeriodConvAttentionNetwork
from modules.models import DRLAgent

sns.set()

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

## 2. Getting Config Data

In [2]:
config_file_name = "config.json"
with open(config_file_name, "r") as jsonfile:
    config_data = json.load(jsonfile)

In [3]:
cac_40 = config_data["tickers"]["Europe"]["CAC_40"]
len(cac_40)

40

In [4]:
start_date = config_data["timeframe"]["cac40"]["start_date"]
end_date = config_data["timeframe"]["cac40"]["end_date"]
data_interval = config_data["data_interval"]
test_ratio = config_data["train_test_ratio"]
random_seed = config_data["random_state_seed"]

## 3. Data Retrieval

In [5]:
portfolio_raw_df = yf.download(tickers=cac_40, start=start_date, end=end_date, interval=data_interval)

portfolio_raw_df.fillna(method="bfill", inplace=True)
portfolio_raw_df = portfolio_raw_df.stack(level=1).rename_axis(["Date", "Ticker"]).reset_index(level=1)
portfolio_raw_df = portfolio_raw_df.drop("Adj Close", axis=1)
portfolio_raw_df.columns.name = None
portfolio_raw_df = portfolio_raw_df.reset_index()
portfolio_raw_df.Date = portfolio_raw_df.Date.astype(str)
portfolio_raw_df.columns = ["date", "tic", "close", "high", "low", "open", "volume"]
portfolio_raw_df = portfolio_raw_df[["date", "tic", "close", "high", "low", "volume"]]

df_portfolio_raw_train, df_portfolio_raw_test = train_test_split(portfolio_raw_df, test_size=test_ratio, shuffle=False, random_state=random_seed)
df_portfolio_train = GroupByScaler(by="tic", scaler=MaxAbsScaler).fit_transform(df_portfolio_raw_train)
df_portfolio_test = GroupByScaler(by="tic", scaler=MaxAbsScaler).fit_transform(df_portfolio_raw_test)


[*********************100%%**********************]  40 of 40 completed

2 Failed downloads:
['STM.PA', 'STLA.PA']: YFTzMissingError('$%ticker%: possibly delisted; No timezone found')
  portfolio_raw_df.fillna(method="bfill", inplace=True)
  portfolio_raw_df = portfolio_raw_df.stack(level=1).rename_axis(["Date", "Ticker"]).reset_index(level=1)


In [6]:
NUM_ASSETS = len(portfolio_raw_df.tic.unique().tolist())
NUM_ASSETS

38

## 4. DRL Environment
### 4.1 Hyperparameter selection

In [7]:
# SETTING HYPERPARAMETERS
FEATURES = ["close", "high", "low", "volume"]
N = config_data["lookback_window"]
T = config_data["multi_step_horizon"]
NUM_FEATURES = len(FEATURES)
experiment_type = "HYBRID_TRANSFORMER_CAC40" 
N, T, NUM_FEATURES

(24, 3, 4)

### 4.2 Train Environment Initialization

In [8]:
train_environment = MultiPeriodPortfolioOptimizationEnv(
    df_portfolio_train,
    initial_amount=100000,
    comission_fee_pct=0.0025,
    time_window=N,
    multi_period_horizon=T,
    features=FEATURES,
    normalize_df=None,
    is_train_mode=True,
    experiment_type=experiment_type
)

### 4.3 Model Parameters

In [9]:
# set PolicyGradient parameters
model_kwargs = {
    "lr": 0.0001,
    "policy": MultiPeriodConvAttentionNetwork,
    "multi_period_horizon": T
}

# here, we can set EIIE's parameters
policy_kwargs = {
    "num_features": NUM_FEATURES,
    "num_stocks": NUM_ASSETS,
    "W": N,
    "T": T
}

### 4.4 Agent initialization

In [10]:
model = DRLAgent(train_environment).get_model("pg", device, model_kwargs, policy_kwargs)



## 5. Model Training

In [11]:
print("TRAINING AGENT.....")
DRLAgent.train_model(model, episodes=35)


TRAINING AGENT.....


  0%|          | 0/35 [00:00<?, ?it/s]

Initial portfolio value:100000
Final portfolio value: 473628.12876374193
Final accumulative portfolio value: 4.73628128763742
Maximum DrawDown: -0.8556373804958776
Sharpe ratio: 0.6816329757524531


  3%|▎         | 1/35 [00:03<02:01,  3.57s/it]

Initial portfolio value:100000
Final portfolio value: 485744.812847591
Final accumulative portfolio value: 4.85744812847591
Maximum DrawDown: -0.8539333926717468
Sharpe ratio: 0.6913128571604664


  6%|▌         | 2/35 [00:07<01:59,  3.63s/it]

Initial portfolio value:100000
Final portfolio value: 496161.40447018633
Final accumulative portfolio value: 4.9616140447018635
Maximum DrawDown: -0.8534049242206097
Sharpe ratio: 0.6984671604271961


  9%|▊         | 3/35 [00:10<01:52,  3.52s/it]

Initial portfolio value:100000
Final portfolio value: 507731.11008272023
Final accumulative portfolio value: 5.0773111008272025
Maximum DrawDown: -0.8531845729786325
Sharpe ratio: 0.7065520745872835


 11%|█▏        | 4/35 [00:14<01:47,  3.48s/it]

Initial portfolio value:100000
Final portfolio value: 526610.4617742979
Final accumulative portfolio value: 5.266104617742979
Maximum DrawDown: -0.8513466102060316
Sharpe ratio: 0.7199729587013922


 14%|█▍        | 5/35 [00:17<01:44,  3.50s/it]

Initial portfolio value:100000
Final portfolio value: 554370.9544694076
Final accumulative portfolio value: 5.543709544694075
Maximum DrawDown: -0.849146201990839
Sharpe ratio: 0.737601646527631


 17%|█▋        | 6/35 [00:21<01:41,  3.49s/it]

Initial portfolio value:100000
Final portfolio value: 571703.4659530498
Final accumulative portfolio value: 5.717034659530499
Maximum DrawDown: -0.8485631606104969
Sharpe ratio: 0.7478612160140823


 20%|██        | 7/35 [00:24<01:35,  3.42s/it]

Initial portfolio value:100000
Final portfolio value: 581362.7735267103
Final accumulative portfolio value: 5.8136277352671035
Maximum DrawDown: -0.8484579336382032
Sharpe ratio: 0.7535082227230813


 23%|██▎       | 8/35 [00:27<01:32,  3.41s/it]

Initial portfolio value:100000
Final portfolio value: 587859.1809308147
Final accumulative portfolio value: 5.878591809308147
Maximum DrawDown: -0.8480032280359964
Sharpe ratio: 0.7567931147727802


 26%|██▌       | 9/35 [00:31<01:27,  3.37s/it]

Initial portfolio value:100000
Final portfolio value: 595280.0950722393
Final accumulative portfolio value: 5.952800950722393
Maximum DrawDown: -0.8487347045611003
Sharpe ratio: 0.7606468345403431


 29%|██▊       | 10/35 [00:34<01:23,  3.32s/it]

Initial portfolio value:100000
Final portfolio value: 594515.1785479375
Final accumulative portfolio value: 5.945151785479375
Maximum DrawDown: -0.8476419074942537
Sharpe ratio: 0.7610273374930085


 31%|███▏      | 11/35 [00:37<01:19,  3.30s/it]

Initial portfolio value:100000
Final portfolio value: 609827.184579165
Final accumulative portfolio value: 6.09827184579165
Maximum DrawDown: -0.8475562748414777
Sharpe ratio: 0.7689680708122916


 34%|███▍      | 12/35 [00:40<01:15,  3.27s/it]

Initial portfolio value:100000
Final portfolio value: 611960.7492443366
Final accumulative portfolio value: 6.119607492443366
Maximum DrawDown: -0.8471703586854544
Sharpe ratio: 0.7704995323929613


 37%|███▋      | 13/35 [00:43<01:11,  3.25s/it]

Initial portfolio value:100000
Final portfolio value: 606418.2223379031
Final accumulative portfolio value: 6.0641822233790315
Maximum DrawDown: -0.8470662251109908
Sharpe ratio: 0.7680193217876106


 40%|████      | 14/35 [00:47<01:08,  3.28s/it]

Initial portfolio value:100000
Final portfolio value: 616967.7224445014
Final accumulative portfolio value: 6.169677224445014
Maximum DrawDown: -0.8466712390788016
Sharpe ratio: 0.7728798853445241


 43%|████▎     | 15/35 [00:50<01:07,  3.37s/it]

Initial portfolio value:100000
Final portfolio value: 621442.8974939102
Final accumulative portfolio value: 6.214428974939102
Maximum DrawDown: -0.8466404686227684
Sharpe ratio: 0.775081268239512


 46%|████▌     | 16/35 [00:54<01:03,  3.34s/it]

Initial portfolio value:100000
Final portfolio value: 621155.9808605046
Final accumulative portfolio value: 6.211559808605045
Maximum DrawDown: -0.8463997601813373
Sharpe ratio: 0.77517828231276


 49%|████▊     | 17/35 [00:57<00:59,  3.33s/it]

Initial portfolio value:100000
Final portfolio value: 624597.0351009726
Final accumulative portfolio value: 6.245970351009726
Maximum DrawDown: -0.8469667920027724
Sharpe ratio: 0.7773024083153097


 51%|█████▏    | 18/35 [01:00<00:56,  3.31s/it]

Initial portfolio value:100000
Final portfolio value: 615977.6090580126
Final accumulative portfolio value: 6.159776090580126
Maximum DrawDown: -0.8465367417805447
Sharpe ratio: 0.773366453757275


 54%|█████▍    | 19/35 [01:03<00:52,  3.30s/it]

Initial portfolio value:100000
Final portfolio value: 631085.4259722383
Final accumulative portfolio value: 6.310854259722383
Maximum DrawDown: -0.846182495289844
Sharpe ratio: 0.7808430763990147


 57%|█████▋    | 20/35 [01:07<00:49,  3.28s/it]

Initial portfolio value:100000
Final portfolio value: 633165.764394769
Final accumulative portfolio value: 6.33165764394769
Maximum DrawDown: -0.8458910064018554
Sharpe ratio: 0.7820546690707155


 60%|██████    | 21/35 [01:10<00:45,  3.24s/it]

Initial portfolio value:100000
Final portfolio value: 628221.0802541673
Final accumulative portfolio value: 6.282210802541673
Maximum DrawDown: -0.8462791800065335
Sharpe ratio: 0.779092241463914


 63%|██████▎   | 22/35 [01:13<00:42,  3.26s/it]

Initial portfolio value:100000
Final portfolio value: 634252.1758086184
Final accumulative portfolio value: 6.342521758086185
Maximum DrawDown: -0.8460082790612996
Sharpe ratio: 0.7826632683852202


 66%|██████▌   | 23/35 [01:16<00:38,  3.24s/it]

Initial portfolio value:100000
Final portfolio value: 620369.68371505
Final accumulative portfolio value: 6.2036968371505
Maximum DrawDown: -0.8460688471555081
Sharpe ratio: 0.7757528086530356


 69%|██████▊   | 24/35 [01:20<00:35,  3.24s/it]

Initial portfolio value:100000
Final portfolio value: 636290.5137729706
Final accumulative portfolio value: 6.362905137729706
Maximum DrawDown: -0.8456838876599834
Sharpe ratio: 0.7840382496303572


 71%|███████▏  | 25/35 [01:23<00:32,  3.23s/it]

Initial portfolio value:100000
Final portfolio value: 639415.5693173499
Final accumulative portfolio value: 6.3941556931734995
Maximum DrawDown: -0.8459181186292303
Sharpe ratio: 0.7853027230421343


 74%|███████▍  | 26/35 [01:26<00:29,  3.24s/it]

Initial portfolio value:100000
Final portfolio value: 642952.7407107406
Final accumulative portfolio value: 6.429527407107406
Maximum DrawDown: -0.8457613669827533
Sharpe ratio: 0.7871126880709992


 77%|███████▋  | 27/35 [01:29<00:26,  3.28s/it]

Initial portfolio value:100000
Final portfolio value: 641122.0568556671
Final accumulative portfolio value: 6.411220568556671
Maximum DrawDown: -0.8456068651869262
Sharpe ratio: 0.7862634051363385


 80%|████████  | 28/35 [01:33<00:23,  3.29s/it]

Initial portfolio value:100000
Final portfolio value: 640279.0360231216
Final accumulative portfolio value: 6.402790360231216
Maximum DrawDown: -0.8456950476479003
Sharpe ratio: 0.785785062981941


 83%|████████▎ | 29/35 [01:36<00:19,  3.31s/it]

Initial portfolio value:100000
Final portfolio value: 642815.0907991296
Final accumulative portfolio value: 6.428150907991296
Maximum DrawDown: -0.845716700648333
Sharpe ratio: 0.787017642514095


 86%|████████▌ | 30/35 [01:39<00:16,  3.32s/it]

Initial portfolio value:100000
Final portfolio value: 642935.9248372702
Final accumulative portfolio value: 6.429359248372702
Maximum DrawDown: -0.845770764836729
Sharpe ratio: 0.7871307672696238


 89%|████████▊ | 31/35 [01:43<00:13,  3.32s/it]

Initial portfolio value:100000
Final portfolio value: 644144.6026135717
Final accumulative portfolio value: 6.441446026135717
Maximum DrawDown: -0.8456529023525071
Sharpe ratio: 0.7877330458211099


 91%|█████████▏| 32/35 [01:46<00:10,  3.37s/it]

Initial portfolio value:100000
Final portfolio value: 642872.910135231
Final accumulative portfolio value: 6.42872910135231
Maximum DrawDown: -0.845661983737143
Sharpe ratio: 0.7871135593551353


 94%|█████████▍| 33/35 [01:49<00:06,  3.33s/it]

Initial portfolio value:100000
Final portfolio value: 638350.0808049907
Final accumulative portfolio value: 6.383500808049908
Maximum DrawDown: -0.8456975283645597
Sharpe ratio: 0.7850359937835446


 97%|█████████▋| 34/35 [01:53<00:03,  3.31s/it]

Initial portfolio value:100000
Final portfolio value: 640567.8460226165
Final accumulative portfolio value: 6.405678460226165
Maximum DrawDown: -0.8456273806544178
Sharpe ratio: 0.7860791659055957


100%|██████████| 35/35 [01:56<00:00,  3.33s/it]


<modules.algorithms.PolicyGradient at 0x28e47fc80>

In [12]:
print("PERSISTING MODEL.....")
torch.save(model.train_policy.state_dict(), f"models/policy_{experiment_type}.pt")

PERSISTING MODEL.....


In [13]:
print("TESTING.....")
MEIIE_results = {
    "training": train_environment._asset_memory["final"],
    "test": {}
}

TESTING.....


## 6. Model Evaluation

In [14]:
policy = MultiPeriodConvAttentionNetwork(num_stocks=NUM_ASSETS,
                                         num_features=NUM_FEATURES,
                                         W=N,
                                         T=T)



In [15]:
test_environment = MultiPeriodPortfolioOptimizationEnv(
    df_portfolio_test,
    initial_amount=100000,
    comission_fee_pct=0.0025,
    time_window=N,
    multi_period_horizon=T,
    features=FEATURES,
    normalize_df=None,
    is_train_mode=False,
    experiment_type=experiment_type
)

In [16]:
policy.load_state_dict(torch.load(f"models/policy_{experiment_type}.pt"))

<All keys matched successfully>

In [17]:
DRLAgent.DRL_validation(model=model, test_env=test_environment, policy=policy)

Initial portfolio value:100000
Final portfolio value: 313318.42484725907
Final accumulative portfolio value: 3.133184248472591
Maximum DrawDown: -0.5112611018657609
Sharpe ratio: 0.8405653840319385


## 7. Postprocessing

In [18]:
MEIIE_results["test"]["value"] = test_environment._asset_memory["final"]

In [19]:
drl_portfolio_performance = test_environment._asset_memory["final"][1:]
len(drl_portfolio_performance)

70

In [20]:
drl_portfolio_performance = [(x/100000) for x in drl_portfolio_performance]

In [21]:
performance_dataset = pd.read_csv("data/processed/performances_cac40.csv", index_col=0)

In [22]:
performance_dataset["DRL_HYBRID_TRANSFORMER"] = drl_portfolio_performance

In [23]:
performance_dataset.to_csv("data/processed/performances_cac40.csv")