In this Notebook, we will train a DRL Agent using an multi-period EIIE model as a policy function for the French stock market. The evaluation and performance including the agent's actions are displayed more explicity in the notebook.

## 1. Importing Libraries

In [1]:
import yfinance as yf
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from modules.scalers import GroupByScaler
from sklearn.preprocessing import MaxAbsScaler
import json
import torch

from modules.environment import MultiPeriodPortfolioOptimizationEnv
from modules.architectures import MultiPeriodEIIE
from modules.models import DRLAgent

sns.set()

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

## 2. Getting Config Data

In [2]:
config_file_name = "config.json"
with open(config_file_name, "r") as jsonfile:
    config_data = json.load(jsonfile)

In [3]:
cac40 = config_data["tickers"]["Europe"]["CAC_40"]
NUM_ASSETS = len(cac40)
len(cac40)

40

In [4]:
start_date = config_data["timeframe"]["cac40"]["start_date"]
end_date = config_data["timeframe"]["cac40"]["end_date"]
data_interval = config_data["data_interval"]
test_ratio = config_data["train_test_ratio"]
random_seed = config_data["random_state_seed"]

## 3. Data Retrieval

In [5]:
portfolio_raw_df = yf.download(tickers=cac40, start=start_date, end=end_date, interval=data_interval)

portfolio_raw_df.fillna(method="bfill", inplace=True)
portfolio_raw_df = portfolio_raw_df.stack(level=1).rename_axis(["Date", "Ticker"]).reset_index(level=1)
portfolio_raw_df = portfolio_raw_df.drop("Adj Close", axis=1)
portfolio_raw_df.columns.name = None
portfolio_raw_df = portfolio_raw_df.reset_index()
portfolio_raw_df.Date = portfolio_raw_df.Date.astype(str)
portfolio_raw_df.columns = ["date", "tic", "close", "high", "low", "open", "volume"]
portfolio_raw_df = portfolio_raw_df[["date", "tic", "close", "high", "low", "volume"]]

df_portfolio_raw_train, df_portfolio_raw_test = train_test_split(portfolio_raw_df, test_size=test_ratio, shuffle=False, random_state=random_seed)
df_portfolio_train = GroupByScaler(by="tic", scaler=MaxAbsScaler).fit_transform(df_portfolio_raw_train)
df_portfolio_test = GroupByScaler(by="tic", scaler=MaxAbsScaler).fit_transform(df_portfolio_raw_test)


[*********************100%%**********************]  40 of 40 completed

2 Failed downloads:
['STM.PA', 'STLA.PA']: YFTzMissingError('$%ticker%: possibly delisted; No timezone found')
  portfolio_raw_df.fillna(method="bfill", inplace=True)
  portfolio_raw_df = portfolio_raw_df.stack(level=1).rename_axis(["Date", "Ticker"]).reset_index(level=1)


## 4. DRL Environment
### 4.1 Hyperparameter selection

In [6]:
# SETTING HYPERPARAMETERS
FEATURES = ["close", "high", "low", "volume"]
N = config_data["lookback_window"]
T = config_data["multi_step_horizon"]
NUM_FEATURES = len(FEATURES)
experiment_type = "EIIE_CAC40" 
NUM_ASSETS = len(portfolio_raw_df.tic.unique().tolist())
NUM_ASSETS
N, T

(24, 3)

### 4.2 Train Environment Initialization

In [7]:
train_environment = MultiPeriodPortfolioOptimizationEnv(
    df_portfolio_train,
    initial_amount=100000,
    comission_fee_pct=0.0025,
    time_window=N,
    multi_period_horizon=T,
    features=FEATURES,
    normalize_df=None,
    is_train_mode=True,
    experiment_type=experiment_type
)

### 4.3 Model Parameters

In [8]:
# set PolicyGradient parameters
model_kwargs = {
    "lr": 0.0001,
    "policy": MultiPeriodEIIE,
    "multi_period_horizon": T
}

# here, we can set EIIE's parameters
policy_kwargs = {
    "initial_features": NUM_FEATURES,
    "k_size": 5, # Size of Initial Kernel
    "time_window": N,
    "prediction_horizon": T
}

### 4.4 Agent initialization

In [9]:
model = DRLAgent(train_environment).get_model("pg", device, model_kwargs, policy_kwargs)

## 5. Model Training

In [10]:
print("TRAINING AGENT.....")
DRLAgent.train_model(model, episodes=35)

TRAINING AGENT.....


  0%|          | 0/35 [00:00<?, ?it/s]

Initial portfolio value:100000
Final portfolio value: 470174.5308365409
Final accumulative portfolio value: 4.7017453083654095
Maximum DrawDown: -0.8547502196390458
Sharpe ratio: 0.6794521870342528


  3%|▎         | 1/35 [00:01<01:03,  1.87s/it]

Initial portfolio value:100000
Final portfolio value: 470175.691557255
Final accumulative portfolio value: 4.70175691557255
Maximum DrawDown: -0.8547501085033495
Sharpe ratio: 0.679452938374466


  6%|▌         | 2/35 [00:03<01:00,  1.85s/it]

Initial portfolio value:100000
Final portfolio value: 470176.9028256585
Final accumulative portfolio value: 4.701769028256585
Maximum DrawDown: -0.8547498866263292
Sharpe ratio: 0.6794538404243219


  9%|▊         | 3/35 [00:05<00:57,  1.80s/it]

Initial portfolio value:100000
Final portfolio value: 470177.75977890164
Final accumulative portfolio value: 4.701777597789016
Maximum DrawDown: -0.8547499830887488
Sharpe ratio: 0.6794543766433937


 11%|█▏        | 4/35 [00:07<00:54,  1.75s/it]

Initial portfolio value:100000
Final portfolio value: 470179.754502892
Final accumulative portfolio value: 4.701797545028921
Maximum DrawDown: -0.854749676453437
Sharpe ratio: 0.6794556912177938


 14%|█▍        | 5/35 [00:08<00:51,  1.72s/it]

Initial portfolio value:100000
Final portfolio value: 470180.783584537
Final accumulative portfolio value: 4.701807835845369
Maximum DrawDown: -0.854749687889572
Sharpe ratio: 0.6794564293246242


 17%|█▋        | 6/35 [00:10<00:49,  1.70s/it]

Initial portfolio value:100000
Final portfolio value: 470182.08962572593
Final accumulative portfolio value: 4.701820896257259
Maximum DrawDown: -0.8547497379474577
Sharpe ratio: 0.679457223648087


 20%|██        | 7/35 [00:12<00:47,  1.70s/it]

Initial portfolio value:100000
Final portfolio value: 470184.4726587129
Final accumulative portfolio value: 4.701844726587129
Maximum DrawDown: -0.8547494075399471
Sharpe ratio: 0.679458961387841


 23%|██▎       | 8/35 [00:13<00:46,  1.72s/it]

Initial portfolio value:100000
Final portfolio value: 470185.00698163314
Final accumulative portfolio value: 4.701850069816332
Maximum DrawDown: -0.8547494360332426
Sharpe ratio: 0.6794592437882919


 26%|██▌       | 9/35 [00:15<00:44,  1.72s/it]

Initial portfolio value:100000
Final portfolio value: 470186.5219218808
Final accumulative portfolio value: 4.701865219218808
Maximum DrawDown: -0.8547493951497165
Sharpe ratio: 0.6794603079231447


 29%|██▊       | 10/35 [00:17<00:42,  1.70s/it]

Initial portfolio value:100000
Final portfolio value: 470187.3639092676
Final accumulative portfolio value: 4.701873639092676
Maximum DrawDown: -0.8547494122707499
Sharpe ratio: 0.6794607297751574


 31%|███▏      | 11/35 [00:19<00:40,  1.71s/it]

Initial portfolio value:100000
Final portfolio value: 470188.490999544
Final accumulative portfolio value: 4.70188490999544
Maximum DrawDown: -0.854749271594879
Sharpe ratio: 0.679461566003987


 34%|███▍      | 12/35 [00:20<00:38,  1.69s/it]

Initial portfolio value:100000
Final portfolio value: 470190.1293198902
Final accumulative portfolio value: 4.701901293198902
Maximum DrawDown: -0.854748982005431
Sharpe ratio: 0.6794627864687033


 37%|███▋      | 13/35 [00:22<00:37,  1.68s/it]

Initial portfolio value:100000
Final portfolio value: 470189.67208341835
Final accumulative portfolio value: 4.701896720834183
Maximum DrawDown: -0.8547491738215447
Sharpe ratio: 0.679462292762853


 40%|████      | 14/35 [00:24<00:35,  1.71s/it]

Initial portfolio value:100000
Final portfolio value: 470192.87457746576
Final accumulative portfolio value: 4.701928745774658
Maximum DrawDown: -0.8547489629974993
Sharpe ratio: 0.6794645368069682


 43%|████▎     | 15/35 [00:25<00:33,  1.69s/it]

Initial portfolio value:100000
Final portfolio value: 470193.5009181985
Final accumulative portfolio value: 4.701935009181985
Maximum DrawDown: -0.8547488163857404
Sharpe ratio: 0.6794649077538021


 46%|████▌     | 16/35 [00:27<00:32,  1.68s/it]

Initial portfolio value:100000
Final portfolio value: 470193.2833606091
Final accumulative portfolio value: 4.701932833606091
Maximum DrawDown: -0.8547487993291191
Sharpe ratio: 0.6794646592219313


 49%|████▊     | 17/35 [00:29<00:30,  1.68s/it]

Initial portfolio value:100000
Final portfolio value: 470194.83824589255
Final accumulative portfolio value: 4.701948382458926
Maximum DrawDown: -0.8547487385384761
Sharpe ratio: 0.6794656629586137


 51%|█████▏    | 18/35 [00:30<00:28,  1.67s/it]

Initial portfolio value:100000
Final portfolio value: 470196.33979520714
Final accumulative portfolio value: 4.701963397952071
Maximum DrawDown: -0.8547487103707323
Sharpe ratio: 0.6794666510390033


 54%|█████▍    | 19/35 [00:32<00:26,  1.67s/it]

Initial portfolio value:100000
Final portfolio value: 470198.67541831743
Final accumulative portfolio value: 4.701986754183174
Maximum DrawDown: -0.8547484478717188
Sharpe ratio: 0.6794683196385736


 57%|█████▋    | 20/35 [00:34<00:25,  1.67s/it]

Initial portfolio value:100000
Final portfolio value: 470199.15154682484
Final accumulative portfolio value: 4.701991515468248
Maximum DrawDown: -0.8547484348671377
Sharpe ratio: 0.679468583940521


 60%|██████    | 21/35 [00:35<00:23,  1.67s/it]

Initial portfolio value:100000
Final portfolio value: 470200.7807348306
Final accumulative portfolio value: 4.702007807348306
Maximum DrawDown: -0.8547482554819941
Sharpe ratio: 0.6794695847309831


 63%|██████▎   | 22/35 [00:37<00:22,  1.69s/it]

Initial portfolio value:100000
Final portfolio value: 470202.2533339868
Final accumulative portfolio value: 4.702022533339868
Maximum DrawDown: -0.8547481108443665
Sharpe ratio: 0.6794706821323502


 66%|██████▌   | 23/35 [00:39<00:20,  1.69s/it]

Initial portfolio value:100000
Final portfolio value: 470205.76950702735
Final accumulative portfolio value: 4.702057695070273
Maximum DrawDown: -0.8547478270207554
Sharpe ratio: 0.6794730734852442


 69%|██████▊   | 24/35 [00:40<00:18,  1.68s/it]

Initial portfolio value:100000
Final portfolio value: 470205.84404744837
Final accumulative portfolio value: 4.702058440474484
Maximum DrawDown: -0.8547477860727077
Sharpe ratio: 0.6794729739981463


 71%|███████▏  | 25/35 [00:42<00:16,  1.68s/it]

Initial portfolio value:100000
Final portfolio value: 470207.16490485455
Final accumulative portfolio value: 4.702071649048546
Maximum DrawDown: -0.8547477125223084
Sharpe ratio: 0.6794738859169354


 74%|███████▍  | 26/35 [00:44<00:15,  1.69s/it]

Initial portfolio value:100000
Final portfolio value: 470208.3010581753
Final accumulative portfolio value: 4.702083010581752
Maximum DrawDown: -0.8547476081184762
Sharpe ratio: 0.6794746530988619


 77%|███████▋  | 27/35 [00:45<00:13,  1.69s/it]

Initial portfolio value:100000
Final portfolio value: 470210.9178145706
Final accumulative portfolio value: 4.702109178145706
Maximum DrawDown: -0.8547473541726811
Sharpe ratio: 0.6794764454678919


 80%|████████  | 28/35 [00:47<00:11,  1.70s/it]

Initial portfolio value:100000
Final portfolio value: 470211.6553024191
Final accumulative portfolio value: 4.702116553024191
Maximum DrawDown: -0.8547472170979602
Sharpe ratio: 0.6794770063158976


 83%|████████▎ | 29/35 [00:49<00:10,  1.70s/it]

Initial portfolio value:100000
Final portfolio value: 470213.2707111876
Final accumulative portfolio value: 4.702132707111876
Maximum DrawDown: -0.854747195326707
Sharpe ratio: 0.6794780010452538


 86%|████████▌ | 30/35 [00:51<00:08,  1.69s/it]

Initial portfolio value:100000
Final portfolio value: 470215.0282605524
Final accumulative portfolio value: 4.7021502826055235
Maximum DrawDown: -0.8547468622314869
Sharpe ratio: 0.6794792852223974


 89%|████████▊ | 31/35 [00:52<00:06,  1.73s/it]

Initial portfolio value:100000
Final portfolio value: 470215.9118414701
Final accumulative portfolio value: 4.702159118414701
Maximum DrawDown: -0.8547467607343229
Sharpe ratio: 0.6794798123535214


 91%|█████████▏| 32/35 [00:54<00:05,  1.72s/it]

Initial portfolio value:100000
Final portfolio value: 470217.10686528974
Final accumulative portfolio value: 4.7021710686528975
Maximum DrawDown: -0.8547464941057793
Sharpe ratio: 0.6794806556496324


 94%|█████████▍| 33/35 [00:56<00:03,  1.72s/it]

Initial portfolio value:100000
Final portfolio value: 470218.50341730803
Final accumulative portfolio value: 4.70218503417308
Maximum DrawDown: -0.8547463015987639
Sharpe ratio: 0.6794815000262666


 97%|█████████▋| 34/35 [00:57<00:01,  1.71s/it]

Initial portfolio value:100000
Final portfolio value: 470220.85601290705
Final accumulative portfolio value: 4.702208560129071
Maximum DrawDown: -0.85474621839091
Sharpe ratio: 0.6794831785461478


100%|██████████| 35/35 [00:59<00:00,  1.70s/it]


<modules.algorithms.PolicyGradient at 0x2ad057c80>

In [11]:
print("PERSISTING MODEL.....")
torch.save(model.train_policy.state_dict(), f"models/policy_{experiment_type}.pt")

PERSISTING MODEL.....


In [12]:
print("TESTING.....")
MEIIE_results = {
    "training": train_environment._asset_memory["final"],
    "test": {}
}

TESTING.....


## 6. Model Evaluation

In [13]:
test_environment = MultiPeriodPortfolioOptimizationEnv(
    df_portfolio_test,
    initial_amount=100000,
    comission_fee_pct=0.0025,
    time_window=N,
    multi_period_horizon=T,
    features=FEATURES,
    normalize_df=None,
    is_train_mode=False,
    experiment_type=experiment_type
)

In [14]:
policy = MultiPeriodEIIE(
    initial_features=NUM_FEATURES,
    k_size=5,
    time_window=N,
    prediction_horizon=T
)

In [15]:
policy.load_state_dict(torch.load(f"models/policy_{experiment_type}.pt"))

<All keys matched successfully>

In [16]:
DRLAgent.DRL_validation(model=model, test_env=test_environment, policy=policy)

Initial portfolio value:100000
Final portfolio value: 272448.12123615056
Final accumulative portfolio value: 2.7244812123615056
Maximum DrawDown: -0.5261476749224261
Sharpe ratio: 0.7533717145106762


## 7. Postprocessing

In [17]:
MEIIE_results["test"]["value"] = test_environment._asset_memory["final"]

In [18]:
drl_portfolio_performance = test_environment._asset_memory["final"][1:]
len(drl_portfolio_performance)

70

In [19]:
drl_portfolio_performance = [(x/100000) for x in drl_portfolio_performance]

In [20]:
performance_dataset = pd.read_csv("data/processed/performances_cac40.csv", index_col=0)

In [21]:
performance_dataset["DRL_EIIE"] = drl_portfolio_performance

In [22]:
performance_dataset.to_csv("data/processed/performances_cac40.csv")