In [1]:
import torch
import torch.nn as nn
import numpy as np

import yfinance as yf
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from modules.scalers import GroupByScaler
from sklearn.preprocessing import MaxAbsScaler
import json
import torch

from modules.environment import MultiPeriodPortfolioOptimizationEnv
from modules.models import DRLAgent

sns.set()

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

In [2]:
config_file_name = "config.json"
with open(config_file_name, "r") as jsonfile:
    config_data = json.load(jsonfile)

In [3]:
sp_100 = config_data["tickers"]["America"]["SP100"]
NUM_ASSETS = len(sp_100)
len(sp_100)

101

In [4]:
start_date = config_data["timeframe"]["bull_market"]["start_date"]
end_date = config_data["timeframe"]["bull_market"]["end_date"]
data_interval = config_data["data_interval"]
test_ratio = config_data["train_test_ratio"]
random_seed = config_data["random_state_seed"]

In [5]:
portfolio_raw_df = yf.download(tickers=sp_100, start=start_date, end=end_date, interval=data_interval)

portfolio_raw_df.fillna(method="bfill", inplace=True)
portfolio_raw_df = portfolio_raw_df.stack(level=1).rename_axis(["Date", "Ticker"]).reset_index(level=1)
portfolio_raw_df = portfolio_raw_df.drop("Adj Close", axis=1)
portfolio_raw_df.columns.name = None
portfolio_raw_df = portfolio_raw_df.reset_index()
portfolio_raw_df.Date = portfolio_raw_df.Date.astype(str)
portfolio_raw_df.columns = ["date", "tic", "close", "high", "low", "open", "volume"]
portfolio_raw_df = portfolio_raw_df[["date", "tic", "close", "high", "low", "volume"]]

df_portfolio_raw_train, df_portfolio_raw_test = train_test_split(portfolio_raw_df, test_size=test_ratio, shuffle=False, random_state=random_seed)
df_portfolio_train = GroupByScaler(by="tic", scaler=MaxAbsScaler).fit_transform(df_portfolio_raw_train)
df_portfolio_test = GroupByScaler(by="tic", scaler=MaxAbsScaler).fit_transform(df_portfolio_raw_test)


[*********************100%%**********************]  101 of 101 completed
  portfolio_raw_df.fillna(method="bfill", inplace=True)
  portfolio_raw_df = portfolio_raw_df.stack(level=1).rename_axis(["Date", "Ticker"]).reset_index(level=1)


In [6]:
# SETTING HYPERPARAMETERS
FEATURES = ["close", "high", "low", "volume"]
N = config_data["lookback_window"]
T = config_data["multi_step_horizon"]
NUM_FEATURES = len(FEATURES)
experiment_type = "HYBRID_TRANSFORMER" 
N, T, NUM_ASSETS, NUM_FEATURES

(24, 3, 101, 4)

In [7]:
train_environment = MultiPeriodPortfolioOptimizationEnv(
    df_portfolio_train,
    initial_amount=100000,
    comission_fee_pct=0.0025,
    time_window=N,
    multi_period_horizon=T,
    features=FEATURES,
    normalize_df=None,
    is_train_mode=True,
    experiment_type=experiment_type
)

In [8]:
class ConvolutionalEmbedding(nn.Module):
    def __init__(self, in_channels, out_channels, window_length=24):
        super(ConvolutionalEmbedding, self).__init__()
        self.convolution = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=(1, window_length))

    def forward(self, X):
        conv_tokens = self.convolution(X)
        return conv_tokens

class MultiPeriodConvAttentionNetwork(nn.Module):
    def __init__(self, num_stocks, num_features, W, T, num_layers=10, device="cpu"):
        super(MultiPeriodConvAttentionNetwork, self).__init__()
        self.device = device
        num_assets = num_stocks + 1
        self.W = W
        self.T = T
        self.state_conv_tokenization = ConvolutionalEmbedding(in_channels=num_features,
                                                         out_channels=T,
                                                         window_length=W)
        self.last_action_tokenization = ConvolutionalEmbedding(in_channels=1,
                                                               out_channels=1,
                                                               window_length=1)
        self.encoder_state = nn.TransformerEncoder(encoder_layer=nn.TransformerEncoderLayer(d_model=num_assets, nhead=1, batch_first=True), num_layers=num_layers)
        self.encoder_last_action = nn.TransformerEncoder(encoder_layer=nn.TransformerEncoderLayer(d_model=num_assets, nhead=1, batch_first=True), num_layers=num_layers)
        self.decoder = nn.TransformerDecoder(decoder_layer=nn.TransformerDecoderLayer(d_model=num_assets, nhead=1, batch_first=True), num_layers=num_layers)
        
        self.norm = nn.Softmax(dim=2)

    def forward(self, observation, last_action):
        mu = self.mu(observation, last_action)
        action = mu.cpu().detach().numpy().squeeze(0)
        return action

    def mu(self, state, last_action):
        if isinstance(state, np.ndarray):
            state = torch.from_numpy(state)
        state = state.to(self.device).float()

        if isinstance(last_action, np.ndarray):
            last_action = torch.from_numpy(last_action)
        last_action = last_action.to(self.device).float()
        
        state_tokens = self.state_conv_tokenization(state)
        last_action = last_action.unsqueeze(1)
        last_action_tokens = self.last_action_tokenization(last_action)
        state_tokens = state_tokens.squeeze(3)
        last_action_tokens = last_action_tokens.squeeze(1)

        
        cash_bias = self.process_last_action(last_action)
        state_tokens = torch.cat([state_tokens, cash_bias], dim=2)
        state_embeddings = self.encoder_state(state_tokens)
        last_action_embeddings = self.encoder_last_action(last_action_tokens)
        embeddings = torch.cat([state_embeddings, last_action_embeddings], dim=1)
        logits = self.decoder(tgt=last_action_tokens, memory=embeddings)
        portfolio_weights = self.norm(logits)
        return portfolio_weights
    
    def process_last_action(self, last_action):
        batch_size = last_action.shape[0]
        cash_allocation = last_action[:, :, :, 0]
        cash_allocation = cash_allocation.reshape((batch_size, self.T, 1))
        return cash_allocation

In [18]:
# policy = MultiPeriodConvAttentionNetwork(num_stocks=NUM_ASSETS,
#                                          num_features=NUM_FEATURES,
#                                          W=N,
#                                          T=T)



In [19]:
# state = torch.rand(1, NUM_FEATURES, NUM_ASSETS, N)
# last_action = torch.rand(1, T, NUM_ASSETS+1)
# state.shape, last_action.shape

(torch.Size([1, 4, 101, 24]), torch.Size([1, 3, 102]))

In [23]:
# policy(state, last_action).min(1)

array([0.00050308, 0.00039293, 0.00058882], dtype=float32)

In [9]:
# set PolicyGradient parameters
model_kwargs = {
    "lr": 0.001,
    "policy": MultiPeriodConvAttentionNetwork,
    "multi_period_horizon": T
}

# here, we can set EIIE's parameters
policy_kwargs = {
    "num_features": NUM_FEATURES,
    "num_stocks": NUM_ASSETS,
    "W": N,
    "T": T
}

In [10]:
model = DRLAgent(train_environment).get_model("pg", device, model_kwargs, policy_kwargs)



In [11]:
print("TRAINING AGENT.....")
DRLAgent.train_model(model, episodes=50)

TRAINING AGENT.....


  0%|          | 0/50 [00:00<?, ?it/s]

Initial portfolio value:100000
Final portfolio value: 2004674.2109971049
Final accumulative portfolio value: 20.04674210997105
Maximum DrawDown: -0.7717208128857289
Sharpe ratio: 5.839784640611192


  2%|▏         | 1/50 [00:05<04:39,  5.71s/it]

Initial portfolio value:100000
Final portfolio value: 2072447.1752922295
Final accumulative portfolio value: 20.724471752922295
Maximum DrawDown: -0.7697776594579664
Sharpe ratio: 5.904474435103387


  4%|▍         | 2/50 [00:11<04:29,  5.61s/it]

Initial portfolio value:100000
Final portfolio value: 2328766.9679312827
Final accumulative portfolio value: 23.287669679312827
Maximum DrawDown: -0.7609233876895951
Sharpe ratio: 6.089484278688962


  6%|▌         | 3/50 [00:16<04:21,  5.57s/it]

Initial portfolio value:100000
Final portfolio value: 2417175.3960412405
Final accumulative portfolio value: 24.171753960412406
Maximum DrawDown: -0.7584066549526516
Sharpe ratio: 6.144124977769671


  8%|▊         | 4/50 [00:22<04:18,  5.62s/it]

Initial portfolio value:100000
Final portfolio value: 2422350.2431995096
Final accumulative portfolio value: 24.223502431995097
Maximum DrawDown: -0.7579876206372003
Sharpe ratio: 6.147110087398016


 10%|█         | 5/50 [00:27<04:08,  5.51s/it]

Initial portfolio value:100000
Final portfolio value: 2421458.9571476653
Final accumulative portfolio value: 24.214589571476655
Maximum DrawDown: -0.7586165491592503
Sharpe ratio: 6.14573439361705


 12%|█▏        | 6/50 [00:33<04:02,  5.51s/it]

Initial portfolio value:100000
Final portfolio value: 2432888.6914905994
Final accumulative portfolio value: 24.328886914905993
Maximum DrawDown: -0.7580794917043607
Sharpe ratio: 6.153437384582067


 14%|█▍        | 7/50 [00:38<03:55,  5.49s/it]

Initial portfolio value:100000
Final portfolio value: 2440520.5274910745
Final accumulative portfolio value: 24.405205274910745
Maximum DrawDown: -0.7578620993308739
Sharpe ratio: 6.160470184142186


 16%|█▌        | 8/50 [00:44<03:50,  5.48s/it]

Initial portfolio value:100000
Final portfolio value: 2436910.114230465
Final accumulative portfolio value: 24.36910114230465
Maximum DrawDown: -0.7579612510558345
Sharpe ratio: 6.155279563262874


 18%|█▊        | 9/50 [00:49<03:45,  5.49s/it]

Initial portfolio value:100000
Final portfolio value: 2437351.2649769424
Final accumulative portfolio value: 24.373512649769424
Maximum DrawDown: -0.7578487251203215
Sharpe ratio: 6.156366023046901


 20%|██        | 10/50 [00:55<03:39,  5.49s/it]

Initial portfolio value:100000
Final portfolio value: 2437511.2456605667
Final accumulative portfolio value: 24.375112456605667
Maximum DrawDown: -0.7577310723285087
Sharpe ratio: 6.156858504863925


 22%|██▏       | 11/50 [01:00<03:33,  5.47s/it]

Initial portfolio value:100000
Final portfolio value: 2439352.938402414
Final accumulative portfolio value: 24.393529384024138
Maximum DrawDown: -0.7577202295221761
Sharpe ratio: 6.158551674395821


 24%|██▍       | 12/50 [01:06<03:28,  5.49s/it]

Initial portfolio value:100000
Final portfolio value: 2436055.16001672
Final accumulative portfolio value: 24.360551600167202
Maximum DrawDown: -0.7577274560966052
Sharpe ratio: 6.155355229287532


 26%|██▌       | 13/50 [01:12<03:28,  5.62s/it]

Initial portfolio value:100000
Final portfolio value: 2439493.7855059085
Final accumulative portfolio value: 24.394937855059087
Maximum DrawDown: -0.757701452817202
Sharpe ratio: 6.157863077419364


 28%|██▊       | 14/50 [01:17<03:24,  5.68s/it]

Initial portfolio value:100000
Final portfolio value: 2442848.992750577
Final accumulative portfolio value: 24.428489927505773
Maximum DrawDown: -0.7576986408434171
Sharpe ratio: 6.16006577550833


 30%|███       | 15/50 [01:23<03:19,  5.70s/it]

Initial portfolio value:100000
Final portfolio value: 2436873.2031755117
Final accumulative portfolio value: 24.368732031755115
Maximum DrawDown: -0.7578302914462882
Sharpe ratio: 6.1557438880984


 32%|███▏      | 16/50 [01:29<03:10,  5.60s/it]

Initial portfolio value:100000
Final portfolio value: 2440313.818352518
Final accumulative portfolio value: 24.403138183525183
Maximum DrawDown: -0.7576428105500936
Sharpe ratio: 6.158593989454165


 34%|███▍      | 17/50 [01:34<03:01,  5.51s/it]

Initial portfolio value:100000
Final portfolio value: 2437413.063754197
Final accumulative portfolio value: 24.37413063754197
Maximum DrawDown: -0.7578393192404825
Sharpe ratio: 6.156046391626546


 36%|███▌      | 18/50 [01:39<02:56,  5.50s/it]

Initial portfolio value:100000
Final portfolio value: 2439822.537239266
Final accumulative portfolio value: 24.398225372392663
Maximum DrawDown: -0.7577048927983544
Sharpe ratio: 6.158444991793789


 38%|███▊      | 19/50 [01:45<02:48,  5.44s/it]

Initial portfolio value:100000
Final portfolio value: 2439777.317087387
Final accumulative portfolio value: 24.397773170873872
Maximum DrawDown: -0.7576988924161514
Sharpe ratio: 6.158212291641735


 40%|████      | 20/50 [01:50<02:42,  5.41s/it]

Initial portfolio value:100000
Final portfolio value: 2440166.242161098
Final accumulative portfolio value: 24.40166242161098
Maximum DrawDown: -0.7576939030993679
Sharpe ratio: 6.158340170887467


 42%|████▏     | 21/50 [01:55<02:36,  5.40s/it]

Initial portfolio value:100000
Final portfolio value: 2439638.909913227
Final accumulative portfolio value: 24.39638909913227
Maximum DrawDown: -0.7577067900163567
Sharpe ratio: 6.1579711831059045


 44%|████▍     | 22/50 [02:01<02:32,  5.44s/it]

Initial portfolio value:100000
Final portfolio value: 2439105.765079892
Final accumulative portfolio value: 24.391057650798917
Maximum DrawDown: -0.7577194138084993
Sharpe ratio: 6.1576574583668675


 46%|████▌     | 23/50 [02:06<02:26,  5.41s/it]

Initial portfolio value:100000
Final portfolio value: 2439820.0339545817
Final accumulative portfolio value: 24.398200339545816
Maximum DrawDown: -0.7576926862630773
Sharpe ratio: 6.158210067590837


 48%|████▊     | 24/50 [02:12<02:20,  5.42s/it]

Initial portfolio value:100000
Final portfolio value: 2439750.735430954
Final accumulative portfolio value: 24.39750735430954
Maximum DrawDown: -0.7577079105617976
Sharpe ratio: 6.157980030666944


 50%|█████     | 25/50 [02:17<02:15,  5.41s/it]

Initial portfolio value:100000
Final portfolio value: 2444592.380194343
Final accumulative portfolio value: 24.445923801943433
Maximum DrawDown: -0.7577093810665442
Sharpe ratio: 6.161775773757921


 52%|█████▏    | 26/50 [02:23<02:10,  5.45s/it]

Initial portfolio value:100000
Final portfolio value: 2439928.2351759365
Final accumulative portfolio value: 24.399282351759364
Maximum DrawDown: -0.7576838853758665
Sharpe ratio: 6.158180172625644


 54%|█████▍    | 27/50 [02:28<02:04,  5.43s/it]

Initial portfolio value:100000
Final portfolio value: 2439490.068525893
Final accumulative portfolio value: 24.394900685258932
Maximum DrawDown: -0.7577382772463278
Sharpe ratio: 6.15790273866176


 56%|█████▌    | 28/50 [02:33<01:59,  5.45s/it]

Initial portfolio value:100000
Final portfolio value: 2439969.774226928
Final accumulative portfolio value: 24.399697742269282
Maximum DrawDown: -0.7576857628547494
Sharpe ratio: 6.158192373928897


 58%|█████▊    | 29/50 [02:39<01:55,  5.49s/it]

Initial portfolio value:100000
Final portfolio value: 2440173.979893666
Final accumulative portfolio value: 24.401739798936664
Maximum DrawDown: -0.7576960877877044
Sharpe ratio: 6.158291284702822


 60%|██████    | 30/50 [02:45<01:49,  5.49s/it]

Initial portfolio value:100000
Final portfolio value: 2439868.0455243713
Final accumulative portfolio value: 24.398680455243714
Maximum DrawDown: -0.7576847290391779
Sharpe ratio: 6.158114800168031


 62%|██████▏   | 31/50 [02:50<01:44,  5.48s/it]

Initial portfolio value:100000
Final portfolio value: 2440038.2586615854
Final accumulative portfolio value: 24.400382586615855
Maximum DrawDown: -0.7576889755342847
Sharpe ratio: 6.158178628252419


 64%|██████▍   | 32/50 [02:55<01:37,  5.44s/it]

Initial portfolio value:100000
Final portfolio value: 2436493.593192529
Final accumulative portfolio value: 24.364935931925288
Maximum DrawDown: -0.7576902620675323
Sharpe ratio: 6.156244097222039


 66%|██████▌   | 33/50 [03:01<01:32,  5.41s/it]

Initial portfolio value:100000
Final portfolio value: 2441284.1218250585
Final accumulative portfolio value: 24.412841218250584
Maximum DrawDown: -0.7576863519224186
Sharpe ratio: 6.159114866548119


 68%|██████▊   | 34/50 [03:06<01:26,  5.39s/it]

Initial portfolio value:100000
Final portfolio value: 2440106.864612867
Final accumulative portfolio value: 24.40106864612867
Maximum DrawDown: -0.7576856130826297
Sharpe ratio: 6.158408397079489


 70%|███████   | 35/50 [03:11<01:21,  5.42s/it]

Initial portfolio value:100000
Final portfolio value: 2440180.5926980847
Final accumulative portfolio value: 24.401805926980845
Maximum DrawDown: -0.7577077212552954
Sharpe ratio: 6.15837602835515


 72%|███████▏  | 36/50 [03:17<01:15,  5.41s/it]

Initial portfolio value:100000
Final portfolio value: 2440482.6827418497
Final accumulative portfolio value: 24.404826827418496
Maximum DrawDown: -0.7576687571926141
Sharpe ratio: 6.1586130130360734


 74%|███████▍  | 37/50 [03:22<01:10,  5.44s/it]

Initial portfolio value:100000
Final portfolio value: 2440314.5292698173
Final accumulative portfolio value: 24.403145292698174
Maximum DrawDown: -0.7576786003730297
Sharpe ratio: 6.158366779573983


 76%|███████▌  | 38/50 [03:28<01:05,  5.47s/it]

Initial portfolio value:100000
Final portfolio value: 2441227.315685252
Final accumulative portfolio value: 24.41227315685252
Maximum DrawDown: -0.7576764532606741
Sharpe ratio: 6.159075312998781


 78%|███████▊  | 39/50 [03:33<01:00,  5.46s/it]

Initial portfolio value:100000
Final portfolio value: 2440703.9320476083
Final accumulative portfolio value: 24.407039320476084
Maximum DrawDown: -0.7576730683519804
Sharpe ratio: 6.158698692264005


 80%|████████  | 40/50 [03:39<00:54,  5.42s/it]

Initial portfolio value:100000
Final portfolio value: 2440397.3274266636
Final accumulative portfolio value: 24.403973274266637
Maximum DrawDown: -0.7576802447352847
Sharpe ratio: 6.158518711657677


 82%|████████▏ | 41/50 [03:44<00:48,  5.42s/it]

Initial portfolio value:100000
Final portfolio value: 2440503.7479560454
Final accumulative portfolio value: 24.405037479560455
Maximum DrawDown: -0.7576738644923395
Sharpe ratio: 6.158533781195001


 84%|████████▍ | 42/50 [03:50<00:43,  5.42s/it]

Initial portfolio value:100000
Final portfolio value: 2440651.253125529
Final accumulative portfolio value: 24.406512531255288
Maximum DrawDown: -0.757684448186049
Sharpe ratio: 6.15861715608892


 86%|████████▌ | 43/50 [03:55<00:37,  5.41s/it]

Initial portfolio value:100000
Final portfolio value: 2440776.0194753404
Final accumulative portfolio value: 24.407760194753404
Maximum DrawDown: -0.7576697418845231
Sharpe ratio: 6.158742575632659


 88%|████████▊ | 44/50 [04:00<00:32,  5.40s/it]

Initial portfolio value:100000
Final portfolio value: 2440794.6380958003
Final accumulative portfolio value: 24.407946380958002
Maximum DrawDown: -0.7576709375390577
Sharpe ratio: 6.158783318683246


 90%|█████████ | 45/50 [04:06<00:27,  5.41s/it]

Initial portfolio value:100000
Final portfolio value: 2440746.397094808
Final accumulative portfolio value: 24.40746397094808
Maximum DrawDown: -0.7576745475012694
Sharpe ratio: 6.158696106908702


 92%|█████████▏| 46/50 [04:11<00:21,  5.41s/it]

Initial portfolio value:100000
Final portfolio value: 2440651.518126452
Final accumulative portfolio value: 24.40651518126452
Maximum DrawDown: -0.7576745922809419
Sharpe ratio: 6.158627416706004


 94%|█████████▍| 47/50 [04:17<00:16,  5.41s/it]

Initial portfolio value:100000
Final portfolio value: 2428240.736041932
Final accumulative portfolio value: 24.28240736041932
Maximum DrawDown: -0.757672958937698
Sharpe ratio: 6.153797902507914


 96%|█████████▌| 48/50 [04:22<00:10,  5.47s/it]

Initial portfolio value:100000
Final portfolio value: 2440721.102322946
Final accumulative portfolio value: 24.407211023229458
Maximum DrawDown: -0.7576869208608812
Sharpe ratio: 6.1586368430498295


 98%|█████████▊| 49/50 [04:28<00:05,  5.53s/it]

Initial portfolio value:100000
Final portfolio value: 2440453.999837825
Final accumulative portfolio value: 24.40453999837825
Maximum DrawDown: -0.7576793011065849
Sharpe ratio: 6.1584896925508446


100%|██████████| 50/50 [04:33<00:00,  5.47s/it]


<modules.algorithms.PolicyGradient at 0x294ab4ef0>

In [12]:
print("PERSISTING MODEL.....")
torch.save(model.train_policy.state_dict(), f"models/policy_{experiment_type}.pt")

PERSISTING MODEL.....


In [13]:
print("TESTING.....")
MEIIE_results = {
    "training": train_environment._asset_memory["final"],
    "test": {}
}

TESTING.....


In [14]:
test_environment = MultiPeriodPortfolioOptimizationEnv(
    df_portfolio_test,
    initial_amount=100000,
    comission_fee_pct=0.0025,
    time_window=N,
    multi_period_horizon=T,
    features=FEATURES,
    normalize_df=None,
    is_train_mode=False,
    experiment_type=experiment_type
)

In [15]:
policy = MultiPeriodConvAttentionNetwork(num_stocks=NUM_ASSETS,
                                         num_features=NUM_FEATURES,
                                         W=N,
                                         T=T)



In [16]:
policy.load_state_dict(torch.load(f"models/policy_{experiment_type}.pt"))

<All keys matched successfully>

In [17]:
DRLAgent.DRL_validation(model=model, test_env=test_environment, policy=policy)

Initial portfolio value:100000
Final portfolio value: 533552.0558170116
Final accumulative portfolio value: 5.335520558170116
Maximum DrawDown: -0.39917030214508575
Sharpe ratio: 5.6634839201682325


In [18]:
MEIIE_results["test"]["value"] = test_environment._asset_memory["final"]

In [19]:
drl_portfolio_performance = test_environment._asset_memory["final"][1:]
len(drl_portfolio_performance)

70

In [20]:
drl_portfolio_performance = [(x/100000)-1 for x in drl_portfolio_performance]

In [21]:
performance_dataset = pd.read_csv("data/processed/performances_sp100.csv", index_col=0)

In [22]:
performance_dataset["DRL_HYBRID_TRANSFORMER"] = drl_portfolio_performance

In [23]:
performance_dataset.to_csv("data/processed/performances_sp100.csv")