In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
#
import importlib
import utilities.lstm_utils as lstm_utils
import utilities.mpt_utils as mpt_utils
import utilities.variables as variables

In [10]:
# Set device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Data

In [11]:
df = pd.read_csv('../../../data/df_monthly_prices_complete_euro.csv', index_col='Date')
df_pct = pd.read_csv('../../../data/df_monthly_returns_complete.csv', index_col='Date')
df_overview = pd.read_csv('../../../data/df_overview.csv', index_col=0)

## LSTM Model

In [12]:
# Define 4.3. LSTM model
class LSTM_Multi_Model(nn.Module):
    def __init__(self, input_size, hidden_size=128, num_layers=1, output_size=1, learning_rate=0.001, dropout=0.2): # , hidden_size=128
        super(LSTM_Multi_Model, self).__init__()
        self.hidden_size = hidden_size
        # init LSTM
        self.lstm = nn.LSTM(input_size=input_size,
                            hidden_size=hidden_size,
                            # num_layers=num_layers,
                            batch_first=True)

        # FC layer for final prediction
        self.fc_final = nn.Linear(hidden_size, 12)

    def forward(self, ts_batch): # ts_batch (64, 1653, 10), static_data (64, 1653, 44)
        # Time-Series Data
        # Reshape dynamic data for LSTM (requires time-step as 2nd dimension)
        batch_size, num_stocks, sequence_length = ts_batch.shape[0], ts_batch.shape[1], ts_batch.shape[2]
        ts_batch_reshaped = ts_batch.view(batch_size * num_stocks, sequence_length)
        #
        ts_output_1, (hidden, cell)  = self.lstm(ts_batch_reshaped) # ts_batch_reshaped
        #
        ts_output = ts_output_1.view(batch_size, num_stocks, self.hidden_size)

        return self.fc_final(ts_output)#.squeeze(-1) # ts_output_2

We use a 12 month lookback for the sequential data to predict the upcoming 12 months.

After that, based on the currently predicted time-horizon, we get the respective sub-range, 
be it 1-month, 6-month or 12-month ahead.

In [13]:
# Set sequence length (12 months)
in_seq_length = 12
out_seq_length = 12
#
out_seq_length_1m = 1
out_seq_length_6m = 6
out_seq_length_12m = 12

### LSTM Multivariate

In [14]:
df_to_evaluate = df_pct - 1

#### Train-Test Splits

Split the data into training and testing sets

In [15]:
importlib.reload(lstm_utils)
importlib.reload(variables)

# Set sequence length (e.g., 12 time points)
X_train, X_test, y_train, y_test = lstm_utils.split_train_test(df_to_evaluate, [], 
                                                               in_seq_length=in_seq_length, 
                                                               out_seq_length=out_seq_length, 
                                                               validation_months=(variables.TEST_YEARS_NR * 12),
                                                               )

# Check the shapes of the training and test data
print("Shape of X_train:", X_train.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_test:", y_test.shape)

Shape of X_train: torch.Size([263, 1332, 12])
Shape of y_train: torch.Size([263, 1332, 12])
Shape of X_test: torch.Size([12, 1332, 12])
Shape of y_test: torch.Size([12, 1332, 12])


### Model Training

In [16]:
# Model, Loss, Optimizer
model = LSTM_Multi_Model(input_size=in_seq_length, output_size=out_seq_length).to(device)
criterion = nn.MSELoss()  # Mean Squared Error for regression
optimizer = optim.Adam(model.parameters(), lr=0.001)

importlib.reload(lstm_utils)
#
model, y_train_pred, y_test_pred = lstm_utils.lstm_train_validate(model, optimizer, X_train, X_test, y_train, y_test, epochs=10)

Epoch 1/10, Loss: 0.0146, Train RMSE: 0.1215, Test RMSE: 0.1038. 
Epoch 2/10, Loss: 0.0137, Train RMSE: 0.1178, Test RMSE: 0.1034. 
Epoch 3/10, Loss: 0.0136, Train RMSE: 0.1171, Test RMSE: 0.1032. 
Epoch 4/10, Loss: 0.0136, Train RMSE: 0.1171, Test RMSE: 0.1032. 
Epoch 5/10, Loss: 0.0135, Train RMSE: 0.1170, Test RMSE: 0.1033. 
Epoch 6/10, Loss: 0.0135, Train RMSE: 0.1169, Test RMSE: 0.1033. 
Epoch 7/10, Loss: 0.0135, Train RMSE: 0.1169, Test RMSE: 0.1034. 
Epoch 8/10, Loss: 0.0135, Train RMSE: 0.1169, Test RMSE: 0.1035. 
Epoch 9/10, Loss: 0.0135, Train RMSE: 0.1169, Test RMSE: 0.1035. 
Epoch 10/10, Loss: 0.0135, Train RMSE: 0.1169, Test RMSE: 0.1036. 
Model training complete and saved.


### LSTM Multivariate - 1 Month

Get the known data (train data).
After that, get the first predicted month, or the first predicted sequence of test data

In [22]:
# 1 month
df_train = X_train[:, :, -1].clone()
y_test_pred_1m = y_test_pred[0,:,:].T[0:0] # y_test_pred[:,:,0][0] 
y_test_pred_1m

tensor([], size=(0, 1332))

In [23]:
df_forecast_1m = pd.DataFrame(df_train.clone())
df_forecast_1m = pd.concat([df_forecast_1m, pd.DataFrame(y_test_pred_1m)], ignore_index=True)
# Assign back columns and indices to make human understandable
df_forecast_1m.columns = df.columns
df_forecast_1m.index = pd.to_datetime(df_pct[(in_seq_length - 1) : len(df_forecast_1m) + (in_seq_length - 1)].index)
#
df_forecast_1m = df_forecast_1m.tail(variables.TEST_YEARS_NR * 12) 
#
df_forecast_1m.tail(3)

Unnamed: 0_level_0,GME,2124.T,2491.T,2471.T,3046.T,PAT.DE,CROX,AOF.DE,SFQ.DE,DAN,...,KREF,HLN.L,DBX,BNL,CBL,KVUE,PSTL,NTST,BLCO,NBS.L
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-06-01,-0.02,-0.04,-0.01,-0.08,0.03,-0.12,-0.13,-0.11,-0.14,-0.14,...,-0.15,0.0,0.01,-0.03,-0.17,0.0,-0.05,-0.09,0.02,-0.09
2022-07-01,0.11,0.13,-0.12,-0.02,-0.11,0.15,0.47,0.27,0.25,0.19,...,0.14,0.0,0.08,0.12,0.31,0.0,0.13,0.09,-0.06,-0.05
2022-08-01,-0.16,0.01,-0.07,-0.01,0.12,0.0,0.03,-0.14,0.01,-0.08,...,-0.01,-0.09,-0.06,-0.16,-0.05,0.0,-0.12,-0.04,0.03,-0.01


In [24]:
importlib.reload(lstm_utils)
importlib.reload(mpt_utils)

weights_1m, mu_1m, S_1m, allocations_1m, weights_all_1m = mpt_utils.portfolio_and_plot(df_forecast_1m, df)

Expected annual return: 49.8%
Annual volatility: 17.2%
Sharpe Ratio: 2.79
-- Allocation --
{'2395.T': 15, 'TEP.L': 1, '2767.T': 12, '3186.T': 17, '7451.T': 10, '6460.T': 12, '2685.T': 8, '5334.T': 5, '4568.T': 4, '9433.T': 5, '9987.T': 5, '3191.T': 12, '7433.T': 2, '6430.T': 4, '6417.T': 14, '7734.T': 5, '8129.T': 7, 'VOD.L': 1, '8897.T': 42, '6055.T': 6, '3402.T': 27, '8309.T': 6, 'SLB': 2, 'CAL': 5, 'ADV.DE': 4, '2379.T': 6, 'DLTR': 1, 'RELL': 6, '8804.T': 7, '3231.T': 4, 'HRB': 1, '5988.T': 5, '9434.T': 7, '9627.T': 3, '8141.T': 8, '7974.T': 1, '8219.T': 5, '8060.T': 2, 'LRN': 1, 'GBF.DE': 2, '8802.T': 5, 'CPRX': 2, 'IOT': 3, '8923.T': 4, '4732.T': 5, '9024.T': 6, '7906.T': 3, '8282.T': 6, '9869.T': 2, '6471.T': 11, '3048.T': 7, 'RGP': 3, 'BLCO': 3, 'MERC': 5, '7313.T': 4, '7867.T': 3, '4665.T': 1, '8252.T': 1, '2792.T': 3, '2874.T': 4, 'CHEF': 1, '7552.T': 1, '2154.T': 1, '3086.T': 1, '3289.T': 1}
-- Weights Percentage --
{'2395.T': 0.0584, '2685.T': 0.0255, '2767.T': 0.0456, '6430


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



#### Actual return rate

In [25]:
importlib.reload(mpt_utils)
#
mpt_utils.get_portfolio_real_return_rate(df_pct, df_forecast_1m, weights=weights_all_1m)

Portfolio real return rate:  0.5%


#### Overview table

In [26]:
importlib.reload(mpt_utils)
# Create overview
mpt_utils.generate_overview_table(allocations_1m, mu_1m, S_1m, df_pct)

Unnamed: 0,Share Count,Average Covariance,Average Returns,Return Last 12 Months,Return (Actual) Next 12 Months
2395.T,15,-0.080781,141.87%,-37.13%,-37.52%
TEP.L,1,0.002969,25.42%,-25.04%,26.40%
2767.T,12,0.037031,41.45%,56.75%,-0.47%
3186.T,17,-0.031562,182.36%,-26.44%,-22.57%
7451.T,10,0.016406,43.81%,37.33%,34.63%
...,...,...,...,...,...
CHEF,1,0.037969,6.95%,-46.81%,83.27%
7552.T,1,0.035156,1.68%,30.66%,68.61%
2154.T,1,-0.017188,21.98%,6.06%,14.74%
3086.T,1,0.047187,10.57%,23.85%,-1.41%


### LSTM Multivariate - 6 Months

Get the known data (train data)
After that, get the first 6 predicted months, or the first 6 predicted sequences of test data

In [27]:
df_train = X_train[:, :, -1].clone()
y_test_pred_6m =  y_test_pred[0,:,:].T[0:6] # y_test_pred[:,:,5][0:6]
y_test_pred_6m

tensor([[ 0.0161,  0.0169,  0.0087,  ...,  0.0127,  0.0080,  0.0072],
        [ 0.0140,  0.0110,  0.0129,  ...,  0.0056,  0.0083,  0.0079],
        [-0.0530, -0.0232, -0.0141,  ...,  0.0143,  0.0123,  0.0111],
        [-0.0245, -0.0108, -0.0005,  ...,  0.0019,  0.0078,  0.0076],
        [ 0.0339,  0.0219,  0.0228,  ...,  0.0140,  0.0135,  0.0142],
        [-0.0234, -0.0024,  0.0064,  ...,  0.0210,  0.0162,  0.0157]])

In [28]:
df_forecast_6m = pd.DataFrame(df_train.clone())
df_forecast_6m = pd.concat([df_forecast_6m, pd.DataFrame(y_test_pred_6m)], ignore_index=True)
# Assign back columns and indices to make human understandable
df_forecast_6m.columns = df.columns
df_forecast_6m.index = pd.to_datetime(df_pct[(in_seq_length - 1) : len(df_forecast_6m) + (in_seq_length - 1)].index)
#
df_forecast_6m = df_forecast_6m.tail(variables.TEST_YEARS_NR * 12)
#
df_forecast_6m.tail(3)

Unnamed: 0_level_0,GME,2124.T,2491.T,2471.T,3046.T,PAT.DE,CROX,AOF.DE,SFQ.DE,DAN,...,KREF,HLN.L,DBX,BNL,CBL,KVUE,PSTL,NTST,BLCO,NBS.L
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-12-01,-0.024488,-0.010837,-0.000476,0.005681,0.008273,0.00296,-0.00587,-0.007818,-0.007399,-0.007669,...,0.000522,0.006671,0.006618,0.004678,-0.003518,0.002884,0.001697,0.00186,0.007779,0.007602
2023-01-01,0.033894,0.021879,0.022828,0.020778,0.01237,0.012309,0.016839,0.020519,0.017474,0.017325,...,0.015178,0.014899,0.012951,0.016309,0.017031,0.016559,0.015538,0.013951,0.013493,0.014155
2023-02-01,-0.023443,-0.002355,0.006367,0.008799,0.012045,0.015933,0.018946,0.022796,0.02765,0.028613,...,0.022365,0.018565,0.01772,0.018788,0.022229,0.018441,0.020985,0.020974,0.01618,0.015724


In [29]:
importlib.reload(lstm_utils)
importlib.reload(mpt_utils)

weights_6m, mu_6m, S_6m, allocations_6m, weights_all_6m = mpt_utils.portfolio_and_plot(df_forecast_6m, df)

Expected annual return: 19.8%
Annual volatility: 6.7%
Sharpe Ratio: 2.64
-- Allocation --
{'TAL': 7, 'CHEF': 3, '7451.T': 2, '2685.T': 2, 'MIDW.L': 1, 'DORM': 1, '2670.T': 3, '8129.T': 2, '3421.T': 5, '2379.T': 2, '2784.T': 3, 'BLCO': 3, '7518.T': 3, '4568.T': 1, '6430.T': 1, 'FCN': 1, '7976.T': 3, '9434.T': 4, '3231.T': 1, '9470.T': 8, '6055.T': 2, '9882.T': 3, '5101.T': 1, '9076.T': 3, 'PLUS.L': 1, '3738.T': 4, '9409.T': 4, '3289.T': 6, '6737.T': 1, '9869.T': 1, '9024.T': 3, '4745.T': 14, '3086.T': 4, '7606.T': 3, '9842.T': 4, '8014.T': 1, '8985.T': 1, '3116.T': 2, 'IBM': 1, '4665.T': 1, '2715.T': 2, 'AV.L': 1, 'SYY': 1, '9832.T': 4, 'VRTX': 1, '4503.T': 3, '8173.T': 2, '7483.T': 2, 'FINV': 7, '2502.T': 1, '8309.T': 2, '9831.T': 10, '3591.T': 1, 'HURN': 1, '3402.T': 6, '7947.T': 2, '8057.T': 1, '9069.T': 5, '1973.T': 2, '7593.T': 11, '8130.T': 2, '8802.T': 2, 'HRB': 1, '4151.T': 2, '8008.T': 2, '2874.T': 4, '8876.T': 2, '3003.T': 4, 'FRPH': 1, 'CASS': 1, '8923.T': 2, '3191.T': 2, '26


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



#### Actual return rate

In [30]:
importlib.reload(mpt_utils)
#
mpt_utils.get_portfolio_real_return_rate(df_pct, df_forecast_6m, weights=weights_all_6m)

Portfolio real return rate:  0.19%


#### Overview table

In [31]:
importlib.reload(mpt_utils)
# Create overview
mpt_utils.generate_overview_table(allocations_6m, mu_6m, S_6m, df_pct)

Unnamed: 0,Share Count,Average Covariance,Average Returns,Return Last 12 Months,Return (Actual) Next 12 Months
TAL,7,0.039489,14.72%,84.94%,-17.19%
CHEF,3,0.008864,27.56%,-46.81%,83.27%
7451.T,2,0.028352,37.14%,37.33%,34.63%
2685.T,2,-0.014943,31.26%,61.48%,22.88%
MIDW.L,1,-0.024034,20.70%,-20.42%,-22.78%
...,...,...,...,...,...
INS.DE,2,0.001818,43.85%,-8.84%,69.79%
4549.T,1,0.008636,15.18%,-14.84%,79.20%
8439.T,1,0.020455,26.99%,16.81%,17.89%
2471.T,4,-0.049034,13.78%,-62.38%,-23.08%


### LSTM Multivariate - 12 Months

Get the known data (train data)
After that, get the first 12 predicted months, or the first 12 predicted sequences of test data

In [32]:
df_train = X_train[:, :, -1].clone()
y_test_pred_12m = y_test_pred[0,:,:].T[0:12]
y_test_pred_12m

tensor([[ 0.0161,  0.0169,  0.0087,  ...,  0.0127,  0.0080,  0.0072],
        [ 0.0140,  0.0110,  0.0129,  ...,  0.0056,  0.0083,  0.0079],
        [-0.0530, -0.0232, -0.0141,  ...,  0.0143,  0.0123,  0.0111],
        ...,
        [-0.0180, -0.0004,  0.0105,  ...,  0.0097,  0.0099,  0.0107],
        [ 0.0103,  0.0075,  0.0066,  ...,  0.0117,  0.0139,  0.0139],
        [-0.0086, -0.0010,  0.0085,  ...,  0.0108,  0.0118,  0.0116]])

In [33]:
df_forecast_12m = pd.DataFrame(df_train.clone())
df_forecast_12m = pd.concat([df_forecast_12m, pd.DataFrame(y_test_pred_12m)], ignore_index=True)
# Assign back columns and indices to make human understandable
df_forecast_12m.columns = df.columns
df_forecast_12m.index = pd.to_datetime(df_pct[(in_seq_length - 1) : len(df_forecast_12m) + (in_seq_length - 1)].index)
#
df_forecast_12m.tail(3)

Unnamed: 0_level_0,GME,2124.T,2491.T,2471.T,3046.T,PAT.DE,CROX,AOF.DE,SFQ.DE,DAN,...,KREF,HLN.L,DBX,BNL,CBL,KVUE,PSTL,NTST,BLCO,NBS.L
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-06-01,-0.017955,-0.000441,0.010481,0.015758,0.01165,0.014822,0.015853,0.009909,0.007633,0.004707,...,0.005456,0.005545,0.006752,0.006863,0.008337,0.009472,0.009939,0.00967,0.00989,0.010682
2023-07-01,0.010268,0.007465,0.006643,0.006281,0.011142,0.007693,0.005767,0.00634,0.009693,0.011747,...,0.012687,0.015248,0.013914,0.011684,0.008502,0.011481,0.011635,0.011728,0.013877,0.013903
2023-08-01,-0.008615,-0.000965,0.008505,0.012038,0.01161,0.009803,0.008227,0.01285,0.009204,0.009414,...,0.010297,0.012577,0.012372,0.015011,0.010545,0.010724,0.011639,0.010823,0.011831,0.011599


In [34]:
importlib.reload(lstm_utils)
importlib.reload(mpt_utils)

weights_12m, mu_12m, S_12m, allocations_12m, weights_all_12m = mpt_utils.portfolio_and_plot(df_forecast_12m, df)

Expected annual return: 15.2%
Annual volatility: 2.7%
Sharpe Ratio: 4.95
-- Allocation --
{'2395.T': 42, '6430.T': 16, '7606.T': 37, '8022.T': 16, '7976.T': 38, '7518.T': 30, 'ABM': 12, 'MUV2.DE': 2, 'AV.L': 1, '4665.T': 21, '9434.T': 41, '8012.T': 30, '9069.T': 61, 'HCSG': 36, '7272.T': 41, 'STRA': 4, 'BKE': 9, '8150.T': 18, 'CASS': 7, '2685.T': 11, '9470.T': 40, '8060.T': 9, 'RRTL.DE': 4, '3421.T': 14, 'CHEF': 7, '9076.T': 6, '9757.T': 4, 'PRI': 1, '9260.T': 1}
-- Weights Percentage --
{'2395.T': 0.0512, '2685.T': 0.0252, '6430.T': 0.0512, 'BKE': 0.0296, 'HCSG': 0.0364, 'STRA': 0.0351, 'CASS': 0.0272, '7272.T': 0.0354, '7606.T': 0.0512, '8022.T': 0.0512, '8150.T': 0.0295, '9069.T': 0.0459, '7976.T': 0.0512, '7518.T': 0.0512, 'ABM': 0.0512, '9470.T': 0.0247, '8012.T': 0.0501, 'MUV2.DE': 0.0512, '8060.T': 0.0234, 'AV.L': 0.0512, '4665.T': 0.0512, '9434.T': 0.0512, 'Other(7)': 0.0744}



Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



#### Actual return rate

In [None]:
df_pct

In [38]:
df_forecast_12m = df_forecast_12m +1

In [39]:
importlib.reload(mpt_utils)
#
mpt_utils.get_portfolio_real_return_rate(df_pct, df_forecast_12m, weights=weights_all_12m)

Portfolio real return rate:  0.35%


#### Overview Table

In [36]:
importlib.reload(mpt_utils)
# Create overview
mpt_utils.generate_overview_table(allocations_12m, mu_12m, S_12m, df_pct)

Unnamed: 0,Share Count,Average Covariance,Average Returns,Return Last 12 Months,Return (Actual) Next 12 Months
2395.T,42,0.0,14.92%,-37.13%,-37.52%
6430.T,16,0.0,16.24%,209.51%,-37.68%
7606.T,37,0.0,14.95%,-3.76%,17.96%
8022.T,16,0.0,14.87%,75.44%,93.58%
7976.T,38,0.0,15.67%,29.58%,28.31%
7518.T,30,0.0,15.82%,-24.10%,33.77%
ABM,12,0.0,15.10%,-8.89%,26.68%
MUV2.DE,2,0.0,15.76%,50.96%,35.62%
AV.L,1,0.0,15.07%,3.34%,31.69%
4665.T,21,0.0,14.74%,17.57%,23.03%
