In [320]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
#
import importlib
import utilities.lstm_utils as lstm_utils
import utilities.mpt_utils as mpt_utils
import utilities.variables as variables

In [321]:
# Set device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Preparing Data

In [322]:
df = pd.read_csv('../../../data/df_monthly_prices_complete_euro.csv', index_col='Date')
df_pct = pd.read_csv('../../../data/df_monthly_returns_complete.csv', index_col='Date')
df_overview = pd.read_csv('../../../data/df_overview.csv', index_col=0)

## Normalisation

In [323]:
''' 
df_ts_torch = torch.from_numpy(df_time_series.values)
# Reshape to (num_samples, num_features) for normalization
df_ts_flat = df_ts_torch.view(-1, df_ts_torch.shape[-1])  # Shape: (1000*300, 5)

# Calculate min and max per feature
df_min = df_ts_flat.min(dim=0, keepdim=True)[0]
df_max = df_ts_flat.max(dim=0, keepdim=True)[0]

# Apply Min-Max normalization
df_ts_normalised = (df_ts_flat - df_min) / (df_max - df_min)

# Reshape back to original shape
df_time_series_torch = df_ts_normalised.view(df_ts_torch.shape)
'''


' \ndf_ts_torch = torch.from_numpy(df_time_series.values)\n# Reshape to (num_samples, num_features) for normalization\ndf_ts_flat = df_ts_torch.view(-1, df_ts_torch.shape[-1])  # Shape: (1000*300, 5)\n\n# Calculate min and max per feature\ndf_min = df_ts_flat.min(dim=0, keepdim=True)[0]\ndf_max = df_ts_flat.max(dim=0, keepdim=True)[0]\n\n# Apply Min-Max normalization\ndf_ts_normalised = (df_ts_flat - df_min) / (df_max - df_min)\n\n# Reshape back to original shape\ndf_time_series_torch = df_ts_normalised.view(df_ts_torch.shape)\n'

## LSTM Model

In [324]:
# Define 4.3. LSTM model
class LSTM_Uni_Model(nn.Module):
    def __init__(self, input_size, hidden_size=128, num_layers=1, output_size=1, learning_rate=0.001, dropout=0.2): # , hidden_size=128
        super(LSTM_Uni_Model, self).__init__()
        self.hidden_size = hidden_size
        # 4.3. LSTM for time-series data (stock returns)
        self.lstm = nn.LSTM(input_size=input_size,
                            hidden_size=hidden_size,
                            # num_layers=num_layers,
                            batch_first=True)

        # FC layer for final prediction
        self.fc_final = nn.Linear(hidden_size, 12)

    def forward(self, ts_batch): # ts_batch (64, 1653, 10), static_data (64, 1653, 44)
        # Time-Series Data
        # Reshape dynamic data for 4.3. LSTM (requires time-step as 2nd dimension)
        batch_size, num_stocks, sequence_length = ts_batch.shape[0], ts_batch.shape[1], ts_batch.shape[2]
        ts_batch_reshaped = ts_batch.view(batch_size * num_stocks, sequence_length)
        #
        ts_output_1, (hidden, cell)  = self.lstm(ts_batch_reshaped) # ts_batch_reshaped

        ts_output = ts_output_1.view(batch_size, num_stocks, self.hidden_size)
        #
        # ts_output_2 = self.fc_lstm(ts_output)
        #fc_final = nn.Linear(sequence_length, 1)
        # prediction =   # (64, 1653, 10)

        return self.fc_final(ts_output)#.squeeze(-1) # ts_output_2

We use a 12 month lookback for the sequential data to predict the upcoming 12 months.

After that, based on the currently predicted time-horizon, we get the respective sub-range, 
be it 1-month, 6-month or 12-month ahead.

In [325]:
# Set sequence length (12 months)
in_seq_length = 12
out_seq_length = 12
#
out_seq_length_1m = 1
out_seq_length_6m = 6
out_seq_length_12m = 12

### LSTM Univariate

In [326]:
df_to_evaluate = df_pct - 1

#### Train-Test Splits

Split the data into training and testing sets

In [327]:
importlib.reload(lstm_utils)

# Set sequence length (e.g., 12 time points)
X_train, X_test, y_train, y_test = lstm_utils.split_train_test(df_to_evaluate, [], in_seq_length=in_seq_length, out_seq_length=out_seq_length)

# Check the shapes of the training and test data
print("Shape of X_train:", X_train.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_test:", y_test.shape)

Shape of X_train: torch.Size([227, 1653, 12])
Shape of y_train: torch.Size([227, 1653, 12])
Shape of X_test: torch.Size([48, 1653, 12])
Shape of y_test: torch.Size([48, 1653, 12])


### Model Training

In [328]:
# Model, Loss, Optimizer
model = LSTM_Uni_Model(input_size=in_seq_length, output_size=out_seq_length).to(device)
criterion = nn.MSELoss()  # Mean Squared Error for regression
optimizer = optim.Adam(model.parameters(), lr=0.001)

importlib.reload(lstm_utils)
#
model, y_train_pred, y_test_pred = lstm_utils.lstm_train_validate(model, optimizer, X_train, X_test, y_train, y_test)

Epoch 1/1, Loss: 31.5989, Train RMSE: 5.9693, Test RMSE: 0.2867. 
Model training complete and saved.


### LSTM Univariate - 1 Month

Get the known data (train data).
After that, get the first predicted month, or the first predicted sequence of test data

In [364]:
# 1 month
df_train = X_train[:, :, -1].clone()
y_test_pred_1m = y_test_pred[0,:,:].T[0:0] # y_test_pred[:,:,0][0] 
y_test_pred_1m

tensor([], size=(0, 1653))

In [365]:
#len(y_test_pred_1m)

In [366]:
df_forecast_1m = pd.DataFrame(df_train.clone())
df_forecast_1m = pd.concat([df_forecast_1m, pd.DataFrame(y_test_pred_1m)], ignore_index=True)
# Assign back columns and indices to make human understandable
df_forecast_1m.columns = df.columns
df_forecast_1m.index = df_pct[(in_seq_length - 1) : len(df_forecast_1m) + (in_seq_length - 1)].index
#
df_forecast_1m = df_forecast_1m.tail(variables.TEST_YEARS_NR * 12) 
#
df_forecast_1m.tail(3)

Unnamed: 0_level_0,RS1.L,KE,TEG.DE,LEG.DE,SCS,HNI,AVT,ACCO,VNA.DE,7912.T,...,DEQ.DE,KIDS,HALO,MATW,9842.T,KVHI,MOON.L,NEO,6055.T,UNP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-06-01,0.04,0.14,0.0,-0.04,0.07,0.08,0.11,0.08,-0.08,-0.03,...,-0.09,0.0,0.16,0.03,-0.01,0.17,0.0,0.01,0.12,0.02
2019-07-01,-0.02,-0.02,0.05,0.06,0.0,-0.03,0.01,0.24,0.05,0.0,...,0.07,-0.09,-0.01,-0.02,0.0,-0.07,0.0,0.11,-0.07,0.06
2019-08-01,-0.05,-0.17,-0.01,0.02,-0.08,-0.09,-0.08,-0.05,0.02,0.0,...,-0.04,-0.09,-0.03,-0.14,-0.09,-0.08,-0.01,0.03,-0.24,-0.1


In [367]:
importlib.reload(lstm_utils)
importlib.reload(mpt_utils)

weights_1m, mu_1m, S_1m, weights_all_1m = mpt_utils.portfolio_and_plot(df_forecast_1m, df)

Expected annual return: 33.1%
Annual volatility: 21.6%
Sharpe Ratio: 1.44
-- Allocation --
{'7575.T': 1, 'MKTX': 1, 'SLP': 1, 'SRT3.DE': 1, 'BURL': 1, '4816.T': 5, 'PAR': 1, '2685.T': 4, 'TAL': 1, 'TPE.DE': 2, 'ADUS': 1, 'MRL.L': 1, '2471.T': 1, 'CENT': 2, 'AOF.DE': 1, 'NSSC': 1, 'FLGT': 4, '2733.T': 2, '9697.T': 5, '7458.T': 2, '4549.T': 3, '7947.T': 1, 'ITI': 3, '7508.T': 2, 'MPX': 2, '2124.T': 5, 'BVB.DE': 2, 'FRPH': 1, '3050.T': 4, '3659.T': 1, '7832.T': 1, 'APLD': 7, '4218.T': 1, '7906.T': 3, 'EBF': 1, '8141.T': 2, '9143.T': 3, '3635.T': 2, '2874.T': 3, '2170.T': 2, '2331.T': 1, '9470.T': 1, '8282.T': 1, '7944.T': 14, 'O5G.DE': 7}
-- Weights Percentage --
{'MKTX': 0.0485, 'MRL.L': 0.0372, 'CENT': 0.0355, 'NSSC': 0.0325, 'BURL': 0.0468, 'TPE.DE': 0.0399, 'SRT3.DE': 0.0474, 'PAR': 0.0446, 'AOF.DE': 0.035, 'SLP': 0.0474, '4816.T': 0.0449, '7575.T': 0.0578, 'TAL': 0.0421, 'FLGT': 0.0248, 'ADUS': 0.0372, '2685.T': 0.0427, '2471.T': 0.0355, 'Other(28)': 0.3004}



Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



#### Overview table

In [379]:
importlib.reload(mpt_utils)
# Create overview
mpt_utils.generate_overview_table(weights_all_1m, mu_1m, S_1m, df_pct)

Unnamed: 0,Share Count,Average Covariance,Average Returns,Return Last 12 Months,Return (Actual) Next 12 Months
7575.T,1,0.0675,16.14%,4.51%,-17.45%
MKTX,1,-0.010227,13.33%,77.62%,49.93%
SLP,1,0.051364,26.96%,77.26%,118.41%
SRT3.DE,1,0.009091,6.41%,36.06%,109.75%
BURL,1,-0.0025,16.67%,13.37%,1.41%
4816.T,5,-0.040682,19.81%,33.20%,39.47%
PAR,1,-0.071818,18.27%,41.04%,70.88%
2685.T,4,-0.034091,6.33%,49.31%,-30.06%
TAL,1,-0.046364,47.28%,48.59%,120.88%
TPE.DE,2,-0.007045,45.29%,-6.28%,3.02%


### LSTM Univariate - 6 Months

Get the known data (train data)
After that, get the first 6 predicted months, or the first 6 predicted sequences of test data

In [368]:
df_train = X_train[:, :, -1].clone()
y_test_pred_6m =  y_test_pred[0,:,:].T[0:6] # y_test_pred[:,:,5][0:6]
y_test_pred_6m

tensor([[-0.0099, -0.0017,  0.0086,  ...,  0.0147,  0.0081,  0.0128],
        [-0.0555, -0.0416, -0.0237,  ...,  0.0158,  0.0128,  0.0142],
        [ 0.0420,  0.0371,  0.0280,  ...,  0.0206,  0.0315,  0.0284],
        [ 0.0410,  0.0349,  0.0331,  ...,  0.0319,  0.0319,  0.0322],
        [ 0.0484,  0.0439,  0.0396,  ...,  0.0335,  0.0400,  0.0392],
        [ 0.0188,  0.0140,  0.0117,  ...,  0.0039,  0.0074,  0.0077]])

In [369]:
df_forecast_6m = pd.DataFrame(df_train.clone())
df_forecast_6m = pd.concat([df_forecast_6m, pd.DataFrame(y_test_pred_6m)], ignore_index=True)
# Assign back columns and indices to make human understandable
df_forecast_6m.columns = df.columns
df_forecast_6m.index = df_pct[(in_seq_length - 1) : len(df_forecast_6m) + (in_seq_length - 1)].index
#
df_forecast_6m = df_forecast_6m.tail(variables.TEST_YEARS_NR * 12)
#
df_forecast_6m.tail(3)

Unnamed: 0_level_0,RS1.L,KE,TEG.DE,LEG.DE,SCS,HNI,AVT,ACCO,VNA.DE,7912.T,...,DEQ.DE,KIDS,HALO,MATW,9842.T,KVHI,MOON.L,NEO,6055.T,UNP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-12-01,0.041008,0.034942,0.033075,0.03328,0.031389,0.031647,0.031352,0.030384,0.032315,0.033274,...,0.035429,0.034211,0.031641,0.033479,0.034301,0.031232,0.030997,0.031858,0.031871,0.032196
2020-01-01,0.048416,0.043874,0.039584,0.038893,0.040554,0.041433,0.043634,0.039937,0.038421,0.040792,...,0.039704,0.036943,0.039709,0.04208,0.043241,0.040815,0.041498,0.033466,0.040028,0.039172
2020-02-01,0.018807,0.014038,0.011664,0.010549,0.012998,0.012465,0.011376,0.010558,0.00925,0.009803,...,0.008285,0.005615,0.00747,0.013697,0.00967,0.008978,0.011212,0.003901,0.007373,0.007662


In [370]:
importlib.reload(lstm_utils)
importlib.reload(mpt_utils)

weights_6m, mu_6m, S_6m, weights_all_6m = mpt_utils.portfolio_and_plot(df_forecast_6m, df)

Expected annual return: 31.6%
Annual volatility: 15.7%
Sharpe Ratio: 1.89
-- Allocation --
{'TPE.DE': 1, '7575.T': 1, '2471.T': 1, 'TAL': 1, 'SLP': 1, 'SRT3.DE': 1, 'ADUS': 1, 'NSSC': 1, 'PAR': 1, 'MKTX': 1, '7564.T': 1, '4816.T': 3, 'CENT': 1, 'PAYC': 1, 'VAR1.DE': 1, 'BURL': 1, 'AOF.DE': 1, 'FLGT': 2, '7832.T': 1, 'ITI': 3, '2685.T': 2, '2733.T': 1, 'NVDA': 1, '9697.T': 3, 'BVB.DE': 1, 'MITK': 1, '7508.T': 2, '7780.T': 1, '2170.T': 2, '4549.T': 1, '7458.T': 1, 'EBF': 1, '2124.T': 3, 'MPX': 1, '3659.T': 1, '4218.T': 1, 'APLD': 4, '9470.T': 1, 'ECV.DE': 1, '2874.T': 2, '7944.T': 16, '8141.T': 1, '3050.T': 1, '3635.T': 1, '7906.T': 1, '9143.T': 1, '9434.T': 1, 'O5G.DE': 10}
-- Weights Percentage --
{'MKTX': 0.0341, 'CENT': 0.0322, 'ITI': 0.0202, 'VAR1.DE': 0.0292, '7832.T': 0.021, 'NSSC': 0.036, 'BURL': 0.0281, 'TPE.DE': 0.054, 'SRT3.DE': 0.0364, 'PAR': 0.0349, 'AOF.DE': 0.0259, 'SLP': 0.0435, '4816.T': 0.0322, '7575.T': 0.0476, 'TAL': 0.0439, 'FLGT': 0.0221, 'PAYC': 0.0315, 'ADUS': 0.0


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



#### Overview table

In [378]:
importlib.reload(mpt_utils)
# Create overview
mpt_utils.generate_overview_table(weights_all_6m, mu_6m, S_6m, df_pct)

Unnamed: 0,Share Count,Average Covariance,Average Returns,Return Last 12 Months,Return (Actual) Next 12 Months
TPE.DE,1,-0.039362,15.02%,-6.28%,3.02%
7575.T,1,-0.025532,16.58%,4.51%,-17.45%
2471.T,1,-0.024255,27.14%,119.57%,24.38%
TAL,1,-0.005106,5.02%,48.59%,120.88%
SLP,1,0.015319,19.12%,77.26%,118.41%
SRT3.DE,1,-0.008511,13.36%,36.06%,109.75%
ADUS,1,-0.012766,54.84%,30.00%,19.98%
NSSC,1,-0.012766,16.43%,118.24%,-8.54%
PAR,1,-0.049787,2.03%,41.04%,70.88%
MKTX,1,0.015532,41.17%,77.62%,49.93%


### LSTM Univariate - 12 Months

Get the known data (train data)
After that, get the first 12 predicted months, or the first 12 predicted sequences of test data

In [371]:
df_train = X_train[:, :, -1].clone()
y_test_pred_12m = y_test_pred[0,:,:].T[0:12]
y_test_pred_12m

tensor([[-0.0099, -0.0017,  0.0086,  ...,  0.0147,  0.0081,  0.0128],
        [-0.0555, -0.0416, -0.0237,  ...,  0.0158,  0.0128,  0.0142],
        [ 0.0420,  0.0371,  0.0280,  ...,  0.0206,  0.0315,  0.0284],
        ...,
        [ 0.0157,  0.0190,  0.0209,  ...,  0.0152,  0.0114,  0.0144],
        [ 0.0586,  0.0505,  0.0408,  ...,  0.0181,  0.0249,  0.0260],
        [-0.0470, -0.0272, -0.0117,  ...,  0.0266,  0.0227,  0.0197]])

In [372]:
df_forecast_12m = pd.DataFrame(df_train.clone())
df_forecast_12m = pd.concat([df_forecast_12m, pd.DataFrame(y_test_pred_12m)], ignore_index=True)
# Assign back columns and indices to make human understandable
df_forecast_12m.columns = df.columns
df_forecast_12m.index = df_pct[(in_seq_length - 1) : len(df_forecast_12m) + (in_seq_length - 1)].index
#
df_forecast_12m.tail(3)

Unnamed: 0_level_0,RS1.L,KE,TEG.DE,LEG.DE,SCS,HNI,AVT,ACCO,VNA.DE,7912.T,...,DEQ.DE,KIDS,HALO,MATW,9842.T,KVHI,MOON.L,NEO,6055.T,UNP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-06-01,0.015691,0.018993,0.020892,0.020854,0.0152,0.015656,0.0174,0.016475,0.017437,0.014194,...,0.015332,0.014762,0.020982,0.017755,0.017769,0.017532,0.014772,0.015169,0.011357,0.014432
2020-07-01,0.058608,0.050501,0.040831,0.033366,0.033835,0.03381,0.031462,0.02982,0.025498,0.024062,...,0.026029,0.027941,0.030552,0.033252,0.030063,0.030649,0.030197,0.0181,0.024855,0.026024
2020-08-01,-0.047012,-0.027225,-0.011695,0.000789,0.006284,0.011069,0.015455,0.018258,0.019303,0.02202,...,0.017684,0.021815,0.021152,0.018096,0.018734,0.021494,0.021594,0.026639,0.022727,0.019714


In [373]:
importlib.reload(lstm_utils)
importlib.reload(mpt_utils)

weights_12m, mu_12m, S_12m, weights_all_12m = mpt_utils.portfolio_and_plot(df_forecast_12m, df)

Expected annual return: 28.2%
Annual volatility: 15.7%
Sharpe Ratio: 1.67
-- Allocation --
{'TAL': 1, 'TPE.DE': 2, 'SLP': 1, '2471.T': 1, '7575.T': 1, 'PAR': 1, 'BURL': 1, 'MKTX': 1, 'ADUS': 1, 'PLUS.L': 1, 'RGLD': 1, 'NSSC': 1, 'AMD': 1, 'AWK': 1, 'PSDL.L': 1, '7780.T': 1, '4816.T': 3, 'BVB.DE': 2, 'FLGT': 2, '2170.T': 3, 'LOVE': 3, '7947.T': 1, 'APLD': 7, '6814.T': 5, 'IOT': 1, 'MITK': 1, '4549.T': 2, '7458.T': 2, '3050.T': 4, 'IVAC': 5, 'BFSA.DE': 1, 'MPX': 1, '9470.T': 1, '1925.T': 1, 'EVLV': 7, 'EBF': 1, '9697.T': 2, 'ENR.DE': 2, 'AGNC': 1, 'FBRT': 1, '2124.T': 3, '9434.T': 2, '7944.T': 17, 'UDMY': 1, '9831.T': 5, 'CRSR': 1, '4751.T': 1, '9143.T': 1, 'ACEL': 1, '2685.T': 1, 'THRY': 1, '3151.T': 1, 'DOLE': 1, '8141.T': 1, 'LAUR': 1, 'RNW': 1, 'LSEA': 1, 'MIR': 1, 'INS.DE': 1, 'ALIT': 1}
-- Weights Percentage --
{'RGLD': 0.0306, 'MKTX': 0.0329, 'AMD': 0.028, '7780.T': 0.0267, 'BVB.DE': 0.0235, 'NSSC': 0.028, 'BURL': 0.0355, 'PSDL.L': 0.0277, 'TPE.DE': 0.0437, 'PAR': 0.0358, 'SLP': 0


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



#### Overview Table

In [377]:
importlib.reload(mpt_utils)
# Create overview
mpt_utils.generate_overview_table(weights_all_12m, mu_12m, S_12m, df_pct)

Unnamed: 0,Share Count,Average Covariance,Average Returns,Return Last 12 Months,Return (Actual) Next 12 Months
TAL,1,-0.029831,30.05%,48.59%,120.88%
TPE.DE,2,-0.026441,9.83%,-6.28%,3.02%
SLP,1,0.024237,14.83%,77.26%,118.41%
2471.T,1,0.018644,1.74%,119.57%,24.38%
7575.T,1,0.028644,-3.02%,4.51%,-17.45%
PAR,1,-0.034237,24.58%,41.04%,70.88%
BURL,1,0.004237,5.98%,13.37%,1.41%
MKTX,1,-0.015763,24.68%,77.62%,49.93%
ADUS,1,0.007966,9.46%,30.00%,19.98%
PLUS.L,1,-0.021356,6.09%,-32.76%,131.69%
