In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
#
import importlib
import utilities.lstm_utils as lstm_utils
import utilities.mpt_utils as mpt_utils
import utilities.variables as variables

In [2]:
# set device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Data

In [3]:
df = pd.read_csv('../../../data/df_monthly_prices_complete_euro.csv', index_col='Date')
df_pct = pd.read_csv('../../../data/df_monthly_returns_complete.csv', index_col='Date')
df_overview = pd.read_csv('../../../data/df_overview.csv', index_col=0)

## Normalisation

In [4]:
''' 
df_ts_torch = torch.from_numpy(df_time_series.values)
# Reshape to (num_samples, num_features) for normalization
df_ts_flat = df_ts_torch.view(-1, df_ts_torch.shape[-1])  # Shape: (1000*300, 5)

# Calculate min and max per feature
df_min = df_ts_flat.min(dim=0, keepdim=True)[0]
df_max = df_ts_flat.max(dim=0, keepdim=True)[0]

# Apply Min-Max normalization
df_ts_normalised = (df_ts_flat - df_min) / (df_max - df_min)

# Reshape back to original shape
df_time_series_torch = df_ts_normalised.view(df_ts_torch.shape)
'''


' \ndf_ts_torch = torch.from_numpy(df_time_series.values)\n# Reshape to (num_samples, num_features) for normalization\ndf_ts_flat = df_ts_torch.view(-1, df_ts_torch.shape[-1])  # Shape: (1000*300, 5)\n\n# Calculate min and max per feature\ndf_min = df_ts_flat.min(dim=0, keepdim=True)[0]\ndf_max = df_ts_flat.max(dim=0, keepdim=True)[0]\n\n# Apply Min-Max normalization\ndf_ts_normalised = (df_ts_flat - df_min) / (df_max - df_min)\n\n# Reshape back to original shape\ndf_time_series_torch = df_ts_normalised.view(df_ts_torch.shape)\n'

## LSTM Model

In [5]:
# Define 4.3. LSTM model
class LSTM_Uni_Model(nn.Module):
    def __init__(self, input_size, hidden_size=128, num_layers=1, output_size=1, learning_rate=0.001, dropout=0.2): # , hidden_size=128
        super(LSTM_Uni_Model, self).__init__()
        self.hidden_size = hidden_size
        # init LSTM
        self.lstm = nn.LSTM(input_size=input_size,
                            hidden_size=hidden_size,
                            # num_layers=num_layers,
                            batch_first=True)

        # FC layer for final prediction
        self.fc_final = nn.Linear(hidden_size, 12)

    def forward(self, ts_batch): # ts_batch (64, 1653, 10), static_data (64, 1653, 44)
        # Time-Series Data
        # Reshape dynamic data for LSTM (requires time-step as 2nd dimension)
        batch_size, num_stocks, sequence_length = ts_batch.shape[0], ts_batch.shape[1], ts_batch.shape[2]
        ts_batch_reshaped = ts_batch.view(batch_size * num_stocks, sequence_length)
        #
        ts_output_1, (hidden, cell)  = self.lstm(ts_batch_reshaped) # ts_batch_reshaped
        #
        ts_output = ts_output_1.view(batch_size, num_stocks, self.hidden_size)

        return self.fc_final(ts_output)#.squeeze(-1) # ts_output_2

We use a 12 month lookback for the sequential data to predict the upcoming 12 months.

After that, based on the currently predicted time-horizon, we get the respective sub-range, 
be it 1-month, 6-month or 12-month ahead.

In [6]:
# Set sequence length (12 months)
in_seq_length = 12
out_seq_length = 12
#
out_seq_length_1m = 1
out_seq_length_6m = 6
out_seq_length_12m = 12

### LSTM Univariate

In [7]:
df_to_evaluate = df_pct - 1

#### Train-Test Splits

Split the data into training and testing sets

In [8]:
importlib.reload(lstm_utils)
importlib.reload(variables)

# Set sequence length (e.g., 12 time points)
X_train, X_test, y_train, y_test = lstm_utils.split_train_test(df_to_evaluate, [], 
                                                               in_seq_length=in_seq_length, 
                                                               out_seq_length=out_seq_length, 
                                                               validation_months=(variables.TEST_YEARS_NR * 12))

# Check the shapes of the training and test data
print("Shape of X_train:", X_train.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_test:", y_test.shape)

Shape of X_train: torch.Size([275, 1332, 12])
Shape of y_train: torch.Size([275, 1332, 12])
Shape of X_test: torch.Size([0, 1332, 12])
Shape of y_test: torch.Size([0, 1332, 12])


  return (torch.tensor(x_ts, dtype=torch.float32),


### Model Training

In [9]:
# Model, Loss, Optimizer
model = LSTM_Uni_Model(input_size=in_seq_length, output_size=out_seq_length).to(device)
criterion = nn.MSELoss()  # Mean Squared Error for regression
optimizer = optim.Adam(model.parameters(), lr=0.001)

importlib.reload(lstm_utils)
#
model, y_train_pred, y_test_pred = lstm_utils.lstm_train_validate(model, optimizer, X_train, X_test, y_train, y_test)

  return F.mse_loss(input, target, reduction=self.reduction)


RuntimeError: The size of tensor a (0) must match the size of tensor b (12) at non-singleton dimension 2

### LSTM Univariate - 1 Month

Get the known data (train data).
After that, get the first predicted month, or the first predicted sequence of test data

In [96]:
# 1 month
df_train = X_train[:, :, -1].clone()
y_test_pred_1m = y_test_pred[0,:,:].T[0:0] # y_test_pred[:,:,0][0] 
y_test_pred_1m

tensor([ 0.0393, -0.0033,  0.0122,  ..., -0.0065,  0.0092,  0.0150])

In [97]:
#len(y_test_pred_1m)

tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0400, -0.0200, -0.0500],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.1400, -0.0200, -0.1700],
        [ 0.0000, -0.1000,  0.0000,  ...,  0.0000,  0.0500, -0.0100],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0100,  0.1100,  0.0300],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.1200, -0.0700, -0.2400],
        [ 0.2100, -0.0100,  0.0900,  ...,  0.0200,  0.0600, -0.1000]])

In [98]:
df_forecast_1m = pd.DataFrame(df_train.clone())
df_forecast_1m = pd.concat([df_forecast_1m, pd.DataFrame(y_test_pred_1m)], ignore_index=True)
# Assign back columns and indices to make human understandable
df_forecast_1m.columns = df.columns
df_forecast_1m.index = df_pct[(in_seq_length - 1) : len(df_forecast_1m) + (in_seq_length - 1)].index
#
df_forecast_1m = df_forecast_1m.tail(variables.TEST_YEARS_NR * 12) 
#
df_forecast_1m.tail(3)

Unnamed: 0_level_0,RS1.L,KE,TEG.DE,LEG.DE,SCS,HNI,AVT,ACCO,VNA.DE,7912.T,...,DEQ.DE,KIDS,HALO,MATW,9842.T,KVHI,MOON.L,NEO,6055.T,UNP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-07-01,-0.02,-0.02,0.05,0.06,0.0,-0.03,0.01,0.24,0.05,0.0,...,0.07,-0.09,-0.01,-0.02,0.0,-0.07,0.0,0.11,-0.07,0.06
2019-08-01,-0.05,-0.17,-0.01,0.02,-0.08,-0.09,-0.08,-0.05,0.02,0.0,...,-0.04,-0.09,-0.03,-0.14,-0.09,-0.08,-0.01,0.03,-0.24,-0.1
2019-09-01,0.039306,-0.003277,0.01216,-0.007937,0.000626,0.005867,0.00707,0.01091,0.011936,-0.001916,...,-0.00324,-0.00076,0.008712,0.015328,0.016228,-0.006808,0.01261,-0.006512,0.009245,0.015005


In [99]:
importlib.reload(lstm_utils)
importlib.reload(mpt_utils)

weights_1m, mu_1m, S_1m, weights_all_1m = mpt_utils.portfolio_and_plot(df_forecast_1m, df)

Expected annual return: 33.0%
Annual volatility: 21.5%
Sharpe Ratio: 1.44
-- Allocation --
{'7575.T': 1, 'SRT3.DE': 1, 'SLP': 1, '4816.T': 5, 'TPE.DE': 2, 'MKTX': 1, 'CENT': 2, '2685.T': 4, 'ADUS': 1, 'TAL': 1, 'NSSC': 1, 'BURL': 1, 'PLUS.L': 1, 'AOF.DE': 1, 'PAR': 1, 'NSP': 1, 'ELS': 1, '9697.T': 6, '2471.T': 1, 'FLGT': 4, '7947.T': 1, '7458.T': 2, 'BVB.DE': 3, '2733.T': 2, '4549.T': 3, 'MPX': 2, '7508.T': 2, '3659.T': 1, 'EBF': 1, '7832.T': 1, '7780.T': 1, 'FRPH': 1, '4218.T': 1, 'ITI': 2, '2874.T': 4, 'APLD': 7, '9470.T': 1, '9143.T': 3, '2170.T': 2, '3050.T': 2, '2331.T': 1, '7944.T': 15, '9831.T': 2, 'O5G.DE': 9, '2124.T': 1}
-- Weights Percentage --
{'NSP': 0.0296, 'MKTX': 0.0423, 'ELS': 0.0247, 'CENT': 0.0412, '7947.T': 0.0201, 'NSSC': 0.035, 'BURL': 0.0345, 'TPE.DE': 0.0426, 'SRT3.DE': 0.0456, 'PAR': 0.0326, 'AOF.DE': 0.0331, 'SLP': 0.0434, '4816.T': 0.0431, '7575.T': 0.0548, 'TAL': 0.0396, 'FLGT': 0.022, 'ADUS': 0.0404, '2685.T': 0.0407, 'PLUS.L': 0.0342, '9697.T': 0.0244, '24


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



#### Overview table

In [100]:
importlib.reload(mpt_utils)
# Create overview
mpt_utils.generate_overview_table(weights_all_1m, mu_1m, S_1m, df_pct)

Unnamed: 0,Share Count,Average Covariance,Average Returns,Return Last 12 Months,Return (Actual) Next 12 Months
7575.T,1,0.055909,16.12%,4.51%,-17.45%
SRT3.DE,1,0.006591,15.73%,36.06%,109.75%
SLP,1,0.058182,22.77%,77.26%,118.41%
4816.T,5,-0.041364,6.53%,33.20%,39.47%
TPE.DE,2,-0.014545,52.08%,-6.28%,3.02%
MKTX,1,-0.001364,17.59%,77.62%,49.93%
CENT,2,0.041591,7.14%,-5.64%,36.67%
2685.T,4,-0.045682,46.29%,49.31%,-30.06%
ADUS,1,0.005,30.15%,30.00%,19.98%
TAL,1,-0.050682,28.72%,48.59%,120.88%


### LSTM Univariate - 6 Months

Get the known data (train data)
After that, get the first 6 predicted months, or the first 6 predicted sequences of test data

In [101]:
df_train = X_train[:, :, -1].clone()
y_test_pred_6m =  y_test_pred[0,:,:].T[0:6] # y_test_pred[:,:,5][0:6]
y_test_pred_6m

tensor([[ 0.0393,  0.0295,  0.0342,  0.0231,  0.0229,  0.0276,  0.0205,  0.0115,
          0.0088,  0.0138,  0.0205,  0.0092,  0.0041,  0.0051, -0.0072, -0.0137,
         -0.0029,  0.0071,  0.0080,  0.0070,  0.0050,  0.0060,  0.0075,  0.0101,
          0.0233,  0.0159,  0.0131,  0.0133,  0.0139,  0.0108,  0.0010,  0.0092,
          0.0160,  0.0179,  0.0059,  0.0127,  0.0169,  0.0054, -0.0054,  0.0029,
          0.0063,  0.0017,  0.0034,  0.0073,  0.0225,  0.0133,  0.0015,  0.0058],
        [ 0.0496,  0.0554,  0.0503,  0.0402,  0.0322,  0.0227,  0.0195,  0.0217,
          0.0295,  0.0374,  0.0267,  0.0019, -0.0054, -0.0051, -0.0044,  0.0056,
          0.0207,  0.0221,  0.0069,  0.0018,  0.0070,  0.0105,  0.0168,  0.0271,
          0.0290,  0.0167,  0.0136,  0.0088,  0.0031, -0.0009,  0.0039,  0.0174,
          0.0224,  0.0190,  0.0120,  0.0148,  0.0074, -0.0063, -0.0108,  0.0041,
          0.0122,  0.0096,  0.0148,  0.0202,  0.0108, -0.0038,  0.0033,  0.0139],
        [ 0.0337,  0.0441,

In [102]:
df_forecast_6m = pd.DataFrame(df_train.clone())
df_forecast_6m = pd.concat([df_forecast_6m, pd.DataFrame(y_test_pred_6m)], ignore_index=True)
# Assign back columns and indices to make human understandable
df_forecast_6m.columns = df.columns
df_forecast_6m.index = df_pct[(in_seq_length - 1) : len(df_forecast_6m) + (in_seq_length - 1)].index
#
df_forecast_6m = df_forecast_6m.tail(variables.TEST_YEARS_NR * 12)
#
df_forecast_6m.tail(3)

ValueError: Length mismatch: Expected axis has 227 elements, new values have 1653 elements

In [622]:
importlib.reload(lstm_utils)
importlib.reload(mpt_utils)

weights_6m, mu_6m, S_6m, weights_all_6m = mpt_utils.portfolio_and_plot(df_forecast_6m, df)

Expected annual return: 31.3%
Annual volatility: 15.6%
Sharpe Ratio: 1.88
-- Allocation --
{'TPE.DE': 1, '7575.T': 1, '2471.T': 1, 'SLP': 1, 'TAL': 1, 'ADUS': 1, 'SRT3.DE': 1, 'NSSC': 1, 'PAR': 1, 'MKTX': 1, '7564.T': 1, 'CENT': 1, '4816.T': 3, 'PAYC': 1, 'VAR1.DE': 1, 'AOF.DE': 1, 'FLGT': 2, '7832.T': 1, 'ITI': 3, '2685.T': 2, '2733.T': 1, 'NVDA': 1, '9697.T': 3, 'BVB.DE': 1, '7508.T': 2, 'MITK': 1, '2170.T': 2, '7780.T': 1, '4549.T': 1, 'EBF': 1, '7458.T': 1, '2124.T': 3, 'MPX': 1, '3659.T': 1, '4218.T': 1, '9470.T': 1, 'APLD': 4, 'ECV.DE': 1, '2874.T': 2, '7944.T': 14, '8141.T': 1, '3050.T': 2, '9143.T': 1, '3635.T': 1, '7906.T': 1, '9434.T': 1, 'O5G.DE': 10}
-- Weights Percentage --
{'MKTX': 0.0356, 'CENT': 0.0333, 'ITI': 0.0213, 'VAR1.DE': 0.0314, '7832.T': 0.0213, 'NSSC': 0.0364, 'TPE.DE': 0.0546, 'SRT3.DE': 0.0368, 'PAR': 0.0356, 'AOF.DE': 0.0275, 'SLP': 0.0453, '4816.T': 0.0329, '7575.T': 0.0492, 'TAL': 0.0453, 'FLGT': 0.0225, 'PAYC': 0.0321, 'ADUS': 0.0376, '2685.T': 0.0201, '


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



#### Overview table

In [623]:
importlib.reload(mpt_utils)
# Create overview
mpt_utils.generate_overview_table(weights_all_6m, mu_6m, S_6m, df_pct)

Unnamed: 0,Share Count,Average Covariance,Average Returns,Return Last 12 Months,Return (Actual) Next 12 Months
TPE.DE,1,-0.04108696,14.35%,-6.28%,3.02%
7575.T,1,-0.02413043,16.53%,4.51%,-17.45%
2471.T,1,-0.02391305,26.62%,119.57%,24.38%
SLP,1,0.01478261,4.76%,77.26%,118.41%
TAL,1,-0.005,18.76%,48.59%,120.88%
ADUS,1,-0.01434783,12.77%,30.00%,19.98%
SRT3.DE,1,-0.008695652,54.15%,36.06%,109.75%
NSSC,1,-0.01391304,16.18%,118.24%,-8.54%
PAR,1,-0.05043478,5.83%,41.04%,70.88%
MKTX,1,0.01456522,41.70%,77.62%,49.93%


### LSTM Univariate - 12 Months

Get the known data (train data)
After that, get the first 12 predicted months, or the first 12 predicted sequences of test data

In [624]:
df_train = X_train[:, :, -1].clone()
y_test_pred_12m = y_test_pred[0,:,:].T[0:12]
y_test_pred_12m

tensor([[ 0.0273,  0.0211,  0.0160,  ...,  0.0128,  0.0124,  0.0105],
        [ 0.0326,  0.0303,  0.0268,  ...,  0.0251,  0.0289,  0.0272],
        [-0.0419, -0.0217, -0.0053,  ...,  0.0214,  0.0150,  0.0159],
        ...,
        [ 0.0012,  0.0108,  0.0190,  ...,  0.0360,  0.0355,  0.0344],
        [-0.0399, -0.0249, -0.0087,  ...,  0.0135,  0.0068,  0.0082],
        [ 0.0559,  0.0430,  0.0331,  ...,  0.0204,  0.0225,  0.0214]])

In [625]:
df_forecast_12m = pd.DataFrame(df_train.clone())
df_forecast_12m = pd.concat([df_forecast_12m, pd.DataFrame(y_test_pred_12m)], ignore_index=True)
# Assign back columns and indices to make human understandable
df_forecast_12m.columns = df.columns
df_forecast_12m.index = df_pct[(in_seq_length - 1) : len(df_forecast_12m) + (in_seq_length - 1)].index
#
df_forecast_12m.tail(3)

Unnamed: 0_level_0,RS1.L,KE,TEG.DE,LEG.DE,SCS,HNI,AVT,ACCO,VNA.DE,7912.T,...,DEQ.DE,KIDS,HALO,MATW,9842.T,KVHI,MOON.L,NEO,6055.T,UNP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-06-01,0.001188,0.010785,0.018987,0.02504,0.028092,0.031946,0.03207,0.03277,0.035189,0.037047,...,0.037735,0.03781,0.035724,0.03621,0.037929,0.038319,0.039801,0.035975,0.03551,0.034434
2020-07-01,-0.039887,-0.024872,-0.008746,-7.1e-05,0.006735,0.011793,0.008621,0.008596,0.013545,0.017165,...,0.016548,0.021575,0.015906,0.020002,0.0178,0.018675,0.02646,0.013547,0.006763,0.008164
2020-08-01,0.055871,0.042957,0.033094,0.027165,0.025988,0.02386,0.02227,0.023676,0.019276,0.018502,...,0.017106,0.020371,0.021846,0.021464,0.018962,0.020853,0.018182,0.020366,0.022524,0.021389


In [626]:
importlib.reload(lstm_utils)
importlib.reload(mpt_utils)

weights_12m, mu_12m, S_12m, weights_all_12m = mpt_utils.portfolio_and_plot(df_forecast_12m, df)

Expected annual return: 27.6%
Annual volatility: 15.6%
Sharpe Ratio: 1.64
-- Allocation --
{'TAL': 1, 'TPE.DE': 2, 'SLP': 1, '2471.T': 1, '7575.T': 1, 'PAR': 1, 'BURL': 1, 'MKTX': 1, 'PLUS.L': 1, 'ADUS': 1, 'RGLD': 1, 'NSSC': 1, 'PSDL.L': 1, 'AMD': 1, 'AWK': 1, '4816.T': 3, '7780.T': 1, 'BVB.DE': 2, 'FLGT': 2, '2170.T': 3, 'LOVE': 3, '7947.T': 1, 'IOT': 1, 'APLD': 6, 'MITK': 1, '6814.T': 5, '4549.T': 2, '7458.T': 2, '3050.T': 4, 'IVAC': 6, 'BFSA.DE': 1, '9143.T': 3, 'MPX': 1, '9470.T': 1, 'EVLV': 7, 'EBF': 1, 'PWSC': 1, 'ENR.DE': 2, '9697.T': 2, 'AGNC': 2, '9434.T': 2, '2124.T': 3, 'CRSR': 2, '7944.T': 16, 'FBRT': 1, 'UDMY': 1, '9831.T': 5, '4751.T': 1, '2685.T': 1, 'THRY': 1, 'ACEL': 1, '3151.T': 1, 'DOLE': 1, '8141.T': 1, 'LAUR': 1, 'RNW': 1, 'LSEA': 1, 'MIR': 1, 'XPER': 1, 'INS.DE': 1, 'ARHS': 1, '2874.T': 1, 'ALIT': 1}
-- Weights Percentage --
{'RGLD': 0.0297, 'MKTX': 0.0317, 'AMD': 0.0268, '7780.T': 0.0248, 'BVB.DE': 0.0225, 'NSSC': 0.0274, 'BURL': 0.0346, 'PSDL.L': 0.0271, 'TPE.D


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



#### Overview Table

In [627]:
importlib.reload(mpt_utils)
# Create overview
mpt_utils.generate_overview_table(weights_all_12m, mu_12m, S_12m, df_pct)

Unnamed: 0,Share Count,Average Covariance,Average Returns,Return Last 12 Months,Return (Actual) Next 12 Months
TAL,1,-0.030000,29.97%,48.59%,120.88%
TPE.DE,2,-0.027258,9.83%,-6.28%,3.02%
SLP,1,0.027258,14.75%,77.26%,118.41%
2471.T,1,0.017903,1.87%,119.57%,24.38%
7575.T,1,0.027258,-2.99%,4.51%,-17.45%
...,...,...,...,...,...
XPER,1,0.001129,11.92%,0.99%,-4.11%
INS.DE,1,0.004677,86.23%,-11.61%,16.32%
ARHS,1,0.001129,5.83%,-23.14%,35.18%
2874.T,1,0.007419,5.55%,22.31%,-10.26%
