In [49]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
#
import importlib
import utilities.lstm_utils as lstm_utils
import utilities.mpt_utils as mpt_utils
import utilities.variables as variables

In [50]:
# set device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Data

In [51]:
df = pd.read_csv('../../../data/df_monthly_prices_complete_euro.csv', index_col='Date')
df_pct = pd.read_csv('../../../data/df_monthly_returns_complete.csv', index_col='Date')
df_overview = pd.read_csv('../../../data/df_overview.csv', index_col=0)

## LSTM Model

In [53]:
# Define 4.3. LSTM model
class LSTM_Uni_Model(nn.Module):
    def __init__(self, input_size, hidden_size=128, num_layers=1, output_size=1, learning_rate=0.001, dropout=0.2): # , hidden_size=128
        super(LSTM_Uni_Model, self).__init__()
        self.hidden_size = hidden_size
        # init LSTM
        self.lstm = nn.LSTM(input_size=input_size,
                            hidden_size=hidden_size,
                            # num_layers=num_layers,
                            batch_first=True)

        # FC layer for final prediction
        self.fc_final = nn.Linear(hidden_size, 12)

    def forward(self, ts_batch): # ts_batch (64, 1653, 10), static_data (64, 1653, 44)
        # Time-Series Data
        # Reshape dynamic data for LSTM (requires time-step as 2nd dimension)
        batch_size, num_stocks, sequence_length = ts_batch.shape[0], ts_batch.shape[1], ts_batch.shape[2]
        ts_batch_reshaped = ts_batch.view(batch_size * num_stocks, sequence_length)
        #
        ts_output_1, (hidden, cell)  = self.lstm(ts_batch_reshaped) # ts_batch_reshaped
        #
        ts_output = ts_output_1.view(batch_size, num_stocks, self.hidden_size)

        return self.fc_final(ts_output)#.squeeze(-1) # ts_output_2

We use a 12 month lookback for the sequential data to predict the upcoming 12 months.

After that, based on the currently predicted time-horizon, we get the respective sub-range, 
be it 1-month, 6-month or 12-month ahead.

In [54]:
# Set sequence length (12 months)
in_seq_length = 12
out_seq_length = 12
#
out_seq_length_1m = 1
out_seq_length_6m = 6
out_seq_length_12m = 12

### LSTM Univariate

In [55]:
df_to_evaluate = df_pct - 1

#### Train-Test Splits

Split the data into training and testing sets

In [56]:
importlib.reload(lstm_utils)
importlib.reload(variables)

# Set sequence length (e.g., 12 time points)
X_train, X_test, y_train, y_test = lstm_utils.split_train_test(df_to_evaluate, [], 
                                                               in_seq_length=in_seq_length, 
                                                               out_seq_length=out_seq_length, 
                                                               validation_months=(variables.LSTM_TEST_YEARS_NR * 12))

# Check the shapes of the training and test data
print("Shape of X_train:", X_train.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_test:", y_test.shape)

Shape of X_train: torch.Size([215, 1332, 12])
Shape of y_train: torch.Size([215, 1332, 12])
Shape of X_test: torch.Size([60, 1332, 12])
Shape of y_test: torch.Size([60, 1332, 12])


### Model Training

In [57]:
X_test.shape

torch.Size([60, 1332, 12])

In [58]:
y_test.shape

torch.Size([60, 1332, 12])

In [59]:
# Model, Loss, Optimizer
model = LSTM_Uni_Model(input_size=in_seq_length, output_size=out_seq_length).to(device)
criterion = nn.MSELoss()  # Mean Squared Error for regression
optimizer = optim.Adam(model.parameters(), lr=0.001)

importlib.reload(lstm_utils)
#
model, y_train_pred, y_test_pred = lstm_utils.lstm_train_validate(model, optimizer, X_train, X_test, y_train, y_test)

Epoch 1/10, Loss: 0.0137, Train RMSE: 0.1174, Test RMSE: 0.1272. 
Epoch 2/10, Loss: 0.0130, Train RMSE: 0.1144, Test RMSE: 0.1269. 
Epoch 3/10, Loss: 0.0129, Train RMSE: 0.1136, Test RMSE: 0.1268. 
Epoch 4/10, Loss: 0.0128, Train RMSE: 0.1135, Test RMSE: 0.1268. 
Epoch 5/10, Loss: 0.0128, Train RMSE: 0.1134, Test RMSE: 0.1267. 
Epoch 6/10, Loss: 0.0128, Train RMSE: 0.1134, Test RMSE: 0.1267. 
Epoch 7/10, Loss: 0.0128, Train RMSE: 0.1133, Test RMSE: 0.1267. 
Epoch 8/10, Loss: 0.0128, Train RMSE: 0.1133, Test RMSE: 0.1268. 
Epoch 9/10, Loss: 0.0128, Train RMSE: 0.1133, Test RMSE: 0.1268. 
Epoch 10/10, Loss: 0.0128, Train RMSE: 0.1133, Test RMSE: 0.1268. 
Model training complete and saved.


### LSTM Univariate - 1 Month

Get the known data (train data).
After that, get the first predicted month, or the first predicted sequence of test data

In [60]:
# 1 month
df_train = X_train[:, :, -1].clone()
y_test_pred_1m = y_test_pred[0,:,:].T[0:0] # y_test_pred[:,:,0][0] 
y_test_pred_1m

tensor([], size=(0, 1332))

In [61]:
df_forecast_1m = pd.DataFrame(df_train.clone())
df_forecast_1m = pd.concat([df_forecast_1m, pd.DataFrame(y_test_pred_1m)], ignore_index=True)
# Assign back columns and indices to make human understandable
df_forecast_1m.columns = df.columns
df_forecast_1m.index = df_pct[(in_seq_length - 1) : len(df_forecast_1m) + (in_seq_length - 1)].index
#
df_forecast_1m = df_forecast_1m.tail(variables.LSTM_TEST_YEARS_NR * 12) 
#
df_forecast_1m.tail(3)

Unnamed: 0_level_0,GME,2124.T,2491.T,2471.T,3046.T,PAT.DE,CROX,AOF.DE,SFQ.DE,DAN,...,KREF,HLN.L,DBX,BNL,CBL,KVUE,PSTL,NTST,BLCO,NBS.L
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-06-01,0.1,0.1,0.25,-0.24,0.04,-0.07,-0.01,0.1,-0.18,-0.09,...,-0.03,0.0,0.08,0.0,0.0,0.0,0.02,0.02,0.0,0.0
2018-07-01,0.02,0.0,0.04,0.1,0.0,0.19,0.03,-0.01,0.1,0.06,...,0.07,0.0,-0.17,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2018-08-01,-0.08,0.01,0.07,0.13,-0.09,-0.07,0.14,-0.07,-0.06,-0.08,...,0.02,0.0,0.0,0.0,0.0,0.0,-0.01,0.0,0.0,0.0


In [81]:
importlib.reload(lstm_utils)
importlib.reload(mpt_utils)

weights_1m, mu_1m, S_1m, allocations_1m = mpt_utils.portfolio_and_plot(df_forecast_1m, df, plot_threshold=0.018)

Expected annual return: 81.8%
Annual volatility: 6.9%
Sharpe Ratio: 11.54
-- Allocation --
{'ETSY': 1, '2491.T': 6, 'TPE.DE': 2, 'CVNA': 1, '3697.T': 1, 'PAR': 1, 'FCN': 1, 'FNKO': 4, 'NTNX': 1, 'RUN': 4, '6951.T': 1, '7518.T': 2, 'ABDP.L': 1, 'BOOT': 1, 'LULU': 1, 'HSII': 1, 'ADUS': 1, 'NSP': 1, 'IIPR': 1, '2471.T': 11, '6814.T': 3, 'WSC': 1, 'HAE': 1, 'MEDP': 1, 'AEO': 1, '4568.T': 1, 'QLYS': 1, '6866.T': 1, 'NSSC': 1, '9697.T': 1, '6200.T': 3, '4751.T': 5, 'CATO': 4, '2475.T': 2, 'MOV': 1, 'BBSI': 1, '2429.T': 3, '2389.T': 2, 'EH': 2, '2154.T': 2, '3994.T': 1, 'SHYF': 2, 'COK.DE': 1, '7730.T': 1, 'FARO': 1, '4218.T': 2, '4641.T': 1, 'GES': 1, 'PRDO': 1, '2170.T': 7, '6914.T': 2, '4503.T': 1, 'TPR': 1, 'UAA': 3, '9413.T': 1, '7868.T': 5, 'NEO': 1, '6183.T': 1, '7990.T': 1, 'FTK.DE': 1, 'NVRI': 3, '6055.T': 1, '7296.T': 1, '3151.T': 2, '7476.T': 1, '3048.T': 2, '7575.T': 3, 'CNK': 1, '7613.T': 2, '7936.T': 2, 'PKE': 1, '7220.T': 1, '4301.T': 1, '8439.T': 1, '3738.T': 1, '7752.T': 2, '


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



#### Overview table

In [63]:
importlib.reload(mpt_utils)
# Create overview
mpt_utils.generate_overview_table(allocations_1m, mu_1m, S_1m, df_pct)

Unnamed: 0,Share Count,Average Covariance,Average Returns,Return Last 12 Months,Return (Actual) Next 12 Months
ETSY,1,-0.004421,47.69%,-33.98%,-19.03%
2491.T,6,-0.024421,182.25%,-40.43%,-10.18%
TPE.DE,2,0.003474,183.13%,-14.53%,-17.65%
CVNA,1,-0.062842,27.62%,98.50%,211.25%
3697.T,1,0.013053,170.34%,15.27%,-56.94%
...,...,...,...,...,...
7893.T,1,0.019579,72.08%,27.21%,7.98%
TRTX,1,0.000421,14.39%,-25.34%,49.38%
AMCR,1,0.010947,-13.23%,-20.57%,26.52%
HBI,1,0.012737,11.15%,-37.03%,54.02%


### LSTM Univariate - 6 Months

Get the known data (train data)
After that, get the first 6 predicted months, or the first 6 predicted sequences of test data

In [64]:
df_train = X_train[:, :, -1].clone()
y_test_pred_6m =  y_test_pred[0,:,:].T[0:6] # y_test_pred[:,:,5][0:6]
y_test_pred_6m

tensor([[-0.0280, -0.0047,  0.0086,  ...,  0.0108,  0.0109,  0.0092],
        [-0.0090, -0.0007,  0.0065,  ...,  0.0117,  0.0117,  0.0108],
        [-0.0276, -0.0113,  0.0034,  ...,  0.0107,  0.0110,  0.0111],
        [ 0.0041,  0.0082,  0.0174,  ...,  0.0102,  0.0103,  0.0105],
        [ 0.0158,  0.0099,  0.0120,  ...,  0.0116,  0.0114,  0.0112],
        [ 0.0290,  0.0180,  0.0110,  ...,  0.0116,  0.0115,  0.0114]])

In [65]:
df_forecast_6m = pd.DataFrame(df_train.clone())
df_forecast_6m = pd.concat([df_forecast_6m, pd.DataFrame(y_test_pred_6m)], ignore_index=True)
# Assign back columns and indices to make human understandable
df_forecast_6m.columns = df.columns
df_forecast_6m.index = df_pct[(in_seq_length - 1) : len(df_forecast_6m) + (in_seq_length - 1)].index
#
df_forecast_6m = df_forecast_6m.tail(variables.LSTM_TEST_YEARS_NR * 12)
#
df_forecast_6m.tail(3)

Unnamed: 0_level_0,GME,2124.T,2491.T,2471.T,3046.T,PAT.DE,CROX,AOF.DE,SFQ.DE,DAN,...,KREF,HLN.L,DBX,BNL,CBL,KVUE,PSTL,NTST,BLCO,NBS.L
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-12-01,0.004123,0.008216,0.01745,0.024211,0.017563,0.011333,0.009798,0.011223,0.010479,0.007691,...,0.008862,0.009656,0.011665,0.010948,0.01068,0.010749,0.010192,0.010181,0.010302,0.010475
2019-01-01,0.015824,0.009935,0.011975,0.005139,0.013306,0.010762,0.009956,0.011044,0.009007,0.006143,...,0.011076,0.010927,0.012568,0.012185,0.011793,0.011307,0.011577,0.011584,0.011432,0.01118
2019-02-01,0.028988,0.01801,0.010951,0.013002,0.014813,0.01641,0.018504,0.016826,0.01373,0.014017,...,0.013819,0.012654,0.011406,0.011087,0.011089,0.011068,0.011597,0.011632,0.011451,0.011371


In [75]:
importlib.reload(lstm_utils)
importlib.reload(mpt_utils)

weights_6m, mu_6m, S_6m, allocations_6m = mpt_utils.portfolio_and_plot(df_forecast_6m, df, plot_threshold=0.008)

Expected annual return: 17.7%
Annual volatility: 2.7%
Sharpe Ratio: 5.93
-- Allocation --
{'FNKO': 3, 'CVNA': 1, 'PAR': 1, '2685.T': 2, '7575.T': 6, 'MDB': 1, '2120.T': 25, '2389.T': 1, '6268.T': 2, 'CEC.DE': 19, 'ETSY': 1, 'ADUS': 1, 'OC': 1, '4218.T': 2, 'RUN': 1, 'CON.DE': 1, 'UNM': 1, '2429.T': 1, '8154.T': 1, 'VSEC': 1, '4151.T': 1, 'SHYF': 1, '3402.T': 4, '9842.T': 2, 'AMAT': 1, 'CSGS': 1, 'GEN': 1, 'KNX': 1, '8173.T': 1, '8219.T': 2, '3046.T': 1, '3697.T': 1, '7915.T': 2, 'WHR': 1, 'DAN': 2, 'CCK': 1, '9766.T': 1, '4549.T': 1, 'CROX': 1, 'LULU': 1, '6436.T': 1, 'DLX': 1, '8060.T': 1, '2874.T': 2, 'MOD': 1, 'CATO': 1, '9831.T': 6, '3086.T': 2, 'NSSC': 1, '3659.T': 1, 'STAA': 1, '9107.T': 1, 'CMPR': 1, 'DLTR': 1, '6645.T': 1, '7976.T': 1, 'HBH.DE': 1, '7947.T': 1, '2379.T': 1, 'RDUS': 1, 'NXPI': 1, 'TBCG.L': 1, '3104.T': 1, '8008.T': 1, '5110.T': 1, '9409.T': 1, 'MHO': 1, '3076.T': 1, '7988.T': 1, 'IGT': 1, 'MHK': 1, 'JLL': 1, 'EBAY': 1, '7868.T': 3, 'NYT': 1, '3191.T': 1, 'UTMD':


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



#### Overview table

In [67]:
importlib.reload(mpt_utils)
# Create overview
mpt_utils.generate_overview_table(allocations_6m, mu_6m, S_6m, df_pct)

Unnamed: 0,Share Count,Average Covariance,Average Returns,Return Last 12 Months,Return (Actual) Next 12 Months
FNKO,3,-0.093750,97.98%,-62.69%,34.43%
CVNA,1,-0.079318,19.60%,98.50%,211.25%
PAR,1,-0.032784,83.90%,1.20%,41.61%
2685.T,2,0.020341,-19.19%,61.48%,22.88%
7575.T,6,0.027273,-20.83%,19.33%,5.04%
...,...,...,...,...,...
KVHI,1,0.001705,10.13%,-56.31%,-11.70%
PDM,1,-0.004659,7.07%,-44.87%,82.09%
2170.T,2,-0.010909,6.98%,-54.33%,42.90%
APLD,1,0.000625,19.64%,115.04%,2.55%


### LSTM Univariate - 12 Months

Get the known data (train data)
After that, get the first 12 predicted months, or the first 12 predicted sequences of test data

In [68]:
df_train = X_train[:, :, -1].clone()
y_test_pred_12m = y_test_pred[0,:,:].T[0:12]
y_test_pred_12m

tensor([[-0.0280, -0.0047,  0.0086,  ...,  0.0108,  0.0109,  0.0092],
        [-0.0090, -0.0007,  0.0065,  ...,  0.0117,  0.0117,  0.0108],
        [-0.0276, -0.0113,  0.0034,  ...,  0.0107,  0.0110,  0.0111],
        ...,
        [-0.0019,  0.0016,  0.0016,  ...,  0.0113,  0.0115,  0.0118],
        [ 0.0500,  0.0340,  0.0271,  ...,  0.0103,  0.0104,  0.0118],
        [-0.0382, -0.0150,  0.0027,  ...,  0.0123,  0.0122,  0.0109]])

In [69]:
df_forecast_12m = pd.DataFrame(df_train.clone())
df_forecast_12m = pd.concat([df_forecast_12m, pd.DataFrame(y_test_pred_12m)], ignore_index=True)
# Assign back columns and indices to make human understandable
df_forecast_12m.columns = df.columns
df_forecast_12m.index = df_pct[(in_seq_length - 1) : len(df_forecast_12m) + (in_seq_length - 1)].index
#
df_forecast_12m.tail(3)

Unnamed: 0_level_0,GME,2124.T,2491.T,2471.T,3046.T,PAT.DE,CROX,AOF.DE,SFQ.DE,DAN,...,KREF,HLN.L,DBX,BNL,CBL,KVUE,PSTL,NTST,BLCO,NBS.L
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-06-01,-0.001887,0.00158,0.001584,0.006454,0.007882,0.010522,0.013018,0.010539,0.012357,0.014041,...,0.013725,0.012624,0.009815,0.010879,0.0112,0.011291,0.011459,0.011304,0.011462,0.011761
2019-07-01,0.04997,0.033988,0.02711,0.018694,0.013065,0.008116,0.006511,0.009205,0.009685,0.007651,...,0.010414,0.010591,0.012147,0.011469,0.010988,0.011125,0.010511,0.01032,0.010352,0.011836
2019-08-01,-0.038165,-0.01498,0.00268,0.001762,0.010951,0.008956,0.011387,0.011515,0.009335,0.008306,...,0.009062,0.010247,0.01318,0.012457,0.012176,0.011727,0.01194,0.012282,0.012238,0.010917


In [79]:
importlib.reload(lstm_utils)
importlib.reload(mpt_utils)

weights_12m, mu_12m, S_12m, allocations_12m = mpt_utils.portfolio_and_plot(df_forecast_12m, df, plot_threshold=0.014)

Expected annual return: 14.5%
Annual volatility: 0.3%
Sharpe Ratio: 42.63
-- Allocation --
{'ENPH': 2, 'PEN': 1, '7575.T': 17, '2491.T': 16, 'SONO': 11, 'NSSC': 6, 'SHYF': 10, '2685.T': 4, '7864.T': 9, '3148.T': 4, 'OSIS': 1, '9076.T': 6, 'ADUS': 1, '4218.T': 7, 'CCK': 1, '8173.T': 5, '9627.T': 3, 'WOSG.L': 1, '4151.T': 5, '7947.T': 4, 'RGA': 1, 'TEP.L': 1, 'PFG': 1, '1973.T': 5, 'CGNX': 2, '7740.T': 5, '2471.T': 33, 'OKTA': 1, '2331.T': 12, '9433.T': 2, '9364.T': 3, '3994.T': 2, 'GEN': 4, '8022.T': 2, 'HNR1.DE': 1, 'ALGN': 1, '9409.T': 6, '9260.T': 5, 'UNM': 1, 'PGR': 1, 'CCS': 1, 'ATRI': 1, '9107.T': 5, 'CNK': 4, '7747.T': 3, '9072.T': 3, '8008.T': 5, '3608.T': 11, 'LOVE': 3, 'FDM.L': 1, '2124.T': 17, 'CNNE': 3, 'AVGO': 1, '2429.T': 3, '7915.T': 5, '2733.T': 2, 'YETI': 1, '7936.T': 6, '4549.T': 4, '8150.T': 3, 'CHEF': 2, 'KTB': 1, 'DX': 5, 'ARLO': 5, '6951.T': 1, 'MTCH': 1, '6326.T': 3, '8154.T': 1, 'EOAN.DE': 3, '2874.T': 5, 'COK.DE': 1, 'GME': 3, '8876.T': 4, '8439.T': 4, '8252.T':


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



#### Overview Table

In [71]:
importlib.reload(mpt_utils)
# Create overview
mpt_utils.generate_overview_table(allocations_12m, mu_12m, S_12m, df_pct)

Unnamed: 0,Share Count,Average Covariance,Average Returns,Return Last 12 Months,Return (Actual) Next 12 Months
ENPH,2,0.000236,-1.12%,-74.11%,-13.52%
PEN,1,0.000000,5.37%,10.43%,-16.55%
7575.T,17,0.000000,12.75%,19.33%,5.04%
2491.T,16,0.000236,14.96%,-40.43%,-10.18%
SONO,11,0.000000,15.43%,-33.04%,-11.84%
...,...,...,...,...,...
3612.T,1,0.000000,13.86%,15.39%,21.65%
4619.T,1,0.000000,14.09%,41.69%,-7.28%
9069.T,1,0.000000,15.48%,6.97%,19.62%
9831.T,2,0.000000,15.03%,2.35%,-0.04%
