In [132]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
#
import importlib
import utilities.lstm_utils as lstm_utils
import utilities.mpt_utils as mpt_utils
import utilities.variables as variables

In [133]:
# set device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Data

In [134]:
df = pd.read_csv('../../../data/df_monthly_prices_complete_euro.csv', index_col='Date')
df_pct = pd.read_csv('../../../data/df_monthly_returns_complete.csv', index_col='Date')
df_overview = pd.read_csv('../../../data/df_overview.csv', index_col=0)

## LSTM Model

In [135]:
# Define 4.3. LSTM model
class LSTM_Uni_Model(nn.Module):
    def __init__(self, input_size, hidden_size=128, num_layers=1, output_size=1, learning_rate=0.001, dropout=0.2): # , hidden_size=128
        super(LSTM_Uni_Model, self).__init__()
        self.hidden_size = hidden_size
        # init LSTM
        self.lstm = nn.LSTM(input_size=input_size,
                            hidden_size=hidden_size,
                            # num_layers=num_layers,
                            batch_first=True)

        # FC layer for final prediction
        self.fc_final = nn.Linear(hidden_size, 12)

    def forward(self, ts_batch): # ts_batch (64, 1653, 10), static_data (64, 1653, 44)
        # Time-Series Data
        # Reshape dynamic data for LSTM (requires time-step as 2nd dimension)
        batch_size, num_stocks, sequence_length = ts_batch.shape[0], ts_batch.shape[1], ts_batch.shape[2]
        ts_batch_reshaped = ts_batch.view(batch_size * num_stocks, sequence_length)
        #
        ts_output_1, (hidden, cell)  = self.lstm(ts_batch_reshaped) # ts_batch_reshaped
        #
        ts_output = ts_output_1.view(batch_size, num_stocks, self.hidden_size)

        return self.fc_final(ts_output)#.squeeze(-1) # ts_output_2

We use a 12 month lookback for the sequential data to predict the upcoming 12 months.

After that, based on the currently predicted time-horizon, we get the respective sub-range, 
be it 1-month, 6-month or 12-month ahead.

In [136]:
# Set sequence length (12 months)
in_seq_length = 12
out_seq_length = 12
#
out_seq_length_1m = 1
out_seq_length_6m = 6
out_seq_length_12m = 12

### LSTM Univariate

In [137]:
df_to_evaluate = df_pct - 1

#### Train-Test Splits

Split the data into training and testing sets

In [138]:
importlib.reload(lstm_utils)
importlib.reload(variables)

# Set sequence length (e.g., 12 time points)
X_train, X_test, y_train, y_test = lstm_utils.split_train_test(df_to_evaluate, [], 
                                                               in_seq_length=in_seq_length, 
                                                               out_seq_length=out_seq_length, 
                                                               validation_months=(variables.LSTM_TEST_YEARS_NR * 12))

# Check the shapes of the training and test data
print("Shape of X_train:", X_train.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_test:", y_test.shape)

Shape of X_train: torch.Size([215, 1332, 12])
Shape of y_train: torch.Size([215, 1332, 12])
Shape of X_test: torch.Size([60, 1332, 12])
Shape of y_test: torch.Size([60, 1332, 12])


### Model Training

In [139]:
X_test.shape

torch.Size([60, 1332, 12])

In [140]:
y_test.shape

torch.Size([60, 1332, 12])

In [141]:
# Model, Loss, Optimizer
model = LSTM_Uni_Model(input_size=in_seq_length, output_size=out_seq_length).to(device)
criterion = nn.MSELoss()  # Mean Squared Error for regression
optimizer = optim.Adam(model.parameters(), lr=0.001)

importlib.reload(lstm_utils)
#
model, y_train_pred, y_test_pred = lstm_utils.lstm_train_validate(model, optimizer, X_train, X_test, y_train, y_test, epochs=10)

Epoch 1/10, Loss: 0.0141, Train RMSE: 0.1193, Test RMSE: 0.1271. 
Epoch 2/10, Loss: 0.0130, Train RMSE: 0.1141, Test RMSE: 0.1272. 
Epoch 3/10, Loss: 0.0129, Train RMSE: 0.1138, Test RMSE: 0.1267. 
Epoch 4/10, Loss: 0.0128, Train RMSE: 0.1136, Test RMSE: 0.1267. 
Epoch 5/10, Loss: 0.0128, Train RMSE: 0.1135, Test RMSE: 0.1267. 
Epoch 6/10, Loss: 0.0128, Train RMSE: 0.1134, Test RMSE: 0.1267. 
Epoch 7/10, Loss: 0.0128, Train RMSE: 0.1133, Test RMSE: 0.1267. 
Epoch 8/10, Loss: 0.0128, Train RMSE: 0.1133, Test RMSE: 0.1267. 
Epoch 9/10, Loss: 0.0128, Train RMSE: 0.1133, Test RMSE: 0.1267. 
Epoch 10/10, Loss: 0.0128, Train RMSE: 0.1133, Test RMSE: 0.1268. 
Model training complete and saved.


### LSTM Univariate - 1 Month

Get the known data (train data).
After that, get the first predicted month, or the first predicted sequence of test data

In [142]:
# 1 month
df_train = X_train[:, :, -1].clone()
y_test_pred_1m = y_test_pred[0,:,:].T[0:0] # y_test_pred[:,:,0][0] 
y_test_pred_1m

tensor([], size=(0, 1332))

In [143]:
df_forecast_1m = pd.DataFrame(df_train.clone())
df_forecast_1m = pd.concat([df_forecast_1m, pd.DataFrame(y_test_pred_1m)], ignore_index=True)
# Assign back columns and indices to make human understandable
df_forecast_1m.columns = df.columns
df_forecast_1m.index = pd.to_datetime(df_pct[(in_seq_length - 1) : len(df_forecast_1m) + (in_seq_length - 1)].index)
#
df_forecast_1m = df_forecast_1m.tail(variables.LSTM_TEST_YEARS_NR * 12) 
#
df_forecast_1m.tail(3)

Unnamed: 0_level_0,GME,2124.T,2491.T,2471.T,3046.T,PAT.DE,CROX,AOF.DE,SFQ.DE,DAN,...,KREF,HLN.L,DBX,BNL,CBL,KVUE,PSTL,NTST,BLCO,NBS.L
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-06-01,0.1,0.1,0.25,-0.24,0.04,-0.07,-0.01,0.1,-0.18,-0.09,...,-0.03,0.0,0.08,0.0,0.0,0.0,0.02,0.02,0.0,0.0
2018-07-01,0.02,0.0,0.04,0.1,0.0,0.19,0.03,-0.01,0.1,0.06,...,0.07,0.0,-0.17,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2018-08-01,-0.08,0.01,0.07,0.13,-0.09,-0.07,0.14,-0.07,-0.06,-0.08,...,0.02,0.0,0.0,0.0,0.0,0.0,-0.01,0.0,0.0,0.0


In [144]:
importlib.reload(lstm_utils)
importlib.reload(mpt_utils)

weights_1m, mu_1m, S_1m, allocations_1m, weights_all_1m = mpt_utils.portfolio_and_plot(df_forecast_1m, df, plot_threshold=0.018)

Expected annual return: 81.8%
Annual volatility: 6.9%
Sharpe Ratio: 11.54
-- Allocation --
{'ETSY': 1, '2491.T': 6, 'TPE.DE': 2, 'CVNA': 1, '3697.T': 1, 'PAR': 1, 'FCN': 1, 'FNKO': 4, 'NTNX': 1, 'RUN': 4, '6951.T': 1, '7518.T': 2, 'ABDP.L': 1, 'BOOT': 1, 'LULU': 1, 'HSII': 1, 'ADUS': 1, 'NSP': 1, 'IIPR': 1, '2471.T': 11, '6814.T': 3, 'WSC': 1, 'HAE': 1, 'MEDP': 1, 'AEO': 1, '4568.T': 1, 'QLYS': 1, '6866.T': 1, 'NSSC': 1, '9697.T': 1, '6200.T': 3, '4751.T': 5, 'CATO': 4, '2475.T': 2, 'MOV': 1, 'BBSI': 1, '2429.T': 3, '2389.T': 2, 'EH': 2, '2154.T': 2, '3994.T': 1, 'SHYF': 2, 'COK.DE': 1, '7730.T': 1, 'FARO': 1, '4218.T': 2, '4641.T': 1, 'GES': 1, 'PRDO': 1, '2170.T': 7, '6914.T': 2, '4503.T': 1, 'TPR': 1, 'UAA': 3, '9413.T': 1, '7868.T': 5, 'NEO': 1, '6183.T': 1, '7990.T': 1, 'FTK.DE': 1, 'NVRI': 3, '6055.T': 1, '7296.T': 1, '3151.T': 2, '7476.T': 1, '3048.T': 2, '7575.T': 3, 'CNK': 1, '7613.T': 2, '7936.T': 2, 'PKE': 1, '7220.T': 1, '4301.T': 1, '8439.T': 1, '3738.T': 1, '7752.T': 2, '


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



#### Actual return rate

In [145]:
importlib.reload(mpt_utils)
#
mpt_utils.get_portfolio_real_return_rate(df_pct, df_forecast_1m, weights=weights_all_1m)

Portfolio real return rate:  0.82%


#### Overview table

In [146]:
importlib.reload(mpt_utils)
# Create overview
mpt_utils.generate_overview_table(allocations_1m, mu_1m, S_1m, df_pct)

Unnamed: 0,Share Count,Average Covariance,Average Returns,Return Last 12 Months,Return (Actual) Next 12 Months
ETSY,1,-0.004421,47.69%,-33.98%,-19.03%
2491.T,6,-0.024421,182.25%,-40.43%,-10.18%
TPE.DE,2,0.003474,183.13%,-14.53%,-17.65%
CVNA,1,-0.062842,27.62%,98.50%,211.25%
3697.T,1,0.013053,170.34%,15.27%,-56.94%
...,...,...,...,...,...
7893.T,1,0.019579,72.08%,27.21%,7.98%
TRTX,1,0.000421,14.39%,-25.34%,49.38%
AMCR,1,0.010947,-13.23%,-20.57%,26.52%
HBI,1,0.012737,11.15%,-37.03%,54.02%


### LSTM Univariate - 6 Months

Get the known data (train data)
After that, get the first 6 predicted months, or the first 6 predicted sequences of test data

In [147]:
df_train = X_train[:, :, -1].clone()
y_test_pred_6m =  y_test_pred[0,:,:].T[0:6] # y_test_pred[:,:,5][0:6]
y_test_pred_6m

tensor([[-0.0335, -0.0101,  0.0035,  ...,  0.0101,  0.0100,  0.0091],
        [-0.0208, -0.0055,  0.0074,  ...,  0.0120,  0.0120,  0.0112],
        [-0.0404, -0.0180,  0.0005,  ...,  0.0101,  0.0103,  0.0105],
        [ 0.0477,  0.0286,  0.0229,  ...,  0.0102,  0.0101,  0.0105],
        [ 0.0007,  0.0048,  0.0119,  ...,  0.0106,  0.0105,  0.0105],
        [ 0.0156,  0.0169,  0.0135,  ...,  0.0122,  0.0121,  0.0119]])

In [148]:
df_forecast_6m = pd.DataFrame(df_train.clone())
df_forecast_6m = pd.concat([df_forecast_6m, pd.DataFrame(y_test_pred_6m)], ignore_index=True)
# Assign back columns and indices to make human understandable
df_forecast_6m.columns = df.columns
df_forecast_6m.index = pd.to_datetime(df_pct[(in_seq_length - 1) : len(df_forecast_6m) + (in_seq_length - 1)].index)
#
df_forecast_6m = df_forecast_6m.tail(variables.LSTM_TEST_YEARS_NR * 12)
#
df_forecast_6m.tail(3)

Unnamed: 0_level_0,GME,2124.T,2491.T,2471.T,3046.T,PAT.DE,CROX,AOF.DE,SFQ.DE,DAN,...,KREF,HLN.L,DBX,BNL,CBL,KVUE,PSTL,NTST,BLCO,NBS.L
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-12-01,0.047733,0.028598,0.022883,0.017308,0.017454,0.011186,0.008255,0.011459,0.009113,0.006611,...,0.008876,0.009591,0.012648,0.011494,0.010843,0.010609,0.010388,0.010206,0.010088,0.010459
2019-01-01,0.000669,0.0048,0.011901,0.006883,0.013904,0.012305,0.008991,0.010621,0.009356,0.007768,...,0.010053,0.009969,0.00992,0.010228,0.0104,0.010324,0.010739,0.010585,0.010471,0.010536
2019-02-01,0.015591,0.016868,0.013543,0.015609,0.01537,0.015178,0.016904,0.015836,0.013221,0.013326,...,0.013914,0.013311,0.013155,0.012328,0.011962,0.011781,0.012162,0.012238,0.012098,0.01192


In [149]:
importlib.reload(lstm_utils)
importlib.reload(mpt_utils)

weights_6m, mu_6m, S_6m, allocations_6m, weights_all_6m = mpt_utils.portfolio_and_plot(df_forecast_6m, df, plot_threshold=0.008)

Expected annual return: 17.5%
Annual volatility: 2.7%
Sharpe Ratio: 5.71
-- Allocation --
{'FNKO': 3, 'CVNA': 1, 'PAR': 1, '2685.T': 2, '7575.T': 6, '2120.T': 27, 'MDB': 1, 'CEC.DE': 20, '2389.T': 1, '6268.T': 2, '4218.T': 2, 'OC': 1, 'ETSY': 1, 'ADUS': 1, 'CON.DE': 1, 'UNM': 1, 'RUN': 1, '8154.T': 1, 'CATO': 1, 'VSEC': 1, 'KNX': 1, '4151.T': 1, '9842.T': 2, '3402.T': 4, '2429.T': 1, 'GEN': 1, 'SHYF': 1, 'AMAT': 1, 'CSGS': 1, 'DAN': 2, '8173.T': 1, 'WHR': 1, '8219.T': 2, '9766.T': 1, 'DLX': 1, 'CCK': 1, '4549.T': 1, '8060.T': 1, '2874.T': 2, '6436.T': 1, '7915.T': 2, '3046.T': 1, 'NSSC': 1, '3086.T': 2, '3697.T': 1, 'CROX': 1, 'MOD': 1, '6645.T': 1, 'LULU': 1, 'DLTR': 1, '3659.T': 1, '9107.T': 1, 'HBH.DE': 1, '9831.T': 6, '2331.T': 2, 'IGT': 1, '7976.T': 1, '8008.T': 1, 'NXPI': 1, 'MHK': 1, '7947.T': 1, 'STAA': 1, 'RDUS': 1, '2379.T': 1, '3104.T': 1, 'EBAY': 1, 'CMPR': 1, '7988.T': 1, '5332.T': 1, '5110.T': 1, 'RGLD': 1, 'MHO': 1, 'JLL': 1, '1925.T': 1, '7729.T': 1, 'TBCG.L': 1, '3076.


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



#### Actual return rates

In [150]:
importlib.reload(mpt_utils)
#
mpt_utils.get_portfolio_real_return_rate(df_pct, df_forecast_6m, weights=weights_all_6m)

Portfolio real return rate:  0.08%


#### Overview table

In [151]:
importlib.reload(mpt_utils)
# Create overview
mpt_utils.generate_overview_table(allocations_6m, mu_6m, S_6m, df_pct)

Unnamed: 0,Share Count,Average Covariance,Average Returns,Return Last 12 Months,Return (Actual) Next 12 Months
FNKO,3,-0.092722,95.13%,-62.69%,34.43%
CVNA,1,-0.078757,19.00%,98.50%,211.25%
PAR,1,-0.036509,82.31%,1.20%,41.61%
2685.T,2,0.021479,-19.68%,61.48%,22.88%
7575.T,6,0.027574,-21.10%,19.33%,5.04%
...,...,...,...,...,...
KVHI,1,-0.001243,9.85%,-56.31%,-11.70%
PDM,1,-0.004793,6.78%,-44.87%,82.09%
2170.T,2,-0.012189,6.70%,-54.33%,42.90%
APLD,1,0.000473,19.57%,115.04%,2.55%


### LSTM Univariate - 12 Months

Get the known data (train data)
After that, get the first 12 predicted months, or the first 12 predicted sequences of test data

In [152]:
df_train = X_train[:, :, -1].clone()
y_test_pred_12m = y_test_pred[0,:,:].T[0:12]
y_test_pred_12m

tensor([[-0.0335, -0.0101,  0.0035,  ...,  0.0101,  0.0100,  0.0091],
        [-0.0208, -0.0055,  0.0074,  ...,  0.0120,  0.0120,  0.0112],
        [-0.0404, -0.0180,  0.0005,  ...,  0.0101,  0.0103,  0.0105],
        ...,
        [-0.0213, -0.0087,  0.0003,  ...,  0.0115,  0.0115,  0.0126],
        [ 0.0445,  0.0296,  0.0220,  ...,  0.0100,  0.0101,  0.0107],
        [ 0.0289,  0.0203,  0.0214,  ...,  0.0111,  0.0111,  0.0109]])

In [153]:
df_forecast_12m = pd.DataFrame(df_train.clone())
df_forecast_12m = pd.concat([df_forecast_12m, pd.DataFrame(y_test_pred_12m)], ignore_index=True)
# Assign back columns and indices to make human understandable
df_forecast_12m.columns = df.columns
df_forecast_12m.index = pd.to_datetime(df_pct[(in_seq_length - 1) : len(df_forecast_12m) + (in_seq_length - 1)].index)
#
df_forecast_12m.tail(3)

Unnamed: 0_level_0,GME,2124.T,2491.T,2471.T,3046.T,PAT.DE,CROX,AOF.DE,SFQ.DE,DAN,...,KREF,HLN.L,DBX,BNL,CBL,KVUE,PSTL,NTST,BLCO,NBS.L
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-06-01,-0.021329,-0.008716,0.00028,0.009266,0.008399,0.008911,0.012849,0.01218,0.010549,0.011212,...,0.013862,0.012927,0.011608,0.011082,0.011032,0.01139,0.011672,0.011533,0.011458,0.012601
2019-07-01,0.04449,0.029569,0.021953,0.014997,0.011593,0.009052,0.007685,0.010063,0.010982,0.009906,...,0.0097,0.010307,0.011639,0.010816,0.010437,0.010539,0.010028,0.010015,0.010103,0.010718
2019-08-01,0.028944,0.020312,0.021394,0.016694,0.016403,0.011914,0.009373,0.010621,0.01132,0.009033,...,0.008955,0.00983,0.011187,0.01127,0.01132,0.011305,0.011112,0.01113,0.011137,0.010924


In [154]:
importlib.reload(lstm_utils)
importlib.reload(mpt_utils)

weights_12m, mu_12m, S_12m, allocations_12m, weights_all_12m = mpt_utils.portfolio_and_plot(df_forecast_12m, df, plot_threshold=0.022)

Expected annual return: 14.3%
Annual volatility: 0.2%
Sharpe Ratio: 52.73
-- Allocation --
{'SHYF': 21, 'SONO': 16, '4218.T': 12, '7575.T': 20, 'NEXN.L': 1, '2874.T': 19, '2331.T': 24, 'GEN': 8, 'XPP.L': 1, '9470.T': 20, 'NSSC': 6, '7272.T': 14, 'XRAY': 3, '7599.T': 17, 'CCK': 1, 'UNM': 2, 'PINE.L': 1, 'DDS': 1, 'DLTR': 1, 'STGW': 23, '3086.T': 10, 'PDCO': 3, 'MMSI': 1, 'CLMB': 2, '4619.T': 11, '9364.T': 4, 'PTEC.L': 1, '8818.T': 8, '3608.T': 15, '8154.T': 2, '8012.T': 5, 'LIN': 1, '8060.T': 3, 'CTSH': 1, '4301.T': 7, '5332.T': 3, 'VOD.L': 1, '7846.T': 2, '4151.T': 4, 'HBI': 18, '4568.T': 2, '7701.T': 3, '9869.T': 2, 'CON.DE': 1, '8173.T': 4, 'ISRG': 1, '2685.T': 3, '4062.T': 1, 'AVGO': 1, '7780.T': 5, '7936.T': 7, '9104.T': 2, 'SBH': 6, '1911.T': 2, '2154.T': 4, 'BK': 1, '7732.T': 5, 'CHEF': 2, '2120.T': 34, '2502.T': 1, '5988.T': 3, '6902.T': 3, '9260.T': 3, '9401.T': 2, '1973.T': 3, '7239.T': 4, '3148.T': 2, '9729.T': 3, '9409.T': 4, 'GBF.DE': 1, '8591.T': 2, 'ATGE': 1, '4549.T': 4,


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



#### Actual return rates

In [155]:
importlib.reload(mpt_utils)
#
mpt_utils.get_portfolio_real_return_rate(df_pct, df_forecast_12m, weights=weights_all_12m, months=12)

Portfolio real return rate:  -0.03%


#### Overview Table

In [156]:
importlib.reload(mpt_utils)
# Create overview
mpt_utils.generate_overview_table(allocations_12m, mu_12m, S_12m, df_pct)

Unnamed: 0,Share Count,Average Covariance,Average Returns,Return Last 12 Months,Return (Actual) Next 12 Months
SHYF,21,0.000000,5.37%,-51.56%,-18.16%
SONO,16,0.000000,14.40%,-33.04%,-11.84%
4218.T,12,0.000000,14.25%,9.86%,7.02%
7575.T,20,0.000085,15.62%,19.33%,5.04%
NEXN.L,1,0.000000,15.52%,-59.21%,92.90%
...,...,...,...,...,...
3289.T,2,0.000000,13.68%,20.64%,12.31%
7613.T,1,0.000000,13.52%,26.43%,-27.69%
7220.T,1,0.000000,13.96%,-14.78%,21.29%
7250.T,1,0.000000,13.94%,32.22%,-1.82%
