In [333]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
#
from pypfopt import EfficientFrontier
from pypfopt import risk_models
from pypfopt import expected_returns
from pypfopt.discrete_allocation import DiscreteAllocation, get_latest_prices
#
import importlib
import utilities.train_test as train_test
import utilities.comparison as comparison
import utilities.lstm_utils as lstm_utils

### Preparing Data

In [145]:
df_time_series = pd.read_csv('../../../data/df_monthly_returns_complete_percentage.csv', index_col='Date')

df_time_series = df_time_series.loc[:, ~df_time_series.columns.str.contains('^Unnamed')]

In [147]:
# 
df_time_series_plus1 = df_time_series
df_time_series = df_time_series - 1

### Normalisation

In [148]:
''' 
df_ts_torch = torch.from_numpy(df_time_series.values)
# Reshape to (num_samples, num_features) for normalization
df_ts_flat = df_ts_torch.view(-1, df_ts_torch.shape[-1])  # Shape: (1000*300, 5)

# Calculate min and max per feature
df_min = df_ts_flat.min(dim=0, keepdim=True)[0]
df_max = df_ts_flat.max(dim=0, keepdim=True)[0]

# Apply Min-Max normalization
df_ts_normalised = (df_ts_flat - df_min) / (df_max - df_min)

# Reshape back to original shape
df_time_series_torch = df_ts_normalised.view(df_ts_torch.shape)
'''


' \ndf_ts_torch = torch.from_numpy(df_time_series.values)\n# Reshape to (num_samples, num_features) for normalization\ndf_ts_flat = df_ts_torch.view(-1, df_ts_torch.shape[-1])  # Shape: (1000*300, 5)\n\n# Calculate min and max per feature\ndf_min = df_ts_flat.min(dim=0, keepdim=True)[0]\ndf_max = df_ts_flat.max(dim=0, keepdim=True)[0]\n\n# Apply Min-Max normalization\ndf_ts_normalised = (df_ts_flat - df_min) / (df_max - df_min)\n\n# Reshape back to original shape\ndf_time_series_torch = df_ts_normalised.view(df_ts_torch.shape)\n'

### LSTM Model

In [None]:
# Define LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size=128, num_layers=1, output_size=1, learning_rate=0.001, dropout=0.2): # , hidden_size=128
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        # LSTM for time-series data (stock returns)
        self.lstm = nn.LSTM(input_size=input_size,
                            hidden_size=hidden_size,
                            # num_layers=num_layers,
                            batch_first=True)

        # FC layer for final prediction
        self.fc_final = nn.Linear(hidden_size, 12)

    def forward(self, ts_batch): # ts_batch (64, 1653, 10), static_data (64, 1653, 44)
        # Time-Series Data
        # Reshape dynamic data for LSTM (requires time-step as 2nd dimension)
        batch_size, num_stocks, sequence_length = ts_batch.shape[0], ts_batch.shape[1], ts_batch.shape[2]
        ts_batch_reshaped = ts_batch.view(batch_size * num_stocks, sequence_length)
        #
        ts_output_1, (hidden, cell)  = self.lstm(ts_batch_reshaped) # ts_batch_reshaped

        ts_output = ts_output_1.view(batch_size, num_stocks, self.hidden_size)
        #
        # ts_output_2 = self.fc_lstm(ts_output)
        #fc_final = nn.Linear(sequence_length, 1)
        # prediction =   # (64, 1653, 10)

        return self.fc_final(ts_output)#.squeeze(-1) # ts_output_2

## 1 Month

In [343]:
importlib.reload(lstm_utils)

# Set sequence length (e.g., 12 time points)
X_train_1m, X_test_1m, y_train_1m, y_test_1m = lstm_utils.split_train_test(df_time_series, [], in_seq_length=12, out_seq_length=1)

# Check the shapes of the training and test data
print("Shape of X_train:", X_train_1m.shape)
print("Shape of y_train:", y_train_1m.shape)
print("Shape of X_test:", X_test_1m.shape)
print("Shape of y_test:", y_test_1m.shape)

torch.Size([287, 1653, 12]) torch.Size([287, 1653, 1])
Shape of X_train: torch.Size([227, 1653, 12])
Shape of y_train: torch.Size([227, 1653, 1])
Shape of X_test: torch.Size([60, 1653, 12])
Shape of y_test: torch.Size([60, 1653, 1])


## 6 Months

In [342]:
importlib.reload(lstm_utils)

# Set sequence length (e.g., 12 time points)
X_train_6m, X_test_6m, y_train_6m, y_test_6m = lstm_utils.split_train_test(df_time_series, [], in_seq_length=12, out_seq_length=6)

# Check the shapes of the training and test data
print("Shape of X_train:", X_train_6m.shape)
print("Shape of y_train:", y_train_6m.shape)
print("Shape of X_test:", X_test_6m.shape)
print("Shape of y_test:", y_test_6m.shape)

torch.Size([282, 1653, 12]) torch.Size([282, 1653, 6])
Shape of X_train: torch.Size([222, 1653, 12])
Shape of y_train: torch.Size([222, 1653, 6])
Shape of X_test: torch.Size([60, 1653, 12])
Shape of y_test: torch.Size([60, 1653, 6])


In [None]:
importlib.reload(lstm_utils)

# Model, Loss, Optimizer
model_6m = LSTMModel(input_size=in_seq_length, output_size=out_seq_length).to(device)
criterion = nn.MSELoss()  # Mean Squared Error for regression
optimizer = optim.Adam(model.parameters(), lr=0.001)

#
model, y_train_pred, y_test_pred = lstm_utils.lstm_train_validate(model, optimizer, X_train, X_test, y_train, y_test)

## 12 Months

### Split the data into training and testing sets

### Train-Test Split

In [344]:
importlib.reload(lstm_utils)

# Set sequence length (e.g., 12 time points)
X_train_12m, X_test_12m, y_train_12m, y_test_12m = lstm_utils.split_train_test(df_time_series, [], in_seq_length=12, out_seq_length=12)

# Check the shapes of the training and test data
print("Shape of X_train:", X_train.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_test:", y_test.shape)

torch.Size([276, 1653, 12]) torch.Size([276, 1653, 12])
Shape of X_train: torch.Size([216, 1653, 12])
Shape of y_train: torch.Size([216, 1653, 12])
Shape of X_test: torch.Size([60, 1653, 12])
Shape of y_test: torch.Size([60, 1653, 12])


In [346]:
print(X_train[50][0])
print(y_train[38][0])

tensor([ 0.1100, -0.0200, -0.0200,  0.0400, -0.0100,  0.0100, -0.0500, -0.0100,
        -0.0400, -0.0700,  0.0100, -0.0300])
tensor([ 0.1100, -0.0200, -0.0200,  0.0400, -0.0100,  0.0100, -0.0500, -0.0100,
        -0.0400, -0.0700,  0.0100, -0.0300])


In [329]:
# Model, Loss, Optimizer
model = LSTMModel(input_size=in_seq_length, output_size=out_seq_length).to(device)
criterion = nn.MSELoss()  # Mean Squared Error for regression
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [330]:
importlib.reload(lstm_utils)
#
model, y_train_pred, y_test_pred = lstm_utils.lstm_train_validate(model, optimizer, X_train, X_test, y_train, y_test)

Epoch 1/1, Loss: 36.1096, Train RMSE: 6.1193, Test RMSE: 0.2769. 
Model training complete and saved.


In [298]:
last_month = X_test[[len(X_test) - 1]]

In [303]:
with torch.no_grad():
    last_month_pred = model(last_month)

In [312]:
# Actual values
y_test_12m = y_test[len(y_test) - 1][:, -1]
# Predicted
y_test_pred_12m = last_month_pred[0][:, -1]

tensor([ 0.0553,  0.0107,  0.0534,  ...,  0.0438, -0.0076, -0.0185])