In [2]:
# MAIN_SEED = 1243
# BATCH_SIZE = 16
# hidden_dim = 64 # LSTM hidden size
# output_dim = 19  # 19 classes (0-18 index)
# num_layers = 2
# LR = 0.00001
# EPOCH = 30

MAIN_SEED = 1000
BATCH_SIZE = 20
hidden_dim = 64 # LSTM hidden size
output_dim = 19  # 19 classes (0-18 index)
num_layers = 2
LR = 0.00001
EPOCH = 33

In [3]:
import math
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt

def set_seed(seed_value=42):
    # Set seed for reproducibility
    random.seed(seed_value)  # Python's random module
    np.random.seed(seed_value)  # NumPy
    torch.manual_seed(seed_value)  # PyTorch
    torch.cuda.manual_seed(seed_value)  # GPU (if available)
    torch.cuda.manual_seed_all(seed_value)  # For multi-GPU
    torch.backends.cudnn.deterministic = True  # Ensures deterministic algorithms
    torch.backends.cudnn.benchmark = False  # Disable benchmark mode for reproducibility
    torch.backends.cudnn.enabled = False
    
# Set seed for reproducibility
set_seed(MAIN_SEED)



In [4]:
# New: split before scaling
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import torch
from torch.utils.data import DataLoader, TensorDataset

sheet_url = "final_train_data.csv" # ternary output
df = pd.read_csv(sheet_url)

df_ret_original = df.iloc[:, 1:20]
df_bm_original  = df.iloc[:, 61:80]

df_ret = df.iloc[:, 1:20]
df_out = df.iloc[:, 21:40]
df_m = df.iloc[:, 41:60]
df_bm = df.iloc[:, 61:80]
df_b = df.iloc[:, 81:100]

df_selected = df_m.copy() # which (combination of) indicator to use

train_split_idx = math.floor(df_selected.shape[0] * 0.8)  # 80% for training
val_split_idx = math.floor(df_selected.shape[0] * 0.9)   # 10% for validation


# Split the dataset
df_X_train, df_X_val, df_X_test = (
    df_selected.iloc[:train_split_idx, :].copy(),               # Train set (0% -> 60%)
    df_selected.iloc[train_split_idx:val_split_idx, :].copy(),  # Validation set (60% -> 80%)
    df_selected.iloc[val_split_idx:, :].copy()                  # Test set (80% -> 100%)
)

df_y_train, df_y_val, df_y_test = (
    df_out.iloc[:train_split_idx, :].copy(),
    df_out.iloc[train_split_idx:val_split_idx, :].copy(),
    df_out.iloc[val_split_idx:, :].copy()
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(df_X_train)
X_val_scaled = scaler.transform(df_X_val)
X_test_scaled = scaler.transform(df_X_test)


def create_lstm_dataset(df, sequence_length=12, step=20):
    X, y = [], []

    for i in range((sequence_length - 1) * step, len(df)):  # Ensure enough past data for X
        x_i = []
        for j in range(sequence_length):
            index = i - (sequence_length - 1 - j) * step  # Compute the index for each row in X_i
            if index < 0:
                break  # Stop if we run out of data
            x_i.append(df[index, :19])  # Select return rates of stock1, stock2, stock3

        if len(x_i) == sequence_length:  # Only append fully constructed sequences
            X.append(np.array(x_i))
            y.append(df[i, 19:])  # Take y from the same index as the last row of X_i

    X = np.array(X)
    y = np.array(y)

    return torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

df_selected_aug_train = np.hstack([X_train_scaled, df_y_train])
X_train, y_train = create_lstm_dataset(df_selected_aug_train)

df_selected_aug_val = np.hstack([X_val_scaled, df_y_val])
X_val, y_val = create_lstm_dataset(df_selected_aug_val)

df_selected_aug_test = np.hstack([X_test_scaled, df_y_test])
X_test, y_test = create_lstm_dataset(df_selected_aug_test)


print("\nData shape after split")
print("X_train:", X_train.shape)
print("y_train:", y_train.shape)
print("X_val:", X_val.shape)
print("y_val:", y_val.shape)
print("X_test:", X_test.shape)
print("y_test:", y_test.shape)

train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)
test_dataset = TensorDataset(X_test, y_test)

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

print("\n Dataset:")
print("train:", len(train_dataloader))
print("val:", len(val_dataloader))
print("test:", len(test_dataloader))


Data shape after split
X_train: torch.Size([3466, 12, 19])
y_train: torch.Size([3466, 19])
X_val: torch.Size([241, 12, 19])
y_val: torch.Size([241, 19])
X_test: torch.Size([241, 12, 19])
y_test: torch.Size([241, 19])

 Dataset:
train: 174
val: 13
test: 13


In [5]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        # self.fc2 = nn.Linear(output_dim, output_dim)
        self.softmax = nn.Softmax(dim=1)  # Softmax activation for multiclass classification

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        out = self.fc(lstm_out[:, -1, :])  # Use last time step's output
        # out = self.fc2(out)
        return self.softmax(out)

In [6]:
input_dim = df_selected.shape[1]  # Feature size

train_losses = []
val_losses = []

# Initialize Model, Loss, and Optimizer
model = LSTMModel(input_dim, hidden_dim, output_dim, num_layers)
criterion = nn.MSELoss()  # Using MSELoss for direct comparison with one-hot vectors
optimizer = optim.Adam(model.parameters(), lr=LR)

# Training Loop
for epoch in range(EPOCH):
    model.train()
    total_train_loss = 0
    for inputs, labels_one_hot in train_dataloader:
        optimizer.zero_grad()
        
        outputs = model(inputs)  # Predicted probabilities (softmax output)
        
        loss = criterion(outputs, labels_one_hot)  # MSE loss between predicted and one-hot label
        total_train_loss += loss.item()
        
        loss.backward()
        optimizer.step()
        
    avg_train_loss = total_train_loss / len(train_dataloader)
    train_losses.append(avg_train_loss)
    
    # Validation loop
    model.eval()
    total_val_loss = 0
    with torch.no_grad():
        for inputs, targets in val_dataloader:
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            total_val_loss += loss.item()

    avg_val_loss = total_val_loss / len(val_dataloader)
    val_losses.append(avg_val_loss)

    print(f"Epoch {epoch + 1}/{EPOCH}, Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}")

# Save the trained model
torch.save(model.state_dict(), "lstm_model.pth")
print("Model training complete and saved!")

# Plot training & validation loss
plt.figure(figsize=(8, 6))
plt.plot(range(1, EPOCH + 1), train_losses, label="Train Loss", marker="o")
plt.plot(range(1, EPOCH + 1), val_losses, label="Validation Loss", marker="o")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Loss plot")
plt.legend()
plt.savefig("./loss_plot.png")
plt.clf()

Epoch 1/33, Train Loss: 0.3055, Val Loss: 0.3054
Epoch 2/33, Train Loss: 0.3054, Val Loss: 0.3054
Epoch 3/33, Train Loss: 0.3054, Val Loss: 0.3054
Epoch 4/33, Train Loss: 0.3054, Val Loss: 0.3054
Epoch 5/33, Train Loss: 0.3054, Val Loss: 0.3054
Epoch 6/33, Train Loss: 0.3053, Val Loss: 0.3054
Epoch 7/33, Train Loss: 0.3053, Val Loss: 0.3053
Epoch 8/33, Train Loss: 0.3053, Val Loss: 0.3053
Epoch 9/33, Train Loss: 0.3052, Val Loss: 0.3053
Epoch 10/33, Train Loss: 0.3052, Val Loss: 0.3053
Epoch 11/33, Train Loss: 0.3051, Val Loss: 0.3053
Epoch 12/33, Train Loss: 0.3051, Val Loss: 0.3053
Epoch 13/33, Train Loss: 0.3050, Val Loss: 0.3052
Epoch 14/33, Train Loss: 0.3050, Val Loss: 0.3052
Epoch 15/33, Train Loss: 0.3049, Val Loss: 0.3052
Epoch 16/33, Train Loss: 0.3048, Val Loss: 0.3052
Epoch 17/33, Train Loss: 0.3047, Val Loss: 0.3051
Epoch 18/33, Train Loss: 0.3046, Val Loss: 0.3051
Epoch 19/33, Train Loss: 0.3045, Val Loss: 0.3051
Epoch 20/33, Train Loss: 0.3045, Val Loss: 0.3051
Epoch 21/

<Figure size 800x600 with 0 Axes>

In [7]:
def transform_array(arr):
    result = np.zeros_like(arr)  # Initialize an array filled with 0s

    # Get indices of 4 largest and 4 smallest values in each row
    sorted_indices = np.argsort(arr, axis=1)  # Sort indices along each row

    lowest_indices = sorted_indices[:, :4]   # First 4 indices (smallest values)
    highest_indices = sorted_indices[:, -4:] # Last 4 indices (largest values)

    # Assign -1 to lowest values
    np.put_along_axis(result, lowest_indices, -1, axis=1)
    # Assign 1 to highest values
    np.put_along_axis(result, highest_indices, 1, axis=1)
    
    return result
output_list = []
model.eval()
with torch.no_grad():
    for inputs, labels_one_hot in test_dataloader:
        outputs = model(inputs)  # Predicted probabilities (softmax output)
        output_list.append(outputs.numpy())

y_pred = np.vstack(output_list)
y_pos_pred = transform_array(y_pred)

In [8]:
# Calculate H4 return
df_ret_org_test = df_ret_original.iloc[220:,:].iloc[val_split_idx:, :]

signals = pd.DataFrame(y_pos_pred).copy()
signals.replace(-1, 0, inplace=True)

signals.columns = df_ret_org_test.columns
# print(signals.sum())

signals.index = df_ret_org_test.index

strategy_returns = (signals.shift(1) * df_ret_org_test)

strategy_returns["Portfolio_sum"] = strategy_returns.sum(axis=1)  # Aggregate portfolio returns
strategy_returns["Portfolio"] = strategy_returns["Portfolio_sum"] / 4  # Aggregate portfolio returns
strategy_returns["Cumulative Returns"] = (1 + strategy_returns["Portfolio"]).cumprod()

print("H4 strategy")
print("(Monthly)  Avg. Ret. =", strategy_returns["Portfolio"].mean())
print("(Annual) Avg. Ret. =", 12 * strategy_returns["Portfolio"].mean())
print()

# Export
df_result = pd.concat([df_ret_org_test, signals, strategy_returns], axis=1)

base_col_names = df_ret_org_test.columns.tolist()
df_result.columns = base_col_names + \
                    [col[:-4] + "_signal_pred" for col in base_col_names] + \
                    [col[:-4] + "_strat_ret" for col in base_col_names] + \
                    ["Portfolio_sum", "Portfolio", "Cumulative Returns"]

df_result.to_csv("result_test_ret_h4.csv")



# Calculate L4 return
df_ret_org_test = df_ret_original.iloc[220:,:].iloc[val_split_idx:, :]

signals = pd.DataFrame(y_pos_pred).copy()
signals.replace(1, 0, inplace=True)
signals.replace(-1, 1, inplace=True)

signals.columns = df_ret_org_test.columns
# print(signals.sum())

signals.index = df_ret_org_test.index

strategy_returns = (signals.shift(1) * df_ret_org_test)

strategy_returns["Portfolio_sum"] = strategy_returns.sum(axis=1)  # Aggregate portfolio returns
strategy_returns["Portfolio"] = strategy_returns["Portfolio_sum"] / 4  # Aggregate portfolio returns
strategy_returns["Cumulative Returns"] = (1 + strategy_returns["Portfolio"]).cumprod()

print("L4 strategy")
print("(Monthly)  Avg. Ret. =", strategy_returns["Portfolio"].mean())
print("(Annual) Avg. Ret. =", 12 * strategy_returns["Portfolio"].mean())
print()

# Export
df_result = pd.concat([df_ret_org_test, signals, strategy_returns], axis=1)

base_col_names = df_ret_org_test.columns.tolist()
df_result.columns = base_col_names + \
                    [col[:-4] + "_signal_pred" for col in base_col_names] + \
                    [col[:-4] + "_strat_ret" for col in base_col_names] + \
                    ["Portfolio_sum", "Portfolio", "Cumulative Returns"]

df_result.to_csv("result_test_ret_l4.csv")



# Calculate H4-L4 return
df_ret_org_test = df_ret_original.iloc[220:,:].iloc[val_split_idx:, :]

signals = pd.DataFrame(y_pos_pred).copy()

signals.columns = df_ret_org_test.columns
# print(signals.sum())

signals.index = df_ret_org_test.index

strategy_returns = (signals.shift(1) * df_ret_org_test)

strategy_returns["Portfolio_sum"] = strategy_returns.sum(axis=1)  # Aggregate portfolio returns
strategy_returns["Portfolio"] = strategy_returns["Portfolio_sum"] / 4  # Aggregate portfolio returns
strategy_returns["Cumulative Returns"] = (1 + strategy_returns["Portfolio"]).cumprod()

print("H4-L4 strategy")
print("(Monthly)  Avg. Ret. =", strategy_returns["Portfolio"].mean())
print("(Annual) Avg. Ret. =", 12 * strategy_returns["Portfolio"].mean())
print()

# Export
df_result = pd.concat([df_ret_org_test, signals, strategy_returns], axis=1)

base_col_names = df_ret_org_test.columns.tolist()
df_result.columns = base_col_names + \
                    [col[:-4] + "_signal_pred" for col in base_col_names] + \
                    [col[:-4] + "_strat_ret" for col in base_col_names] + \
                    ["Portfolio_sum", "Portfolio", "Cumulative Returns"]

df_result.to_csv("result_test_h4_l4.csv")

H4 strategy
(Monthly)  Avg. Ret. = 0.015673106429460577
(Annual) Avg. Ret. = 0.18807727715352693

L4 strategy
(Monthly)  Avg. Ret. = -0.00463820608713693
(Annual) Avg. Ret. = -0.05565847304564316

H4-L4 strategy
(Monthly)  Avg. Ret. = 0.020311312516597506
(Annual) Avg. Ret. = 0.24373575019917007



In [9]:
df_ret_org_test.describe()

Unnamed: 0,CL_ret,NG_ret,HO_ret,XB_ret,LP_ret,LA_ret,LN_ret,GC_ret,SI_ret,W_ret,C_ret,S_ret,LC_ret,LH_ret,SB_ret,CT_ret,CC_ret,KC_ret,JO_ret
count,241.0,241.0,241.0,241.0,241.0,241.0,241.0,241.0,241.0,241.0,241.0,241.0,241.0,241.0,241.0,241.0,241.0,241.0,241.0
mean,0.007443,-0.010101,-0.006546,0.001896,0.003898,0.010244,-0.005496,0.021707,0.02428,-0.015118,-0.002872,-0.011849,0.009655,0.008404,-0.01349,-0.024397,0.12086,0.056816,0.042245
std,0.058577,0.127635,0.059533,0.056811,0.054561,0.058845,0.072603,0.035171,0.071047,0.079227,0.052405,0.046864,0.031532,0.06229,0.077412,0.056507,0.217642,0.095282,0.079833
min,-0.158072,-0.354489,-0.149623,-0.161562,-0.118035,-0.127868,-0.20592,-0.058571,-0.131231,-0.222068,-0.157497,-0.10931,-0.068152,-0.201769,-0.138149,-0.151586,-0.358869,-0.170377,-0.106312
25%,-0.035473,-0.07841,-0.050436,-0.040016,-0.036929,-0.024389,-0.04983,-0.004595,-0.028555,-0.059081,-0.032961,-0.051493,-0.011393,-0.033107,-0.064417,-0.056034,-0.023339,-0.008874,-0.010953
50%,0.007675,-0.00897,-0.010734,0.002953,0.003101,0.007113,-0.015457,0.023617,0.024505,-0.02459,0.005233,-0.014035,0.01342,0.018483,-0.029224,-0.029403,0.063809,0.046133,0.024258
75%,0.046238,0.065442,0.027627,0.042136,0.043373,0.045198,0.049268,0.048461,0.07791,0.034096,0.032778,0.027651,0.030984,0.055639,0.016595,0.001392,0.298912,0.106293,0.075232
max,0.141168,0.370601,0.155627,0.135918,0.139266,0.171267,0.148454,0.101514,0.208186,0.210573,0.111741,0.104242,0.092862,0.222939,0.251513,0.174529,0.642142,0.354548,0.324093


In [35]:
# NO
# # LSTM Grid Search on Commodity Return Data (Colab Compatible)

# import torch
# import torch.nn as nn
# import torch.optim as optim
# import numpy as np
# import pandas as pd
# from sklearn.preprocessing import StandardScaler
# from torch.utils.data import DataLoader, TensorDataset
# import matplotlib.pyplot as plt

# # CSV 업로드 후 실행
# df = pd.read_csv("final_train_data.csv")
# df_ret_original = df.iloc[:, 1:20]
# df_out = df.iloc[:, 21:40]
# df_m = df.iloc[:, 41:60]
# df_selected = df_m.copy()

# train_split_idx = int(df_selected.shape[0] * 0.8)
# val_split_idx = int(df_selected.shape[0] * 0.9)

# scaler = StandardScaler()
# X_train_scaled = scaler.fit_transform(df_selected.iloc[:train_split_idx])
# X_test_scaled = scaler.transform(df_selected.iloc[val_split_idx:])
# df_y_train = df_out.iloc[:train_split_idx]
# df_y_test = df_out.iloc[val_split_idx:]

# def create_lstm_dataset(df, sequence_length=12, step=20):
#     X, y = [], []
#     for i in range((sequence_length - 1) * step, len(df)):
#         x_i = []
#         for j in range(sequence_length):
#             index = i - (sequence_length - 1 - j) * step
#             if index < 0:
#                 break
#             x_i.append(df[index, :19])
#         if len(x_i) == sequence_length:
#             X.append(np.array(x_i))
#             y.append(df[i, 19:])
#     return torch.tensor(np.array(X), dtype=torch.float32), torch.tensor(np.array(y), dtype=torch.float32)

# df_selected_aug_train = np.hstack([X_train_scaled, df_y_train])
# df_selected_aug_test = np.hstack([X_test_scaled, df_y_test])
# X_train, y_train = create_lstm_dataset(df_selected_aug_train)
# X_test, y_test = create_lstm_dataset(df_selected_aug_test)
# df_ret_org_test = df_ret_original.iloc[220:,:].iloc[val_split_idx:, :]

# class LSTMModel(nn.Module):
#     def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
#         super(LSTMModel, self).__init__()
#         self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
#         self.fc = nn.Linear(hidden_dim, output_dim)
#         self.softmax = nn.Softmax(dim=1)
#     def forward(self, x):
#         lstm_out, _ = self.lstm(x)
#         out = self.fc(lstm_out[:, -1, :])
#         return self.softmax(out)

# def transform_array(arr):
#     result = np.zeros_like(arr)
#     sorted_indices = np.argsort(arr, axis=1)
#     lowest_indices = sorted_indices[:, :4]
#     highest_indices = sorted_indices[:, -4:]
#     np.put_along_axis(result, lowest_indices, -1, axis=1)
#     np.put_along_axis(result, highest_indices, 1, axis=1)
#     return result

# # 실험 조합
# # param_grid = [(16, 30), (32, 30)]  # 필요시 확장 가능
# # results = []
# import itertools

# batch_sizes = [16, 18, 20, 24, 32]
# epochs = [30, 32, 33, 35]
# param_grid = list(itertools.product(batch_sizes, epochs))  # 전체 조합 생성
# results = []

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# for batch_size, epoch in param_grid:
#     torch.manual_seed(1243)
#     model = LSTMModel(input_dim=df_selected.shape[1], hidden_dim=64, output_dim=19, num_layers=2).to(device)
#     criterion = nn.MSELoss()
#     optimizer = optim.Adam(model.parameters(), lr=0.00001)

#     train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)

#     for ep in range(epoch):
#         model.train()
#         for inputs, labels in train_loader:
#             inputs, labels = inputs.to(device), labels.to(device)
#             optimizer.zero_grad()
#             outputs = model(inputs)
#             loss = criterion(outputs, labels)
#             loss.backward()
#             optimizer.step()

#     model.eval()
#     output_list = []
#     with torch.no_grad():
#         for inputs, _ in DataLoader(TensorDataset(X_test, y_test), batch_size=batch_size):
#             inputs = inputs.to(device)
#             outputs = model(inputs)
#             output_list.append(outputs.cpu().numpy())

#     y_pred = np.vstack(output_list)
#     y_pos_pred = transform_array(y_pred)
#     signals = pd.DataFrame(y_pos_pred)
#     signals.columns = df_ret_org_test.columns
#     signals.index = df_ret_org_test.index

#     strat_returns = (signals.shift(1) * df_ret_org_test)
#     strat_returns["Portfolio"] = strat_returns.sum(axis=1) / 4
#     monthly_ret = strat_returns["Portfolio"].mean()
#     annual_ret = monthly_ret * 12

#     results.append({
#         "BATCH_SIZE": batch_size,
#         "EPOCH": epoch,
#         "Monthly": round(monthly_ret, 6),
#         "Annual": round(annual_ret, 6)
#     })

# # 결과 출력
# results_df = pd.DataFrame(results)
# print(results_df)

In [42]:
# LSTM Grid Search (Extended: all hyperparameters tunable)

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset
import itertools

# -----------------------------
# 1. Set seed function
# -----------------------------
MAIN_SEED = 999

def set_seed(seed_value):
    import random
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# -----------------------------
# 2. Load and preprocess data
# -----------------------------
df = pd.read_csv("final_train_data.csv")
df_ret_original = df.iloc[:, 1:20]
df_out = df.iloc[:, 21:40]
df_m = df.iloc[:, 41:60]
df_selected = df_m.copy()

train_split_idx = int(df_selected.shape[0] * 0.8)
val_split_idx = int(df_selected.shape[0] * 0.9)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(df_selected.iloc[:train_split_idx])
X_test_scaled = scaler.transform(df_selected.iloc[val_split_idx:])
df_y_train = df_out.iloc[:train_split_idx]
df_y_test = df_out.iloc[val_split_idx:]

def create_lstm_dataset(df, sequence_length=12, step=20):
    X, y = [], []
    for i in range((sequence_length - 1) * step, len(df)):
        x_i = []
        for j in range(sequence_length):
            index = i - (sequence_length - 1 - j) * step
            if index < 0:
                break
            x_i.append(df[index, :19])
        if len(x_i) == sequence_length:
            X.append(np.array(x_i))
            y.append(df[i, 19:])
    return torch.tensor(np.array(X), dtype=torch.float32), torch.tensor(np.array(y), dtype=torch.float32)

df_selected_aug_train = np.hstack([X_train_scaled, df_y_train])
df_selected_aug_test = np.hstack([X_test_scaled, df_y_test])
X_train, y_train = create_lstm_dataset(df_selected_aug_train)
X_test, y_test = create_lstm_dataset(df_selected_aug_test)
df_ret_org_test = df_ret_original.iloc[220:, :].iloc[val_split_idx:, :]

# -----------------------------
# 3. LSTM model definition
# -----------------------------
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        out = self.fc(lstm_out[:, -1, :])
        return self.softmax(out)

# -----------------------------
# 4. Transformation for top/bottom 4
# -----------------------------
def transform_array(arr):
    result = np.zeros_like(arr)
    sorted_indices = np.argsort(arr, axis=1)
    lowest_indices = sorted_indices[:, :4]
    highest_indices = sorted_indices[:, -4:]
    np.put_along_axis(result, lowest_indices, -1, axis=1)
    np.put_along_axis(result, highest_indices, 1, axis=1)
    return result

# -----------------------------
# 5. Extended Grid Search
# -----------------------------
batch_sizes = [16, 20]
epochs = [30, 33]
hidden_dims = [64, 128]
learning_rates = [0.00001, 0.0001]
num_layers_list = [1, 2]

param_grid = list(itertools.product(batch_sizes, epochs, hidden_dims, learning_rates, num_layers_list))

results = []
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for batch_size, epoch, hidden_dim, lr, num_layers in param_grid:
    print(f"Running BS={batch_size}, EP={epoch}, HD={hidden_dim}, LR={lr}, NL={num_layers}")
    set_seed(MAIN_SEED)

    model = LSTMModel(input_dim=df_selected.shape[1], hidden_dim=hidden_dim, output_dim=19, num_layers=num_layers).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)

    for ep in range(epoch):
        model.train()
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    model.eval()
    output_list = []
    with torch.no_grad():
        for inputs, _ in DataLoader(TensorDataset(X_test, y_test), batch_size=batch_size):
            inputs = inputs.to(device)
            outputs = model(inputs)
            output_list.append(outputs.cpu().numpy())

    y_pred = np.vstack(output_list)
    y_pos_pred = transform_array(y_pred)
    signals = pd.DataFrame(y_pos_pred)
    signals.columns = df_ret_org_test.columns
    signals.index = df_ret_org_test.index

    strat_returns = (signals.shift(1) * df_ret_org_test)
    strat_returns["Portfolio"] = strat_returns.sum(axis=1) / 4
    monthly_ret = strat_returns["Portfolio"].mean()
    annual_ret = monthly_ret * 12

    results.append({
        "BATCH_SIZE": batch_size,
        "EPOCH": epoch,
        "hidden_dim": hidden_dim,
        "LR": lr,
        "num_layers": num_layers,
        "Monthly": round(monthly_ret, 6),
        "Annual": round(annual_ret, 6)
    })

# -----------------------------
# 6. Save and print results
# -----------------------------
results_df = pd.DataFrame(results)
print("\nExtended Grid Search Results:")
print(results_df.sort_values(by="Annual", ascending=False))
results_df.to_csv("extended_grid_search_results.csv", index=False)

Running BS=16, EP=30, HD=64, LR=1e-05, NL=1
Running BS=16, EP=30, HD=64, LR=1e-05, NL=2
Running BS=16, EP=30, HD=64, LR=0.0001, NL=1
Running BS=16, EP=30, HD=64, LR=0.0001, NL=2
Running BS=16, EP=30, HD=128, LR=1e-05, NL=1
Running BS=16, EP=30, HD=128, LR=1e-05, NL=2
Running BS=16, EP=30, HD=128, LR=0.0001, NL=1
Running BS=16, EP=30, HD=128, LR=0.0001, NL=2
Running BS=16, EP=33, HD=64, LR=1e-05, NL=1
Running BS=16, EP=33, HD=64, LR=1e-05, NL=2
Running BS=16, EP=33, HD=64, LR=0.0001, NL=1
Running BS=16, EP=33, HD=64, LR=0.0001, NL=2
Running BS=16, EP=33, HD=128, LR=1e-05, NL=1
Running BS=16, EP=33, HD=128, LR=1e-05, NL=2
Running BS=16, EP=33, HD=128, LR=0.0001, NL=1
Running BS=16, EP=33, HD=128, LR=0.0001, NL=2
Running BS=20, EP=30, HD=64, LR=1e-05, NL=1
Running BS=20, EP=30, HD=64, LR=1e-05, NL=2
Running BS=20, EP=30, HD=64, LR=0.0001, NL=1
Running BS=20, EP=30, HD=64, LR=0.0001, NL=2
Running BS=20, EP=30, HD=128, LR=1e-05, NL=1
Running BS=20, EP=30, HD=128, LR=1e-05, NL=2
Running BS=2

In [38]:
# LSTM Grid Search (Extended: all hyperparameters tunable)

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset
import itertools

# -----------------------------
# 1. Set seed function
# -----------------------------
MAIN_SEED = 1342

def set_seed(seed_value):
    import random
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# -----------------------------
# 2. Load and preprocess data
# -----------------------------
df = pd.read_csv("final_train_data.csv")
df_ret_original = df.iloc[:, 1:20]
df_out = df.iloc[:, 21:40]
df_m = df.iloc[:, 41:60]
df_selected = df_m.copy()

train_split_idx = int(df_selected.shape[0] * 0.8)
val_split_idx = int(df_selected.shape[0] * 0.9)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(df_selected.iloc[:train_split_idx])
X_test_scaled = scaler.transform(df_selected.iloc[val_split_idx:])
df_y_train = df_out.iloc[:train_split_idx]
df_y_test = df_out.iloc[val_split_idx:]

def create_lstm_dataset(df, sequence_length=12, step=20):
    X, y = [], []
    for i in range((sequence_length - 1) * step, len(df)):
        x_i = []
        for j in range(sequence_length):
            index = i - (sequence_length - 1 - j) * step
            if index < 0:
                break
            x_i.append(df[index, :19])
        if len(x_i) == sequence_length:
            X.append(np.array(x_i))
            y.append(df[i, 19:])
    return torch.tensor(np.array(X), dtype=torch.float32), torch.tensor(np.array(y), dtype=torch.float32)

df_selected_aug_train = np.hstack([X_train_scaled, df_y_train])
df_selected_aug_test = np.hstack([X_test_scaled, df_y_test])
X_train, y_train = create_lstm_dataset(df_selected_aug_train)
X_test, y_test = create_lstm_dataset(df_selected_aug_test)
df_ret_org_test = df_ret_original.iloc[220:, :].iloc[val_split_idx:, :]

# -----------------------------
# 3. LSTM model definition
# -----------------------------
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        out = self.fc(lstm_out[:, -1, :])
        return self.softmax(out)

# -----------------------------
# 4. Transformation for top/bottom 4
# -----------------------------
def transform_array(arr):
    result = np.zeros_like(arr)
    sorted_indices = np.argsort(arr, axis=1)
    lowest_indices = sorted_indices[:, :4]
    highest_indices = sorted_indices[:, -4:]
    np.put_along_axis(result, lowest_indices, -1, axis=1)
    np.put_along_axis(result, highest_indices, 1, axis=1)
    return result

# -----------------------------
# 5. Extended Grid Search
# -----------------------------
batch_sizes = [16, 20]
epochs = [30, 33]
hidden_dims = [64, 128]
learning_rates = [0.00001, 0.0001]
num_layers_list = [1, 2]

param_grid = list(itertools.product(batch_sizes, epochs, hidden_dims, learning_rates, num_layers_list))

results = []
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for batch_size, epoch, hidden_dim, lr, num_layers in param_grid:
    print(f"Running BS={batch_size}, EP={epoch}, HD={hidden_dim}, LR={lr}, NL={num_layers}")
    set_seed(MAIN_SEED)

    model = LSTMModel(input_dim=df_selected.shape[1], hidden_dim=hidden_dim, output_dim=19, num_layers=num_layers).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)

    for ep in range(epoch):
        model.train()
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    model.eval()
    output_list = []
    with torch.no_grad():
        for inputs, _ in DataLoader(TensorDataset(X_test, y_test), batch_size=batch_size):
            inputs = inputs.to(device)
            outputs = model(inputs)
            output_list.append(outputs.cpu().numpy())

    y_pred = np.vstack(output_list)
    y_pos_pred = transform_array(y_pred)
    signals = pd.DataFrame(y_pos_pred)
    signals.columns = df_ret_org_test.columns
    signals.index = df_ret_org_test.index

    strat_returns = (signals.shift(1) * df_ret_org_test)
    strat_returns["Portfolio"] = strat_returns.sum(axis=1) / 4
    monthly_ret = strat_returns["Portfolio"].mean()
    annual_ret = monthly_ret * 12

    results.append({
        "BATCH_SIZE": batch_size,
        "EPOCH": epoch,
        "hidden_dim": hidden_dim,
        "LR": lr,
        "num_layers": num_layers,
        "Monthly": round(monthly_ret, 6),
        "Annual": round(annual_ret, 6)
    })

# -----------------------------
# 6. Save and print results
# -----------------------------
results_df = pd.DataFrame(results)
print("\nExtended Grid Search Results:")
print(results_df.sort_values(by="Annual", ascending=False))
results_df.to_csv("extended_grid_search_results1.csv", index=False)

Running BS=16, EP=30, HD=64, LR=1e-05, NL=1
Running BS=16, EP=30, HD=64, LR=1e-05, NL=2
Running BS=16, EP=30, HD=64, LR=0.0001, NL=1
Running BS=16, EP=30, HD=64, LR=0.0001, NL=2
Running BS=16, EP=30, HD=128, LR=1e-05, NL=1
Running BS=16, EP=30, HD=128, LR=1e-05, NL=2
Running BS=16, EP=30, HD=128, LR=0.0001, NL=1
Running BS=16, EP=30, HD=128, LR=0.0001, NL=2
Running BS=16, EP=33, HD=64, LR=1e-05, NL=1
Running BS=16, EP=33, HD=64, LR=1e-05, NL=2
Running BS=16, EP=33, HD=64, LR=0.0001, NL=1
Running BS=16, EP=33, HD=64, LR=0.0001, NL=2
Running BS=16, EP=33, HD=128, LR=1e-05, NL=1
Running BS=16, EP=33, HD=128, LR=1e-05, NL=2
Running BS=16, EP=33, HD=128, LR=0.0001, NL=1
Running BS=16, EP=33, HD=128, LR=0.0001, NL=2
Running BS=20, EP=30, HD=64, LR=1e-05, NL=1
Running BS=20, EP=30, HD=64, LR=1e-05, NL=2
Running BS=20, EP=30, HD=64, LR=0.0001, NL=1
Running BS=20, EP=30, HD=64, LR=0.0001, NL=2
Running BS=20, EP=30, HD=128, LR=1e-05, NL=1
Running BS=20, EP=30, HD=128, LR=1e-05, NL=2
Running BS=2

In [39]:
# LSTM Grid Search (Extended: all hyperparameters tunable)

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset
import itertools

# -----------------------------
# 1. Set seed function
# -----------------------------
MAIN_SEED = 2000

def set_seed(seed_value):
    import random
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# -----------------------------
# 2. Load and preprocess data
# -----------------------------
df = pd.read_csv("final_train_data.csv")
df_ret_original = df.iloc[:, 1:20]
df_out = df.iloc[:, 21:40]
df_m = df.iloc[:, 41:60]
df_selected = df_m.copy()

train_split_idx = int(df_selected.shape[0] * 0.8)
val_split_idx = int(df_selected.shape[0] * 0.9)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(df_selected.iloc[:train_split_idx])
X_test_scaled = scaler.transform(df_selected.iloc[val_split_idx:])
df_y_train = df_out.iloc[:train_split_idx]
df_y_test = df_out.iloc[val_split_idx:]

def create_lstm_dataset(df, sequence_length=12, step=20):
    X, y = [], []
    for i in range((sequence_length - 1) * step, len(df)):
        x_i = []
        for j in range(sequence_length):
            index = i - (sequence_length - 1 - j) * step
            if index < 0:
                break
            x_i.append(df[index, :19])
        if len(x_i) == sequence_length:
            X.append(np.array(x_i))
            y.append(df[i, 19:])
    return torch.tensor(np.array(X), dtype=torch.float32), torch.tensor(np.array(y), dtype=torch.float32)

df_selected_aug_train = np.hstack([X_train_scaled, df_y_train])
df_selected_aug_test = np.hstack([X_test_scaled, df_y_test])
X_train, y_train = create_lstm_dataset(df_selected_aug_train)
X_test, y_test = create_lstm_dataset(df_selected_aug_test)
df_ret_org_test = df_ret_original.iloc[220:, :].iloc[val_split_idx:, :]

# -----------------------------
# 3. LSTM model definition
# -----------------------------
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        out = self.fc(lstm_out[:, -1, :])
        return self.softmax(out)

# -----------------------------
# 4. Transformation for top/bottom 4
# -----------------------------
def transform_array(arr):
    result = np.zeros_like(arr)
    sorted_indices = np.argsort(arr, axis=1)
    lowest_indices = sorted_indices[:, :4]
    highest_indices = sorted_indices[:, -4:]
    np.put_along_axis(result, lowest_indices, -1, axis=1)
    np.put_along_axis(result, highest_indices, 1, axis=1)
    return result

# -----------------------------
# 5. Extended Grid Search
# -----------------------------
batch_sizes = [16, 20]
epochs = [30, 33]
hidden_dims = [64, 128]
learning_rates = [0.00001, 0.0001]
num_layers_list = [1, 2]

param_grid = list(itertools.product(batch_sizes, epochs, hidden_dims, learning_rates, num_layers_list))

results = []
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for batch_size, epoch, hidden_dim, lr, num_layers in param_grid:
    print(f"Running BS={batch_size}, EP={epoch}, HD={hidden_dim}, LR={lr}, NL={num_layers}")
    set_seed(MAIN_SEED)

    model = LSTMModel(input_dim=df_selected.shape[1], hidden_dim=hidden_dim, output_dim=19, num_layers=num_layers).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)

    for ep in range(epoch):
        model.train()
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    model.eval()
    output_list = []
    with torch.no_grad():
        for inputs, _ in DataLoader(TensorDataset(X_test, y_test), batch_size=batch_size):
            inputs = inputs.to(device)
            outputs = model(inputs)
            output_list.append(outputs.cpu().numpy())

    y_pred = np.vstack(output_list)
    y_pos_pred = transform_array(y_pred)
    signals = pd.DataFrame(y_pos_pred)
    signals.columns = df_ret_org_test.columns
    signals.index = df_ret_org_test.index

    strat_returns = (signals.shift(1) * df_ret_org_test)
    strat_returns["Portfolio"] = strat_returns.sum(axis=1) / 4
    monthly_ret = strat_returns["Portfolio"].mean()
    annual_ret = monthly_ret * 12

    results.append({
        "BATCH_SIZE": batch_size,
        "EPOCH": epoch,
        "hidden_dim": hidden_dim,
        "LR": lr,
        "num_layers": num_layers,
        "Monthly": round(monthly_ret, 6),
        "Annual": round(annual_ret, 6)
    })

# -----------------------------
# 6. Save and print results
# -----------------------------
results_df = pd.DataFrame(results)
print("\nExtended Grid Search Results:")
print(results_df.sort_values(by="Annual", ascending=False))
results_df.to_csv("extended_grid_search_results2.csv", index=False)

Running BS=16, EP=30, HD=64, LR=1e-05, NL=1
Running BS=16, EP=30, HD=64, LR=1e-05, NL=2
Running BS=16, EP=30, HD=64, LR=0.0001, NL=1
Running BS=16, EP=30, HD=64, LR=0.0001, NL=2
Running BS=16, EP=30, HD=128, LR=1e-05, NL=1
Running BS=16, EP=30, HD=128, LR=1e-05, NL=2
Running BS=16, EP=30, HD=128, LR=0.0001, NL=1
Running BS=16, EP=30, HD=128, LR=0.0001, NL=2
Running BS=16, EP=33, HD=64, LR=1e-05, NL=1
Running BS=16, EP=33, HD=64, LR=1e-05, NL=2
Running BS=16, EP=33, HD=64, LR=0.0001, NL=1
Running BS=16, EP=33, HD=64, LR=0.0001, NL=2
Running BS=16, EP=33, HD=128, LR=1e-05, NL=1
Running BS=16, EP=33, HD=128, LR=1e-05, NL=2
Running BS=16, EP=33, HD=128, LR=0.0001, NL=1
Running BS=16, EP=33, HD=128, LR=0.0001, NL=2
Running BS=20, EP=30, HD=64, LR=1e-05, NL=1
Running BS=20, EP=30, HD=64, LR=1e-05, NL=2
Running BS=20, EP=30, HD=64, LR=0.0001, NL=1
Running BS=20, EP=30, HD=64, LR=0.0001, NL=2
Running BS=20, EP=30, HD=128, LR=1e-05, NL=1
Running BS=20, EP=30, HD=128, LR=1e-05, NL=2
Running BS=2

In [40]:
# LSTM Grid Search (Extended: all hyperparameters tunable)

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset
import itertools

# -----------------------------
# 1. Set seed function
# -----------------------------
MAIN_SEED = 2025

def set_seed(seed_value):
    import random
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# -----------------------------
# 2. Load and preprocess data
# -----------------------------
df = pd.read_csv("final_train_data.csv")
df_ret_original = df.iloc[:, 1:20]
df_out = df.iloc[:, 21:40]
df_m = df.iloc[:, 41:60]
df_selected = df_m.copy()

train_split_idx = int(df_selected.shape[0] * 0.8)
val_split_idx = int(df_selected.shape[0] * 0.9)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(df_selected.iloc[:train_split_idx])
X_test_scaled = scaler.transform(df_selected.iloc[val_split_idx:])
df_y_train = df_out.iloc[:train_split_idx]
df_y_test = df_out.iloc[val_split_idx:]

def create_lstm_dataset(df, sequence_length=12, step=20):
    X, y = [], []
    for i in range((sequence_length - 1) * step, len(df)):
        x_i = []
        for j in range(sequence_length):
            index = i - (sequence_length - 1 - j) * step
            if index < 0:
                break
            x_i.append(df[index, :19])
        if len(x_i) == sequence_length:
            X.append(np.array(x_i))
            y.append(df[i, 19:])
    return torch.tensor(np.array(X), dtype=torch.float32), torch.tensor(np.array(y), dtype=torch.float32)

df_selected_aug_train = np.hstack([X_train_scaled, df_y_train])
df_selected_aug_test = np.hstack([X_test_scaled, df_y_test])
X_train, y_train = create_lstm_dataset(df_selected_aug_train)
X_test, y_test = create_lstm_dataset(df_selected_aug_test)
df_ret_org_test = df_ret_original.iloc[220:, :].iloc[val_split_idx:, :]

# -----------------------------
# 3. LSTM model definition
# -----------------------------
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        out = self.fc(lstm_out[:, -1, :])
        return self.softmax(out)

# -----------------------------
# 4. Transformation for top/bottom 4
# -----------------------------
def transform_array(arr):
    result = np.zeros_like(arr)
    sorted_indices = np.argsort(arr, axis=1)
    lowest_indices = sorted_indices[:, :4]
    highest_indices = sorted_indices[:, -4:]
    np.put_along_axis(result, lowest_indices, -1, axis=1)
    np.put_along_axis(result, highest_indices, 1, axis=1)
    return result

# -----------------------------
# 5. Extended Grid Search
# -----------------------------
batch_sizes = [16, 20]
epochs = [30, 33]
hidden_dims = [64, 128]
learning_rates = [0.00001, 0.0001]
num_layers_list = [1, 2]

param_grid = list(itertools.product(batch_sizes, epochs, hidden_dims, learning_rates, num_layers_list))

results = []
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for batch_size, epoch, hidden_dim, lr, num_layers in param_grid:
    print(f"Running BS={batch_size}, EP={epoch}, HD={hidden_dim}, LR={lr}, NL={num_layers}")
    set_seed(MAIN_SEED)

    model = LSTMModel(input_dim=df_selected.shape[1], hidden_dim=hidden_dim, output_dim=19, num_layers=num_layers).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)

    for ep in range(epoch):
        model.train()
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    model.eval()
    output_list = []
    with torch.no_grad():
        for inputs, _ in DataLoader(TensorDataset(X_test, y_test), batch_size=batch_size):
            inputs = inputs.to(device)
            outputs = model(inputs)
            output_list.append(outputs.cpu().numpy())

    y_pred = np.vstack(output_list)
    y_pos_pred = transform_array(y_pred)
    signals = pd.DataFrame(y_pos_pred)
    signals.columns = df_ret_org_test.columns
    signals.index = df_ret_org_test.index

    strat_returns = (signals.shift(1) * df_ret_org_test)
    strat_returns["Portfolio"] = strat_returns.sum(axis=1) / 4
    monthly_ret = strat_returns["Portfolio"].mean()
    annual_ret = monthly_ret * 12

    results.append({
        "BATCH_SIZE": batch_size,
        "EPOCH": epoch,
        "hidden_dim": hidden_dim,
        "LR": lr,
        "num_layers": num_layers,
        "Monthly": round(monthly_ret, 6),
        "Annual": round(annual_ret, 6)
    })

# -----------------------------
# 6. Save and print results
# -----------------------------
results_df = pd.DataFrame(results)
print("\nExtended Grid Search Results:")
print(results_df.sort_values(by="Annual", ascending=False))
results_df.to_csv("extended_grid_search_results3.csv", index=False)

Running BS=16, EP=30, HD=64, LR=1e-05, NL=1
Running BS=16, EP=30, HD=64, LR=1e-05, NL=2
Running BS=16, EP=30, HD=64, LR=0.0001, NL=1
Running BS=16, EP=30, HD=64, LR=0.0001, NL=2
Running BS=16, EP=30, HD=128, LR=1e-05, NL=1
Running BS=16, EP=30, HD=128, LR=1e-05, NL=2
Running BS=16, EP=30, HD=128, LR=0.0001, NL=1
Running BS=16, EP=30, HD=128, LR=0.0001, NL=2
Running BS=16, EP=33, HD=64, LR=1e-05, NL=1
Running BS=16, EP=33, HD=64, LR=1e-05, NL=2
Running BS=16, EP=33, HD=64, LR=0.0001, NL=1
Running BS=16, EP=33, HD=64, LR=0.0001, NL=2
Running BS=16, EP=33, HD=128, LR=1e-05, NL=1
Running BS=16, EP=33, HD=128, LR=1e-05, NL=2
Running BS=16, EP=33, HD=128, LR=0.0001, NL=1
Running BS=16, EP=33, HD=128, LR=0.0001, NL=2
Running BS=20, EP=30, HD=64, LR=1e-05, NL=1
Running BS=20, EP=30, HD=64, LR=1e-05, NL=2
Running BS=20, EP=30, HD=64, LR=0.0001, NL=1
Running BS=20, EP=30, HD=64, LR=0.0001, NL=2
Running BS=20, EP=30, HD=128, LR=1e-05, NL=1
Running BS=20, EP=30, HD=128, LR=1e-05, NL=2
Running BS=2

In [41]:
# LSTM Grid Search (Extended: all hyperparameters tunable)

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset
import itertools

# -----------------------------
# 1. Set seed function
# -----------------------------
MAIN_SEED = 88

def set_seed(seed_value):
    import random
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# -----------------------------
# 2. Load and preprocess data
# -----------------------------
df = pd.read_csv("final_train_data.csv")
df_ret_original = df.iloc[:, 1:20]
df_out = df.iloc[:, 21:40]
df_m = df.iloc[:, 41:60]
df_selected = df_m.copy()

train_split_idx = int(df_selected.shape[0] * 0.8)
val_split_idx = int(df_selected.shape[0] * 0.9)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(df_selected.iloc[:train_split_idx])
X_test_scaled = scaler.transform(df_selected.iloc[val_split_idx:])
df_y_train = df_out.iloc[:train_split_idx]
df_y_test = df_out.iloc[val_split_idx:]

def create_lstm_dataset(df, sequence_length=12, step=20):
    X, y = [], []
    for i in range((sequence_length - 1) * step, len(df)):
        x_i = []
        for j in range(sequence_length):
            index = i - (sequence_length - 1 - j) * step
            if index < 0:
                break
            x_i.append(df[index, :19])
        if len(x_i) == sequence_length:
            X.append(np.array(x_i))
            y.append(df[i, 19:])
    return torch.tensor(np.array(X), dtype=torch.float32), torch.tensor(np.array(y), dtype=torch.float32)

df_selected_aug_train = np.hstack([X_train_scaled, df_y_train])
df_selected_aug_test = np.hstack([X_test_scaled, df_y_test])
X_train, y_train = create_lstm_dataset(df_selected_aug_train)
X_test, y_test = create_lstm_dataset(df_selected_aug_test)
df_ret_org_test = df_ret_original.iloc[220:, :].iloc[val_split_idx:, :]

# -----------------------------
# 3. LSTM model definition
# -----------------------------
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        out = self.fc(lstm_out[:, -1, :])
        return self.softmax(out)

# -----------------------------
# 4. Transformation for top/bottom 4
# -----------------------------
def transform_array(arr):
    result = np.zeros_like(arr)
    sorted_indices = np.argsort(arr, axis=1)
    lowest_indices = sorted_indices[:, :4]
    highest_indices = sorted_indices[:, -4:]
    np.put_along_axis(result, lowest_indices, -1, axis=1)
    np.put_along_axis(result, highest_indices, 1, axis=1)
    return result

# -----------------------------
# 5. Extended Grid Search
# -----------------------------
batch_sizes = [16, 20]
epochs = [30, 33]
hidden_dims = [64, 128]
learning_rates = [0.00001, 0.0001]
num_layers_list = [1, 2]

param_grid = list(itertools.product(batch_sizes, epochs, hidden_dims, learning_rates, num_layers_list))

results = []
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for batch_size, epoch, hidden_dim, lr, num_layers in param_grid:
    print(f"Running BS={batch_size}, EP={epoch}, HD={hidden_dim}, LR={lr}, NL={num_layers}")
    set_seed(MAIN_SEED)

    model = LSTMModel(input_dim=df_selected.shape[1], hidden_dim=hidden_dim, output_dim=19, num_layers=num_layers).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)

    for ep in range(epoch):
        model.train()
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    model.eval()
    output_list = []
    with torch.no_grad():
        for inputs, _ in DataLoader(TensorDataset(X_test, y_test), batch_size=batch_size):
            inputs = inputs.to(device)
            outputs = model(inputs)
            output_list.append(outputs.cpu().numpy())

    y_pred = np.vstack(output_list)
    y_pos_pred = transform_array(y_pred)
    signals = pd.DataFrame(y_pos_pred)
    signals.columns = df_ret_org_test.columns
    signals.index = df_ret_org_test.index

    strat_returns = (signals.shift(1) * df_ret_org_test)
    strat_returns["Portfolio"] = strat_returns.sum(axis=1) / 4
    monthly_ret = strat_returns["Portfolio"].mean()
    annual_ret = monthly_ret * 12

    results.append({
        "BATCH_SIZE": batch_size,
        "EPOCH": epoch,
        "hidden_dim": hidden_dim,
        "LR": lr,
        "num_layers": num_layers,
        "Monthly": round(monthly_ret, 6),
        "Annual": round(annual_ret, 6)
    })

# -----------------------------
# 6. Save and print results
# -----------------------------
results_df = pd.DataFrame(results)
print("\nExtended Grid Search Results:")
print(results_df.sort_values(by="Annual", ascending=False))
results_df.to_csv("extended_grid_search_results4.csv", index=False)

Running BS=16, EP=30, HD=64, LR=1e-05, NL=1
Running BS=16, EP=30, HD=64, LR=1e-05, NL=2
Running BS=16, EP=30, HD=64, LR=0.0001, NL=1
Running BS=16, EP=30, HD=64, LR=0.0001, NL=2
Running BS=16, EP=30, HD=128, LR=1e-05, NL=1
Running BS=16, EP=30, HD=128, LR=1e-05, NL=2
Running BS=16, EP=30, HD=128, LR=0.0001, NL=1
Running BS=16, EP=30, HD=128, LR=0.0001, NL=2
Running BS=16, EP=33, HD=64, LR=1e-05, NL=1
Running BS=16, EP=33, HD=64, LR=1e-05, NL=2
Running BS=16, EP=33, HD=64, LR=0.0001, NL=1
Running BS=16, EP=33, HD=64, LR=0.0001, NL=2
Running BS=16, EP=33, HD=128, LR=1e-05, NL=1
Running BS=16, EP=33, HD=128, LR=1e-05, NL=2
Running BS=16, EP=33, HD=128, LR=0.0001, NL=1
Running BS=16, EP=33, HD=128, LR=0.0001, NL=2
Running BS=20, EP=30, HD=64, LR=1e-05, NL=1
Running BS=20, EP=30, HD=64, LR=1e-05, NL=2
Running BS=20, EP=30, HD=64, LR=0.0001, NL=1
Running BS=20, EP=30, HD=64, LR=0.0001, NL=2
Running BS=20, EP=30, HD=128, LR=1e-05, NL=1
Running BS=20, EP=30, HD=128, LR=1e-05, NL=2
Running BS=2