In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
import sys
import os

# Link to src
sys.path.append(os.path.abspath(os.path.join('..')))
from src.models import BaselineModel, MLP, LSTMModel

# --- 1. LOAD DATA ---
df = pd.read_csv('../data/processed/lstm_ready_data_multi.csv', index_col=0, parse_dates=True)

# Define Features (X) and Target (y)
feature_cols = ['Z_Score', 'Volatility', 'Momentum'] # Must match your CSV columns
X = df[feature_cols].values
y = df['Target_Direction'].values

# --- 2. PREPARE DATA (Normalize) ---
# Neural Networks require data to be centered around 0
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# --- 3. HELPER: CREATE SEQUENCES FOR LSTM ---
def create_sequences(data, target, lookback=10):
    """
    Converts 2D data into 3D sequences for LSTM.
    Input: (1000, 3) -> Output: (990, 10, 3)
    """
    X_seq, y_seq = [], []
    for i in range(len(data) - lookback):
        X_seq.append(data[i:i+lookback]) # The sequence (Days t-10 to t)
        y_seq.append(target[i+lookback]) # The target (Day t+1)
    return np.array(X_seq), np.array(y_seq)

# Create 3D data for Tier 3
LOOKBACK = 10
X_lstm, y_lstm = create_sequences(X_scaled, y, lookback=LOOKBACK)

# Align 2D data for Tier 1 & 2 (must match LSTM length to be fair)
X_flat = X_scaled[LOOKBACK:]
y_flat = y[LOOKBACK:]

# --- 4. TRAIN/TEST SPLIT (No Random Shuffling!) ---
# We split by time. Train on 2020-2023, Test on 2024.
split = int(len(X_flat) * 0.8)

# Data for Tier 1 & 2
X_train_2d, X_test_2d = X_flat[:split], X_flat[split:]
y_train, y_test = y_flat[:split], y_flat[split:]

# Data for Tier 3 (LSTM)
X_train_3d, X_test_3d = X_lstm[:split], X_lstm[split:]

# Convert to PyTorch Tensors
X_train_tensor_2d = torch.FloatTensor(X_train_2d)
X_test_tensor_2d = torch.FloatTensor(X_test_2d)
X_train_tensor_3d = torch.FloatTensor(X_train_3d)
X_test_tensor_3d = torch.FloatTensor(X_test_3d)
y_train_tensor = torch.FloatTensor(y_train).view(-1, 1)

print(f"Training Data Shape (2D): {X_train_2d.shape}")
print(f"Training Data Shape (3D): {X_train_3d.shape}")

# ==========================================
# TIER 1: RIDGE REGRESSION (Baseline)
# ==========================================
print("\n--- Training Tier 1: Ridge Regression ---")
model_ridge = BaselineModel(alpha=1.0)
model_ridge.fit(X_train_2d, y_train)
pred_ridge = model_ridge.predict(X_test_2d)
acc_ridge = accuracy_score(y_test, pred_ridge)
print(f"Tier 1 Accuracy: {acc_ridge:.4f}")

# ==========================================
# TIER 2: MLP (Classical ML)
# ==========================================
print("\n--- Training Tier 2: MLP ---")
model_mlp = MLP(input_dim=3)
optimizer = optim.Adam(model_mlp.parameters(), lr=0.001)
criterion = nn.BCELoss() # Binary Cross Entropy

# Training Loop
for epoch in range(50):
    optimizer.zero_grad()
    output = model_mlp(X_train_tensor_2d)
    loss = criterion(output, y_train_tensor)
    loss.backward()
    optimizer.step()

# Predict
with torch.no_grad():
    pred_mlp_prob = model_mlp(X_test_tensor_2d)
    pred_mlp = (pred_mlp_prob > 0.5).float().numpy().flatten()
    
acc_mlp = accuracy_score(y_test, pred_mlp)
print(f"Tier 2 Accuracy: {acc_mlp:.4f}")

# ==========================================
# TIER 3: LSTM (Deep Learning)
# ==========================================
print("\n--- Training Tier 3: LSTM ---")
model_lstm = LSTMModel(input_dim=3, hidden_dim=64)
optimizer = optim.Adam(model_lstm.parameters(), lr=0.001)

# Training Loop
for epoch in range(50):
    optimizer.zero_grad()
    output = model_lstm(X_train_tensor_3d) # Feeding 3D Data!
    loss = criterion(output, y_train_tensor)
    loss.backward()
    optimizer.step()

# Predict
with torch.no_grad():
    pred_lstm_prob = model_lstm(X_test_tensor_3d)
    pred_lstm = (pred_lstm_prob > 0.5).float().numpy().flatten()

acc_lstm = accuracy_score(y_test, pred_lstm)
print(f"Tier 3 Accuracy: {acc_lstm:.4f}")

# ==========================================
# FINAL REPORT
# ==========================================
results = pd.DataFrame({
    'Model': ['Ridge (Baseline)', 'MLP (Non-Linear)', 'LSTM (Sequential)'],
    'Accuracy': [acc_ridge, acc_mlp, acc_lstm]
})
print("\nFinal Results:")
print(results)

KeyboardInterrupt: 

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
import sys
import os

# Link to src
sys.path.append(os.path.abspath(os.path.join('..')))
from src.models import BaselineModel, MLP, LSTMModel

# --- 1. LOAD DATA ---
df = pd.read_csv('../data/processed/lstm_ready_data_multi.csv', index_col=0, parse_dates=True)

# Define Features (X) and Target (y)
feature_cols = ['Z_Score', 'Volatility', 'Momentum'] # Must match your CSV columns
X = df[feature_cols].values
y = df['Target_Direction'].values

# --- 2. PREPARE DATA (Normalize) ---
# Neural Networks require data to be centered around 0
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# --- 3. HELPER: CREATE SEQUENCES FOR LSTM ---
def create_sequences(data, target, lookback=10):
    """
    Converts 2D data into 3D sequences for LSTM.
    Input: (1000, 3) -> Output: (990, 10, 3)
    """
    X_seq, y_seq = [], []
    for i in range(len(data) - lookback):
        X_seq.append(data[i:i+lookback]) # The sequence (Days t-10 to t)
        y_seq.append(target[i+lookback]) # The target (Day t+1)
    return np.array(X_seq), np.array(y_seq)

# Create 3D data for Tier 3
LOOKBACK = 10
X_lstm, y_lstm = create_sequences(X_scaled, y, lookback=LOOKBACK)

# Align 2D data for Tier 1 & 2 (must match LSTM length to be fair)
X_flat = X_scaled[LOOKBACK:]
y_flat = y[LOOKBACK:]

# --- 4. TRAIN/TEST SPLIT (No Random Shuffling!) ---
# We split by time. Train on 2020-2023, Test on 2024.
split = int(len(X_flat) * 0.8)

# Data for Tier 1 & 2
X_train_2d, X_test_2d = X_flat[:split], X_flat[split:]
y_train, y_test = y_flat[:split], y_flat[split:]

# Data for Tier 3 (LSTM)
X_train_3d, X_test_3d = X_lstm[:split], X_lstm[split:]

# Convert to PyTorch Tensors
X_train_tensor_2d = torch.FloatTensor(X_train_2d)
X_test_tensor_2d = torch.FloatTensor(X_test_2d)
X_train_tensor_3d = torch.FloatTensor(X_train_3d)
X_test_tensor_3d = torch.FloatTensor(X_test_3d)
y_train_tensor = torch.FloatTensor(y_train).view(-1, 1)

print(f"Training Data Shape (2D): {X_train_2d.shape}")
print(f"Training Data Shape (3D): {X_train_3d.shape}")

# ==========================================
# TIER 1: RIDGE REGRESSION (Baseline)
# ==========================================
print("\n--- Training Tier 1: Ridge Regression ---")
model_ridge = BaselineModel(alpha=1.0)
model_ridge.fit(X_train_2d, y_train)
pred_ridge = model_ridge.predict(X_test_2d)
acc_ridge = accuracy_score(y_test, pred_ridge)
print(f"Tier 1 Accuracy: {acc_ridge:.4f}")

# ==========================================
# TIER 2: MLP (Classical ML)
# ==========================================
print("\n--- Training Tier 2: MLP ---")
model_mlp = MLP(input_dim=3)
optimizer = optim.Adam(model_mlp.parameters(), lr=0.001)
criterion = nn.BCELoss() # Binary Cross Entropy

# Training Loop
for epoch in range(50):
    optimizer.zero_grad()
    output = model_mlp(X_train_tensor_2d)
    loss = criterion(output, y_train_tensor)
    loss.backward()
    optimizer.step()

# Predict
with torch.no_grad():
    pred_mlp_prob = model_mlp(X_test_tensor_2d)
    pred_mlp = (pred_mlp_prob > 0.5).float().numpy().flatten()
    
acc_mlp = accuracy_score(y_test, pred_mlp)
print(f"Tier 2 Accuracy: {acc_mlp:.4f}")

# ==========================================
# TIER 3: LSTM (Deep Learning)
# ==========================================
print("\n--- Training Tier 3: LSTM ---")
model_lstm = LSTMModel(input_dim=3, hidden_dim=64)
optimizer = optim.Adam(model_lstm.parameters(), lr=0.001)

# Training Loop
for epoch in range(50):
    optimizer.zero_grad()
    output = model_lstm(X_train_tensor_3d) # Feeding 3D Data!
    loss = criterion(output, y_train_tensor)
    loss.backward()
    optimizer.step()

# Predict
with torch.no_grad():
    pred_lstm_prob = model_lstm(X_test_tensor_3d)
    pred_lstm = (pred_lstm_prob > 0.5).float().numpy().flatten()

acc_lstm = accuracy_score(y_test, pred_lstm)
print(f"Tier 3 Accuracy: {acc_lstm:.4f}")

# ==========================================
# FINAL REPORT
# ==========================================
results = pd.DataFrame({
    'Model': ['Ridge (Baseline)', 'MLP (Non-Linear)', 'LSTM (Sequential)'],
    'Accuracy': [acc_ridge, acc_mlp, acc_lstm]
})
print("\nFinal Results:")
print(results)

KeyboardInterrupt: 