#THIS NOTEBOOK TEST THE PREVIOUSLY TRAINED MODELS WITH NEW DATA RANGING FROM JANURAY 2024 TO DECEMBER 2025

In [8]:
#GENERATE PREDICTIONS BASED ON THE PREVIOUSLY TRAINED MODELS

import pandas as pd
import numpy as np
import torch
import joblib 
import json
import sys
import os
from sklearn.metrics import accuracy_score, classification_report

sys.path.append(os.path.abspath('..'))
from src.models import BaselineModel, LSTMModel


print("PREDICTION PHASE")


# 1. LOAD CONFIGURATION ( The Binding )
print("Loading config...")
with open('../models/config.json', 'r') as f:
    config = json.load(f)

SELECTED_FEATURES = config['features']
LOOKBACK = config['lookback']
TRAIN_END = config['train_end_date']

print(f"   Features: {SELECTED_FEATURES}")

# 2. LOAD MODELS & SCALER ( The Binding )
print("Loading models...")
scaler = joblib.load('../models/scaler.pkl')
model_ridge = joblib.load('../models/ridge_model.pkl')

# For PyTorch, we must initialize the class first
model_lstm = LSTMModel(input_dim=len(SELECTED_FEATURES))
model_lstm.load_state_dict(torch.load('../models/lstm_model.pth'))
model_lstm.eval() # Important: Set to evaluation mode

# 3. LOAD & PREPARE TEST DATA
df = pd.read_csv('../data/processed/04_ml_ready_features.csv', index_col=0, parse_dates=True)
df_test = df[df.index > TRAIN_END].copy()

# Scale using the LOADED scaler (Do not fit!)
X_test_vals = scaler.transform(df_test[SELECTED_FEATURES])

# Rebuild DF for sequencing
df_test_proc = pd.DataFrame(X_test_vals, columns=SELECTED_FEATURES, index=df_test.index)
df_test_proc['Target'] = df_test['Target_Direction'].values
df_test_proc['Pair_ID'] = df_test['Pair_ID'].values
df_test_proc['Original_Index'] = np.arange(len(df_test))

# 4. GENERATE SEQUENCES (Test Version - keeps indices)
def create_test_sequences(data_df, feature_cols, lookback=10):
    X_seq, indices = [], []
    for pair in data_df['Pair_ID'].unique():
        pair_df = data_df[data_df['Pair_ID'] == pair].reset_index(drop=True)
        X_vals = pair_df[feature_cols].values
        orig_idxs = pair_df['Original_Index'].values
        
        if len(X_vals) <= lookback: continue
        for i in range(len(X_vals) - lookback):
            X_seq.append(X_vals[i:i+lookback])
            indices.append(orig_idxs[i+lookback])
    return np.array(X_seq), np.array(indices)

X_test_3d, test_indices = create_test_sequences(df_test_proc, SELECTED_FEATURES, lookback=LOOKBACK)
X_test_2d = np.array([s[-1] for s in X_test_3d])
X_test_t3d = torch.FloatTensor(X_test_3d)

# 5. PREDICT
print("\n Forecasting...")
ridge_preds = model_ridge.predict(X_test_2d)

with torch.no_grad():
    lstm_probs = model_lstm(X_test_t3d).numpy().flatten()

# 6. SAVE RESULTS
results = df_test.iloc[test_indices].copy()
results['Ridge_Pred'] = ridge_preds
results['LSTM_Prob'] = lstm_probs
results['LSTM_Pred'] = (lstm_probs >= 0.5).astype(int)

results.to_csv('../data/processed/05_model_predictions.csv')
print("predictions saved to ../data/processed/05_model_predictions.csv")


PREDICTION PHASE
Loading config...
   Features: ['Z_Score', 'Range_Position', 'MR_Strength']
Loading models...

 Forecasting...
predictions saved to ../data/processed/05_model_predictions.csv
