## Notebook for hybrid model evaluation

In [1]:
import os
import numpy as np # linear algebra
import pandas as pd
import polars as pls
import lightgbm as lgb
from pathlib import Path

import pytorch_lightning as pl
import torch
from torch import nn

In [None]:
data_path = "/home/yang/kaggle/jane/data"

# model uploaded via kaggle UI
path_to_model_lgb = "../mono_model/model_init/jane_lgbm_baseline.txt"
path_to_model_nn = "./jane_mlp_hidden_32_epoch_50.ckpt"

## Architecture of NN

In [3]:
class MLPRegressor(pl.LightningModule):
    def __init__(self, input_dim: int, hidden_dim: int = 128, lr: float = 1e-3):
        super().__init__()
        self.save_hyperparameters()
        self.lr = lr
        self.model = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim*2),
            nn.ReLU(),
            nn.Linear(hidden_dim*2, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1)
        )
        self.training_step_outputs = []
        self.validation_step_outputs = []
        self.test_step_outputs = []
        self.criterion = nn.MSELoss()

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x).squeeze()
        loss = self.criterion(y_hat, y.squeeze())
        #loss = r2_score(y_hat, y.squeeze())
        self.training_step_outputs.append(loss.item())
        self.log("train_loss", loss)
        return loss
    
    def on_train_epoch_end(self):
        epoch_average = torch.tensor(self.training_step_outputs).mean()
        self.log("training_epoch_average", epoch_average)
        self.training_step_outputs.clear()  # free memory

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x).squeeze()
        loss = self.criterion(y_hat, y.squeeze())
        self.validation_step_outputs.append(loss.item())
        self.log("val_loss", loss, prog_bar=True)
        return {"val_loss": loss}

    def on_validation_epoch_end(self):
        avg_val_loss = torch.tensor(self.validation_step_outputs).mean()
        self.log("avg_val_loss", avg_val_loss)
        self.validation_step_outputs.clear()

    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x).squeeze()
        loss = self.criterion(y_hat, y.squeeze())
        #loss = r2_score(y_hat, y.squeeze())
        self.test_step_outputs.append(loss.item())
        self.log("test_loss", loss)
        return {"test_loss": loss}
    
    def on_test_epoch_end(self):
        epoch_average = torch.tensor(self.test_step_outputs).mean()
        self.log("test_epoch_average", epoch_average)
        self.test_step_outputs.clear()  # free memory

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.lr)

In [4]:
train_feature_list = ["time_id", "symbol_id"] + [f"feature_{idx:02d}" for idx in range(79)]

In [5]:
# load saved model to make predictions
model_lgb = lgb.Booster(model_file=path_to_model_lgb)
model_nn = MLPRegressor.load_from_checkpoint("./jane_mlp_hidden_32_epoch_50.ckpt")

In [6]:
# a completely new dataset for testing
test_raw_data_num = "7"

test_data = pls.read_parquet(Path(data_path, "train.parquet", f"partition_id={test_raw_data_num}", "part-0.parquet"))

In [7]:
test_data_subset = test_data.select([col for col in test_data.columns if col in train_feature_list])

In [8]:
y_pred_lgb = model_lgb.predict(test_data_subset)



In [9]:
test_data_subset = test_data_subset.fill_null(0)
model_nn.eval()
with torch.no_grad():
    y_pred_nn = model_nn(torch.tensor(test_data_subset.to_numpy(), dtype=torch.float32)).squeeze().numpy()

In [10]:
pred_hybrid = (y_pred_nn + y_pred_lgb) / 2

In [11]:
def sample_weighted_zero_mean_r2(y_pred, y_truth, weight):
    """
    Zero-mean R-squared metrics.

    Args:
        y_pred: Array of predicted values.
        y_truth: Array of true values.
        weight: Array of sample weights.

    Returns:
        1-corr: Zero-mean R-squared.
    """

    # Ensure weights are valid
    weight = weight if weight is not None else np.ones_like(y_pred)
    
    corr = np.sum((weight * (y_truth - y_pred) ** 2)) / np.sum(weight * y_truth ** 2)
    
    return 1 - corr 

In [12]:
score = sample_weighted_zero_mean_r2(pred_hybrid, test_data.select(pls.col("responder_6")).to_numpy()[:,0],
                                     test_data.select(pls.col("weight")).to_numpy()[:,0])
score

np.float64(0.005259295651295459)