In [33]:
import pandas as pd
import numpy as np

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.metrics import confusion_matrix, classification_report

import torch
import torch.nn as nn

import joblib

import warnings

In [34]:
warnings.filterwarnings("ignore")

In [35]:
df = pd.read_csv('../data/test.csv')
df.head()

Unnamed: 0,MEAN_RR,MEDIAN_RR,SDRR,RMSSD,SDSD,SDRR_RMSSD,HR,pNN25,pNN50,SD1,...,HF,HF_PCT,HF_NU,TP,LF_HF,HF_LF,sampen,higuci,datasetId,condition
0,721.901897,727.26728,74.722315,12.361264,12.361069,6.044877,84.121868,4.933333,0.0,8.743513,...,66.617057,3.921868,9.760289,1698.60539,9.245599,0.10816,2.097342,1.243696,2,0
1,843.538633,844.40793,58.499429,19.29888,19.298795,3.031234,71.478642,21.0,0.2,13.650863,...,26.500086,1.123416,1.663151,2358.884694,59.126832,0.016913,2.217275,1.250056,2,2
2,958.523868,966.671125,132.84911,21.342715,21.342653,6.224565,63.874293,24.133333,1.8,15.096571,...,16.024935,0.370208,0.766416,4328.633724,129.477524,0.007723,2.217136,1.144943,2,0
3,824.838669,842.485905,117.822093,11.771814,11.771248,10.00883,74.330531,4.733333,0.533333,8.326307,...,17.58147,0.615932,3.358652,2854.449091,28.773854,0.034754,2.106863,1.142355,2,0
4,756.707933,747.94162,143.968457,13.357748,13.356388,10.777899,82.092049,5.933333,0.666667,9.447545,...,35.199054,0.662879,6.292253,5310.027472,14.892559,0.067148,1.912191,1.128098,2,1


In [36]:
df = df.rename(columns={'MEAN_RR':'mean_pr',
                        'RMSSD':'rmssd',
                        'pNN25':'pnn25',
                        'pNN50':'pnn50',
                        'LF':'lf',
                        'HF':'hf',
                        'LF_HF':'lf_hf',
                        })

In [37]:
df = df[['mean_pr', 'rmssd', 'pnn25', 'pnn50', 'lf', 'hf', 'lf_hf', 'condition']]

In [38]:
cont_cols = ['mean_pr', 'rmssd', 'pnn25', 'pnn50', 'lf', 'hf', 'lf_hf']
y_col = ['condition']

In [39]:
features = df.drop(columns=y_col)
scaler = joblib.load('standard_scaler.pkl')
df_features = scaler.transform(features)
df_features = pd.DataFrame(df_features, columns=features.columns, index=df.index)
last_column = df[y_col].reset_index(drop=True)
df_scaled = pd.concat([df_features.reset_index(drop=True), last_column], axis=1)

In [40]:
df_transformed = df_scaled.copy()
pt = joblib.load('yeo_johnson_transformer.pkl')
df_transformed[cont_cols] = pt.transform(df_transformed[cont_cols])

In [41]:
df_transformed

Unnamed: 0,mean_pr,rmssd,pnn25,pnn50,lf,hf,lf_hf,condition
0,-1.066777,-0.591799,-0.539897,-1.140884,-0.512148,1.126456,-0.969137,0
1,0.104635,1.053384,1.340395,-0.684462,1.190115,0.041196,0.297112,2
2,0.941881,1.446617,1.522189,1.158862,1.633023,-0.513793,1.241313,0
3,-0.055697,-0.762186,-0.583383,-0.054378,-0.862966,-0.422186,-0.396923,0
4,-0.702899,-0.313905,-0.331724,0.152907,-0.802034,0.397978,-0.792364,1
...,...,...,...,...,...,...,...,...
41028,1.877049,0.910977,1.195474,-0.545488,0.364634,-1.627278,2.164678,2
41029,0.206714,1.311142,0.961341,1.812438,0.817327,1.634123,-0.925956,1
41030,-1.424536,1.512724,0.800566,1.928079,1.505597,2.040504,-0.982137,0
41031,-0.655238,-0.907358,-1.052004,-0.545488,-0.674825,0.611041,-0.835657,0


In [42]:
y = torch.tensor(df_transformed[y_col].values).flatten()
y.shape

x = np.stack([df_transformed[col].values for col in cont_cols], 1)
x = torch.tensor(x, dtype=torch.float)
x[:5]

tensor([[-1.0668, -0.5918, -0.5399, -1.1409, -0.5121,  1.1265, -0.9691],
        [ 0.1046,  1.0534,  1.3404, -0.6845,  1.1901,  0.0412,  0.2971],
        [ 0.9419,  1.4466,  1.5222,  1.1589,  1.6330, -0.5138,  1.2413],
        [-0.0557, -0.7622, -0.5834, -0.0544, -0.8630, -0.4222, -0.3969],
        [-0.7029, -0.3139, -0.3317,  0.1529, -0.8020,  0.3980, -0.7924]])

In [43]:
class TabularModel(nn.Module):

    def __init__(self, n_cont, out_sz, layers, p=0.5):
        super().__init__()
        self.bn_cont = nn.BatchNorm1d(n_cont)

        layerlist = []
        n_in = n_cont  

        for i in layers:
            layerlist.append(nn.Linear(n_in,i)) 
            layerlist.append(nn.ReLU(inplace=True))
            layerlist.append(nn.BatchNorm1d(i))
            layerlist.append(nn.Dropout(p))
            n_in = i
        layerlist.append(nn.Linear(layers[-1],out_sz))

        self.layers = nn.Sequential(*layerlist)

    def forward(self, x_cont):
        x_cont = self.bn_cont(x_cont)
        x = self.layers(x_cont)
        return x

In [44]:
model2 = TabularModel(x.shape[1], 3, [200,100,50,20,10], p=0.4)
criterion = nn.CrossEntropyLoss()

In [45]:
model2.load_state_dict(torch.load('ppg_regression.pt'));
model2.eval()

TabularModel(
  (bn_cont): BatchNorm1d(7, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layers): Sequential(
    (0): Linear(in_features=7, out_features=200, bias=True)
    (1): ReLU(inplace=True)
    (2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Dropout(p=0.4, inplace=False)
    (4): Linear(in_features=200, out_features=100, bias=True)
    (5): ReLU(inplace=True)
    (6): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): Dropout(p=0.4, inplace=False)
    (8): Linear(in_features=100, out_features=50, bias=True)
    (9): ReLU(inplace=True)
    (10): BatchNorm1d(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): Dropout(p=0.4, inplace=False)
    (12): Linear(in_features=50, out_features=20, bias=True)
    (13): ReLU(inplace=True)
    (14): BatchNorm1d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (15): Dropout(p=0.4, inplace=Fa

In [46]:
with torch.no_grad():
    y_val = model2(x)
    loss = criterion(y_val, y)
print(f'CE Loss: {loss:.8f}')

CE Loss: 0.14719060


In [55]:
def evaluate_classification(y_true, y_pred):
    """
    Evaluate classification model with various metrics
    
    Parameters:
    y_true: Tensor containing true labels
    y_pred: Tensor containing model predictions
    """
    
    # Convert tensors to numpy arrays
    if torch.is_tensor(y_true):
        y_true = y_true.cpu().numpy()
    if torch.is_tensor(y_pred):
        y_pred = y_pred.cpu().numpy()
    
    # If y_pred contains probabilities, convert to class labels
    if y_pred.ndim > 1 and y_pred.shape[1] > 1:
        y_pred = np.argmax(y_pred, axis=1)
    
    # Calculate various metrics
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_true, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)
    
    # Confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    
    # Print results
    print("=" * 50)
    print("Classification Evaluation Metrics")
    print("=" * 50)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Weighted Precision: {precision:.4f}")
    print(f"Weighted Recall: {recall:.4f}")
    print(f"Weighted F1-Score: {f1:.4f}")
    print("\nConfusion Matrix:")
    print(cm)
    print("\nComplete Classification Report:")
    print(classification_report(y_true, y_pred, zero_division=0))
    print("=" * 50)

evaluate_classification(y, y_val)

Classification Evaluation Metrics
Accuracy: 0.9576
Weighted Precision: 0.9577
Weighted Recall: 0.9576
Weighted F1-Score: 0.9575

Confusion Matrix:
[[21708   301   149]
 [  553 11084   145]
 [  490   100  6503]]

Complete Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.98      0.97     22158
           1       0.97      0.94      0.95     11782
           2       0.96      0.92      0.94      7093

    accuracy                           0.96     41033
   macro avg       0.96      0.95      0.95     41033
weighted avg       0.96      0.96      0.96     41033

