In [4]:
import numpy as np
import pandas as pd

df_history = pd.read_csv(r'D:\AnacondaProj\KKKaGGLe\Tri\triagegeist\data\patient_history.csv')

df_train = pd.read_csv(r'D:\AnacondaProj\KKKaGGLe\Tri\triagegeist\data\train.csv')

df_chief = pd.read_csv(r'D:\AnacondaProj\KKKaGGLe\Tri\triagegeist\data\chief_complaints.csv')





df_full = df_train.merge(df_history, on="patient_id", how="left")
df_full = df_full.merge(
    df_chief[["patient_id", "chief_complaint_raw"]],
    on="patient_id",
    how="left"
)

df_full.shape

(80000, 66)

In [5]:


classes = [1,2,3,4,5]

cost_matrix = pd.DataFrame(
    np.zeros((5,5)),
    index=classes,
    columns=classes
)

for true in classes:
    for pred in classes:
        diff = pred - true
        
        if diff > 0:  # undertriage (pred less urgent)
            cost_matrix.loc[true, pred] = abs(diff) * 2
        elif diff < 0:  # overtriage
            cost_matrix.loc[true, pred] = abs(diff) * 1
        else:
            cost_matrix.loc[true, pred] = 0

cost_matrix

Unnamed: 0,1,2,3,4,5
1,0.0,2.0,4.0,6.0,8.0
2,1.0,0.0,2.0,4.0,6.0
3,2.0,1.0,0.0,2.0,4.0
4,3.0,2.0,1.0,0.0,2.0
5,4.0,3.0,2.0,1.0,0.0


In [6]:
# Define Cost Function
def compute_cost(y_true, y_pred, cost_matrix):
    total_cost = 0
    for t, p in zip(y_true, y_pred):
        total_cost += cost_matrix.loc[t, p]
    return total_cost / len(y_true)

In [9]:
# Baseline A — Always Predict 3

y_true = df_full["triage_acuity"]
y_pred_baseline = np.full_like(y_true, 3)

baseline_cost = compute_cost(y_true, y_pred_baseline, cost_matrix)
baseline_cost


np.float64(1.069775)

In [7]:
# Create Baseline Models


# Baseline A — Always Predict 3

y_true = df_full["triage_acuity"]
y_pred_baseline = np.full_like(y_true, 3)

baseline_cost = compute_cost(y_true, y_pred_baseline, cost_matrix)
baseline_cost



# Baseline B — NEWS2 Mapping
df_full["news2_pred"] = df_full.groupby("news2_score")["triage_acuity"].transform("mean").round()

news2_cost = compute_cost(y_true, df_full["news2_pred"], cost_matrix)
news2_cost

np.float64(0.5056125)

In [8]:
# Add Ordinal Metrics
from sklearn.metrics import cohen_kappa_score, classification_report

print("QWK:", cohen_kappa_score(y_true, df_full["news2_pred"], weights="quadratic"))

print(classification_report(y_true, df_full["news2_pred"]))

QWK: 0.7874010402739823
              precision    recall  f1-score   support

           1       0.69      0.38      0.49      3222
           2       0.81      0.86      0.84     13439
           3       0.79      0.68      0.73     28921
           4       0.51      0.86      0.64     23020
           5       0.00      0.00      0.00     11398

    accuracy                           0.65     80000
   macro avg       0.56      0.56      0.54     80000
weighted avg       0.59      0.65      0.61     80000



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
