In [1]:
import numpy as np

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [2]:
df_history = pd.read_csv(r'D:\AnacondaProj\KKKaGGLe\Tri\triagegeist\data\patient_history.csv')

df_train = pd.read_csv(r'D:\AnacondaProj\KKKaGGLe\Tri\triagegeist\data\train.csv')

df_chief = pd.read_csv(r'D:\AnacondaProj\KKKaGGLe\Tri\triagegeist\data\chief_complaints.csv')

In [3]:
df_full = df_train.merge(df_history, on="patient_id", how="left")
df_full = df_full.merge(
    df_chief[["patient_id", "chief_complaint_raw"]],
    on="patient_id",
    how="left"
)

df_full.shape

(80000, 66)

In [11]:

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

df_full_info = pd.DataFrame({
    "Column": df_full.columns,
    "Non-Null Count": df_full.notnull().sum().values,
    "Dtype": df_full.dtypes.values
})

df_full_info

Unnamed: 0,Column,Non-Null Count,Dtype
0,patient_id,80000,object
1,site_id,80000,object
2,triage_nurse_id,80000,object
3,arrival_mode,80000,object
4,arrival_hour,80000,int64
5,arrival_day,80000,object
6,arrival_month,80000,int64
7,arrival_season,80000,object
8,shift,80000,object
9,age,80000,int64


In [None]:
target = "triage_acuity"

leakage_columns = [
    "disposition",     # outcome after triage
    "ed_los_hours"     # length of stay after triage
]

continuous_features = [
    "systolic_bp",
    "diastolic_bp",
    "mean_arterial_pressure",
    "pulse_pressure",
    "heart_rate",
    "respiratory_rate",
    "temperature_c",
    "spo2",
    "weight_kg",
    "height_cm",
    "bmi",
    "shock_index"
]

count_features = [
    "age",
    "num_prior_ed_visits_12m",
    "num_prior_admissions_12m",
    "num_active_medications",
    "num_comorbidities",
    "gcs_total",
    "pain_score",
    "news2_score"
]

binary_hx_features = [
    col for col in df_full.columns if col.startswith("hx_")
]

categorical_features = [
    "arrival_mode",
    "arrival_day",
    "arrival_month",
    "arrival_season",
    "shift",
    "age_group",
    "sex",
    "language",
    "insurance_type",
    "transport_origin",
    "pain_location",
    "mental_status_triage",
    "chief_complaint_system"
]

time_features = [
    "arrival_hour"
]

text_feature = "chief_complaint_raw"

**Check target distribution by site and shift.**

In [12]:
df_full.groupby("site_id")["triage_acuity"].value_counts(normalize=True)

site_id      triage_acuity
SITE-HEL-01  3                0.355671
             4                0.295217
             2                0.167007
             5                0.140400
             1                0.041705
SITE-HEL-02  3                0.360985
             4                0.288336
             2                0.166730
             5                0.144545
             1                0.039404
SITE-OUL-01  3                0.366883
             4                0.284029
             2                0.165773
             5                0.143497
             1                0.039818
SITE-TMP-01  3                0.361797
             4                0.286236
             2                0.168389
             5                0.143874
             1                0.039703
SITE-TUR-01  3                0.362324
             4                0.284851
             2                0.171971
             5                0.140143
             1                0.04071

**Observation 1**<br>
Variation between sites is extremely small.<br>
This is statistically negligible.<br>
**Conclusion from Step 1**<br>
We do NOT drop site_id yet.<br>
But we mark it as low-priority feature.

**Does night shift have more ESI 1–2?**

In [13]:
df_full.groupby("shift")["triage_acuity"].value_counts(normalize=True)

shift      triage_acuity
afternoon  3                0.362389
           4                0.289600
           2                0.163110
           5                0.144124
           1                0.040777
evening    3                0.364080
           4                0.291050
           2                0.167327
           5                0.138283
           1                0.039259
morning    3                0.361531
           4                0.282823
           2                0.171733
           5                0.143023
           1                0.040891
night      3                0.358960
           4                0.290281
           2                0.168289
           5                0.142843
           1                0.039626
Name: proportion, dtype: float64

**Observation 2**<br>
Distribution is again extremely stable.<br>
**Provisional Conclusion**<br>
Shift alone does not encode label imbalance.<br>
Likely low predictive power.<br>
But we keep it until feature importance confirms

**Ambulance arrival_mode.**

In [14]:
df_full.groupby("arrival_mode")["triage_acuity"].value_counts(normalize=True)

arrival_mode       triage_acuity
ambulance          3                0.359802
                   4                0.289323
                   2                0.167470
                   5                0.142832
                   1                0.040573
brought_by_family  3                0.359126
                   4                0.282823
                   2                0.169264
                   5                0.146158
                   1                0.042629
helicopter         3                0.375468
                   4                0.283160
                   2                0.153015
                   5                0.141372
                   1                0.046985
police             3                0.358950
                   4                0.282416
                   2                0.170778
                   5                0.148640
                   1                0.039216
transfer           3                0.363591
                   4  

**Observation 3 — Arrival Mode**<br>
The distribution of triage_acuity is again nearly identical.

**Does age distribution differ by acuity?**

In [15]:
df_full.groupby("triage_acuity")["age"].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
triage_acuity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,3222.0,48.693979,24.253424,1.0,29.0,49.0,67.0,94.0
2,13439.0,48.625121,24.214939,1.0,29.0,48.0,67.0,94.0
3,28921.0,48.558521,24.169642,1.0,29.0,48.0,67.0,94.0
4,23020.0,48.604605,24.132619,1.0,29.0,48.0,67.0,94.0
5,11398.0,48.248289,24.294433,1.0,29.0,48.0,67.0,94.0


In [16]:
df_full.groupby("age_group")["triage_acuity"].value_counts(normalize=True)

age_group    triage_acuity
elderly      3                0.358842
             4                0.289429
             2                0.169630
             5                0.141228
             1                0.040872
middle_aged  3                0.362795
             4                0.285668
             2                0.169099
             5                0.142673
             1                0.039765
pediatric    3                0.360728
             4                0.282487
             2                0.169219
             5                0.146778
             1                0.040788
young_adult  3                0.362653
             4                0.290114
             2                0.164858
             5                0.142187
             1                0.040188
Name: proportion, dtype: float64

**Observation 4 — Age and Acuity**<br>
The differences are negligible (< 0.5 years)

**Does mental_status_triage strongly separate acuity?**

In [17]:
df_full.groupby("mental_status_triage")["triage_acuity"].value_counts(normalize=True)

mental_status_triage  triage_acuity
agitated              3                0.391716
                      2                0.344548
                      4                0.189011
                      1                0.055283
                      5                0.019442
alert                 4                0.398511
                      3                0.342205
                      5                0.228837
                      2                0.029603
                      1                0.000844
confused              3                0.515641
                      2                0.278606
                      4                0.162013
                      5                0.033172
                      1                0.010568
drowsy                2                0.445336
                      3                0.313043
                      4                0.128667
                      1                0.087243
                      5                0.025711
unre

**Observation 5 — Mental Status vs Acuity**<br>
Mental status is a near-deterministic feature for high acuity.
This is clinically accurate.

In [21]:
df_full["mental_status_triage"].unique()

array(['drowsy', 'alert', 'unresponsive', 'agitated', 'confused'],
      dtype=object)

**NEWS2 by Mental Status**

In [22]:
df_full.groupby("mental_status_triage")["news2_score"].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
mental_status_triage,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
agitated,5915.0,5.378529,4.823701,0.0,1.0,4.0,10.0,17.0
alert,46212.0,1.549186,2.217391,0.0,0.0,1.0,2.0,17.0
confused,14289.0,4.410246,4.248766,0.0,1.0,3.0,7.0,17.0
drowsy,9101.0,6.582793,4.974118,0.0,2.0,6.0,11.0,17.0
unresponsive,4483.0,10.665403,4.17436,0.0,9.0,12.0,14.0,17.0


**GCS by Mental Status**

In [23]:
df_full.groupby("mental_status_triage")["gcs_total"].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
mental_status_triage,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
agitated,5915.0,13.478614,2.564621,3.0,12.0,15.0,15.0,15.0
alert,46212.0,14.905869,0.660869,3.0,15.0,15.0,15.0,15.0
confused,14289.0,14.065155,1.908612,3.0,15.0,15.0,15.0,15.0
drowsy,9101.0,12.934073,2.848374,3.0,11.0,15.0,15.0,15.0
unresponsive,4483.0,10.020968,3.788296,3.0,7.0,10.0,14.0,15.0


**Conclusion From This Step**
Mental_status_triage:

Strong predictive signal

Correlated with GCS + NEWS2

But not redundant

Keep as featur

**Does mental_status_triage add signal beyond NEWS2?**

In [24]:


df_full.groupby(["mental_status_triage"])["triage_acuity"].mean()

mental_status_triage
agitated        2.772781
alert           3.824894
confused        2.928616
drowsy          2.560268
unresponsive    1.701539
Name: triage_acuity, dtype: float64

In [25]:
df_full.groupby(["news2_score"])["triage_acuity"].mean().head(20)

news2_score
0     4.234965
1     3.782159
2     3.373719
3     3.141378
4     3.005169
5     2.872325
6     2.646621
7     2.311531
8     2.075875
9     1.993364
10    1.931382
11    1.897521
12    1.816759
13    1.683645
14    1.564084
15    1.388831
16    1.238095
17    1.147368
Name: triage_acuity, dtype: float64

In [26]:
df_full.head()

Unnamed: 0,patient_id,site_id,triage_nurse_id,arrival_mode,arrival_hour,arrival_day,arrival_month,arrival_season,shift,age,age_group,sex,language,insurance_type,transport_origin,pain_location,mental_status_triage,chief_complaint_system,num_prior_ed_visits_12m,num_prior_admissions_12m,num_active_medications,num_comorbidities,systolic_bp,diastolic_bp,mean_arterial_pressure,pulse_pressure,heart_rate,respiratory_rate,temperature_c,spo2,gcs_total,pain_score,weight_kg,height_cm,bmi,shock_index,news2_score,disposition,ed_los_hours,triage_acuity,hx_hypertension,hx_diabetes_type2,hx_diabetes_type1,hx_asthma,hx_copd,hx_heart_failure,hx_atrial_fibrillation,hx_ckd,hx_liver_disease,hx_malignancy,hx_obesity,hx_depression,hx_anxiety,hx_dementia,hx_epilepsy,hx_hypothyroidism,hx_hyperthyroidism,hx_hiv,hx_coagulopathy,hx_immunosuppressed,hx_pregnant,hx_substance_use_disorder,hx_coronary_artery_disease,hx_stroke_prior,hx_peripheral_vascular_disease,chief_complaint_raw
0,TG-UXRGA9UCO,SITE-TMP-01,NURSE-0033,walk-in,6,Monday,5,spring,morning,43,middle_aged,M,Finnish,public,public_space,extremity,drowsy,neurological,0,0,4,8,79.0,57.5,64.7,21.5,57.3,17.9,37.0,92.1,14,7,52.3,165.4,19.1,0.725,8,discharged,7.35,2,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,"thunderclap headache, worsening with movement"
1,TG-B19DBBS2G,SITE-HEL-01,NURSE-0001,walk-in,6,Thursday,4,spring,morning,72,elderly,F,Russian,military,home,extremity,alert,genitourinary,0,0,10,8,131.7,93.4,106.2,38.3,97.3,17.2,36.9,99.4,15,-1,73.3,164.4,27.1,0.739,1,discharged,0.7,5,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,0,"contraception advice, intermittent"
2,TG-GZ97W7M6V,SITE-HEL-02,NURSE-0005,walk-in,8,Saturday,4,spring,morning,82,elderly,M,English,none,nursing_home,abdomen,alert,other,0,0,13,14,94.7,83.3,87.1,11.4,75.6,14.7,37.3,100.0,15,3,77.1,183.7,22.8,0.798,2,discharged,0.63,5,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,"general health question, intermittent"
3,TG-THIB2TN9Q,SITE-HEL-02,NURSE-0026,police,7,Sunday,3,spring,morning,50,middle_aged,F,Finnish,private,outdoor,abdomen,alert,dermatological,3,1,4,3,134.2,51.8,79.3,82.4,109.0,17.6,38.2,96.0,15,7,49.6,172.6,16.6,0.812,2,discharged,1.99,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,"erythema migrans tick bite, intermittent"
4,TG-J3U3LQ2QY,SITE-HEL-02,NURSE-0044,walk-in,5,Tuesday,5,spring,night,62,middle_aged,M,Finnish,public,home,multiple,alert,dermatological,2,0,10,17,140.1,75.4,97.0,64.7,113.7,17.6,36.6,99.1,15,4,71.9,173.4,23.9,0.812,2,transferred,3.58,3,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,"cellulitis localised, intermittent"


In [27]:
df_full.groupby(["news2_score", "mental_status_triage"])["triage_acuity"].mean().unstack()

mental_status_triage,agitated,alert,confused,drowsy,unresponsive
news2_score,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,3.828125,4.327416,3.82818,3.876738,3.0
1,3.486207,3.914573,3.413025,3.494382,2.983193
2,3.192429,3.495379,3.14852,3.194588,2.991304
3,3.066667,3.220921,3.036398,3.025157,2.894309
4,2.906736,3.064235,2.971818,2.922727,2.720721
5,2.795833,2.976263,2.867153,2.684932,2.473684
6,2.468571,2.918486,2.59751,2.359736,2.191919
7,2.206186,2.716931,2.271394,2.102493,2.021277
8,2.061611,2.362319,2.090713,2.013921,1.917391
9,2.0,2.12069,2.023091,1.983986,1.85623


In [28]:
df_full[df_full["news2_score"] == 0]["mental_status_triage"].value_counts(normalize=True)

mental_status_triage
alert           0.814800
confused        0.097464
drowsy          0.046451
agitated        0.038572
unresponsive    0.002712
Name: proportion, dtype: float64

What This Confirms

1️ NEWS2 strongly aligns with mental status<br>
2️ But mental status still shifts triage within same NEWS2<br>
3️ Triage is not pure physiological mapping<br>
4️ Dataset is internally coherent

In [29]:
df_full["total_hx"] = df_full[
    [col for col in df_full.columns if col.startswith("hx_")]
].sum(axis=1)

df_full.groupby(["news2_score"])["total_hx"].mean().head(20)

news2_score
0     4.985966
1     5.161035
2     5.066823
3     5.077593
4     5.048539
5     5.188561
6     5.597365
7     5.994606
8     6.285344
9     6.458908
10    6.451056
11    6.586749
12    6.363453
13    6.621392
14    6.368573
15    6.436401
16    6.532020
17    6.757895
Name: total_hx, dtype: float64

In [30]:
df_full.groupby(["news2_score", "total_hx"])["triage_acuity"].mean().head(20)

news2_score  total_hx
0            0           4.240157
             1           4.249489
             2           4.225436
             3           4.220334
             4           4.232011
             5           4.255796
             6           4.241675
             7           4.234030
             8           4.223118
             9           4.214870
             10          4.227000
             11          4.294304
             12          4.243553
             13          4.204188
             14          4.207792
             15          4.233333
             16          4.750000
             17          5.000000
1            0           3.772816
             1           3.803226
Name: triage_acuity, dtype: float64

**Now We Move To Critical Missing Piece**

In [31]:
df_full.groupby("chief_complaint_system")["triage_acuity"].mean().sort_values()

chief_complaint_system
endocrine           3.283875
gastrointestinal    3.297488
other               3.301666
ENT                 3.307923
respiratory         3.312026
trauma              3.313657
cardiovascular      3.331410
infectious          3.332061
psychiatric         3.335970
ophthalmic          3.336641
musculoskeletal     3.338043
dermatological      3.338311
genitourinary       3.354647
neurological        3.355474
Name: triage_acuity, dtype: float64

**Do certain words correlate strongly with high acuity?**

In [32]:
df_full[df_full["triage_acuity"] == 1]["chief_complaint_raw"].str.contains("severe").mean()

np.float64(0.13594040968342644)

In [33]:
df_full[df_full["triage_acuity"] == 5]["chief_complaint_raw"].str.contains("severe").mean()

np.float64(0.0)

**So:**

For ESI 1 (most urgent)
≈ 13.6% of complaints contain the word "severe".

In [34]:
df_full[df_full["triage_acuity"] == 5]["chief_complaint_raw"].str.contains("severe").mean()

np.float64(0.0)

In [35]:
df_full["chief_complaint_raw"].str.contains("severe").mean()

np.float64(0.0541)

**Check a few more critical words:**

In [36]:
keywords = ["acute", "massive", "thunderclap", "chest pain", "rigors", "cardiac arrest"]

for word in keywords:
    print(word)
    print("ESI 1:", df_full[df_full["triage_acuity"] == 1]["chief_complaint_raw"].str.contains(word).mean())
    print("ESI 5:", df_full[df_full["triage_acuity"] == 5]["chief_complaint_raw"].str.contains(word).mean())
    print()

acute
ESI 1: 0.12259466170080695
ESI 5: 0.0

massive
ESI 1: 0.06610800744878957
ESI 5: 0.0

thunderclap
ESI 1: 0.0
ESI 5: 0.0

chest pain
ESI 1: 0.0
ESI 5: 0.0

rigors
ESI 1: 0.06455617628801986
ESI 5: 0.07150377259168275

cardiac arrest
ESI 1: 0.010552451893234015
ESI 5: 0.0



**Checking Coorelation**

In [37]:
df_full[["news2_score", "triage_acuity"]].corr()

Unnamed: 0,news2_score,triage_acuity
news2_score,1.0,-0.814804
triage_acuity,-0.814804,1.0


**Correlation = -0.8148**

This is extremely strong<br>
Meaning:

~66% of variance in triage is explained by NEWS2 alone
(because 0.81² ≈ 0.66)

**Residual error from NEWS2-only model.**

In [38]:
# Simple regression from NEWS2
import numpy as np

news2_pred = np.round(
    np.interp(
        df_full["news2_score"],
        sorted(df_full["news2_score"].unique()),
        df_full.groupby("news2_score")["triage_acuity"].mean().sort_index().values
    )
)

residual = df_full["triage_acuity"] - news2_pred

residual.describe()

count    80000.000000
mean         0.059475
std          0.610485
min         -2.000000
25%          0.000000
50%          0.000000
75%          0.000000
max          2.000000
Name: triage_acuity, dtype: float64

In [39]:
df_full.groupby("mental_status_triage")[residual.name].mean()

mental_status_triage
agitated        2.772781
alert           3.824894
confused        2.928616
drowsy          2.560268
unresponsive    1.701539
Name: triage_acuity, dtype: float64

In [40]:
df_full["news2_pred"] = df_full.groupby("news2_score")["triage_acuity"].transform("mean").round()

df_full["residual"] = df_full["triage_acuity"] - df_full["news2_pred"]

df_full.groupby("mental_status_triage")["residual"].mean()

mental_status_triage
agitated       -0.112088
alert           0.205293
confused       -0.106516
drowsy         -0.115262
unresponsive   -0.333482
Name: residual, dtype: float64

**Now We Ask Bigger Question**

Is anyone systematically undertriaged?

In [41]:
df_full.groupby("age_group")["residual"].mean()

age_group
elderly        0.050755
middle_aged    0.061709
pediatric      0.067324
young_adult    0.062607
Name: residual, dtype: float64

In [42]:
df_full.groupby("sex")["residual"].mean()

sex
F        0.061652
M        0.057665
Other    0.049325
Name: residual, dtype: float64

**The simulation did not encode obvious demographic bias.**

**Does chief complaint severity modify triage beyond NEWS2?**

In [43]:
df_full["has_severe_word"] = df_full["chief_complaint_raw"].str.contains("severe|acute|massive", regex=True)

df_full.groupby("has_severe_word")["residual"].mean()

has_severe_word
False    0.089690
True    -0.187479
Name: residual, dtype: float64

Residual analysis demonstrates that lexical severity markers (e.g., ‘severe’, ‘acute’, ‘massive’) are associated with systematic upweighting of triage acuity beyond physiological severity scoring, suggesting that clinician judgment integrates semantic complaint framing into urgency assessment

In [44]:
df_full["triage_acuity"].value_counts(normalize=True)

triage_acuity
3    0.361513
4    0.287750
2    0.167988
5    0.142475
1    0.040275
Name: proportion, dtype: float64