In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from xgboost import XGBClassifier
import joblib
import os

df = pd.read_excel('simulated_mimic_ed_balanced_750.csv') # use .xlsx if that's your real format

def map_flag(row):
    if row['icu_admit'] == 1 or row['mortality'] == 1:
        return 'Red'
    elif row['hospital_admit'] == 1:
        return 'Yellow'
    else:
        return 'Green'

df['flag'] = df.apply(map_flag, axis=1)

df = df.drop(columns=['icu_admit', 'hospital_admit', 'mortality'])

num_cols = ['heart_rate', 'resp_rate', 'spo2', 'temperature', 'sbp', 'dbp']
for col in num_cols:
    df[col] = df[col].fillna(df[col].median())

le_gender = LabelEncoder()
df['gender_encoded'] = le_gender.fit_transform(df['gender'])

le_complaint = LabelEncoder()
df['complaint_encoded'] = le_complaint.fit_transform(df['chief_complaint'])

df = df.drop(columns=['gender', 'chief_complaint'])

target_encoder = LabelEncoder()
df['flag_encoded'] = target_encoder.fit_transform(df['flag'])

X = df.drop(columns=['flag', 'flag_encoded'])
y = df['flag_encoded']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

xgb = XGBClassifier(
    objective='multi:softmax',
    num_class=3,
    use_label_encoder=False,
    eval_metric='mlogloss',
    random_state=42,
    n_estimators=200,
    max_depth=4,
    learning_rate=0.05,
    subsample=0.9
)

xgb.fit(X_train, y_train)
y_pred = xgb.predict(X_test)

acc = accuracy_score(y_test, y_pred)
print(f"Accuracy: {acc:.4f}")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=target_encoder.classes_))

joblib.dump(xgb, 'model.pkl')
current_dir = os.getcwd()
gender_encoder_path = os.path.join(current_dir, "gender_encoder.pkl")
joblib.dump(le_gender, gender_encoder_path)
complaint_encoder_path = os.path.join(current_dir, "complaint_encoder.pkl")
joblib.dump(le_complaint, 'complaint_encoder.pkl')
joblib.dump(target_encoder, 'flag_label_encoder.pkl')

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Accuracy: 0.8533
Confusion Matrix:
[[36  6  8]
 [ 2 48  0]
 [ 6  0 44]]
Classification Report:
              precision    recall  f1-score   support

       Green       0.82      0.72      0.77        50
         Red       0.89      0.96      0.92        50
      Yellow       0.85      0.88      0.86        50

    accuracy                           0.85       150
   macro avg       0.85      0.85      0.85       150
weighted avg       0.85      0.85      0.85       150



['flag_label_encoder.pkl']