In [1]:
# =====================================
# TRIAGE AI NIGERIA
# Patient Deterioration Prediction Model
# =====================================

import numpy as np
import pandas as pd
import os
import joblib

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score


In [2]:
np.random.seed(42)
N = 40000

# Demographics
age = np.random.randint(0, 95, N)
sex = np.random.choice(["Male", "Female"], N)

# Admission Info
ward_type = np.random.choice(["General", "Surgical", "Medical"], N)

# Current Vital Signs
systolic_bp = np.clip(np.random.normal(120, 25, N), 70, 220)
heart_rate = np.clip(np.random.normal(85, 20, N), 40, 180)
respiratory_rate = np.clip(np.random.normal(18, 5, N), 10, 40)
temperature = np.clip(np.random.normal(37.2, 1.2, N), 35, 41.5)
spo2 = np.clip(np.random.normal(97, 3, N), 70, 100)

# Lab Values (common Nigerian hospital labs)
wbc = np.clip(np.random.normal(7, 3, N), 1, 25)  # White Blood Cells
hemoglobin = np.clip(np.random.normal(12, 3, N), 4, 20)
creatinine = np.clip(np.random.normal(1.0, 0.5, N), 0.3, 6)
blood_glucose = np.clip(np.random.normal(110, 40, N), 40, 400)

# Comorbidities
hypertension = np.where(age > 40, np.random.binomial(1, 0.3, N), 0)
diabetes = np.where(age > 45, np.random.binomial(1, 0.2, N), 0)
asthma = np.random.binomial(1, 0.08, N)

# Severity Logic (simulate deterioration risk)
severity_score = (
    (spo2 < 92)*3 +
    (systolic_bp < 90)*3 +
    (temperature > 38.5)*2 +
    (wbc > 15)*2 +
    (creatinine > 2)*2 +
    (blood_glucose > 250)*1 +
    (age > 70)*1 +
    hypertension*1 +
    diabetes*1
)

deterioration = np.where(severity_score >= 6, 1, 0)

df = pd.DataFrame({
    "age": age,
    "sex": sex,
    "ward_type": ward_type,
    "systolic_bp": systolic_bp,
    "heart_rate": heart_rate,
    "respiratory_rate": respiratory_rate,
    "temperature": temperature,
    "spo2": spo2,
    "wbc": wbc,
    "hemoglobin": hemoglobin,
    "creatinine": creatinine,
    "blood_glucose": blood_glucose,
    "hypertension": hypertension,
    "diabetes": diabetes,
    "asthma": asthma,
    "deterioration": deterioration
})

df.to_csv("data/synthetic_deterioration_data.csv", index=False)

df.head()


Unnamed: 0,age,sex,ward_type,systolic_bp,heart_rate,respiratory_rate,temperature,spo2,wbc,hemoglobin,creatinine,blood_glucose,hypertension,diabetes,asthma,deterioration
0,51,Male,Medical,151.884602,67.988376,17.807113,35.391542,96.524602,3.36558,8.66987,0.825679,89.439472,1,0,0,0
1,92,Female,Medical,109.659151,88.880461,16.351127,38.849698,97.979628,6.015655,14.165822,1.547345,126.341041,0,0,1,0
2,14,Female,General,109.653047,125.824483,19.103442,38.338181,93.318505,8.671012,12.379543,0.873322,146.013227,0,0,0,0
3,71,Male,Medical,101.608642,65.609246,10.0,38.583824,97.783682,4.968828,8.684583,0.445512,145.684221,0,1,0,0
4,60,Female,General,148.867405,72.112705,12.447164,39.505924,99.062643,11.880904,12.320357,1.411321,114.015659,1,0,0,0


In [3]:
X = df.drop("deterioration", axis=1)
y = df["deterioration"]


In [4]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)


In [5]:
numeric_features = [
    "age", "systolic_bp", "heart_rate", "respiratory_rate",
    "temperature", "spo2", "wbc", "hemoglobin",
    "creatinine", "blood_glucose"
]

categorical_features = ["sex", "ward_type"]

binary_features = ["hypertension", "diabetes", "asthma"]

numeric_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler())
])

categorical_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("encoder", OneHotEncoder(handle_unknown="ignore"))
])

preprocessor = ColumnTransformer([
    ("num", numeric_pipeline, numeric_features),
    ("cat", categorical_pipeline, categorical_features),
    ("bin", "passthrough", binary_features)
])


In [6]:
model = Pipeline([
    ("preprocessor", preprocessor),
    ("classifier", RandomForestClassifier(
        n_estimators=300,
        max_depth=12,
        class_weight="balanced",
        random_state=42
    ))
])


In [7]:
model.fit(X_train, y_train)


0,1,2
,steps,"[('preprocessor', ...), ('classifier', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('num', ...), ('cat', ...), ...]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,missing_values,
,strategy,'median'
,fill_value,
,copy,True
,add_indicator,False
,keep_empty_features,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,missing_values,
,strategy,'most_frequent'
,fill_value,
,copy,True
,add_indicator,False
,keep_empty_features,False

0,1,2
,categories,'auto'
,drop,
,sparse_output,True
,dtype,<class 'numpy.float64'>
,handle_unknown,'ignore'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,n_estimators,300
,criterion,'gini'
,max_depth,12
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [8]:
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1]

print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("ROC-AUC Score:", roc_auc_score(y_test, y_prob))


Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      7861
           1       0.96      0.78      0.86       139

    accuracy                           1.00      8000
   macro avg       0.98      0.89      0.93      8000
weighted avg       1.00      1.00      1.00      8000

Confusion Matrix:
[[7857    4]
 [  31  108]]
ROC-AUC Score: 0.9982666455564718


In [9]:
joblib.dump(model, "models/deterioration_model.pkl")
print("Deterioration model saved successfully.")


Deterioration model saved successfully.


In [10]:
sample_patient = pd.DataFrame([{
    "age": 75,
    "sex": "Male",
    "ward_type": "Medical",
    "systolic_bp": 85,
    "heart_rate": 120,
    "respiratory_rate": 28,
    "temperature": 39.0,
    "spo2": 89,
    "wbc": 18,
    "hemoglobin": 10,
    "creatinine": 2.5,
    "blood_glucose": 300,
    "hypertension": 1,
    "diabetes": 1,
    "asthma": 0
}])

prediction = model.predict(sample_patient)[0]
probability = model.predict_proba(sample_patient)[0][1]

print("Deterioration Risk:", prediction)
print("Risk Probability:", round(probability, 3))


Deterioration Risk: 1
Risk Probability: 0.547
