In [14]:
# ===============================
# TRIAGE AI NIGERIA
# Intelligent Triage Risk Model
# ===============================

import numpy as np
import pandas as pd
import os
import joblib

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score


synthetic data generation

In [15]:
np.random.seed(42)
N = 50000

# Demographics
age = np.random.randint(0, 95, N)
sex = np.random.choice(["Male", "Female"], N)

pregnant = [
    1 if (sex[i] == "Female" and 15 <= age[i] <= 45 and np.random.rand() < 0.1) else 0
    for i in range(N)
]

# Vital Signs
systolic_bp = np.clip(np.random.normal(120, 25, N), 70, 220)
diastolic_bp = np.clip(np.random.normal(80, 15, N), 40, 140)
heart_rate = np.clip(np.random.normal(85, 20, N), 40, 180)
respiratory_rate = np.clip(np.random.normal(18, 5, N), 10, 40)
temperature = np.clip(np.random.normal(37.2, 1.2, N), 35, 41.5)
spo2 = np.clip(np.random.normal(97, 3, N), 70, 100)

# Symptoms
def binary(prob):
    return np.random.binomial(1, prob, N)

fever = binary(0.3)
chest_pain = binary(0.1)
difficulty_breathing = binary(0.12)
seizure = binary(0.02)
trauma = binary(0.07)

# Severity Logic
severity_score = (
    (spo2 < 92)*3 +
    (systolic_bp < 90)*3 +
    (temperature > 38.5)*2 +
    seizure*3 +
    difficulty_breathing*2 +
    chest_pain*1 +
    (age > 65)*1
)

triage_level = np.where(severity_score >= 6, "Emergency",
                 np.where(severity_score >= 3, "Urgent", "Routine"))

df = pd.DataFrame({
    "age": age,
    "sex": sex,
    "pregnant": pregnant,
    "systolic_bp": systolic_bp,
    "diastolic_bp": diastolic_bp,
    "heart_rate": heart_rate,
    "respiratory_rate": respiratory_rate,
    "temperature": temperature,
    "spo2": spo2,
    "fever": fever,
    "chest_pain": chest_pain,
    "difficulty_breathing": difficulty_breathing,
    "seizure": seizure,
    "trauma": trauma,
    "triage_level": triage_level
})

df.to_csv("data/synthetic_triage_data.csv", index=False)

df.head()


Unnamed: 0,age,sex,pregnant,systolic_bp,diastolic_bp,heart_rate,respiratory_rate,temperature,spo2,fever,chest_pain,difficulty_breathing,seizure,trauma,triage_level
0,51,Male,0,124.961135,92.485586,69.768278,15.356809,36.712008,94.132714,0,0,0,0,0,Routine
1,92,Female,0,158.389438,76.530537,78.018368,10.0,35.74531,93.010282,0,0,0,0,1,Routine
2,14,Male,0,101.210323,98.659025,74.246709,25.236065,39.61179,100.0,0,0,0,0,0,Routine
3,71,Male,0,70.0,59.969011,56.356364,19.363002,35.920333,100.0,0,0,1,0,0,Emergency
4,60,Male,0,95.808704,114.401771,116.745505,22.831728,37.425124,96.966508,1,0,0,0,0,Routine


In [16]:
X = df.drop("triage_level", axis=1)
y = df["triage_level"]


In [17]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)


In [18]:
numeric_features = [
    "age", "systolic_bp", "diastolic_bp",
    "heart_rate", "respiratory_rate",
    "temperature", "spo2"
]

categorical_features = ["sex"]
binary_features = [
    "pregnant", "fever", "chest_pain",
    "difficulty_breathing", "seizure", "trauma"
]

numeric_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler())
])

categorical_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("encoder", OneHotEncoder(handle_unknown="ignore"))
])

preprocessor = ColumnTransformer([
    ("num", numeric_pipeline, numeric_features),
    ("cat", categorical_pipeline, categorical_features),
    ("bin", "passthrough", binary_features)
])


In [19]:
model = Pipeline([
    ("preprocessor", preprocessor),
    ("classifier", RandomForestClassifier(
        n_estimators=200,
        max_depth=10,
        random_state=42
    ))
])


In [20]:
model.fit(X_train, y_train)


0,1,2
,steps,"[('preprocessor', ...), ('classifier', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('num', ...), ('cat', ...), ...]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,missing_values,
,strategy,'median'
,fill_value,
,copy,True
,add_indicator,False
,keep_empty_features,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,missing_values,
,strategy,'most_frequent'
,fill_value,
,copy,True
,add_indicator,False
,keep_empty_features,False

0,1,2
,categories,'auto'
,drop,
,sparse_output,True
,dtype,<class 'numpy.float64'>
,handle_unknown,'ignore'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,n_estimators,200
,criterion,'gini'
,max_depth,10
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [21]:
y_pred = model.predict(X_test)

print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Classification Report:
              precision    recall  f1-score   support

   Emergency       1.00      0.96      0.98       268
     Routine       1.00      1.00      1.00      7397
      Urgent       1.00      1.00      1.00      2335

    accuracy                           1.00     10000
   macro avg       1.00      0.99      0.99     10000
weighted avg       1.00      1.00      1.00     10000

Confusion Matrix:
[[ 257    0   11]
 [   0 7397    0]
 [   0    1 2334]]


In [22]:
joblib.dump(model, "models/triage_model.pkl")
print("Model saved successfully.")


Model saved successfully.


In [23]:
sample_patient = pd.DataFrame([{
    "age": 70,
    "sex": "Male",
    "pregnant": 0,
    "systolic_bp": 85,
    "diastolic_bp": 60,
    "heart_rate": 120,
    "respiratory_rate": 30,
    "temperature": 39.5,
    "spo2": 88,
    "fever": 1,
    "chest_pain": 1,
    "difficulty_breathing": 1,
    "seizure": 0,
    "trauma": 0
}])

prediction = model.predict(sample_patient)
print("Predicted Triage Level:", prediction[0])


Predicted Triage Level: Emergency
