In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, FunctionTransformer
import joblib
import os

In [12]:
# Load dataset
data = pd.read_csv("./data/emergency_triage_dataset.csv")

In [13]:
# Define features and target
numeric_features = ["Age", "Heart_Rate", "Respiratory_Rate", "Temperature", "Blood_Pressure", "Oxygen_Saturation"]
categorical_features = ["Symptom", "Pre_Existing_Conditions"]
target = "Triage_Level"

In [14]:
# Split features and target
X = data[numeric_features + categorical_features]
y = data[target]
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [15]:
# Define preprocessing steps
preprocessor = ColumnTransformer(
    transformers=[
        ("num", FunctionTransformer(validate=False), numeric_features),
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features)
    ]
)

# Create pipeline
pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("classifier", RandomForestClassifier(n_estimators=100, random_state=42))
])

# Train the model
pipeline.fit(X_train, y_train)

In [17]:
# Evaluate accuracy
accuracy = pipeline.score(X_test, y_test)
print(f"Model accuracy on test set: {accuracy:.2f}")

# Save model
model_path = os.path.join("triage_model_with_encoding3.joblib")
joblib.dump(pipeline, model_path)
print(f"Model saved to '{model_path}'")

Model accuracy on test set: 0.88
Model saved to 'triage_model_with_encoding3.joblib'


In [19]:
# load_and_predict.py

import pandas as pd
import joblib
import os

# Load the saved model
model_path = os.path.join("triage_model_with_encoding.joblib")
pipeline = joblib.load(model_path)

# Example input (replace with real user data in production)
new_input = {
    "Age": [28],
    "Heart_Rate": [78],
    "Respiratory_Rate": [31],

    "Temperature": [37],
    "Blood_Pressure": [127],
    "Oxygen_Saturation": [87],
    "Symptom": ["Confusion"],
    "Pre_Existing_Conditions": ["Diabetes"]
}

# Convert to DataFrame
input_df = pd.DataFrame(new_input)

# Predict
prediction = pipeline.predict(input_df)[0]
print("Predicted Triage Level:", prediction)


Predicted Triage Level: Red
