In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
import joblib

In [6]:
data = pd.read_csv("./data/emergency_triage_dataset.csv")

In [7]:
# Define features and target
numeric_features = ["Age", "Heart_Rate", "Respiratory_Rate", "Temperature", "Blood_Pressure", "Oxygen_Saturation"]
categorical_features = ["Symptom", "Pre_Existing_Conditions"]
target = "Triage_Level"

In [8]:
X = data[numeric_features + categorical_features]
y = data[target]

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
# Column transformer for preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ("num", "passthrough", numeric_features),
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features)
    ]
)

# Build pipeline
pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("classifier", RandomForestClassifier(n_estimators=100, random_state=42))
])

# Train the model
pipeline.fit(X_train, y_train)

In [10]:
# Evaluate the model
accuracy = pipeline.score(X_test, y_test)
print(f"Model accuracy on test set: {accuracy:.2f}")

# Save the pipeline (model + preprocessing)
joblib.dump(pipeline, "triage_model_with_encoding.joblib")
print("Model saved as 'triage_model_with_encoding.joblib'")

Model accuracy on test set: 0.88
Model saved as 'triage_model_with_encoding.joblib'
