In [6]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb
import pickle

# Load the dataset
df = pd.read_csv("dataset/health_risk_dataset.csv")  # Replace with your actual dataset path
print(df.head())

# Target column
target_column = "RiskLevel"

# Split into features and target
features = df.drop(columns=[target_column])
target = df[target_column]

# Encode the target labels
le = LabelEncoder()
target_encoded = le.fit_transform(target)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    features,
    target_encoded,
    test_size=0.2,
    stratify=target_encoded,
    random_state=42
)

# Train the XGBoost model
model = xgb.XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='mlogloss')
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)

y_pred_labels = le.inverse_transform(y_pred)
y_test_labels = le.inverse_transform(y_test)

cm = confusion_matrix(y_test_labels, y_pred_labels)
print("\nConfusion Matrix:\n", cm)

cr = classification_report(y_test_labels, y_pred_labels)
print("\nClassification Report:\n", cr)

# Save the Model + Label Encoder together
os.makedirs("models", exist_ok=True)

maternal_risk_bundle = {
    "model": model,
    "label_encoder": le
}

with open("models/maternal_risk_model_bundle.pkl", "wb") as f:
    pickle.dump(maternal_risk_bundle, f)

print("\n✅ Model and Label Encoder saved as 'model/maternal_risk_model_bundle.pkl'.")


   Age  SystolicBP  DiastolicBP    BS  BodyTemp  HeartRate  RiskLevel
0   25         130           80  15.0      98.0         86  high risk
1   35         140           90  13.0      98.0         70  high risk
2   29          90           70   8.0     100.0         80  high risk
3   30         140           85   7.0      98.0         70  high risk
4   35         120           60   6.1      98.0         76   low risk


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



Confusion Matrix:
 [[50  0  5]
 [ 2 65 14]
 [ 2  6 59]]

Classification Report:
               precision    recall  f1-score   support

   high risk       0.93      0.91      0.92        55
    low risk       0.92      0.80      0.86        81
    mid risk       0.76      0.88      0.81        67

    accuracy                           0.86       203
   macro avg       0.87      0.86      0.86       203
weighted avg       0.87      0.86      0.86       203


✅ Model and Label Encoder saved as 'model/maternal_risk_model_bundle.pkl'.
