In [None]:
# train_model_a_xgb.py
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from xgboost import XGBClassifier
import joblib
import os

# -------------------------------
# Load dataset
# -------------------------------
df = pd.read_csv("data/Maternal Health Risk Data Set.csv")

# -------------------------------
# Encode target
# -------------------------------
le = LabelEncoder()
df["RiskLevelEncoded"] = le.fit_transform(df["RiskLevel"])

# -------------------------------
# Add derived feature
# -------------------------------
df["PulsePressure"] = df["SystolicBP"] - df["DiastolicBP"]

# -------------------------------
# Features & target
# -------------------------------
X = df.drop(columns=["RiskLevel", "RiskLevelEncoded"])
y = df["RiskLevelEncoded"]

# -------------------------------
# Scale features
# -------------------------------
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Convert scaled array back to DataFrame with column names ✅
X_scaled_df = pd.DataFrame(X_scaled, columns=X.columns)

# -------------------------------
# Handle imbalance with SMOTE
# -------------------------------
sm = SMOTE(random_state=42)
X_res, y_res = sm.fit_resample(X_scaled_df, y)

# -------------------------------
# Train/test split
# -------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X_res, y_res, test_size=0.2, random_state=42, stratify=y_res
)

# -------------------------------
# Train XGBoost
# -------------------------------
xgb = XGBClassifier(
    n_estimators=500,
    learning_rate=0.05,
    max_depth=5,
    use_label_encoder=False,
    eval_metric="mlogloss",
    random_state=42
)
xgb.fit(X_train, y_train)  # ✅ X_train is a DataFrame with column names

# -------------------------------
# Evaluate
# -------------------------------
y_pred = xgb.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f"XGBoost Accuracy: {acc*100:.2f}%")
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=le.classes_))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

# -------------------------------
# Save model artifacts
# -------------------------------
os.makedirs("artifacts", exist_ok=True)

joblib.dump(xgb, "artifacts/model_a_xgb.joblib")
joblib.dump(scaler, "artifacts/scaler_model_a.joblib")
joblib.dump(le, "artifacts/label_encoder_model_a.joblib")

print("\n✅ Saved model artifacts in 'artifacts/' folder:")
print("   - model_a_xgb.joblib")
print("   - scaler_model_a.joblib")
print("   - label_encoder_model_a.joblib")

# -------------------------------
# Verify feature names are stored ✅
# -------------------------------
try:
    print("\n✅ XGB feature names:", xgb.get_booster().feature_names)
except Exception as e:
    print("⚠️ Couldn't fetch feature names:", e)


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


XGBoost Accuracy: 85.25%

Classification Report:
               precision    recall  f1-score   support

   high risk       0.89      0.95      0.92        81
    low risk       0.89      0.80      0.84        81
    mid risk       0.79      0.80      0.80        82

    accuracy                           0.85       244
   macro avg       0.85      0.85      0.85       244
weighted avg       0.85      0.85      0.85       244


Confusion Matrix:
 [[77  0  4]
 [ 2 65 14]
 [ 8  8 66]]

✅ Saved model artifacts in 'artifacts/' folder:
   - model_a_xgb.joblib
   - scaler_model_a.joblib
   - label_encoder_model_a.joblib

✅ XGB feature names: ['Age', 'SystolicBP', 'DiastolicBP', 'BS', 'BodyTemp', 'HeartRate', 'PulsePressure']


In [None]:
from google.colab import files
files.download("artifacts/model_a_xgb.joblib")
files.download("artifacts/scaler_model_a.joblib")
files.download("artifacts/label_encoder_model_a.joblib")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>