In [3]:
# ---------------------------------------------
# 1. Import Libraries
# ---------------------------------------------
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from xgboost import XGBClassifier
import shap
import pickle

# ---------------------------------------------
# 2. Load Dataset
# ---------------------------------------------
df = pd.read_excel("dataset.xlsx")

# ---------------------------------------------
# 3. Encode Target Column ('Attrition')
# ---------------------------------------------
df["Attrition"] = df["Attrition"].map({"No": 0, "Yes": 1})

# ---------------------------------------------
# 4. Split Features and Target
# ---------------------------------------------
X = df.drop(columns=["Attrition"])
y = df["Attrition"]

# ---------------------------------------------
# 5. Identify Column Types
# ---------------------------------------------
num_cols = X.select_dtypes(include=["int64", "float64"]).columns.tolist()
cat_cols = X.select_dtypes(include=["object"]).columns.tolist()

# ---------------------------------------------
# 6. Preprocessing and Model Pipeline
# ---------------------------------------------
# Handles scaling for numeric and one-hot encoding for categorical
preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), num_cols),
        ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), cat_cols),
    ]
)

model = Pipeline(
    steps=[
        ("preprocessor", preprocessor),
        (
            "classifier",
            XGBClassifier(
                use_label_encoder=False,
                eval_metric="logloss",
                random_state=42,
                n_estimators=300,
                learning_rate=0.1,
                max_depth=5,
                subsample=0.8,
                colsample_bytree=0.8,
            ),
        ),
    ]
)

# ---------------------------------------------
# 7. Train Model
# ---------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
model.fit(X_train, y_train)

# ---------------------------------------------
# 8. SHAP Verification (Optional)
# ---------------------------------------------
# This is just to ensure SHAP compatibility — not saved in the pickle
X_train_transformed = model.named_steps["preprocessor"].transform(X_train)
explainer = shap.Explainer(model.named_steps["classifier"])
shap_values = explainer(X_train_transformed[:100])  # sample 100 rows only

print("✅ SHAP verification complete (no shape mismatch).")

# ---------------------------------------------
# 9. Save Model to Pickle
# ---------------------------------------------
with open("employee_attrition_model.pkl", "wb") as f:
    pickle.dump(model, f)

print("✅ Model training complete and saved as employee_attrition_model.pkl")


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


✅ SHAP verification complete (no shape mismatch).
✅ Model training complete and saved as employee_attrition_model.pkl
