In [1]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier

# Load encoded dataset
df = pd.read_csv("../data/encoded_churn.csv")

# Separate features and target
X = df.drop("Churn", axis=1)
y = df["Churn"]

# Ensure all features are numeric (important for consistency)
X = X.apply(pd.to_numeric, errors="coerce")
X = X.fillna(0)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=42, stratify=y
)

# Train final XGBoost model
final_model = XGBClassifier(
    n_estimators=300,
    max_depth=5,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    objective="binary:logistic",
    eval_metric="logloss",
    random_state=42
)

final_model.fit(X_train, y_train)

# -------------------------
# SAVE MODEL
# -------------------------
joblib.dump(final_model, "../models/churn_xgboost_model.joblib")

print("MODEL SAVED SUCCESSFULLY")

# -------------------------
# LOAD MODEL (TEST)
# -------------------------
loaded_model = joblib.load("../models/churn_xgboost_model.joblib")

# Test prediction to confirm it works
sample_pred = loaded_model.predict(X_test.iloc[:5])
print("SAMPLE PREDICTIONS:", sample_pred)

print("\nDAY 20 COMPLETED: Final model saved and verified")


MODEL SAVED SUCCESSFULLY
SAMPLE PREDICTIONS: [0 1 0 0 0]

DAY 20 COMPLETED: Final model saved and verified
