In [11]:
!pip install xgboost




In [12]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

import pickle

df = pd.read_csv("C:/Users/Rhitik9579/Desktop/churn-prediction/data/processed/processed_churn.csv")
df.head()


Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges,TotalCharges,Churn,total_services,is_long_term,gender_Male,Partner_Yes,Dependents_Yes,...,Contract_One year,Contract_Two year,PaperlessBilling_Yes,PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_group_12-24,tenure_group_24-48,tenure_group_48+,tenure_group_6-12
0,0,1,29.85,29.85,0,3,0,False,True,False,...,False,False,True,False,True,False,False,False,False,False
1,0,34,56.95,1889.5,0,4,1,True,False,False,...,True,False,False,False,False,True,False,True,False,False
2,0,2,53.85,108.15,1,4,0,True,False,False,...,False,False,True,False,False,True,False,False,False,False
3,0,45,42.3,1840.75,0,5,1,True,False,False,...,True,False,False,False,False,False,False,True,False,False
4,0,2,70.7,151.65,1,2,0,False,False,False,...,False,False,True,False,True,False,False,False,False,False


In [13]:
X = df.drop("Churn", axis=1)
y = df["Churn"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

X_train.shape, X_test.shape


((5634, 36), (1409, 36))

In [14]:
numeric_cols = ["tenure", "MonthlyCharges", "TotalCharges", "total_services"]

scaler = StandardScaler()
X_train[numeric_cols] = scaler.fit_transform(X_train[numeric_cols])
X_test[numeric_cols] = scaler.transform(X_test[numeric_cols])


In [15]:
log_model = LogisticRegression(max_iter=500)
log_model.fit(X_train, y_train)

log_pred = log_model.predict(X_test)
log_prob = log_model.predict_proba(X_test)[:,1]


In [16]:
rf_model = RandomForestClassifier(n_estimators=200, random_state=42)
rf_model.fit(X_train, y_train)

rf_pred = rf_model.predict(X_test)
rf_prob = rf_model.predict_proba(X_test)[:,1]


In [17]:
xgb_model = XGBClassifier(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=5,
    subsample=0.8,
    colsample_bytree=0.8,
    eval_metric="logloss",
    random_state=42
)

xgb_model.fit(X_train, y_train)

xgb_pred = xgb_model.predict(X_test)
xgb_prob = xgb_model.predict_proba(X_test)[:,1]


In [18]:
def evaluate_model(name, y_test, y_pred, y_prob):
    print(f"----- {name} -----")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Precision:", precision_score(y_test, y_pred))
    print("Recall:", recall_score(y_test, y_pred))
    print("F1 Score:", f1_score(y_test, y_pred))
    print("ROC-AUC:", roc_auc_score(y_test, y_prob))
    print("\n")


In [19]:
evaluate_model("Logistic Regression", y_test, log_pred, log_prob)
evaluate_model("Random Forest", y_test, rf_pred, rf_prob)
evaluate_model("XGBoost", y_test, xgb_pred, xgb_prob)


----- Logistic Regression -----
Accuracy: 0.8005677785663591
Precision: 0.658703071672355
Recall: 0.516042780748663
F1 Score: 0.5787106446776612
ROC-AUC: 0.845113539486941


----- Random Forest -----
Accuracy: 0.7885024840312278
Precision: 0.6292517006802721
Recall: 0.4946524064171123
F1 Score: 0.5538922155688623
ROC-AUC: 0.8245175540571961


----- XGBoost -----
Accuracy: 0.8005677785663591
Precision: 0.6485623003194888
Recall: 0.5427807486631016
F1 Score: 0.5909752547307132
ROC-AUC: 0.8409297579374306




In [24]:
model_scores = {
    "logistic": roc_auc_score(y_test, log_prob),
    "random_forest": roc_auc_score(y_test, rf_prob),
    "xgboost": roc_auc_score(y_test, xgb_prob)
}

best_model_name = max(model_scores, key=model_scores.get)
best_model_name
best_model = {
    "logistic": log_model,
    "random_forest": rf_model,
    "xgboost": xgb_model
}[best_model_name]

pickle.dump(best_model, open("C:/Users/Rhitik9579/Desktop/churn-prediction/models/best_model.pkl", "wb"))

print("Model saved successfully!")


Model saved successfully!
