In [26]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
import warnings
warnings.filterwarnings("ignore")

In [4]:
df = pd.read_csv('Bank Customer Churn Prediction.csv')
df.head()

Unnamed: 0,customer_id,credit_score,country,gender,age,tenure,balance,products_number,credit_card,active_member,estimated_salary,churn
0,15634602,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,15647311,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,15619304,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,15701354,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,15737888,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customer_id       10000 non-null  int64  
 1   credit_score      10000 non-null  int64  
 2   country           10000 non-null  object 
 3   gender            10000 non-null  object 
 4   age               10000 non-null  int64  
 5   tenure            10000 non-null  int64  
 6   balance           10000 non-null  float64
 7   products_number   10000 non-null  int64  
 8   credit_card       10000 non-null  int64  
 9   active_member     10000 non-null  int64  
 10  estimated_salary  10000 non-null  float64
 11  churn             10000 non-null  int64  
dtypes: float64(2), int64(8), object(2)
memory usage: 937.6+ KB


In [11]:
df.isnull().sum()

customer_id         0
credit_score        0
country             0
gender              0
age                 0
tenure              0
balance             0
products_number     0
credit_card         0
active_member       0
estimated_salary    0
churn               0
dtype: int64

In [12]:
# Drop customer_id
df = df.drop(columns=['customer_id'])

In [14]:
# Encode gender
le_gender = LabelEncoder()
df['gender'] = le_gender.fit_transform(df['gender'])

In [16]:
# One-hot encode 'country'
df = pd.get_dummies(df, columns=['country'], drop_first=True)

In [17]:
df.head()

Unnamed: 0,credit_score,gender,age,tenure,balance,products_number,credit_card,active_member,estimated_salary,churn,country_Germany,country_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,False,False
1,608,0,41,1,83807.86,1,0,1,112542.58,0,False,True
2,502,0,42,8,159660.8,3,1,0,113931.57,1,False,False
3,699,0,39,1,0.0,2,0,0,93826.63,0,False,False
4,850,0,43,2,125510.82,1,1,1,79084.1,0,False,True


In [18]:
# Features and target
X = df.drop('churn', axis=1)
y = df['churn']

In [21]:
# Feature Scaling
# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [27]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [28]:
# Define all models
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "Gradient Boosting": GradientBoostingClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "SVM (RBF Kernel)": SVC(probability=True),
    "Naive Bayes": GaussianNB(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

In [29]:
# Train and evaluate each model
results = []
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:, 1]

    accuracy = accuracy_score(y_test, y_pred)
    roc = roc_auc_score(y_test, y_prob)
    f1 = f1_score(y_test, y_pred)

    results.append({
        "Model": name,
        "Accuracy": round(accuracy * 100, 2),
        "ROC-AUC": round(roc, 4),
        "F1-Score": round(f1, 4)
    })

# Create a DataFrame of results
results_df = pd.DataFrame(results).sort_values(by="ROC-AUC", ascending=False)
print(results_df)

                 Model  Accuracy  ROC-AUC  F1-Score
3    Gradient Boosting     86.75   0.8729    0.5917
2        Random Forest     86.60   0.8576    0.5732
4             AdaBoost     85.85   0.8550    0.5557
8              XGBoost     86.95   0.8502    0.6255
5     SVM (RBF Kernel)     85.60   0.8247    0.5102
6          Naive Bayes     83.35   0.8044    0.4585
0  Logistic Regression     81.10   0.7789    0.2948
7  K-Nearest Neighbors     82.95   0.7592    0.4613
1        Decision Tree     78.15   0.6757    0.4741


# Interpretation:
Best Pick: XGBoost
Why?
- Highest Accuracy among all models: 86.95%
- Highest F1-Score: Best balance between Precision & Recall
- Strong ROC-AUC: 0.8502 (very good class separation)
- Performs well even in slightly imbalanced datasets
- Robust and fast for deployment in real-world systems


In [31]:
# Pick the best model
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb_model.fit(X_train, y_train)


In [34]:
# Save model and preprocessing objects
import pickle
pickle.dump(xgb_model, open("xgb_model.pkl", "wb"))
pickle.dump(scaler, open("scaler.pkl", "wb"))
pickle.dump(le_gender, open("gender_encoder.pkl", "wb"))

print("✅ XGBoost model and encoders saved successfully.")

✅ XGBoost model and encoders saved successfully.
