## Importing Libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder

### Loading Dataset

In [2]:
df = pd.read_csv("Churn_Modelling.csv")
df.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1, inplace=True)

### Preprocessing

In [3]:
df['Gender'] = LabelEncoder().fit_transform(df['Gender'])
df = pd.get_dummies(df, columns=['Geography'], drop_first=True)

### Spliting the dataset using Test,Train,Split

In [None]:
from sklearn.model_selection import train_test_split

In [4]:
X = df.drop('Exited', axis=1)
y = df['Exited']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Feature Scaling

In [6]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Model Fiting

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

## 1. Logistic Regression

In [7]:
log_model = LogisticRegression()
log_model.fit(X_train, y_train)
log_preds = log_model.predict(X_test)

## 2. Random Forest

In [8]:
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)
rf_preds = rf_model.predict(X_test)

## 3. Gradient Boosting

In [9]:
gb_model = GradientBoostingClassifier()
gb_model.fit(X_train, y_train)
gb_preds = gb_model.predict(X_test)

# Model Evaluation

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

In [10]:
def evaluate_model(name, y_true, y_pred):
    print(f"\n{name} Evaluation:")
    print(confusion_matrix(y_true, y_pred))
    print(classification_report(y_true, y_pred))
    print("ROC-AUC:", roc_auc_score(y_true, y_pred))

In [11]:
evaluate_model("Logistic Regression", y_test, log_preds)


Logistic Regression Evaluation:
[[1543   64]
 [ 314   79]]
              precision    recall  f1-score   support

           0       0.83      0.96      0.89      1607
           1       0.55      0.20      0.29       393

    accuracy                           0.81      2000
   macro avg       0.69      0.58      0.59      2000
weighted avg       0.78      0.81      0.77      2000

ROC-AUC: 0.5805960247074267


In [12]:
evaluate_model("Random Forest", y_test, rf_preds)


Random Forest Evaluation:
[[1551   56]
 [ 206  187]]
              precision    recall  f1-score   support

           0       0.88      0.97      0.92      1607
           1       0.77      0.48      0.59       393

    accuracy                           0.87      2000
   macro avg       0.83      0.72      0.76      2000
weighted avg       0.86      0.87      0.86      2000

ROC-AUC: 0.7204897150032222


In [13]:
evaluate_model("Gradient Boosting", y_test, gb_preds)


Gradient Boosting Evaluation:
[[1543   64]
 [ 201  192]]
              precision    recall  f1-score   support

           0       0.88      0.96      0.92      1607
           1       0.75      0.49      0.59       393

    accuracy                           0.87      2000
   macro avg       0.82      0.72      0.76      2000
weighted avg       0.86      0.87      0.86      2000

ROC-AUC: 0.7243619280153146


### Overall : Best accuracy was achieved by "Gradient Boosting" and worst by "Logistic Regression"