# _Bank Customer Churn Prediction Model_

improting required libraries

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from lazypredict.Supervised import LazyClassifier
from sklearn.tree import DecisionTreeClassifier, plot_tree
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

Load dataset

In [None]:
dataset = pd.read_csv("Bank Customer Churn Prediction.csv")

# Feature Description:

1. Customer ID - Unique ID given to identify a particular customer.
2. Credit Score - It is the score which determines the creditworthiness of a customer.
3. Country - The country where customer lives.
4. Gender - The Sex of customer.
5. Age - The age of customer.
6. Tenure - Number of years the customer has Bank Account in that Bank.
7. Balance - Amount of money present in customer's bank.
8. Products Number - Number of Products from that Bank.
9. Credit Card - Does the customer own a credit card of that Bank.
10. Active Member - Whether the customer is an active member of that Bank.
11. Estimated Salary - Total Income of the Customer.
12. Churn - Churn (Loss of existing customers) of the Bank.

In [None]:
dataset.head()

Drop the 'customer_id' column as it's unique for each customer and doesn't provide useful information

In [None]:
df = dataset.drop("customer_id", axis=1)

# Graphical representation of the dataset
Providing information about the data distribution in the dataset

In [None]:
df.hist(bins=60, figsize=(20,10))

Check for missing values

In [None]:
print("Missing values in dataset:")
print(df.isnull().sum())

Drop rows with missing values

In [None]:
df = df.dropna()

 Convert categorical variables into numerical using factorization

In [None]:
df['gender'] = pd.factorize(df['gender'])[0]
df['country'] = pd.factorize(df['country'])[0]

 Drop 'Country' and 'Gender' columns as they are now encoded

In [None]:
df = df.drop(['country', 'gender'], axis=1)

Split data into features and target variable
1. X is feature variable
2. Y is target variable

In [None]:
X = df.drop('customer_churned', axis=1)
Y = df['customer_churned']

Split data into train and test sets
split ratio = train : test
60 : 40

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.4, random_state=42)

Initialize LazyClassifier

In [None]:
clf = LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)

 Fit LazyClassifier

In [None]:
try:
    models, predictions = clf.fit(X_train, X_test, Y_train, Y_test)

except Exception as e:
    print("Error occurred during fitting the LazyClassifier:", str(e))

# models
Displays models and their performance

In [None]:
models

# Sort models DataFrame by 'Accuracy' column

In [None]:
models.sort_values(by='Accuracy', inplace=True, ascending=False)

Create line plot

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(models.index, models['Accuracy'], marker='o', color='red', linestyle='-')
plt.xlabel('Model')
plt.ylabel('Accuracy')
plt.title('Accuracy vs Model')
plt.xticks(rotation=90)
plt.grid(False)
plt.tight_layout()
plt.show()

# Sort models DataFrame by 'Time Taken' column

In [None]:
models.sort_values(by='Time Taken', inplace=True, ascending=False)

Plot Time Taken vs Model

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(models.index, models['Time Taken'], marker='o', color='blue', linestyle='-')
plt.xlabel('Model')
plt.ylabel('Time Taken')
plt.title('Time Taken vs Model')
plt.xticks(rotation=90)
plt.grid(False)
plt.tight_layout()
plt.show()

# ID3

Initialize Decision Tree Classifier with the ID3 algorithm

In [None]:
dt_id3 = DecisionTreeClassifier(criterion='entropy', max_depth=3)
dt_id3.fit(X_train, Y_train)

Visualize the decision tree built using ID3 algorithm

In [None]:
plt.figure(figsize=(20,10))
plot_tree(dt_id3, feature_names=list(X.columns), class_names=['Not Churned', 'Churned'], filled=True)
plt.show()

# C4.5

Initialize Decision Tree Classifier with the C4.5 algorithm (default in scikit-learn)

In [None]:
dt_c45 = DecisionTreeClassifier(criterion='gini', max_depth=3)
dt_c45.fit(X_train, Y_train)

Visualize the decision tree built using C4.5 algorithm

In [None]:
plt.figure(figsize=(20,10))
plot_tree(dt_c45, feature_names=list(X.columns), class_names=['Not Churned', 'Churned'], filled=True)
plt.show()

# CART

Initialize Decision Tree Classifier with the CART algorithm

In [None]:
dt_clf = DecisionTreeClassifier(criterion='gini', max_depth=3)

Fit the model on the training data

In [None]:
dt_clf.fit(X_train, Y_train)

Plot the decision tree

In [None]:
plt.figure(figsize=(20, 10))
plot_tree(dt_clf, feature_names=X.columns, class_names=['Not Churned', 'Churned'], filled=True)
plt.show()

# Confusion Matrix

Generating confusion matrix

In [None]:
Y_pred = dt_clf.predict(X_test)
cm = confusion_matrix(Y_test, Y_pred)

Plotting confusion matrix

In [None]:
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.xlabel('Predicted Value')
plt.ylabel('Actual Value')
plt.title('Confusion Matrix')
plt.show()
