<a href="https://www.kaggle.com/code/gaganajayprajapati/bank-customer-churn-model?scriptVersionId=182755503" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# Predicting Number Of Customer Left The Bank 

### Churn means number of customers left the bank

In [None]:
import pandas as pd

import numpy as np

import seaborn as sns

import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('https://github.com/YBI-Foundation/Dataset/raw/main/Bank%20Churn%20Modelling.csv')

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.duplicated('CustomerId').sum()

In [None]:
df = df.set_index('CustomerId')

In [None]:
df.head()

In [None]:
df.columns

In [None]:
df['Geography'].value_counts()

In [None]:
df.replace({'Geography':{'France':2, 'Germany':1, 'Spain':0}}, inplace=True)

In [None]:
df['Gender'].value_counts()

In [None]:
df.replace({'Gender':{'Male':0, 'Female':1}}, inplace = True)

In [None]:
df.drop(['Surname'], axis=1, inplace = True)

In [None]:
df.head()

In [None]:
df['Num Of Products'].value_counts()

In [None]:
df.replace({'Num Of Products': {1:0, 2:1, 3:1, 4:1}})

In [None]:
df['Has Credit Card'].value_counts()

In [None]:
df['Is Active Member'].value_counts()

In [None]:
# Chacking Number Of Customer have zero balance account and left the bank 

df.loc[(df['Balance']==0), 'Churn'].value_counts()

500 customers have zero balance and they left the bank

In [None]:
# Ploting who have zero balance and who do not have a zero balance

df['Zero Balance'] = np.where(df['Balance'] > 0,  1, 0)
df['Zero Balance'].hist()

# This creates a new column Zero Balance where each entry is 
# 1 if the corresponding Balance is greater than 0 and 0 otherwise.

# Undersampling and Oversampling

![Undersampling and Oversampling](https://miro.medium.com/v2/resize:fit:725/1*7xf9e1EaoK5n05izIFBouA.png)

# 1. Undersampling

In [None]:
df['Churn'].value_counts()

In [None]:
sns.countplot(x = 'Churn', data=df);

In [None]:
y = df['Churn']

In [None]:
X = df.drop(['CustomerId', 'Churn', 'Surname', 'Geography'], axis=1)

In [None]:
X.shape, y.shape

In [None]:
from imblearn.under_sampling import RandomUnderSampler
rus = RandomUnderSampler(random_state=2529)

In [None]:
X_rus, y_rus = rus.fit_resample(X,y)

In [None]:
X_rus.shape, y_rus.shape, X.shape, y.shape

In [None]:
y.value_counts()

In [None]:
y_rus.value_counts()

In [None]:
y_rus.plot(kind='hist')

# 2. Oversampling

In [None]:
from imblearn.over_sampling import RandomOverSampler
ros = RandomOverSampler()

In [None]:
X_ros, y_ros = ros.fit_resample(X,y)

In [None]:
X_ros.shape, y_ros.shape, X.shape, y.shape, X_rus.shape, y_rus.shape

In [None]:
y.value_counts()

In [None]:
y_ros.value_counts()

In [None]:
y_ros.plot(kind='hist')

### Spliting Original Data

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y)

### Spliting Random Under Sample Data

In [None]:
X_train_rus, X_test_rus, y_train_rus, y_test_rus = train_test_split(X_rus, y_rus)

### Spliting Random Over Sample Data

In [None]:
X_train_ros, X_test_ros, y_train_ros, y_test_ros = train_test_split(X_ros, y_ros)

### Now Standerdize The Data

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
sc = StandardScaler

### Standerdize Original Data 

In [None]:
columns_to_standardize = ['CreditScore', 'Age', 'Tenure', 'Balance', 'Estimated Salary']

In [None]:
X_train[columns_to_standardize] = scaler.fit_transform(X_train[columns_to_standardize])

X_test[columns_to_standardize] = scaler.fit_transform(X_test[columns_to_standardize])

### Standerdize Random Under Sample Data

In [None]:
X_train_rus[columns_to_standardize] = scaler.fit_transform(X_train_rus[columns_to_standardize])

X_test_rus[columns_to_standardize] = scaler.fit_transform(X_test_rus[columns_to_standardize])

### Standerdize Random Over Sample Data

In [None]:
X_train_ros[columns_to_standardize] = scaler.fit_transform(X_train_ros[columns_to_standardize])

X_test_ros[columns_to_standardize] = scaler.fit_transform(X_test_ros[columns_to_standardize])

# Applying Support Vector Calassifier To Original Dataset

In [None]:
from sklearn.svm import SVC

In [None]:
svc = SVC()

In [None]:
svc.fit(X_train, y_train)

In [None]:
y_pred = svc.predict(X_test)

### Model Accuracy

In [None]:
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
confusion_matrix(y_test, y_pred)

In [None]:
print(classification_report(y_test, y_pred))

### Hyperparameter Tunning

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
param_grid = {  'C':[0.1, 1, 10],
                'gamma':[1, 0.1, 0.01],
                'kernel':['rbf'],
                'class_weight':['balanced']}

In [None]:
grid = GridSearchCV(SVC(), param_grid,
        refit=True, verbose=2, cv=2)

In [None]:
grid.fit(X_train, y_train)

In [None]:
print(grid.best_estimator_)

In [None]:
grid_prediction = grid.predict(X_test)

In [None]:
confusion_matrix(y_test, grid_prediction)

In [None]:
print(classification_report(y_test, grid_prediction))

# Applying Support Vector Calassifier To RUS Dataset

In [None]:
svc.fit(X_train_rus, y_train_rus)

In [None]:
y_pred_rus = svc.predict(X_test_rus)

In [None]:
confusion_matrix(y_test_rus, y_pred_rus)

In [None]:
print(classification_report(y_test_rus, y_pred_rus))

In [None]:
grid.fit(X_train_rus, y_train_rus)

In [None]:
print(grid.best_estimator_)

In [None]:
grid_prediction = grid.predict(X_test_rus)

In [None]:
confusion_matrix(y_test_rus, grid_prediction)

In [None]:
print(classification_report(y_test_rus, grid_prediction))

# Applying Support Vector Calassifier To ROS Dataset

In [None]:
svc.fit(X_train_ros, y_train_ros)

In [None]:
y_pred_ros = svc.predict(X_test_ros)

In [None]:
confusion_matrix(y_test_ros, y_pred_ros)

In [None]:
print(classification_report(y_test_ros, y_pred_ros))

In [None]:
grid.fit(X_train_ros, y_train_ros)

In [None]:
print(grid.best_estimator_)

In [None]:
grid_prediction = grid.predict(X_test_ros)

In [None]:
confusion_matrix(y_test_ros, grid_prediction)

In [None]:
print(classification_report(y_test_ros, grid_prediction))