<a href="https://colab.research.google.com/github/Khmariya16/Churn_Model/blob/main/bankCustChurning_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ***Bank Customer Churn Prediction Using Machine Learning***
**SOURCE**
https://colab.research.google.com/drive/1DXv9WKPlNdfn30H_rcMOWfWiROlQwATm#scrollTo=1l5VwPYPcAGG

# **OBJECTIVES**

1.DATA ENCODING

 2.FEATURE SCALING

3.HANDLING IMBALANCE DATA

   a. RANDOM UNDERSAMPLE DATA  
   b. RANDOM OVERSAMPLE DATA

4.SUPPORT VECTOR MACHINE CLASSIFIER

5.GRID SEARCH FOR HYPERTUNNING

# **INTRODUCTION**
*Customer Churn* prediction means knowing which customers are likely to leave or unsubscribe from your service. For many companies, this is an important prediction. This is because acquiring new customers often costs more than retaining existing ones. Once you’ve identified customers at risk of churn, you need to know exactly what marketing efforts you should make with each customer to maximize their likelihood of staying.
# **Benefits of Analyzing Customer Churn Prediction**
***Increase profits*** -
 the ultimate goal of churn analysis is to reduce churn and increase profits

 ***Improve the customer experience*** -Understanding why customers churn, you can better understand their priorities, identify your weaknesses, and improve the overall customer experience.
# **How does Customer Churn Prediction Work?**
We first need to analyze the dataset and then fit that into the Machine learning Classification algorithm

# **IMPORT LIBRARY**

In [None]:
import pandas as pd

In [None]:
import numpy as np

In [None]:
import matplotlib as plt

In [None]:
import seaborn as sns

# **IMPORT DATASET**

In [None]:
df = pd.read_csv('https://github.com/YBIFoundation/Dataset/raw/main/Bank%20Churn%20Modelling.csv')

# **DESCRIBE DATA**

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.duplicated('CustomerId').sum()

In [None]:
df = df.set_index('CustomerId')

In [None]:
df.info()

In [None]:
df['Geography'].value_counts

In [None]:
df.replace({'Geography' : {'France' : 2, 'Germany' : 1, 'Spain' : 0}}, inplace=True)

In [None]:
df['Gender'].value_counts

In [None]:
df.replace({'Gender': {'Male':0 , 'Female': 1}}, inplace=True)

In [None]:
df['Num Of Products'].value_counts

In [None]:
df.replace({'Num Of Products': {1:0 , 2:1 , 3:1, 4:1}})

In [None]:
df['Has Credit Card'].value_counts

In [None]:
df['Is Active Member'].value_counts

In [None]:
df.loc[(df['Balance']==0), 'Churn'].value_counts

In [None]:
df['Zero Balance'] = np.where(df['Balance']>0,1,0)

In [None]:
df['Zero Balance'].hist()

In [None]:
df.groupby(['Churn','Geography']).count()

# **Define Target Variable (y) and Feature Variables (X)**

In [None]:
df.columns

In [None]:
y = df['Churn']

In [None]:
x = df.drop(['Surname','Churn'] , axis=1)

In [None]:
x.shape , y.shape

# **RANDOM UNDER SAMPLING**

In [None]:
from imblearn.under_sampling import RandomUnderSampler

In [None]:
rus = RandomUnderSampler(random_state=2529)

In [None]:
x_rus,y_rus =rus.fit_resample(x,y)

In [None]:
x.shape, y.shape , x_rus.shape, y_rus.shape

In [None]:
y.value_counts()

In [None]:
y_rus.value_counts()

In [None]:
y_rus.plot(kind = 'hist')

# **RANDOM OVER SAMPLING**

In [None]:
from imblearn.over_sampling import RandomOverSampler

In [None]:
ros = RandomOverSampler(random_state=2529)

In [None]:
x_ros,y_ros =ros.fit_resample(x,y)

In [None]:
x_ros.shape , y_ros.shape , x.shape , y.shape

In [None]:
y.value_counts()

In [None]:
y_ros.value_counts()

In [None]:
y_ros.plot(kind = 'hist')

# **Train Test Split**

In [None]:
from sklearn.model_selection import train_test_split

***split original data***

In [None]:
x_train , y_train , x_test, y_test = train_test_split(x, y,random_state=2529)

***split under sampling***

In [None]:
x_train_rus , y_train_rus , x_test_rus , y_test_rus = train_test_split(x_rus , y_rus )

***split over sampling***

In [None]:
x_train_ros ,y_train_ros , x_test_ros , y_test_ros = train_test_split(x_ros , y_ros)

# **STANDARDIZED FEATURES**

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
sc = StandardScaler()

# **STANDARDIZED ORIGINAL DATA**

In [None]:
x_train[['CreditScore','Age','Tenure','Balance' , 'Estimated Salary']]= sc.fit_transform(x_train[['CreditScore','Age','Tenure','Balance' , 'Estimated Salary']])

In [None]:
x_test[['CreditScore','Age','Tenure','Balance','Estimated Salary']]=sc.fit_transform(x_test[['CreditScore','Age','Tenure','Balance','Estimated Salary']])

# **STANDRADIZED RANDOM UNSAMPLING**

In [None]:
x_train_rus[['CreditScore','Age','Tenure','Balance','Estimated Salary']]=sc.fit_transform(x_train_rus[['CreditScore','Age','Tenure','Balance','Estimated Salary']])

In [None]:
x_test_rus[['CreditScore','Age','Tenure','Balance','Estimated Salary']]=sc.fit_transform(x_test_rus[['CreditScore','Age','Tenure','Balance','Estimated Salary']])

# **STANDRADIZED OVERSAMPLING**

In [None]:
x_train_ros[['CreditScore','Age','Tenure','Balance','Estimated Salary']]=sc.fit_transform(x_train_ros[['CreditScore','Age','Tenure','Balance','Estimated Salary']])

In [None]:
x_test_ros[['CreditScore','Age','Tenure','Balance','Estimated Salary']]=sc.fit_transform(x_test_ros[['CreditScore','Age','Tenure','Balance','Estimated Salary']])

# **SUPPORT VECTOR MACHINE CLASSIFIER**

In [None]:
from sklearn.svm import SVC

In [None]:
svc = SVC()

In [None]:
svc.fit(x_train,y_train)

# **DATA PREDICTION**

In [None]:
y_pred=svc.predict(x_test)

In [None]:
y_pred

# **MODEL ACCURACY**

In [None]:
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
confusion_matrix(y_test,y_pred)

In [None]:
print (classification_report(y_test,y_pred))

# **HYPERPARAMETER TUNING**

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
param_grid={'C':[0.1,1,10],
            'gamma':[1,0.1,0.01],
            'kernel':['rbf'],
            'class_weight':['Balanced']}

In [None]:
grid=GridSearchCV(SVC(),param_grid,refit=True,verbose=2,cv=2)

In [None]:
grid.fit(x_train,y_train)

In [None]:
print(grid.best_estimator_)

In [None]:
grid_predictions=grid.predict(x_test)

In [None]:
confusion_matrix(y_test,grid_predictions)

In [None]:
print(classification_report(y_test,grid_predictions))

# **MODEL WITH RANDOM UNDER SMAPLING**

In [None]:
svc_rus=SVC()

In [None]:
svc.fit(x_train_rus,y_train_rus)

In [None]:
y_pred_rus=svc_rus.fit(x_test_rus)

# **MODEL ACCURACY**

In [None]:
confusion_matrix(y_test_rus,y_pred_rus)

In [None]:
print(classification_report(y_test_rus,y_pred_rus))

***HYPERPAAMETER TUNING ***

In [None]:
param_grid={'C':[0.1,1,10],
            'gamma':[1,0.1,0.01],
            'kernel':['rbf'],
            'class_weight':['Balanced']}

In [None]:
grid=GridSearchCV(SVC(),param_grid,refit=True,verbose=2,cv=2)

In [None]:
grid.fit(x_train_rus,y_train_rus)

In [None]:
print(grid_rus.best_estimator_)

In [None]:
grid_rus_predictions=grid_rus.predict(x_testr_rus)

In [None]:
confusion_matrix(y_test_rus,grid_predictions_rus)

In [None]:
print(classification_report(y_test_rus,grid_predictions_rus))

# **MODEL OVER SAMPLING**

In [None]:
svc_ros = SVC()

In [None]:
svc_ros.fit(x_tarin_ros,y_train_ros)

In [None]:
y_pred_ros=svc_ros.fit(x_test_ros)

***MODEL ACCURACY***

In [None]:
confusion_matrix(y_test_ros,y_pred_ros)

In [None]:
print(classification_report(y_test_ros,y_pred_ros))

In [None]:
param_grid={'C':[0.1,1,10],
            'gamma':[1,0.1,0.01],
            'kernel':['rbf'],
            'class_weight':['Balanced']}

In [None]:
grid=GridSearchCV(SVC(),param_grid,refit=True,verbose=2,cv=2)

In [None]:
grid.fit(x_train_ros,y_train_ros)

In [None]:
print(grid_ros.best_estimator_)

In [None]:
grid_ros_predictions=grid_rus.predict(x_testr_ros)

In [None]:
confusion_matrix(y_test_ros,grid_predictions_ros)

In [None]:
print(classification_report(y_test_ros,grid_predictions_ros))