In [2]:
# Connect the colab notebook with drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.linear_model import LogisticRegressionCV
from sklearn import metrics

In [4]:
dataset =  pd.read_csv("/content/drive/MyDrive/Customer_Churn/Cleaned_data_24_10_22.csv")
dataset.head(5)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,0,0,42,2,0.0,1,1,1,101348.88,1
1,608,2,0,41,1,83807.86,1,0,1,112542.58,0
2,502,0,0,42,8,159660.8,3,1,0,113931.57,1
3,699,0,0,39,1,0.0,2,0,0,93826.63,0
4,850,2,0,43,2,125510.82,1,1,1,79084.1,0


In [5]:
df = dataset[['Geography', 'Gender','Age','Tenure','Balance','NumOfProducts','HasCrCard','IsActiveMember','Exited']].copy()
df.head(5)

Unnamed: 0,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,Exited
0,0,0,42,2,0.0,1,1,1,1
1,2,0,41,1,83807.86,1,0,1,0
2,0,0,42,8,159660.8,3,1,0,1
3,0,0,39,1,0.0,2,0,0,0
4,2,0,43,2,125510.82,1,1,1,0


In [6]:
df.shape

(15682, 9)

In [7]:
X = dataset.drop(['Exited'],axis=1)
y = dataset['Exited']

In [8]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)

In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.50, random_state=1)

In [10]:
from sklearn import preprocessing
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV 
from sklearn.svm import SVC as svc 
from sklearn.metrics import make_scorer, roc_auc_score
from scipy import stats


# DEFINE MODEL AND PERFORMANCE MEASURE
mdl = svc(probability = True, random_state = 1)
auc = make_scorer(roc_auc_score)

# RANDOM SEARCH FOR 20 COMBINATIONS OF PARAMETERS
rand_list = {"C": stats.uniform(2, 10),
             "gamma": stats.uniform(0.1, 1)}
              
svm_model = RandomizedSearchCV(mdl, param_distributions = rand_list, n_iter = 20, n_jobs = 4, cv = 3, random_state = 2017, scoring = auc) 
svm_model.fit(X_train, y_train) 
svm_model.cv_results_

{'mean_fit_time': array([33.86560583, 20.30289642, 34.87735025, 21.02432664, 21.80916762,
        33.49926448, 25.35011435, 24.39572104, 22.50952586, 32.5433983 ,
        24.68548385, 25.37755791, 33.47780577, 18.01540565, 34.46299346,
        17.92319679, 32.87443956, 34.82060663, 18.31672955, 24.48517434]),
 'std_fit_time': array([0.73464183, 0.55589986, 1.22019285, 0.63465009, 0.5717135 ,
        1.03591069, 0.85242266, 0.59525669, 0.25710494, 0.09557481,
        0.86039938, 0.9775984 , 0.87093821, 0.28578636, 0.91328991,
        0.86765485, 0.88607364, 1.14529843, 0.32196761, 1.75551933]),
 'mean_score_time': array([2.70429349, 1.45467806, 2.40677047, 1.71003556, 1.77986455,
        2.43948452, 1.86574666, 1.69837125, 1.47640045, 2.48046915,
        1.88012815, 1.81895439, 2.47130934, 1.42002877, 2.30503488,
        1.53655299, 2.01323732, 2.55480011, 1.32464377, 0.94045909]),
 'std_score_time': array([0.13727293, 0.05680079, 0.03159248, 0.04327246, 0.03266409,
        0.01762543, 

In [11]:
from sklearn import metrics

# Predict values using the training data
svm_cv_predict_train = svm_model.predict(X_train)

# View the accuracy of the model against the training data.  Y_train are the known class values,
# and lr_cv_predict_train are the predicted class values for the same features.
print("Accuracy against training data: {0:.4f}".format(metrics.accuracy_score(y_train, svm_cv_predict_train)))
print()

Accuracy against training data: 0.8799



In [12]:
# Predict values using the test data
svm_cv_predict_test = svm_model.predict(X_test)

# View the accuracy of the model against the test data.  Y_test are the known class values,
# and lr_cv_predict_test are the predicted class values for the same features.
print("Accuracy against test data: {0:.4f}".format(metrics.accuracy_score(y_test, svm_cv_predict_test)))
print()

Accuracy against test data: 0.8346



In [13]:
print("Confusion Matrix")
print(metrics.confusion_matrix(y_test, svm_cv_predict_test))
print()

Confusion Matrix
[[3231  669]
 [ 628 3313]]



In [14]:
print("Classification Report")
print(metrics.classification_report(y_test, svm_cv_predict_test))
print()

Classification Report
              precision    recall  f1-score   support

           0       0.84      0.83      0.83      3900
           1       0.83      0.84      0.84      3941

    accuracy                           0.83      7841
   macro avg       0.83      0.83      0.83      7841
weighted avg       0.83      0.83      0.83      7841


