In [2]:
# Connect the colab notebook with drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.linear_model import LogisticRegressionCV
from sklearn import metrics

In [4]:
dataset =  pd.read_csv("/content/drive/MyDrive/Customer_Churn/Cleaned_data_24_10_22.csv")
dataset.head(5)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,0,0,42,2,0.0,1,1,1,101348.88,1
1,608,2,0,41,1,83807.86,1,0,1,112542.58,0
2,502,0,0,42,8,159660.8,3,1,0,113931.57,1
3,699,0,0,39,1,0.0,2,0,0,93826.63,0
4,850,2,0,43,2,125510.82,1,1,1,79084.1,0


In [5]:
df = dataset[['Geography', 'Gender','Age','Tenure','Balance','NumOfProducts','HasCrCard','IsActiveMember','Exited']].copy()
df.head(5)

Unnamed: 0,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,Exited
0,0,0,42,2,0.0,1,1,1,1
1,2,0,41,1,83807.86,1,0,1,0
2,0,0,42,8,159660.8,3,1,0,1
3,0,0,39,1,0.0,2,0,0,0
4,2,0,43,2,125510.82,1,1,1,0


In [6]:
df.shape

(15682, 9)

In [7]:
X = dataset.drop(['Exited'],axis=1)
y = dataset['Exited']

In [8]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)

In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.50, random_state=1, stratify=y)

In [10]:
from sklearn import preprocessing
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV 
from sklearn.svm import SVC as svc 
from sklearn.metrics import make_scorer, roc_auc_score
from scipy import stats


# DEFINE MODEL AND PERFORMANCE MEASURE
mdl = svc(probability = True, random_state = 1)
auc = make_scorer(roc_auc_score)

# RANDOM SEARCH FOR 20 COMBINATIONS OF PARAMETERS
rand_list = {"C": stats.uniform(2, 10),
             "gamma": stats.uniform(0.1, 1)}
              
svm_model = RandomizedSearchCV(mdl, param_distributions = rand_list, n_iter = 20, n_jobs = 4, cv = 3, random_state = 2017, scoring = auc) 
svm_model.fit(X_train, y_train) 
svm_model.cv_results_

{'mean_fit_time': array([36.59196043, 22.58481471, 38.12794574, 23.155322  , 24.2693429 ,
        34.97766026, 29.03958392, 27.14534322, 25.04085183, 36.17367212,
        24.91599019, 26.25191879, 36.37703792, 19.69132336, 35.61779229,
        18.71656529, 33.58759586, 39.3879168 , 23.59219662, 30.42482694]),
 'std_fit_time': array([0.61505184, 0.38609939, 1.04002681, 0.47055645, 0.62465659,
        1.56178006, 1.1074068 , 0.58247422, 0.51222246, 0.58431125,
        0.21281789, 0.67774846, 0.86589273, 0.38560925, 1.45934655,
        0.68243908, 0.18918635, 1.5747248 , 1.50672633, 3.39873987]),
 'mean_score_time': array([2.6532642 , 1.63488849, 2.35073741, 1.79646794, 1.79787548,
        2.40379755, 1.89499633, 1.77956335, 1.59990581, 2.37642868,
        1.94093593, 1.77882306, 2.4781909 , 1.46860186, 2.32912223,
        1.54684933, 2.02535502, 3.57969856, 1.37530033, 0.91139531]),
 'std_score_time': array([0.032952  , 0.22314534, 0.13426968, 0.03435799, 0.15234888,
        0.05728767, 

In [11]:
from sklearn import metrics

# Predict values using the training data
svm_cv_predict_train = svm_model.predict(X_train)

# View the accuracy of the model against the training data.  Y_train are the known class values,
# and lr_cv_predict_train are the predicted class values for the same features.
print("Accuracy against training data: {0:.4f}".format(metrics.accuracy_score(y_train, svm_cv_predict_train)))
print()

Accuracy against training data: 0.8750



In [12]:
# Predict values using the test data
svm_cv_predict_test = svm_model.predict(X_test)

# View the accuracy of the model against the test data.  Y_test are the known class values,
# and lr_cv_predict_test are the predicted class values for the same features.
print("Accuracy against test data: {0:.4f}".format(metrics.accuracy_score(y_test, svm_cv_predict_test)))
print()

Accuracy against test data: 0.8411



In [13]:
print("Confusion Matrix")
print(metrics.confusion_matrix(y_test, svm_cv_predict_test))
print()

Confusion Matrix
[[3320  600]
 [ 646 3275]]



In [14]:
print("Classification Report")
print(metrics.classification_report(y_test, svm_cv_predict_test))
print()

Classification Report
              precision    recall  f1-score   support

           0       0.84      0.85      0.84      3920
           1       0.85      0.84      0.84      3921

    accuracy                           0.84      7841
   macro avg       0.84      0.84      0.84      7841
weighted avg       0.84      0.84      0.84      7841


