In [None]:
# Connect the colab notebook with drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.linear_model import LogisticRegressionCV
from sklearn import metrics

In [None]:
dataset =  pd.read_csv("/content/drive/MyDrive/Customer_Churn/Cleaned_data_24_10_22.csv")
dataset.head(5)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,0,0,42,2,0.0,1,1,1,101348.88,1
1,608,2,0,41,1,83807.86,1,0,1,112542.58,0
2,502,0,0,42,8,159660.8,3,1,0,113931.57,1
3,699,0,0,39,1,0.0,2,0,0,93826.63,0
4,850,2,0,43,2,125510.82,1,1,1,79084.1,0


In [None]:
df = dataset[['Geography', 'Gender','Age','Tenure','Balance','NumOfProducts','HasCrCard','IsActiveMember','Exited']].copy()
df.head(5)

Unnamed: 0,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,Exited
0,0,0,42,2,0.0,1,1,1,1
1,2,0,41,1,83807.86,1,0,1,0
2,0,0,42,8,159660.8,3,1,0,1
3,0,0,39,1,0.0,2,0,0,0
4,2,0,43,2,125510.82,1,1,1,0


In [None]:
df.shape

(15682, 9)

In [None]:
X = dataset.drop(['Exited'],axis=1)
y = dataset['Exited']

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1)

In [None]:
from sklearn import preprocessing
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV 
from sklearn.svm import SVC as svc 
from sklearn.metrics import make_scorer, roc_auc_score
from scipy import stats


# DEFINE MODEL AND PERFORMANCE MEASURE
mdl = svc(probability = True, random_state = 1)
auc = make_scorer(roc_auc_score)

# RANDOM SEARCH FOR 20 COMBINATIONS OF PARAMETERS
rand_list = {"C": stats.uniform(2, 10),
             "gamma": stats.uniform(0.1, 1)}
              
svm_model = RandomizedSearchCV(mdl, param_distributions = rand_list, n_iter = 20, n_jobs = 4, cv = 3, random_state = 2017, scoring = auc) 
svm_model.fit(X_train, y_train) 
svm_model.cv_results_

{'mean_fit_time': array([104.70864248,  57.14202444, 102.05607049,  58.10350649,
         60.09546844, 100.92626921,  65.8136971 ,  66.93136287,
         70.57025735, 101.79966434,  62.15354633,  64.21361065,
        103.85781852,  50.99034365, 100.8116521 ,  47.66683666,
         92.01516438, 100.44147992,  51.38369934,  51.62232653]),
 'std_fit_time': array([ 3.01228555,  0.83816642,  5.87537761,  1.55745397,  0.45520978,
         3.69146364,  1.32233408,  3.73242151,  0.15882506,  2.93265964,
         2.15766985,  0.5566445 ,  2.13123822,  0.75562736,  1.20326289,
         0.70424668,  0.47456156,  2.06143183,  0.42959106, 11.26846271]),
 'mean_score_time': array([6.15346686, 3.51251038, 5.26760713, 4.11153523, 4.05087296,
        6.63172372, 4.20812217, 4.35385601, 3.51983356, 5.79394817,
        4.28032494, 4.2193017 , 5.59971992, 3.93672363, 5.12178787,
        3.54560463, 4.7813015 , 5.95039837, 3.31668488, 2.14598497]),
 'std_score_time': array([0.10170917, 0.07988338, 0.198163

In [None]:
from sklearn import metrics

# Predict values using the training data
svm_cv_predict_train = svm_model.predict(X_train)

# View the accuracy of the model against the training data.  Y_train are the known class values,
# and lr_cv_predict_train are the predicted class values for the same features.
print("Accuracy against training data: {0:.4f}".format(metrics.accuracy_score(y_train, svm_cv_predict_train)))
print()

Accuracy against training data: 0.8687



In [None]:
# Predict values using the test data
svm_cv_predict_test = svm_model.predict(X_test)

# View the accuracy of the model against the test data.  Y_test are the known class values,
# and lr_cv_predict_test are the predicted class values for the same features.
print("Accuracy against test data: {0:.4f}".format(metrics.accuracy_score(y_test, svm_cv_predict_test)))
print()

Accuracy against test data: 0.8381



In [None]:
print("Confusion Matrix")
print(metrics.confusion_matrix(y_test, svm_cv_predict_test))
print()

Confusion Matrix
[[1303  258]
 [ 250 1326]]



In [None]:
print("Classification Report")
print(metrics.classification_report(y_test, svm_cv_predict_test))
print()

Classification Report
              precision    recall  f1-score   support

           0       0.84      0.83      0.84      1561
           1       0.84      0.84      0.84      1576

    accuracy                           0.84      3137
   macro avg       0.84      0.84      0.84      3137
weighted avg       0.84      0.84      0.84      3137


