In [28]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from numpy import arange
from sklearn import preprocessing
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
from sklearn.model_selection import cross_validate
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import make_scorer, accuracy_score, f1_score
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from fpdf import FPDF 
from sklearn.pipeline import Pipeline
import pickle
import datetime
from datetime import datetime as dt
from datetime import timedelta
from keras.models import model_from_json

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

#from imblearn.over_sampling import SMOTE

In [29]:
%matplotlib inline
plt.style.use('fivethirtyeight')
pd.options.display.max_columns = None
pd.options.display.max_rows = None

In [30]:
df_churn = pd.read_pickle('./data/training/churn.pickle')

In [31]:
df_churn.head()

Unnamed: 0,TENURE,MONTHLY_CHARGES,CHURN_STATUS,GENDER_F,GENDER_M
0,1,29.85,0,1,0
1,34,56.95,0,0,1
2,2,53.85,1,0,1
3,45,42.3,0,0,1
4,2,70.7,1,1,0


In [32]:
df = df_churn.copy()

In [33]:
df.shape

(7043, 5)

In [34]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7043 entries, 0 to 7042
Data columns (total 5 columns):
TENURE             7043 non-null int64
MONTHLY_CHARGES    7043 non-null float64
CHURN_STATUS       7043 non-null int64
GENDER_F           7043 non-null uint8
GENDER_M           7043 non-null uint8
dtypes: float64(1), int64(2), uint8(2)
memory usage: 233.8 KB


In [35]:
df.columns

Index(['TENURE', 'MONTHLY_CHARGES', 'CHURN_STATUS', 'GENDER_F', 'GENDER_M'], dtype='object')

In [36]:
df.describe()

Unnamed: 0,TENURE,MONTHLY_CHARGES,CHURN_STATUS,GENDER_F,GENDER_M
count,7043.0,7043.0,7043.0,7043.0,7043.0
mean,32.371149,64.761692,0.26537,0.495244,0.504756
std,24.559481,30.090047,0.441561,0.500013,0.500013
min,0.0,18.25,0.0,0.0,0.0
25%,9.0,35.5,0.0,0.0,0.0
50%,29.0,70.35,0.0,0.0,1.0
75%,55.0,89.85,1.0,1.0,1.0
max,72.0,118.75,1.0,1.0,1.0


In [37]:
X = df.drop('CHURN_STATUS', axis=1)
y = df['CHURN_STATUS'].values

In [38]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X_train,y_train,test_size=0.25, random_state=0)

In [43]:
tstart = dt.now()
pipe = Pipeline(steps=[
    ('svc', SVC())
])

param_grid = [{'svc__kernel': ['rbf'], 'svc__gamma': [1e-3, 1e-4],
                     'svc__C': [1, 10, 100, 500]},
            ]
model=GridSearchCV(estimator=pipe,
                         param_grid=param_grid,
                         scoring='roc_auc', 
                         n_jobs=-1,
                         pre_dispatch='2*n_jobs', 
                         cv=5, 
                         verbose=1,
                         return_train_score=False)

model.fit(X_train,y_train)

pkl_filename = "./models/svc.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(model, file)

X_val_np = X_val.to_numpy()
predicted = model.predict(X_val_np)
#proba = model.predict_proba(X_val_np)

accuracy=accuracy_score(y_val, predicted)

CM = confusion_matrix(y_val, predicted)
(TN,FN,TP,FP) = (CM[0][0],CM[1][0],CM[1][1],CM[0][1])
FPR = FP/(FP+TN)
recall = TP/(TP+FN)
precision = TP / (TP+FP)
f1 = 2*(recall*precision)/(recall+precision)
tend = dt.now()
delta = tend-tstart


print("Best Params-")
print(model.best_params_)

print("\n")
print("Algorithm: ", 'SVC')
print("Accuracy: ",accuracy)
print("Recall: ", recall)
print("F1-support: ", f1)
print("FPR: ", FPR)
print("Runtime: ", delta)

print("Best Params-")
print(model.best_params_)

Fitting 5 folds for each of 8 candidates, totalling 40 fits
Best Params-
{'svc__C': 1, 'svc__gamma': 0.001, 'svc__kernel': 'linear'}


Algorithm:  SVC
Accuracy:  0.7750177430801988
Recall:  0.44931506849315067
F1-support:  0.5085271317829457
FPR:  0.1111111111111111
Runtime:  0:12:11.920172
Best Params-
{'svc__C': 1, 'svc__gamma': 0.001, 'svc__kernel': 'linear'}
