# Build SVMs with different kernels

# Dataset

In [5]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

df = pd.read_csv('churn_ibm.csv')
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


Pre processing

In [6]:
y = df['Churn']
X = df.drop(['Churn','customerID'],axis=1)

for column in X.columns:
    if X[column].dtype == np.object:
        X = pd.concat([X,pd.get_dummies(X[column],prefix=column, drop_first=True)],axis=1).drop([column],axis=1)

X = StandardScaler().fit_transform(X)
y = pd.get_dummies(y,prefix='churn',drop_first=True)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if X[column].dtype == np.object:


# Modelling

In [8]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3)

# The gamma parameter is the kernel coefficient for kernels rbf/poly/sigmoid
svm = SVC(gamma='auto', probability=True)

svm.fit(X_train,y_train.values.ravel())
prediction = svm.predict(X_test)
prediction_prob = svm.predict_proba(X_test)
print('Accuracy:', accuracy_score(y_test,prediction))
print('AUC:',roc_auc_score(y_test,prediction_prob[:,1]))

Accuracy: 0.8184834123222748
AUC: 0.8217889066773396


# Different parameters

We can also change the parameters. Now, let's build SVMs with different kernel functions and cost values 

In [10]:
def svm_grid_search(parameters, cv):

    means = []
    stds = []
    params = []
    gs = GridSearchCV(estimator=SVC(gamma='auto'), param_grid=parameters,scoring='roc_auc',cv=cv)
    gs = gs.fit(X_train,y_train.values.ravel())
    means = gs.cv_results_['mean_test_score']
    stddevs = gs.cv_results_['std_test_score']
    
    for i in range(len(means)):
        params.append({'C':gs.cv_results_['param_C'].data[i],'kernel':gs.cv_results_['param_kernel'].data[i]})
    
    return means, stddevs, params

In [11]:
from sklearn.model_selection import GridSearchCV

parameters = {'kernel':['linear','poly','rbf'],'C':[0.2,0.5,1.0]}
means, stddevs, params = svm_grid_search(parameters, 10)

print('Mean AUC (+/- standard deviation), for parameters')
for mean, std, params in zip(means, stddevs, params):
    print("%0.3f (+/- %0.03f) for %r"
          % (mean, std, params))

Mean AUC (+/- standard deviation), for parameters
0.829 (+/- 0.022) for {'C': 0.2, 'kernel': 'linear'}
0.818 (+/- 0.021) for {'C': 0.2, 'kernel': 'poly'}
0.804 (+/- 0.026) for {'C': 0.2, 'kernel': 'rbf'}
0.829 (+/- 0.022) for {'C': 0.5, 'kernel': 'linear'}
0.814 (+/- 0.021) for {'C': 0.5, 'kernel': 'poly'}
0.798 (+/- 0.026) for {'C': 0.5, 'kernel': 'rbf'}
0.829 (+/- 0.022) for {'C': 1.0, 'kernel': 'linear'}
0.808 (+/- 0.020) for {'C': 1.0, 'kernel': 'poly'}
0.795 (+/- 0.026) for {'C': 1.0, 'kernel': 'rbf'}


1.0