In [53]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
import sklearn.metrics as metrics
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import ClusterCentroids
from imblearn.combine import SMOTEENN

In [7]:
df = pd.read_csv("./../churn_data.csv")

# df = pd.get_dummies(df, columns=['internationalplan', 'voicemailplan'], prefix = ['internationalplan', 'voicemailplan'])
df = df.dropna()

df['churn'] = df['churn'].map({'No': 0, 'Yes': 1})

X = df.drop('internationalplan', axis=1)
X = X.drop('voicemailplan', axis=1)
X = X.drop('churn', axis=1).to_numpy()
y = df.loc[:, 'churn'].to_numpy()
count_no = np.sum(y == 0)
count_yes = np.sum(y == 1)
print(f"Array before: yes {count_yes}, no {count_no}")

# Balancing the classes
sm = ClusterCentroids(random_state=42)

X, y = sm.fit_resample(X, y)

Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=42)

count_no = np.sum(y == 0)
count_yes = np.sum(y == 1)
print(f"Array after: yes {count_yes}, no {count_no}")

Array before: yes 590, no 3617




Array after: yes 590, no 590


In [37]:
def evaluate(model, model_name):
    predictions = model.predict(Xtest)

    cm = metrics.confusion_matrix(ytest, predictions)

    tp = cm[1][1]
    tn = cm[0][0]
    fp = cm[0][1]
    fn = cm[1][0]
    total = tp + tn + fp + fn
    recall = tp/(tp+fn)
    precision = tp/(tp+fp)

    print(f"{model_name} Kernel SVM:")
    print(f"TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}")

    print(f"Accuracy: {(tp+tn)/total}")
    print(f"Error: {1-(tp+tn)/total}")
    print(f"Recall: {recall}")
    print(f"Specificity: {tn/(tn+fp)}")
    print(f"Precision: {precision}")
    print(f"F1: {2*recall*precision/(recall+precision)}")


In [35]:
svmLinear = SVC(kernel='linear', C=100)
svmLinear.fit(Xtrain, ytrain)

In [38]:
evaluate(svmLinear, "Linear")

Linear Kernel SVM:
TP: 84, TN: 93, FP: 22, FN: 37
Accuracy: 0.75
Error: 0.25
Recall: 0.6942148760330579
Specificity: 0.808695652173913
Precision: 0.7924528301886793
F1: 0.7400881057268722


In [39]:
svmPoly = SVC(kernel='poly', C=100)
svmPoly.fit(Xtrain, ytrain)

In [40]:
evaluate(svmPoly, "Polinomial")

Polinomial Kernel SVM:
TP: 84, TN: 105, FP: 10, FN: 37
Accuracy: 0.8008474576271186
Error: 0.19915254237288138
Recall: 0.6942148760330579
Specificity: 0.9130434782608695
Precision: 0.8936170212765957
F1: 0.7813953488372092


In [32]:
svmRBF = SVC(kernel='rbf', C=100)
svmRBF.fit(Xtrain, ytrain)

In [41]:
evaluate(svmRBF, "RBF")

RBF Kernel SVM:
TP: 79, TN: 110, FP: 5, FN: 42
Accuracy: 0.8008474576271186
Error: 0.19915254237288138
Recall: 0.6528925619834711
Specificity: 0.9565217391304348
Precision: 0.9404761904761905
F1: 0.7707317073170733


In [42]:
svmSigmoid = SVC(kernel="sigmoid", C=100)
svmSigmoid.fit(Xtrain, ytrain)

In [43]:
evaluate(svmSigmoid, "Sigmoid")

Sigmoid Kernel SVM:
TP: 60, TN: 70, FP: 45, FN: 61
Accuracy: 0.5508474576271186
Error: 0.4491525423728814
Recall: 0.49586776859504134
Specificity: 0.6086956521739131
Precision: 0.5714285714285714
F1: 0.5309734513274337


In [49]:
for i in range(100, 111):
    print("------------------------------------------------------")
    print(f"C = {i}")

    svmPoly = SVC(kernel='poly', C=i)
    svmPoly.fit(Xtrain, ytrain)
    evaluate(svmPoly, "Poly")

------------------------------------------------------
C = 100
Poly Kernel SVM:
TP: 84, TN: 105, FP: 10, FN: 37
Accuracy: 0.8008474576271186
Error: 0.19915254237288138
Recall: 0.6942148760330579
Specificity: 0.9130434782608695
Precision: 0.8936170212765957
F1: 0.7813953488372092
------------------------------------------------------
C = 101
Poly Kernel SVM:
TP: 85, TN: 105, FP: 10, FN: 36
Accuracy: 0.8050847457627118
Error: 0.19491525423728817
Recall: 0.7024793388429752
Specificity: 0.9130434782608695
Precision: 0.8947368421052632
F1: 0.787037037037037
------------------------------------------------------
C = 102
Poly Kernel SVM:
TP: 85, TN: 105, FP: 10, FN: 36
Accuracy: 0.8050847457627118
Error: 0.19491525423728817
Recall: 0.7024793388429752
Specificity: 0.9130434782608695
Precision: 0.8947368421052632
F1: 0.787037037037037
------------------------------------------------------
C = 103
Poly Kernel SVM:
TP: 84, TN: 105, FP: 10, FN: 37
Accuracy: 0.8008474576271186
Error: 0.199152542372

In [52]:
svmPoly = SVC(kernel='poly', C=200)
svmPoly.fit(Xtrain, ytrain)
evaluate(svmPoly, "Poly")

Poly Kernel SVM:
TP: 88, TN: 106, FP: 9, FN: 33
Accuracy: 0.8220338983050848
Error: 0.17796610169491522
Recall: 0.7272727272727273
Specificity: 0.9217391304347826
Precision: 0.9072164948453608
F1: 0.8073394495412843
