In [10]:
from sklearn.naive_bayes import GaussianNB, ComplementNB
from sklearn.model_selection import train_test_split
import sklearn.metrics as metrics
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import ClusterCentroids
from imblearn.combine import SMOTEENN

In [7]:
df = pd.read_csv("./../churn_data.csv")

df = pd.get_dummies(df, columns=['internationalplan', 'voicemailplan'], prefix = ['internationalplan', 'voicemailplan'])
df = df.dropna()

df['churn'] = df['churn'].map({'No': 0, 'Yes': 1})

X = df.drop('churn', axis=1).to_numpy()
y = df.loc[:, 'churn'].to_numpy()
count_no = np.sum(y == 0)
count_yes = np.sum(y == 1)
print(f"Array before: yes {count_yes}, no {count_no}")

# Balancing the classes
#sm = SMOTE(random_state=42)
sm = SMOTEENN(random_state=42)

X, y = sm.fit_resample(X, y)

Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=42)

count_no = np.sum(y == 0)
count_yes = np.sum(y == 1)
print(f"Array after: yes {count_yes}, no {count_no}")

Array before: yes 606, no 3690
Array after: yes 3520, no 2127


In [5]:
def evaluate(model, model_name):
    predictions = model.predict(Xtest)

    cm = metrics.confusion_matrix(ytest, predictions)

    tp = cm[1][1]
    tn = cm[0][0]
    fp = cm[0][1]
    fn = cm[1][0]
    total = tp + tn + fp + fn
    recall = tp/(tp+fn)
    precision = tp/(tp+fp)

    print(f"{model_name} Kernel SVM:")
    print(f"TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}")

    print(f"Accuracy: {(tp+tn)/total}")
    print(f"Error: {1-(tp+tn)/total}")
    print(f"Recall: {recall}")
    print(f"Specificity: {tn/(tn+fp)}")
    print(f"Precision: {precision}")
    print(f"F1: {2*recall*precision/(recall+precision)}")

In [8]:
gnb=GaussianNB()
gnb.fit(Xtrain,ytrain)

In [9]:
evaluate(gnb, "Gaussian")

Gaussian Kernel SVM:
TP: 584, TN: 262, FP: 178, FN: 106
Accuracy: 0.7486725663716814
Error: 0.25132743362831855
Recall: 0.8463768115942029
Specificity: 0.5954545454545455
Precision: 0.7664041994750657
F1: 0.8044077134986225


In [11]:
cnb=ComplementNB()
cnb.fit(Xtrain, ytrain)

In [13]:
evaluate(cnb, "Complement")

Complement Kernel SVM:
TP: 461, TN: 217, FP: 223, FN: 229
Accuracy: 0.6
Error: 0.4
Recall: 0.6681159420289855
Specificity: 0.49318181818181817
Precision: 0.6739766081871345
F1: 0.6710334788937409
