In [1]:
import pandas as pd
from numpy import random
import numpy as np
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

In [2]:
SEED = 12345

random.seed(SEED)

dados = pd.read_csv('Customer-Churn.csv')

traducao = {'Sim': 1, 'Nao': 0}

In [3]:
dados1 = dados[['Conjuge', 'Dependentes', 'TelefoneFixo', 'PagamentoOnline', 'Churn']].replace(traducao)

dummie = pd.get_dummies(dados.drop(['Conjuge', 'Dependentes', 'TelefoneFixo', 'PagamentoOnline', 'Churn'],axis=1))

dados_fn = pd.concat([dados1, dummie], axis=1)

In [4]:
x = dados_fn.drop('Churn', axis = 1)
y = dados_fn['Churn']

smt = SMOTE(random_state=SEED)

x, y = smt.fit_resample(x, y)

dados_fn = pd.concat([x, y], axis=1)

In [5]:
x = dados_fn.drop('Churn', axis = 1)
y = dados_fn['Churn']

norm = StandardScaler()

x = norm.fit_transform(x)

In [6]:
treinox, testex, treinoy, testey = train_test_split(x, y, test_size=0.3)

In [7]:
def algoritmo_KNeighborsClassifier(x,y,teste):
    knn = KNeighborsClassifier(metric='euclidean')
    knn.fit(x, y)
    valor_knn = knn.predict(teste)
    return valor_knn

def algoritmo_randomforest(x,y,teste):
    rfc = RandomForestClassifier(n_estimators = 200)
    rfc.fit(x, y)
    valor_rfc = rfc.predict(teste)
    return valor_rfc

def algoritmo_arvore(x,y,teste):
    dtc = DecisionTreeClassifier(criterion='entropy', random_state=42)
    dtc.fit(x, y)
    valor_dtc = dtc.predict(teste)
    return valor_dtc

def algoritmo_bernoulli(x,y,teste):
    bnb = BernoulliNB(binarize=np.median(x))
    bnb.fit(x, y)
    valor_bnb = bnb.predict(teste)
    return valor_bnb

def algoritmo_ReLogi(x,y,teste):
    clf = LogisticRegression(random_state=SEED, max_iter=10000)
    clf.fit(x, y)
    valor_clf = clf.predict(teste)
    return valor_clf

In [8]:
modelos = ["KNeighborsClassifier","RandomForestClassifier","DecisionTreeClassifier","BernoulliNB","LogisticRegression"]

In [9]:
valor_knn = algoritmo_KNeighborsClassifier(treinox,treinoy,testex)
valor_rfc = algoritmo_randomforest(treinox,treinoy,testex)
valor_dtc = algoritmo_arvore(treinox,treinoy,testex)
valor_bnb = algoritmo_bernoulli(treinox,treinoy,testex)
valor_clf = algoritmo_ReLogi(treinox,treinoy,testex)

In [10]:
print("Accuracy:")

print(round(accuracy_score(testey, valor_knn)*100,2), "%", f"Modelo {modelos[0]}")
print(round(accuracy_score(testey, valor_rfc)*100,2), "%", f"Modelo {modelos[1]}")
print(round(accuracy_score(testey, valor_dtc)*100,2), "%", f"Modelo {modelos[2]}")
print(round(accuracy_score(testey, valor_bnb)*100,2), "%", f"Modelo {modelos[3]}")
print(round(accuracy_score(testey, valor_clf)*100,2), "%", f"Modelo {modelos[4]}")

Accuracy:
81.51 % Modelo KNeighborsClassifier
85.09 % Modelo RandomForestClassifier
80.35 % Modelo DecisionTreeClassifier
75.81 % Modelo BernoulliNB
84.51 % Modelo LogisticRegression


In [11]:
print("Precision:")

print(round(precision_score(testey, valor_knn)*100,2), "%", f"Modelo {modelos[0]}")
print(round(precision_score(testey, valor_rfc)*100,2), "%", f"Modelo {modelos[1]}")
print(round(precision_score(testey, valor_dtc)*100,2), "%", f"Modelo {modelos[2]}")
print(round(precision_score(testey, valor_bnb)*100,2), "%", f"Modelo {modelos[3]}")
print(round(precision_score(testey, valor_clf)*100,2), "%", f"Modelo {modelos[4]}")


Precision:
80.61 % Modelo KNeighborsClassifier
86.18 % Modelo RandomForestClassifier
80.56 % Modelo DecisionTreeClassifier
72.24 % Modelo BernoulliNB
85.27 % Modelo LogisticRegression


In [12]:
print("Recall:")

print(round(recall_score(testey, valor_knn)*100,2), "%", f"Modelo {modelos[0]}")
print(round(recall_score(testey, valor_rfc)*100,2), "%", f"Modelo {modelos[1]}")
print(round(recall_score(testey, valor_dtc)*100,2), "%", f"Modelo {modelos[2]}")
print(round(recall_score(testey, valor_bnb)*100,2), "%", f"Modelo {modelos[3]}")
print(round(recall_score(testey, valor_clf)*100,2), "%", f"Modelo {modelos[4]}")


Recall:
83.78 % Modelo KNeighborsClassifier
84.16 % Modelo RandomForestClassifier
80.86 % Modelo DecisionTreeClassifier
85.11 % Modelo BernoulliNB
84.03 % Modelo LogisticRegression
