# Testing models precision

In [3]:
import pickle
with open('customer.pkl', 'rb') as f:
    X_customer_balanced, Y_customer_balanced = pickle.load(f)

# *SVM - 74%*

In [17]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score, KFold
from sklearn.model_selection import GridSearchCV

In [5]:
X_customer_balanced.shape, Y_customer_balanced.shape

((3918, 10), (3918,))

In [4]:
model = SVC(kernel='rbf', random_state=42, C=2.0)
kf = KFold(n_splits=10, shuffle=True, random_state=42)

In [5]:
scores = cross_val_score(model, X_customer_balanced, Y_customer_balanced, cv=kf, scoring='accuracy')

In [6]:
# Exibindo os resultados
print(f"Scores de cada fold: {scores}")
print(f"Score médio: {np.mean(scores)}")
print(f"Desvio padrão: {np.std(scores)}")

Scores de cada fold: [0.7372449  0.75       0.75255102 0.73469388 0.72704082 0.69897959
 0.73469388 0.78316327 0.72890026 0.76982097]
Score médio: 0.7417088574560259
Desvio padrão: 0.022401792602462955


# ANN - Artificial Neural Network - 74,5%

In [7]:
from sklearn.neural_network import MLPClassifier

In [8]:
model_ann = MLPClassifier(max_iter=1500, verbose=True, tol=0.000000, solver='adam', activation='relu', hidden_layer_sizes=(10,10))
kf_ann = KFold(n_splits=10, shuffle=True, random_state=42)

In [9]:
scores_ann = cross_val_score(model_ann, X_customer_balanced, Y_customer_balanced, cv=kf_ann, scoring='accuracy')

Iteration 1, loss = 0.68006787
Iteration 2, loss = 0.66744869
Iteration 3, loss = 0.65479591
Iteration 4, loss = 0.64228197
Iteration 5, loss = 0.63008621
Iteration 6, loss = 0.61811929
Iteration 7, loss = 0.60715317
Iteration 8, loss = 0.59721503
Iteration 9, loss = 0.58852141
Iteration 10, loss = 0.58094276
Iteration 11, loss = 0.57488409
Iteration 12, loss = 0.56991148
Iteration 13, loss = 0.56559369
Iteration 14, loss = 0.56233973
Iteration 15, loss = 0.55988566
Iteration 16, loss = 0.55770978
Iteration 17, loss = 0.55579933
Iteration 18, loss = 0.55432800
Iteration 19, loss = 0.55299230
Iteration 20, loss = 0.55148849
Iteration 21, loss = 0.55028195
Iteration 22, loss = 0.54907014
Iteration 23, loss = 0.54780897
Iteration 24, loss = 0.54664640
Iteration 25, loss = 0.54547789
Iteration 26, loss = 0.54447886
Iteration 27, loss = 0.54354144
Iteration 28, loss = 0.54242787
Iteration 29, loss = 0.54150461
Iteration 30, loss = 0.54038880
Iteration 31, loss = 0.53925920
Iteration 32, los

In [10]:
# Exibindo os resultados
print(f"Scores de cada fold: {scores_ann}")
print(f"Score médio: {np.mean(scores_ann)}")
print(f"Desvio padrão: {np.std(scores_ann)}")

Scores de cada fold: [0.73469388 0.74744898 0.76020408 0.72704082 0.70663265 0.71428571
 0.75765306 0.77295918 0.74168798 0.74936061]
Score médio: 0.7411966960697323
Desvio padrão: 0.019769875644670528


# Decision Tree - 66%(Normal) 72%(Boosted)

In [8]:
from sklearn.tree import DecisionTreeClassifier

In [10]:
# Normal
model_tree = DecisionTreeClassifier(criterion='entropy', random_state=42)

In [11]:
# Boosted
model_tree_boosted = DecisionTreeClassifier(criterion='gini', splitter='random', min_samples_leaf=10, min_samples_split=2, random_state=42)

In [12]:
kf_tree = KFold(n_splits=10, shuffle=True, random_state=42)

In [13]:
scores_tree = cross_val_score(model_tree, X_customer_balanced, Y_customer_balanced, cv=kf_tree, scoring='accuracy')

In [14]:
# Exibindo os resultados
print(f"Scores de cada fold: {scores_tree}")
print(f"Score médio: {np.mean(scores_tree)}")
print(f"Desvio padrão: {np.std(scores_tree)}")

Scores de cada fold: [0.6505102  0.65306122 0.66071429 0.67091837 0.62755102 0.63010204
 0.68367347 0.69642857 0.69309463 0.68030691]
Score médio: 0.6646360718200324
Desvio padrão: 0.023205420177937815


In [15]:
scores_tree_boosted = cross_val_score(model_tree_boosted, X_customer_balanced, Y_customer_balanced, cv=kf_tree, scoring='accuracy')

In [16]:
# Exibindo os resultados
print(f"Scores de cada fold: {scores_tree_boosted}")
print(f"Score médio: {np.mean(scores_tree_boosted)}")
print(f"Desvio padrão: {np.std(scores_tree_boosted)}")

Scores de cada fold: [0.70918367 0.70153061 0.73979592 0.73469388 0.72193878 0.69897959
 0.70918367 0.7627551  0.73401535 0.73657289]
Score médio: 0.7248649459783915
Desvio padrão: 0.019211770606043753


In [6]:
# Params
parametros = {
    'criterion': ['gini', 'entropy'],
    'splitter': ['best', 'random'],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 5, 10]
}

In [9]:
# Finding Best Params
grid_search = GridSearchCV(estimator=DecisionTreeClassifier(), param_grid=parametros)
grid_search.fit(X_customer_balanced, Y_customer_balanced)
melhores_parametros = grid_search.best_params_
melhor_resultado = grid_search.best_score_
print(melhores_parametros)
print(melhor_resultado)

{'criterion': 'gini', 'min_samples_leaf': 10, 'min_samples_split': 2, 'splitter': 'random'}
0.7309914770505903


In [19]:
# # Calculando desvio padrão
# std_dev = np.std(scores_tree_boosted)

# # Plotando o desvio padrão
# plt.bar('Desvio Padrão', std_dev)
# plt.ylabel('Valor')
# plt.title('Desvio Padrão dos Scores')

# plt.show()

# KNN - 70%

In [15]:
from sklearn.neighbors import KNeighborsClassifier

In [16]:
model_knn = KNeighborsClassifier(n_neighbors=10, metric='minkowski', p = 2)
kf_knn = KFold(n_splits=10, shuffle=True, random_state=42)

In [17]:
scores_knn = cross_val_score(model_knn, X_customer_balanced, Y_customer_balanced, cv=kf_knn, scoring='accuracy')

In [18]:
# Exibindo os resultados
print(f"Scores de cada fold: {scores_knn}")
print(f"Score médio: {np.mean(scores_knn)}")
print(f"Desvio padrão: {np.std(scores_knn)}")

Scores de cada fold: [0.70153061 0.70408163 0.72704082 0.69132653 0.68877551 0.66071429
 0.68367347 0.75510204 0.70588235 0.71611253]
Score médio: 0.703423978286967
Desvio padrão: 0.02444292091659416


# Logistic Regression - 70%

In [19]:
from sklearn.linear_model import LogisticRegression

In [20]:
model_logistic = LogisticRegression(random_state=42, max_iter=150)
kf_logistic = KFold(n_splits=10, shuffle=True, random_state=42)

In [21]:
scores_logistic = cross_val_score(model_logistic, X_customer_balanced, Y_customer_balanced, cv=kf_logistic, scoring='accuracy')

In [22]:
# Exibindo os resultados
print(f"Scores de cada fold: {scores_logistic}")
print(f"Score médio: {np.mean(scores_logistic)}")
print(f"Desvio padrão: {np.std(scores_logistic)}")

Scores de cada fold: [0.67602041 0.70663265 0.71938776 0.71683673 0.66326531 0.67091837
 0.72959184 0.75255102 0.71355499 0.74680307]
Score médio: 0.7095562137898638
Desvio padrão: 0.029277793572685756


# Naive Bayes - 72%

In [23]:
from sklearn.naive_bayes import GaussianNB

In [24]:
model_naive = GaussianNB()
kf_naive = KFold(n_splits=10, shuffle=True, random_state=42)

In [25]:
scores_naive = cross_val_score(model_naive, X_customer_balanced, Y_customer_balanced, cv=kf_naive, scoring='accuracy')

In [26]:
# Exibindo os resultados
print(f"Scores de cada fold: {scores_naive}")
print(f"Score médio: {np.mean(scores_naive)}")
print(f"Desvio padrão: {np.std(scores_naive)}")

Scores de cada fold: [0.68877551 0.7244898  0.70918367 0.73214286 0.68877551 0.68112245
 0.72193878 0.77295918 0.72634271 0.75959079]
Score médio: 0.7205321258938358
Desvio padrão: 0.028564427059807447


# Random Forest - 75%

In [27]:
from sklearn.ensemble import RandomForestClassifier

In [28]:
model_forest = RandomForestClassifier(n_estimators=80, criterion='entropy', random_state=42)
kf_forest = KFold(n_splits=10, shuffle=True, random_state=42)

In [29]:
scores_forest = cross_val_score(model_forest, X_customer_balanced, Y_customer_balanced, cv=kf_forest, scoring='accuracy')

In [30]:
# Exibindo os resultados
print(f"Scores de cada fold: {scores_forest}")
print(f"Score médio: {np.mean(scores_forest)}")
print(f"Desvio padrão: {np.std(scores_forest)}")

Scores de cada fold: [0.75       0.75765306 0.7627551  0.75       0.7372449  0.72193878
 0.73469388 0.78316327 0.76470588 0.76982097]
Score médio: 0.7531975833811785
Desvio padrão: 0.01735613718267566
