# Testing models precision

In [22]:
import pickle
with open('customer.pkl', 'rb') as f:
    X_customer_balanced, Y_customer_balanced = pickle.load(f)

# *SVM - 74,17%(Normal) 74,96%(Boosted)*

In [23]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score, KFold
from sklearn.model_selection import GridSearchCV

In [24]:
X_customer_balanced.shape, Y_customer_balanced.shape

((3918, 10), (3918,))

In [25]:
model = SVC(kernel='rbf', random_state=42, C=2.0)

In [26]:
model_boosted = SVC(kernel='rbf', random_state=42, C=10.0, tol=0.001, gamma=0.01)

In [27]:
kf = KFold(n_splits=10, shuffle=True, random_state=42)

In [28]:
scores = cross_val_score(model, X_customer_balanced, Y_customer_balanced, cv=kf, scoring='accuracy')

In [29]:
scores_boosted = cross_val_score(model_boosted, X_customer_balanced, Y_customer_balanced, cv=kf, scoring='accuracy')

In [30]:
# Exibindo os resultados
print(f"Scores de cada fold: {scores}")
print(f"Score médio: {np.mean(scores)}")
print(f"Desvio padrão: {np.std(scores)}")

Scores de cada fold: [0.7372449  0.75       0.75255102 0.73469388 0.72704082 0.69897959
 0.73469388 0.78316327 0.72890026 0.76982097]
Score médio: 0.7417088574560259
Desvio padrão: 0.022401792602462955


In [31]:
# Exibindo os resultados
print(f"Scores de cada fold: {scores_boosted}")
print(f"Score médio: {np.mean(scores_boosted)}")
print(f"Desvio padrão: {np.std(scores_boosted)}")

Scores de cada fold: [0.73979592 0.75       0.76785714 0.75255102 0.71938776 0.70918367
 0.75       0.78316327 0.75191816 0.77237852]
Score médio: 0.749623545070202
Desvio padrão: 0.02153010197017537


In [32]:
parametros = {
    'C': [0.1, 1.0, 10.0],  # Regularização
    'kernel': ['rbf', 'linear', 'poly', 'sigmoid'],  # Kernels mais comuns
    'gamma': ['scale', 'auto', 0.01, 0.001],  # Para 'rbf' kernel
    'tol': [0.001, 0.0001]  # Tolerância de otimização
}

In [33]:
grid_search = GridSearchCV(estimator=SVC(), param_grid=parametros)
grid_search.fit(X_customer_balanced, Y_customer_balanced)
melhores_parametros = grid_search.best_params_
melhor_resultado = grid_search.best_score_
print(melhores_parametros)
print(melhor_resultado)

{'C': 10.0, 'gamma': 0.01, 'kernel': 'rbf', 'tol': 0.001}
0.7468110615893867


# ANN - Artificial Neural Network - 74,5%

In [34]:
from sklearn.neural_network import MLPClassifier

In [35]:
model_ann = MLPClassifier(max_iter=1500, verbose=True, tol=0.000000, solver='adam', activation='relu', hidden_layer_sizes=(10,10))
kf_ann = KFold(n_splits=10, shuffle=True, random_state=42)

In [36]:
scores_ann = cross_val_score(model_ann, X_customer_balanced, Y_customer_balanced, cv=kf_ann, scoring='accuracy')

Iteration 1, loss = 0.73863469
Iteration 2, loss = 0.71626398
Iteration 3, loss = 0.69898458
Iteration 4, loss = 0.68418740
Iteration 5, loss = 0.67120495
Iteration 6, loss = 0.65907086
Iteration 7, loss = 0.64748423
Iteration 8, loss = 0.63641259
Iteration 9, loss = 0.62541587
Iteration 10, loss = 0.61502725
Iteration 11, loss = 0.60497337
Iteration 12, loss = 0.59606421
Iteration 13, loss = 0.58805333
Iteration 14, loss = 0.58129999
Iteration 15, loss = 0.57499346
Iteration 16, loss = 0.56989449
Iteration 17, loss = 0.56546591
Iteration 18, loss = 0.56181457
Iteration 19, loss = 0.55838251
Iteration 20, loss = 0.55552057
Iteration 21, loss = 0.55296423
Iteration 22, loss = 0.55061383
Iteration 23, loss = 0.54860011
Iteration 24, loss = 0.54653116
Iteration 25, loss = 0.54468255
Iteration 26, loss = 0.54284336
Iteration 27, loss = 0.54066644
Iteration 28, loss = 0.53881459
Iteration 29, loss = 0.53694524
Iteration 30, loss = 0.53503057
Iteration 31, loss = 0.53364282
Iteration 32, los

In [37]:
# Exibindo os resultados
print(f"Scores de cada fold: {scores_ann}")
print(f"Score médio: {np.mean(scores_ann)}")
print(f"Desvio padrão: {np.std(scores_ann)}")

Scores de cada fold: [0.74234694 0.76020408 0.73979592 0.75       0.69897959 0.71938776
 0.74489796 0.77295918 0.7314578  0.7544757 ]
Score médio: 0.7414504932407746
Desvio padrão: 0.01998265962323871


# Decision Tree - 66%(Normal) 72%(Boosted)

In [38]:
from sklearn.tree import DecisionTreeClassifier

In [39]:
# Normal
model_tree = DecisionTreeClassifier(criterion='entropy', random_state=42)

In [40]:
# Boosted
model_tree_boosted = DecisionTreeClassifier(criterion='gini', splitter='random', min_samples_leaf=10, min_samples_split=2, random_state=42)

In [41]:
kf_tree = KFold(n_splits=10, shuffle=True, random_state=42)

In [42]:
scores_tree = cross_val_score(model_tree, X_customer_balanced, Y_customer_balanced, cv=kf_tree, scoring='accuracy')

In [43]:
# Exibindo os resultados
print(f"Scores de cada fold: {scores_tree}")
print(f"Score médio: {np.mean(scores_tree)}")
print(f"Desvio padrão: {np.std(scores_tree)}")

Scores de cada fold: [0.6505102  0.65306122 0.66071429 0.67091837 0.62755102 0.63010204
 0.68367347 0.69642857 0.69309463 0.68030691]
Score médio: 0.6646360718200324
Desvio padrão: 0.023205420177937815


In [44]:
scores_tree_boosted = cross_val_score(model_tree_boosted, X_customer_balanced, Y_customer_balanced, cv=kf_tree, scoring='accuracy')

In [45]:
# Exibindo os resultados
print(f"Scores de cada fold: {scores_tree_boosted}")
print(f"Score médio: {np.mean(scores_tree_boosted)}")
print(f"Desvio padrão: {np.std(scores_tree_boosted)}")

Scores de cada fold: [0.70918367 0.70153061 0.73979592 0.73469388 0.72193878 0.69897959
 0.70918367 0.7627551  0.73401535 0.73657289]
Score médio: 0.7248649459783915
Desvio padrão: 0.019211770606043753


In [46]:
# Params
parametros = {
    'criterion': ['gini', 'entropy'],
    'splitter': ['best', 'random'],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 5, 10]
}

In [47]:
# Finding Best Params
grid_search = GridSearchCV(estimator=DecisionTreeClassifier(), param_grid=parametros)
grid_search.fit(X_customer_balanced, Y_customer_balanced)
melhores_parametros = grid_search.best_params_
melhor_resultado = grid_search.best_score_
print(melhores_parametros)
print(melhor_resultado)

{'criterion': 'entropy', 'min_samples_leaf': 10, 'min_samples_split': 2, 'splitter': 'random'}
0.7253707613313524


In [48]:
# # Calculando desvio padrão
# std_dev = np.std(scores_tree_boosted)

# # Plotando o desvio padrão
# plt.bar('Desvio Padrão', std_dev)
# plt.ylabel('Valor')
# plt.title('Desvio Padrão dos Scores')

# plt.show()

# KNN - 70%

In [49]:
from sklearn.neighbors import KNeighborsClassifier

In [50]:
model_knn = KNeighborsClassifier(n_neighbors=10, metric='minkowski', p = 2)
kf_knn = KFold(n_splits=10, shuffle=True, random_state=42)

In [51]:
scores_knn = cross_val_score(model_knn, X_customer_balanced, Y_customer_balanced, cv=kf_knn, scoring='accuracy')

In [52]:
# Exibindo os resultados
print(f"Scores de cada fold: {scores_knn}")
print(f"Score médio: {np.mean(scores_knn)}")
print(f"Desvio padrão: {np.std(scores_knn)}")

Scores de cada fold: [0.70153061 0.70408163 0.72704082 0.69132653 0.68877551 0.66071429
 0.68367347 0.75510204 0.70588235 0.71611253]
Score médio: 0.703423978286967
Desvio padrão: 0.02444292091659416


# Logistic Regression - 70%

In [53]:
from sklearn.linear_model import LogisticRegression

In [54]:
model_logistic = LogisticRegression(random_state=42, max_iter=150)
kf_logistic = KFold(n_splits=10, shuffle=True, random_state=42)

In [55]:
scores_logistic = cross_val_score(model_logistic, X_customer_balanced, Y_customer_balanced, cv=kf_logistic, scoring='accuracy')

In [56]:
# Exibindo os resultados
print(f"Scores de cada fold: {scores_logistic}")
print(f"Score médio: {np.mean(scores_logistic)}")
print(f"Desvio padrão: {np.std(scores_logistic)}")

Scores de cada fold: [0.67602041 0.70663265 0.71938776 0.71683673 0.66326531 0.67091837
 0.72959184 0.75255102 0.71355499 0.74680307]
Score médio: 0.7095562137898638
Desvio padrão: 0.029277793572685756


# Naive Bayes - 72%

In [57]:
from sklearn.naive_bayes import GaussianNB

In [58]:
model_naive = GaussianNB()
kf_naive = KFold(n_splits=10, shuffle=True, random_state=42)

In [59]:
scores_naive = cross_val_score(model_naive, X_customer_balanced, Y_customer_balanced, cv=kf_naive, scoring='accuracy')

In [60]:
# Exibindo os resultados
print(f"Scores de cada fold: {scores_naive}")
print(f"Score médio: {np.mean(scores_naive)}")
print(f"Desvio padrão: {np.std(scores_naive)}")

Scores de cada fold: [0.68877551 0.7244898  0.70918367 0.73214286 0.68877551 0.68112245
 0.72193878 0.77295918 0.72634271 0.75959079]
Score médio: 0.7205321258938358
Desvio padrão: 0.028564427059807447


# Random Forest - 75%

In [61]:
from sklearn.ensemble import RandomForestClassifier

In [62]:
model_forest = RandomForestClassifier(n_estimators=80, criterion='entropy', random_state=42)
kf_forest = KFold(n_splits=10, shuffle=True, random_state=42)

In [63]:
scores_forest = cross_val_score(model_forest, X_customer_balanced, Y_customer_balanced, cv=kf_forest, scoring='accuracy')

In [64]:
# Exibindo os resultados
print(f"Scores de cada fold: {scores_forest}")
print(f"Score médio: {np.mean(scores_forest)}")
print(f"Desvio padrão: {np.std(scores_forest)}")

Scores de cada fold: [0.75       0.75765306 0.7627551  0.75       0.7372449  0.72193878
 0.73469388 0.78316327 0.76470588 0.76982097]
Score médio: 0.7531975833811785
Desvio padrão: 0.01735613718267566
