# Avaliação dos algoritmos

* Naive Bayes: 93.80%
* Árvore de Decisão: 98.20% -> 98.30%
* Random Forest: 98.40% -> 98.70%
* Regras: 97.40%
* kNN: 98.60% -> 98%
* Regressão Logistica: 94.60% -> 94.84%
* SVM: 98.60% -> 98.29%
* Redes Neurais: 99.60% -> 99.64%

In [17]:
from sklearn.model_selection import GridSearchCV

from sklearn.tree            import DecisionTreeClassifier
from sklearn.ensemble        import RandomForestClassifier
from sklearn.neighbors       import KNeighborsClassifier
from sklearn.linear_model    import LogisticRegression
from sklearn.svm             import SVC
from sklearn.neural_network  import MLPClassifier

import pickle
import numpy as np

In [28]:
with open('../data/credit.pkl', 'rb') as f:
    x_credit_train, y_credit_train, x_credit_test, y_credit_test = pickle.load(f)

In [29]:
x_credit = np.concatenate((x_credit_train, x_credit_test), axis=0)
y_credit = np.concatenate((y_credit_train, y_credit_test), axis=0)

## Decision Tree

In [31]:
parameters = {'criterion': ['gini','entropy'],
              'splitter': ['best','random'],
              'min_samples_split': [2, 5, 10],
              'min_samples_leaf': [1, 5, 10]}

In [32]:
grid_search = GridSearchCV(estimator=DecisionTreeClassifier(), param_grid=parameters)
grid_search.fit(x_credit, y_credit)
param = grid_search.best_params_
score = grid_search.best_score_
print(param)
print(score)

{'criterion': 'entropy', 'min_samples_leaf': 1, 'min_samples_split': 5, 'splitter': 'best'}
0.983


## Random Forest

In [34]:
parameters = {'criterion': ['gini','entropy'],
              'n_estimators': [10, 40, 100, 150],
              'min_samples_split': [2, 5, 10],
              'min_samples_leaf': [1, 5, 10]}

In [35]:
grid_search = GridSearchCV(estimator=RandomForestClassifier(), param_grid=parameters)
grid_search.fit(x_credit, y_credit)
param = grid_search.best_params_
score = grid_search.best_score_
print(param)
print(score)

{'criterion': 'entropy', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 150}
0.9870000000000001


## kNN

In [36]:
parameters = {'n_neighbors': [3, 5, 10, 20],
              'p': [1, 2]}

In [37]:
grid_search = GridSearchCV(estimator=KNeighborsClassifier(), param_grid=parameters)
grid_search.fit(x_credit, y_credit)
param = grid_search.best_params_
score = grid_search.best_score_
print(param)
print(score)

{'n_neighbors': 20, 'p': 1}
0.9800000000000001


## Logistic Regression

In [38]:
parameters = {'tol': [0.0001, 0.00001, 0.000001],
              'C': [1.0, 1.5, 2.0],
              'solver': ['lbfgs', 'liblinear', 'sag', 'sag']}

In [39]:
grid_search = GridSearchCV(estimator=LogisticRegression(), param_grid=parameters)
grid_search.fit(x_credit, y_credit)
param = grid_search.best_params_
score = grid_search.best_score_
print(param)
print(score)

{'C': 1.0, 'solver': 'lbfgs', 'tol': 0.0001}
0.9484999999999999


## SVM

In [40]:
parameters = {'C': [1.0, 1.5, 2.0],
              'kernel': ['rbf', 'linear','poly', 'sigmoid'],
              'tol': [0.0001, 0.00001, 0.000001]}

In [41]:
grid_search = GridSearchCV(estimator=SVC(), param_grid=parameters)
grid_search.fit(x_credit, y_credit)
param = grid_search.best_params_
score = grid_search.best_score_
print(param)
print(score)

{'C': 1.5, 'kernel': 'rbf', 'tol': 0.0001}
0.9829999999999999


## Neural Network

In [44]:
#parameters = {'activation': ['relu', 'logistic', 'tahn'],
#              'solver': ['adam', 'sgd'],
#              'batch_size': [10, 56]}

In [45]:
#grid_search = GridSearchCV(estimator=MLPClassifier(), param_grid=parameters)
#grid_search.fit(x_credit, y_credit)
#param = grid_search.best_params_
#score = grid_search.best_score_

In [46]:
#print(param)
#print(score)