<a href="https://colab.research.google.com/github/RenataRSBriet/ciencia-de-dados-e-sistemas-inteligentes/blob/master/AvaliacaoAlgoritmos.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Avaliação dos algoritmos

Árvores de decisao: 98.20
Random Forest: 98.40
KNN: 98.60
Regressão Logística: 96.40
SVM: 98.80
Rdes Neurais: 99.60




In [44]:
import pickle
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV, cross_val_score, KFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

In [11]:
from google.colab import drive
drive.mount('/content/drive')
caminho = "/content/drive/MyDrive/Aula/MaterialApoio/"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [14]:
with open(caminho+'credit.pkl', 'rb') as f:
  x_credit_treinamento, y_credit_treinamento, x_credit_teste, y_credit_teste = pickle.load(f)

In [15]:
x_credit_treinamento.shape, y_credit_treinamento.shape

((1500, 3), (1500,))

In [16]:
x_credit = np.concatenate((x_credit_treinamento, x_credit_teste), axis = 0)
x_credit.shape

(2000, 3)

In [17]:
y_credit = np.concatenate((y_credit_treinamento, y_credit_teste), axis = 0)
y_credit.shape

(2000,)

## Árvore de Decisao

In [20]:
parametros = {'criterion': ['gini', 'entropy'],
              'splitter': ['best', 'random'],
              'min_samples_split': [2,5,10],
              'min_samples_leaf': [1,5,10]}

In [21]:
gri_search = GridSearchCV(estimator = DecisionTreeClassifier(), param_grid = parametros)
gri_search.fit(x_credit, y_credit)
melhores_parametros = gri_search.best_params_
melhor_resultado = gri_search.best_score_
print(melhores_parametros)
print(melhor_resultado)

{'criterion': 'entropy', 'min_samples_leaf': 1, 'min_samples_split': 5, 'splitter': 'best'}
0.983


## Random Forest

In [22]:
parametros = {'criterion': ['gini', 'entropy'],
              'n_estimators': [10, 40, 100, 150],
              'min_samples_split': [2,5,10],
              'min_samples_leaf': [1,5,10]}

In [25]:
gri_search = GridSearchCV(estimator = RandomForestClassifier(), param_grid = parametros)
gri_search.fit(x_credit, y_credit)
melhores_parametros = gri_search.best_params_
melhor_resultado = gri_search.best_score_
print(melhores_parametros)
print(melhor_resultado)

{'criterion': 'entropy', 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 150}
0.9860000000000001


## KNN

In [26]:
parametros = {'n_neighbors': [3, 5, 10, 20],
              'p': [1, 2]}

In [27]:
gri_search = GridSearchCV(estimator = KNeighborsClassifier(), param_grid = parametros)
gri_search.fit(x_credit, y_credit)
melhores_parametros = gri_search.best_params_
melhor_resultado = gri_search.best_score_
print(melhores_parametros)
print(melhor_resultado)

{'n_neighbors': 20, 'p': 1}
0.9800000000000001


## Regressao Logistica

In [28]:
parametros = {'tol': [0.0001, 0.00001, 0.000001],
              'C': [1.0, 1.5, 2.0],
              'solver': ['lbfgs', 'sag', 'saga']}

In [30]:
gri_search = GridSearchCV(estimator = LogisticRegression(), param_grid = parametros)
gri_search.fit(x_credit, y_credit)
melhores_parametros = gri_search.best_params_
melhor_resultado = gri_search.best_score_
print(melhores_parametros)
print(melhor_resultado)

{'C': 1.0, 'solver': 'lbfgs', 'tol': 1e-05}
0.9484999999999999


## SVM

In [33]:
parametros = {'tol': [0.001, 0.0001, 0.00001],
              'C': [1.0, 1.5, 2.0],
              'kernel': ['rbf', 'linear', 'poly', 'sigmoid']}

In [34]:
gri_search = GridSearchCV(estimator = SVC(), param_grid = parametros)
gri_search.fit(x_credit, y_credit)
melhores_parametros = gri_search.best_params_
melhor_resultado = gri_search.best_score_
print(melhores_parametros)
print(melhor_resultado)

{'C': 2.0, 'kernel': 'rbf', 'tol': 0.001}
0.983


## Redes Neurais

In [37]:
parametros = {'activation': ['relu', 'logistic', 'tanh'],
              'solver': ['adam', 'sgd'],
              'batch_size': [10, 56]}

In [38]:
gri_search = GridSearchCV(estimator = MLPClassifier(), param_grid = parametros)
gri_search.fit(x_credit, y_credit)
melhores_parametros = gri_search.best_params_
melhor_resultado = gri_search.best_score_
print(melhores_parametros)
print(melhor_resultado)



{'activation': 'relu', 'batch_size': 56, 'solver': 'adam'}
0.9970000000000001




In [39]:
print(melhores_parametros)
print(melhor_resultado)

{'activation': 'relu', 'batch_size': 56, 'solver': 'adam'}
0.9970000000000001


## Validação Cruzada

In [48]:
resultados_arvore = []
resultados_random_forest = []
resultados_knn = []
resultados_logistica = []
resultados_svm = []
resultados_rede_neural = []

for i in range(30):
  print(i)
  kfold = KFold(n_splits = 10, shuffle = True, random_state = i)

  arvore = DecisionTreeClassifier(criterion = 'entropy', min_samples_leaf = 1, min_samples_split = 5, splitter = 'best')
  scores = cross_val_score(arvore, x_credit, y_credit, cv = kfold)
  resultados_arvore.append(scores.mean())

  random_forest = RandomForestClassifier(criterion = 'entropy', min_samples_leaf=1, min_samples_split=2, n_estimators=100)
  scores = cross_val_score(random_forest, x_credit, y_credit, cv = kfold)
  resultados_random_forest.append(scores.mean())

  knn = KNeighborsClassifier(n_neighbors=20, p=1)
  scores = cross_val_score(knn, x_credit, y_credit, cv = kfold)
  resultados_knn.append(scores.mean())

  logistica = LogisticRegression(C=0.1, solver='lbfgs', tol=0.00001)
  scores = cross_val_score(logistica, x_credit, y_credit, cv = kfold)
  resultados_logistica.append(scores.mean())

  svm = SVC(C=2.0, kernel='rbf', tol=0.001)
  scores = cross_val_score(svm, x_credit, y_credit, cv = kfold)
  resultados_svm.append(scores.mean())

  rede_neural = MLPClassifier(activation='relu', batch_size=10, solver='adam')
  scores = cross_val_score(rede_neural, x_credit, y_credit, cv = kfold)
  resultados_rede_neural.append(scores.mean())


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29


In [49]:
resultados = pd.DataFrame({'Arvore': resultados_arvore,
                           'Random Forest': resultados_random_forest,
                           'KNN': resultados_knn,
                           'Logistica': resultados_logistica,
                           'SVM': resultados_svm,
                           'Rede Neural': resultados_rede_neural})

In [50]:
resultados.describe()

Unnamed: 0,Arvore,Random Forest,KNN,Logistica,SVM,Rede Neural
count,30.0,30.0,30.0,30.0,30.0,30.0
mean,0.98725,0.987133,0.980267,0.940533,0.985,0.99645
std,0.001654,0.001508,0.001264,0.001033,0.001145,0.000968
min,0.984,0.984,0.9775,0.9385,0.983,0.995
25%,0.9865,0.98575,0.98,0.94,0.984125,0.995625
50%,0.987,0.9875,0.98,0.9405,0.985,0.9965
75%,0.988375,0.988,0.981,0.9415,0.9855,0.997
max,0.9905,0.99,0.9835,0.943,0.9875,0.998


In [51]:
resultados.var()

Unnamed: 0,0
Arvore,2.737069e-06
Random Forest,2.274713e-06
KNN,1.598851e-06
Logistica,1.067816e-06
SVM,1.310345e-06
Rede Neural,9.37069e-07


In [52]:
pickle.dump(rede_neural, open(caminho+'rede_neural_finalizada.sav', 'wb'))

In [None]:
rede_neural = pickle.load(open(caminho+'rede_neural_finalizado.sav', 'rb'))