# Turning Parametros e Validação Cruzada

In [5]:
# Modelos de aprendizagem

from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score, KFold

In [6]:
import pickle
with open('credit.pkl', 'rb') as f:
  X_treinamento, Y_treinamento, X_teste, Y_teste = pickle.load(f)

In [7]:
X_treinamento.shape, Y_treinamento.shape

((1500, 3), (1500,))

In [8]:
X_teste.shape, Y_teste.shape

((500, 3), (500,))

In [9]:
import numpy as np

X_credit = np.concatenate((X_treinamento, X_teste), axis=0)
X_credit.shape

(2000, 3)

In [10]:
Y_credit = np.concatenate((Y_treinamento, Y_teste), axis=0)
Y_credit.shape

(2000,)

# Arvore de Decisão

In [11]:
parametros = {'criterion': ['gini', 'entropy'],
              'splitter': ['best', 'random'],
              'min_samples_split': [2,5,10],
              'min_samples_leaf': [1,5,10]}

In [12]:
grid_Search = GridSearchCV(estimator=DecisionTreeClassifier(), param_grid=parametros)
grid_Search.fit(X_credit, Y_credit)
melhor_parametros = grid_Search.best_params_
melhor_resultados = grid_Search.best_score_
print(melhor_parametros)
print(melhor_resultados)

{'criterion': 'entropy', 'min_samples_leaf': 1, 'min_samples_split': 5, 'splitter': 'best'}
0.983


In [13]:
resultado_arvore = []

for x in range(30):
  kfold = KFold(n_splits=10, shuffle=True, random_state=x)
  # Criterios gerados pelo GridSearch CV vão para o modelo
  arvore = DecisionTreeClassifier(criterion='entropy', min_samples_leaf=1, min_samples_split=5, splitter='best')
  scores = cross_val_score(arvore, X_credit, Y_credit, cv = kfold)
  # print(scores.mean())
  resultado_arvore.append(scores.mean())
print(resultado_arvore)

[0.9869999999999999, 0.986, 0.9905000000000002, 0.9875, 0.9879999999999999, 0.9890000000000001, 0.9880000000000001, 0.9875, 0.9870000000000001, 0.9869999999999999, 0.9860000000000001, 0.9894999999999999, 0.9889999999999999, 0.9864999999999998, 0.9845, 0.9854999999999998, 0.9855, 0.9904999999999999, 0.9875, 0.9869999999999999, 0.985, 0.9870000000000001, 0.9889999999999999, 0.9875, 0.9870000000000001, 0.9880000000000001, 0.9879999999999999, 0.986, 0.9864999999999998, 0.9880000000000001]


# Ramdom Forest

In [14]:
parametros = {'criterion': ['gini', 'entropy'],
              'n_estimators': [10,40,100,150],
              'min_samples_split': [2,5,10],
              'min_samples_leaf': [1,5,10]}

In [15]:
grid_Search = GridSearchCV(estimator=RandomForestClassifier(), param_grid=parametros)
grid_Search.fit(X_credit, Y_credit)
melhor_parametros = grid_Search.best_params_
melhor_resultados = grid_Search.best_score_
print(melhor_parametros)
print(melhor_resultados)

{'criterion': 'entropy', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 40}
0.9880000000000001


In [16]:
resultado_RandomForest = []

for x in range(30):
  kfold = KFold(n_splits=10, shuffle=True, random_state=x)
  # Criterios gerados pelo GridSearch CV vão para o modelo
  random_forest = RandomForestClassifier(criterion= 'gini', min_samples_leaf= 1, min_samples_split= 2, n_estimators= 100)
  scores = cross_val_score(random_forest, X_credit, Y_credit, cv = kfold)
  # print(scores.mean())
  resultado_RandomForest.append(scores.mean())
print(resultado_RandomForest)

[0.985, 0.9879999999999999, 0.9894999999999999, 0.9865, 0.9875, 0.9879999999999999, 0.991, 0.9889999999999999, 0.9875, 0.9884999999999999, 0.9875, 0.9894999999999999, 0.9884999999999999, 0.9879999999999999, 0.9894999999999999, 0.985, 0.9855, 0.9869999999999999, 0.9894999999999999, 0.9880000000000001, 0.986, 0.9880000000000001, 0.9880000000000001, 0.9879999999999999, 0.9894999999999999, 0.9894999999999999, 0.9874999999999998, 0.9854999999999998, 0.9854999999999998, 0.9874999999999998]


# KNN

In [17]:
parametros = {'n_neighbors': [3,5,10,20],
              'p': [1,2]}

In [18]:
grid_Search = GridSearchCV(estimator=KNeighborsClassifier(), param_grid=parametros)
grid_Search.fit(X_credit, Y_credit)
melhor_parametros = grid_Search.best_params_
melhor_resultados = grid_Search.best_score_
print(melhor_parametros)
print(melhor_resultados)

{'n_neighbors': 20, 'p': 1}
0.9800000000000001


In [19]:
resultado_KNN = []

for x in range(30):
  kfold = KFold(n_splits=10, shuffle=True, random_state=x)
  # Criterios gerados pelo GridSearch CV vão para o modelo
  knn = KNeighborsClassifier(n_neighbors= 20, p= 1)
  scores = cross_val_score(knn, X_credit, Y_credit, cv = kfold)
  # print(scores.mean())
  resultado_KNN.append(scores.mean())
print(resultado_KNN)

[0.9800000000000001, 0.9800000000000001, 0.9799999999999999, 0.9799999999999999, 0.9809999999999999, 0.9785, 0.9799999999999999, 0.9804999999999999, 0.9809999999999999, 0.9809999999999999, 0.978, 0.9804999999999999, 0.9809999999999999, 0.9799999999999999, 0.9795, 0.9795, 0.9804999999999999, 0.983, 0.9800000000000001, 0.9804999999999999, 0.9799999999999999, 0.9810000000000001, 0.9810000000000001, 0.9800000000000001, 0.9835, 0.9814999999999999, 0.9795, 0.9795, 0.9775, 0.978]


# Regressão Lógica

In [20]:
parametros = {'tol': [0.0001, 0.00001, 0.000001],
              'C': [1.0, 1.5, 2.0],
              'solver': ['lbfgs', 'sag', 'saga']}

In [21]:
grid_Search = GridSearchCV(estimator=LogisticRegression(), param_grid=parametros)
grid_Search.fit(X_credit, Y_credit)
melhor_parametros = grid_Search.best_params_
melhor_resultados = grid_Search.best_score_
print(melhor_parametros)
print(melhor_resultados)

{'C': 1.0, 'solver': 'lbfgs', 'tol': 0.0001}
0.9484999999999999


In [22]:
resultado_RegrecaoLogica = []

for x in range(30):
  kfold = KFold(n_splits=10, shuffle=True, random_state=x)
  # Criterios gerados pelo GridSearch CV vão para o modelo
  rl = LogisticRegression(C= 1.0, solver= 'lbfgs', tol= 0.0001)
  scores = cross_val_score(rl, X_credit, Y_credit, cv = kfold)
  # print(scores.mean())
  resultado_RegrecaoLogica.append(scores.mean())
print(resultado_RegrecaoLogica)

[0.9475, 0.9465, 0.9470000000000001, 0.946, 0.9465, 0.9465, 0.9469999999999998, 0.9480000000000001, 0.9465, 0.9465, 0.9475, 0.9479999999999998, 0.9475, 0.9475, 0.9484999999999999, 0.9475, 0.946, 0.9470000000000001, 0.9465, 0.9464999999999998, 0.9465, 0.9469999999999998, 0.9455, 0.9465, 0.9470000000000001, 0.9469999999999998, 0.9475, 0.9465, 0.9480000000000001, 0.9465]


# SVM

In [23]:
parametros = {'tol': [0.001, 0.0001, 0.00001],
              'C':[1.0, 1.5, 2.0],
              'kernel': ['rbf', 'linear', 'poly', 'sigmoid']}

In [24]:
grid_Search = GridSearchCV(estimator=SVC(), param_grid=parametros)
grid_Search.fit(X_credit, Y_credit)
melhor_parametros = grid_Search.best_params_
melhor_resultados = grid_Search.best_score_
print(melhor_parametros)
print(melhor_resultados)

{'C': 1.5, 'kernel': 'rbf', 'tol': 0.001}
0.9829999999999999


In [25]:
resultado_SVM = []

for x in range(30):
  kfold = KFold(n_splits=10, shuffle=True, random_state=x)
  # Criterios gerados pelo GridSearch CV vão para o modelo
  svc = SVC(C= 1.5, kernel= 'rbf', tol= 0.001)
  scores = cross_val_score(svc, X_credit, Y_credit, cv = kfold)
  # print(scores.mean())
  resultado_SVM.append(scores.mean())
print(resultado_SVM)

[0.9835, 0.984, 0.9834999999999999, 0.9844999999999999, 0.9855, 0.9835, 0.9839999999999998, 0.9845, 0.982, 0.9829999999999999, 0.9824999999999999, 0.9825000000000002, 0.9844999999999999, 0.984, 0.9834999999999999, 0.9845, 0.984, 0.9844999999999999, 0.985, 0.9839999999999998, 0.9835, 0.985, 0.9844999999999999, 0.9870000000000001, 0.9839999999999998, 0.982, 0.9825000000000002, 0.9850000000000001, 0.983, 0.986]


# Rede Neural

In [26]:
parametros = {'activation': ['relu', 'logistic', 'tahn'],
              'solver': ['adam', 'sgd'],
              'batch_size': [10, 56]}

In [27]:
grid_Search = GridSearchCV(estimator=MLPClassifier(), param_grid=parametros)
grid_Search.fit(X_credit, Y_credit)
melhor_parametros = grid_Search.best_params_
melhor_resultados = grid_Search.best_score_

In [None]:
print(melhor_parametros)
print(melhor_resultados)

{'activation': 'relu', 'batch_size': 56, 'solver': 'adam'}
0.9964999999999999


In [None]:
resultado_RedeNeural = []

for x in range(30):
  kfold = KFold(n_splits=10, shuffle=True, random_state=x)
  # Criterios gerados pelo GridSearch CV vão para o modelo
  rn = MLPClassifier(activation= 'relu', batch_size= 56, solver= 'adam')
  scores = cross_val_score(rn, X_credit, Y_credit, cv = kfold)
  # print(scores.mean())
  resultado_RedeNeural.append(scores.mean())
print(resultado_RedeNeural)



# Resultados

In [None]:
import pandas as pd
resultados = pd.DataFrame({'arvore': resultado_arvore, 'Random Forest': resultado_RandomForest, 'KNN': resultado_KNN, 'Logistica': resultado_RegrecaoLogica, 'SVM': resultado_SVM, 'RedeNeural': resultado_RedeNeural})

resultados

KeyboardInterrupt: 

# Teste de resultados

In [None]:
alpha = 0.05

In [None]:
from scipy.stats import shapiro

In [None]:
shapiro(resultado_arvore), shapiro(resultado_RandomForest), shapiro(resultado_KNN), shapiro(resultado_RegrecaoLogica), shapiro(resultado_SVM), shapiro(resultado_RedeNeural) 

NameError: name 'resultado_arvore' is not defined