# *CROSS VAL*

In [6]:
from sklearn.model_selection import cross_val_score, KFold
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
import numpy as np
import pandas as pd

In [2]:
import pickle
with open('credit.pkl', 'rb') as f:
    X_credit_treinamento, Y_credit_treinamento, X_credit_teste, Y_credit_teste = pickle.load(f)

In [3]:
X_credit = np.concatenate((X_credit_treinamento, X_credit_teste), axis=0)
X_credit.shape

(2000, 3)

In [4]:
Y_credit = np.concatenate((Y_credit_treinamento, Y_credit_teste), axis=0)
Y_credit.shape

(2000,)

In [5]:
resultados_arvore = []
resultados_random_forest = []
resultados_knn = []
resultados_logistica = []
resultados_svm = []
resultados_neural = []

for i in range(30):
    kfold = KFold(n_splits=10, shuffle=True, random_state=i)
    
    ## ARVORE DE DECISAO ##
    arvore = DecisionTreeClassifier(criterion='entropy', min_samples_leaf=1, min_samples_split=5, splitter='best')
    scores = cross_val_score(arvore, X_credit, Y_credit, cv=kfold)
    resultados_arvore.append(scores.mean())
    
    ## FLORESTA RANDOMICA ##
    random_forest = RandomForestClassifier(criterion='entropy', min_samples_leaf=1, min_samples_split=5, n_estimators=10)
    scores = cross_val_score(random_forest, X_credit, Y_credit, cv=kfold)
    resultados_random_forest.append(scores.mean())
    
    ## VIZINHO MAIS PRÓXIMO ##
    knn = KNeighborsClassifier()
    scores = cross_val_score(knn, X_credit, Y_credit, cv=kfold)
    resultados_knn.append(scores.mean())
    
    ## REGRESSÃO LOGÍSTICA ##
    logistica = LogisticRegression(C=1.0, solver='lbfgs', tol=0.0001)
    scores = cross_val_score(logistica, X_credit, Y_credit, cv=kfold)
    resultados_logistica.append(scores.mean())
    
    ## MAQUINAS DE VETORES DE SUPORTE ##
    svm = SVC(kernel='rbf', C=2.0)
    scores = cross_val_score(svm, X_credit, Y_credit, cv=kfold)
    resultados_svm.append(scores.mean())
    
    ## REDES NEURAIS ##
    rede_neural = MLPClassifier(activation='relu', batch_size=56, solver='adam')
    scores = cross_val_score(rede_neural, X_credit, Y_credit, cv=kfold)
    resultados_neural.append(scores.mean())
    
    



In [10]:
resultados = pd.DataFrame({
                            'Arvore': resultados_arvore, 
                            'Random Forest': resultados_random_forest, 
                            'KNN': resultados_knn, 
                            'Logistica': resultados_logistica, 
                            'SVM': resultados_svm, 
                            'ANN': resultados_neural
                            })

In [11]:
resultados.describe()

Unnamed: 0,Arvore,Random Forest,KNN,Logistica,SVM,ANN
count,30.0,30.0,30.0,30.0,30.0,30.0
mean,0.987383,0.98345,0.98005,0.947,0.985083,0.997067
std,0.001695,0.002065,0.001533,0.000743,0.00128,0.000553
min,0.983,0.979,0.977,0.9455,0.982,0.996
25%,0.986625,0.98175,0.979,0.9465,0.984125,0.9965
50%,0.98725,0.9835,0.98,0.947,0.985,0.997
75%,0.988375,0.9845,0.981,0.9475,0.986375,0.9975
max,0.9905,0.987,0.9825,0.9485,0.9875,0.998


In [12]:
resultados.var()

Arvore           2.873851e-06
Random Forest    4.264655e-06
KNN              2.350862e-06
Logistica        5.517241e-07
SVM              1.639368e-06
ANN              3.057471e-07
dtype: float64

In [14]:
(resultados.std() / resultados.mean()) * 100

Arvore           0.171691
Random Forest    0.209986
KNN              0.156446
Logistica        0.078435
SVM              0.129977
ANN              0.055457
dtype: float64