In [1]:
from sklearn import tree
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn import model_selection
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn import metrics
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier

monkeys = pd.read_table('monkeys.data', sep=',')
print("\nDataset Monkeys:")
monkeys


Dataset Monkeys:


Unnamed: 0,Class,A1,A2,A3,A4,A5,A6
0,7,8,9,6,4,5,3
1,1,1,1,1,1,1,1
2,1,1,1,1,1,1,2
3,1,1,1,1,1,2,1
4,1,1,1,1,1,2,2
...,...,...,...,...,...,...,...
1708,0,3,3,2,2,2,2
1709,0,3,3,2,2,3,2
1710,0,3,3,2,3,1,1
1711,0,3,3,2,3,3,2


In [3]:
monkeys.columns

Index(['Class', 'A1', 'A2', 'A3', 'A4', 'A5', 'A6'], dtype='object')

In [4]:
monkeysValues = monkeys.iloc[:,1:7].values
print("\n Monkeys features:\n")
print(monkeysValues)



 Monkeys features:

[[8 9 6 4 5 3]
 [1 1 1 1 1 1]
 [1 1 1 1 1 2]
 ...
 [3 3 2 3 1 1]
 [3 3 2 3 3 2]
 [3 3 2 3 4 2]]


In [5]:
monkeysClasses = monkeys.iloc[:,0].values
print("\nMonkeys classes:\n")
print(monkeysClasses)
print("\nMonkeys classes shape:")
print(monkeysClasses.shape)


Monkeys classes:

[7 1 1 ... 0 0 0]

Monkeys classes shape:
(1713,)


In [6]:
kfold_treinamento = model_selection.StratifiedKFold(n_splits=5, shuffle=True, random_state=None)
kfold_ajuste_parametros = model_selection.StratifiedKFold(n_splits=3, shuffle=True, random_state=None)

In [7]:
arvore_decisao = DecisionTreeClassifier()
print("====== Iniciando busca pelos melhores parâmetros do algoritmo Arvore de decisao no dataset Teaching ....\n")
param_dist = {'max_depth':list(np.arange(1,10)), 'min_samples_split':list(np.arange(1,10)) ,'criterion':['entropy'], 'splitter':['best','random']}
grid_search = GridSearchCV(arvore_decisao, param_grid=param_dist, cv=kfold_ajuste_parametros, scoring='accuracy', refit=False)
grid_search.fit(monkeysValues, monkeysClasses)
decisionTreeBestParams = grid_search.best_params_
print("Decision Tree: %s \n\n" % decisionTreeBestParams)


Decision Tree: {'max_depth': 3, 'splitter': 'best', 'criterion': 'entropy'} 




In [8]:
knn_vizinhos_proximos = KNeighborsClassifier()
print("====== Iniciando busca pelos melhores parâmetros do algoritmo KNN no dataset Teaching ....\n")
param_dist = {'n_neighbors':[3,5,7,9], 'metric':['euclidean', 'manhattan'], 'weights':['uniform', 'distance']} #Parâmetros testados
grid_search = GridSearchCV(knn_vizinhos_proximos, param_grid=param_dist, cv=kfold_ajuste_parametros, scoring='accuracy', refit=False)
grid_search.fit(monkeysValues, monkeysClasses)
knnBestParams = grid_search.best_params_
print("KNN: %s \n\n" % knnBestParams)


KNN: {'metric': 'euclidean', 'n_neighbors': 9, 'weights': 'distance'} 




In [9]:
regressao_logistica = LogisticRegression()
print("====== Iniciando busca pelos melhores parâmetros do algoritmo Regressao logistica no dataset Teaching ....\n")
param_dist = {'penalty':['l2','l1'], 'max_iter':[200,300,400,500],'solver':['liblinear','sag','saga']} #Parâmetros testados
grid_search = GridSearchCV(regressao_logistica, param_grid=param_dist, cv=kfold_ajuste_parametros, scoring='accuracy', refit=False)
grid_search.fit(monkeysValues, monkeysClasses)
regressao_logistica_param = grid_search.best_params_
print("Regressão logistica: %s \n\n" % regressao_logistica_param)


Regressão logistica: {'solver': 'lbfgs', 'max_iter': 300, 'tol': 0.0001, 'penalty': 'l2'} 






In [None]:
rede_neural = MLPClassifier()
print("====== Iniciando busca pelos melhores parâmetros do algoritmo Redes neurais no dataset Teaching ....\n")
param_dist = {'hidden_layer_sizes':list(np.arange(10,20)),'activation':['logistic','identity','tanh'], 'max_iter':[100,200,300,400],'alpha':[0.0001,0.001, 0.01, 0.1], 'solver':['sgd','adam']}
grid_search = GridSearchCV(rede_neural, param_grid=param_dist, cv=kfold_ajuste_parametros, scoring='accuracy', refit=False)
grid_search.fit(monkeysValues, monkeysClasses)
redes_neurais_param = grid_search.best_params_
print("Redes neurais: %s \n\n" % redes_neurais_param)






In [None]:
predicted_classes = dict()
predicted_classes['arvore_decisao'] = np.zeros(monkeysClasses.shape)
predicted_classes['vizinhos_proximos'] = np.zeros(monkeysClasses.shape)
predicted_classes['naive_bayes_gaussian'] = np.zeros(monkeysClasses.shape)
predicted_classes['regressao_logistica'] = np.zeros(monkeysClasses.shape)
predicted_classes['rede_neural'] = np.zeros(monkeysClasses.shape)

arvore_decisao_best_param = DecisionTreeClassifier(**decisionTreeBestParams)
knn_vizinhos_proximos_best_param = KNeighborsClassifier(**knnBestParams)
naive_bayes_gaussian = GaussianNB()
regressao_logistica_best_param = LogisticRegression(**regressao_logistica_param)
rede_neural_best_param = MLPClassifier(**redes_neurais_param)

In [None]:
for train, test in kfold_treinamento.split(monkeysValues, monkeysClasses):
    data_train, target_train = monkeysValues[train], monkeysClasses[train]
    data_test, target_test = monkeysValues[test], monkeysClasses[test]

    arvore_decisao = arvore_decisao.fit(data_train, target_train)
    arvore_decisao_predicted = arvore_decisao.predict(data_test)
    predicted_classes['arvore_decisao'][test] = arvore_decisao_predicted

    vizinhos_proximos = vizinhos_proximos.fit(data_train, target_train)
    vizinhos_proximos_predicted = vizinhos_proximos.predict(data_test)
    predicted_classes['vizinhos_proximos'][test] = vizinhos_proximos_predicted

    naive_bayes_gaussian = naive_bayes_gaussian.fit(data_train, target_train)
    naive_bayes_gaussian_predicted = naive_bayes_gaussian.predict(data_test)
    predicted_classes['naive_bayes_gaussian'][test] = naive_bayes_gaussian_predicted

    regressao_logistica = regressao_logistica.fit(data_train, target_train)
    regressao_logistica_predicted = regressao_logistica.predict(data_test)
    predicted_classes['regressao_logistica'][test] = regressao_logistica_predicted

    rede_neural = rede_neural.fit(data_train, target_train)
    rede_neural_predicted = rede_neural.predict(data_test)
    predicted_classes['rede_neural'][test] = rede_neural_predicted

In [None]:
for classificador in predicted_classes.keys():
    print("================================================================================================")
    print("Resultados do classificador %s\n%s\n"
          %(classificador, metrics.classification_report(monkeysClasses, predicted_classes[classificador])))
    print("Matriz de confusão: \n%s\n\n\n" % metrics.confusion_matrix(monkeysClasses, predicted_classes[classificador]))