# Importando as bibliotecas

In [21]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
from sklearn.metrics import confusion_matrix
import pandas as pd
import numpy as np
from collections import Counter
import os
import sys
sys.path.insert(0, os.path.abspath('../Util'))
from dados import ProcessarDados
import time

# Dataset

In [54]:
procData = ProcessarDados("../dataset/bin_norm_10_features_m73_n4.csv", sep = ',')

# Funções

In [60]:
# funcção que treina e testa o modelo armazenando as métricas
# retorna um dicionário cotendo os valores das métricas de cada rodada
def treinamento_teste(epocas = 10, k_folds = 5, exibir_matriz_confusao=False, exibir_metricas=False):
    
    #array para armazenar as das métricas de cada rodada
    resultados_accuracy = []
    resultados_precision = []
    resultados_recall = []
    resultados_specificity = [] # taxa de verdadeiros negativos ou especificidade
    resultados_f2 = []
    resultados_parametros = []
    resultados_time_train = []
    resultados_time_test = []
    resultados_time_total = []
    
    #dicionário das métricas
    resultados_gerais = {}
    
    max_iter = 400

    for i in range(epocas):
        # divisão os dados 
        seed = i
        X_train, X_test, y_train, y_test = procData.holdout2(0.2, seed)
        #print(Counter(y_test))

        # realizando o grid search para encontrar a melhor combinação de parametros, 
        # considerando a acurácia (taxa de acerto)
        # aqui o método GridSearchCV é configurado para subdividir os dados de treino em k_folds
        
        
        clf = MLPClassifier(random_state = seed)
        grid_mlp = GridSearchCV(clf, param_grid, cv=k_folds, scoring='accuracy', verbose=0, n_jobs=-1)
        
        start_time_train = time.clock()
        grid_mlp.fit(X_train, y_train)

        # Treinando do modelo com os melhores parametros encontrados
        activation_best = grid_mlp.best_estimator_.activation
        alpha_best = grid_mlp.best_estimator_.alpha
        solver_best = grid_mlp.best_estimator_.solver
        hidden_layer_size_best = grid_mlp.best_estimator_.hidden_layer_sizes

        MLP = MLPClassifier(random_state = seed, activation = activation_best, alpha = alpha_best, solver = solver_best, hidden_layer_sizes = hidden_layer_size_best, max_iter = max_iter)
        MLP.fit(X_train, y_train)
        
        time_train = time.clock() - start_time_train

        #testando o modelo
        start_time_test = time.clock()
        y_pred = MLP.predict(X_test)
        time_test = time.clock() - start_time_test
        cm  = confusion_matrix(y_test, y_pred)
        if exibir_matriz_confusao:
            print(cm)

        # calculado as metricas
        accuracy = metrics.accuracy_score(y_test, y_pred)
        precision = metrics.precision_score(y_test, y_pred)
        recall = metrics.recall_score(y_test, y_pred)
        # f2-score
        # Fbeta = ((1 + beta^2) * Precision * Recall) / (beta^2 * Precision + Recall)
        beta = 0.5
        f2_score = (1 + beta**2) * (precision * recall) / (beta**2 * precision + recall)
        tn, fp, fn, tp = cm.ravel()
        specificity = tn / (tn+fp)

        # armazenando as métricas
        resultados_accuracy.append(accuracy)
        resultados_precision.append(precision)
        resultados_recall.append(recall)
        resultados_specificity.append(specificity)
        resultados_f2.append(f2_score)

        best_parametros = {'Activation': activation_best, 'Alpha': alpha_best, 'Solver': solver_best ,'hidden_layer_sizes': hidden_layer_size_best};
        resultados_parametros.append(best_parametros)
        
        resultados_time_train.append(time_train)
        resultados_time_test.append(time_test) 
        resultados_time_total.append(time_train+time_test)


        if exibir_metricas:
            print("Rodada: #",i)
            print(best_parametros)
            print("Accuracy:",accuracy)
            print("Precision:",precision)
            print("Recall:",recall)
            print("Specificity:",specificity)
            print("f2-Score:",f2_score)
            print("Time Train (s):",time_train)
            print("Time Test (s):",time_test)
            print("Time Total (s):",time_train+time_test)
            print("\n")

            
    resultados_gerais['accuracy'] = resultados_accuracy
    resultados_gerais['precision'] = resultados_precision
    resultados_gerais['recall'] = resultados_recall
    resultados_gerais['specificity'] = resultados_specificity
    resultados_gerais['f2'] = resultados_f2
    resultados_gerais['params'] = resultados_parametros
    resultados_gerais['time_train'] = resultados_time_train
    resultados_gerais['time_test'] = resultados_time_test
    resultados_gerais['time_total'] = resultados_time_total
    
    return resultados_gerais

In [61]:
import csv
def writeResultsCsv(dict_results, name_file):
    keys = list(dict_results.keys())
    keys.insert(0,'Rodada')
    with open(name_file, 'w') as f:  # You will need 'wb' mode in Python 2.x
        w = csv.writer(f)
        #w.writeheader()
        row = []
        w.writerow(keys)
        for i in range(epocas):
            row = []
            acc = dict_results.get('accuracy')[i]
            prec = dict_results.get('precision')[i]
            sen = dict_results.get('recall')[i]
            spe = dict_results.get('specificity')[i]
            fscore = dict_results.get('f2')[i]
            param = dict_results.get('params')[i]
            time_train = dict_results.get('time_train')[i]
            time_test = dict_results.get('time_test')[i]
            time_total = dict_results.get('time_total')[i]

            row.append(i+1)
            row.append(acc)
            row.append(prec)
            row.append(sen)
            row.append(spe)
            row.append(fscore)
            row.append(param)
            row.append(time_train)
            row.append(time_test)
            row.append(time_total)

            w.writerow(row)
    

In [62]:
def tabelaMetricas(nome_modelo, dict_metricas, rodadas=False, salvarResultados=True):
        
    print ("============================================== "+nome_modelo+" =================================================")
    print ("=================================== TABELA DE MÉTRICAS DO MODELO ===================================")
    
    if(rodadas==False):
        print ("\t Accuracy \t|\t Precision \t|\t Recall \t|\tSpecificity \t|\t fb-Score")
        print ("      %.4f +- %.4f" % (np.mean(dict_metricas['accuracy'], axis=0), np.std(dict_metricas['accuracy'], axis=0)),end=' ')
        print ("      %.4f +- %.4f" % (np.mean(dict_metricas['precision'], axis=0), np.std(dict_metricas['precision'], axis=0)),end='  ')
        print ("      %.4f +- %.4f" % (np.mean(dict_metricas['recall'], axis=0), np.std(dict_metricas['recall'], axis=0)),end=' ')
        print ("      %.4f +- %.4f" % (np.mean(dict_metricas['specificity'], axis=0), np.std(dict_metricas['specificity'], axis=0)),end='   ')
        print ("      %.4f +- %.4f" % (np.mean(dict_metricas['f2'], axis=0), np.std(dict_metricas['f2'], axis=0)))
        print ("====================================================================================================")
        
    if(salvarResultados):
        # save to npy file
        np.save('../resultados/resultados_'+nome_modelo+'.npy', dict_metricas)
        writeResultsCsv(dict_metricas, 'resultados_mlp.csv')

# Definindo os parâmetros

In [63]:
activations = ['relu']
alphas = 10.0 ** -np.arange(1, 4)
solvers = ['lbfgs']
hidden_layer_sizes = [100, 150, 200]
param_grid = {'hidden_layer_sizes': hidden_layer_sizes, 'activation' : activations, 'alpha': alphas, 'solver': solvers}

epocas = 50
k_folds = 5
exibir_matriz_confusao = True
exibir_metricas = True
salvarResultados = True
rodadas=False

# Treinando e obtendo as métricas do modelo

In [64]:
# treinando o modelo
dict_metricas = treinamento_teste(epocas, k_folds, exibir_matriz_confusao, exibir_metricas)
tabelaMetricas('MLP',dict_metricas, rodadas, salvarResultados)

[[18  2]
 [ 4 46]]
Rodada: # 0
{'Activation': 'relu', 'Alpha': 0.001, 'Solver': 'lbfgs', 'hidden_layer_sizes': 150}
Accuracy: 0.9142857142857143
Precision: 0.9583333333333334
Recall: 0.92
Specificity: 0.9
f2-Score: 0.9504132231404958
Time Train (s): 11.617704600001161
Time Test (s): 0.0005501000014191959
Time Total (s): 11.61825470000258




STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[[18  2]
 [ 2 48]]
Rodada: # 1
{'Activation': 'relu', 'Alpha': 0.01, 'Solver': 'lbfgs', 'hidden_layer_sizes': 150}
Accuracy: 0.9428571428571428
Precision: 0.96
Recall: 0.96
Specificity: 0.9
f2-Score: 0.96
Time Train (s): 7.354846800000814
Time Test (s): 0.0006501999996544328
Time Total (s): 7.355497000000469




STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[[16  4]
 [ 1 49]]
Rodada: # 2
{'Activation': 'relu', 'Alpha': 0.1, 'Solver': 'lbfgs', 'hidden_layer_sizes': 150}
Accuracy: 0.9285714285714286
Precision: 0.9245283018867925
Recall: 0.98
Specificity: 0.8
f2-Score: 0.935114503816794
Time Train (s): 9.254570199998852
Time Test (s): 0.0008138000011967961
Time Total (s): 9.25538400000005




STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[[19  1]
 [ 4 46]]
Rodada: # 3
{'Activation': 'relu', 'Alpha': 0.1, 'Solver': 'lbfgs', 'hidden_layer_sizes': 150}
Accuracy: 0.9285714285714286
Precision: 0.9787234042553191
Recall: 0.92
Specificity: 0.95
f2-Score: 0.9663865546218486
Time Train (s): 10.64466999999786
Time Test (s): 0.0014832999986538198
Time Total (s): 10.646153299996513




STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[[18  2]
 [ 7 43]]
Rodada: # 4
{'Activation': 'relu', 'Alpha': 0.1, 'Solver': 'lbfgs', 'hidden_layer_sizes': 150}
Accuracy: 0.8714285714285714
Precision: 0.9555555555555556
Recall: 0.86
Specificity: 0.9
f2-Score: 0.9347826086956523
Time Train (s): 12.873818899999605
Time Test (s): 0.0013121000010869466
Time Total (s): 12.875131000000692




STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[[16  4]
 [ 6 44]]
Rodada: # 5
{'Activation': 'relu', 'Alpha': 0.1, 'Solver': 'lbfgs', 'hidden_layer_sizes': 150}
Accuracy: 0.8571428571428571
Precision: 0.9166666666666666
Recall: 0.88
Specificity: 0.8
f2-Score: 0.9090909090909091
Time Train (s): 8.690267400001176
Time Test (s): 0.0006643000015174039
Time Total (s): 8.690931700002693




STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[[19  1]
 [ 0 50]]
Rodada: # 6
{'Activation': 'relu', 'Alpha': 0.1, 'Solver': 'lbfgs', 'hidden_layer_sizes': 150}
Accuracy: 0.9857142857142858
Precision: 0.9803921568627451
Recall: 1.0
Specificity: 0.95
f2-Score: 0.9842519685039369
Time Train (s): 8.833703699998296
Time Test (s): 0.0007624999998370185
Time Total (s): 8.834466199998133




STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[[17  3]
 [ 4 46]]
Rodada: # 7
{'Activation': 'relu', 'Alpha': 0.1, 'Solver': 'lbfgs', 'hidden_layer_sizes': 200}
Accuracy: 0.9
Precision: 0.9387755102040817
Recall: 0.92
Specificity: 0.85
f2-Score: 0.9349593495934959
Time Train (s): 8.542535500000668
Time Test (s): 0.0007378999980574008
Time Total (s): 8.543273399998725




STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[[17  3]
 [ 4 46]]
Rodada: # 8
{'Activation': 'relu', 'Alpha': 0.1, 'Solver': 'lbfgs', 'hidden_layer_sizes': 100}
Accuracy: 0.9
Precision: 0.9387755102040817
Recall: 0.92
Specificity: 0.85
f2-Score: 0.9349593495934959
Time Train (s): 7.115892800000438
Time Test (s): 0.0008516999987477902
Time Total (s): 7.116744499999186




STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[[13  7]
 [ 6 44]]
Rodada: # 9
{'Activation': 'relu', 'Alpha': 0.1, 'Solver': 'lbfgs', 'hidden_layer_sizes': 100}
Accuracy: 0.8142857142857143
Precision: 0.8627450980392157
Recall: 0.88
Specificity: 0.65
f2-Score: 0.8661417322834646
Time Train (s): 8.316074500002287
Time Test (s): 0.0005596999981207773
Time Total (s): 8.316634200000408




STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[[20  0]
 [ 2 48]]
Rodada: # 10
{'Activation': 'relu', 'Alpha': 0.1, 'Solver': 'lbfgs', 'hidden_layer_sizes': 200}
Accuracy: 0.9714285714285714
Precision: 1.0
Recall: 0.96
Specificity: 1.0
f2-Score: 0.9917355371900827
Time Train (s): 10.707804000001488
Time Test (s): 0.0007149000011850148
Time Total (s): 10.708518900002673




KeyboardInterrupt: 