#### Validação Cruzada

Validação cruzada é você usar diversas divisões de base, usando diversas possibilidades pra encontrar quais são os dados previsores que melhor funcionam

#### Etapa 1: Importação das bibliotecas

In [1]:
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import seaborn as sns
from skorch import NeuralNetBinaryClassifier #pytorch nao tem validacao cruzada
from sklearn.model_selection import cross_val_score

#### Etapa 2: Base de dados

In [2]:
np.random.seed(123)
torch.manual_seed(123) # Mesmos pesos

<torch._C.Generator at 0x21e910237d0>

In [3]:
previsores = pd.read_csv('Arquivos/entradas_breast.csv')
classes = pd.read_csv('Arquivos/saidas_breast.csv')

In [4]:
classes.shape

(569, 1)

In [5]:
previsores = np.array(previsores, dtype = 'float32')
classes = np.array(classes, dtype='float32').squeeze(1)

In [6]:
classes.shape

(569,)

#### Etapa 3: Classe para estrutura da rede neural

In [7]:
class classificador_torch(nn.Module):
    def __init__(self): ## Construtor
        super().__init__() ## Ta pegando todas as características da classe nn.Module

        # 30 -> 16 -> 16 -> 1
        # Atributos
        self.dense0 = nn.Linear(30,16)
        torch.nn.init.uniform_(self.dense0.weight) # Inicializa Pesos uniformes
        self.activation0 = nn.ReLU()
        self.dense1 = nn.Linear(16,16)
        torch.nn.init.uniform_(self.dense1.weight)
        self.activation1 = nn.ReLU()
        self.dense2 = nn.Linear(16,1)
        torch.nn.init.uniform_(self.dense2.weight)
        self.output = nn.Sigmoid()

    def forward(self, X): # X são as entradas
        X = self.dense0(X)
        X = self.activation0(X)
        X = self.dense1(X)
        X = self.activation1(X)
        X = self.dense2(X)
        X = self.output(X)

        return X

#### Etapa 4: Skorch 

In [8]:
classificador_sklearn = NeuralNetBinaryClassifier(module=classificador_torch, 
                                                  criterion=torch.nn.BCELoss, 
                                                  optimizer = torch.optim.Adam, 
                                                  lr = 0.001, 
                                                  optimizer__weight_decay = 0.0001,
                                                  max_epochs=100,
                                                  batch_size = 10,
                                                  train_split=False) # Train split é falso porque faremos a validação cruzada


#### Etapa 5: Validação cruzada

In [9]:
resultados = cross_val_score(classificador_sklearn, previsores, classes, cv = 10, scoring = 'accuracy') # Cross validation = cv

     89       37.3047  0.0660
     90       37.3047  0.0720
     91       37.3047  0.1040
     92       37.3047  0.0730
     93       37.3047  0.0790
     94       37.3047  0.0750
     95       37.3047  0.0680
     96       37.3047  0.0680
     97       37.3047  0.0650
     98       37.3047  0.0690
     99       37.3047  0.0840
    100       37.3047  0.0780
  epoch    train_loss     dur
-------  ------------  ------
      1       [36m37.2320[0m  0.0680
      2       37.2320  0.0630
      3       37.2320  0.0620
      4       37.2320  0.0650
      5       37.2320  0.0680
      6       37.2320  0.0700
      7       37.2320  0.0670
      8       37.2320  0.0620
      9       37.2320  0.0625
     10       37.2320  0.0620
     11       37.2320  0.0600
     12       37.2320  0.0630
     13       37.2320  0.0620
     14       37.2320  0.0610
     15       37.2320  0.0620
     16       37.2320  0.0650
     17       37.2320  0.0640
     18       37.2320  0.0730
     19       37.2320  0.0660
 

In [10]:
media = resultados.mean()
desvio = resultados.std()
media, desvio

(0.7835526315789474, 0.13066955478793235)

In [11]:
resultados.shape

(10,)

In [12]:
resultados

array([0.84210526, 0.61403509, 0.89473684, 0.63157895, 0.89473684,
       0.89473684, 0.87719298, 0.92982456, 0.63157895, 0.625     ])

#### Etapa 6: Dropout

Previne Overfitting fazendo com que alguns neurônios sejam desligados

In [13]:
class classificador_torch(nn.Module):
    def __init__(self): ## Construtor
        super().__init__() ## Ta pegando todas as características da classe nn.Module

        # 30 -> 16 -> 16 -> 1
        # Atributos
        self.dense0 = nn.Linear(30,16)
        torch.nn.init.uniform_(self.dense0.weight) # Inicializa Pesos uniformes
        self.activation0 = nn.ReLU()
        self.dropout0 = nn.Dropout(0.2)
        self.dense1 = nn.Linear(16,16)
        torch.nn.init.uniform_(self.dense1.weight)
        self.activation1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.2)
        self.dense2 = nn.Linear(16,1)
        torch.nn.init.uniform_(self.dense2.weight)
        self.output = nn.Sigmoid()

    def forward(self, X): # X são as entradas
        X = self.dense0(X)
        X = self.activation0(X)
        X = self.dropout0(X)
        X = self.dense1(X)
        X = self.activation1(X)
        X = self.dropout1(X)
        X = self.dense2(X)
        X = self.output(X)

        return X

In [14]:
classificador_sklearn = NeuralNetBinaryClassifier(module=classificador_torch, 
                                                  criterion=torch.nn.BCELoss, 
                                                  optimizer = torch.optim.Adam, 
                                                  lr = 0.001, 
                                                  optimizer__weight_decay = 0.0001,
                                                  max_epochs=100,
                                                  batch_size = 10,
                                                  train_split=False) # Train split é falso porque faremos a validação cruzada


In [15]:
resultados = cross_val_score(classificador_sklearn, previsores, classes, cv = 10, scoring = 'accuracy') # Cross validation = cv

     13       37.1094  0.0740
     14       37.1094  0.0780
     15       37.1094  0.0780
     16       37.1094  0.0910
     17       37.1094  0.0870
     18       37.1094  0.0900
     19       37.1094  0.0870
     20       37.1094  0.0750
     21       [36m36.9392[0m  0.0730
     22       37.1149  0.0700
     23       36.9493  0.0710
     24       [36m13.8035[0m  0.0680
     25        [36m0.6964[0m  0.0710
     26        0.6974  0.0680
     27        [36m0.6398[0m  0.0670
     28        [36m0.4511[0m  0.0670
     29        0.4559  0.0680
     30        [36m0.4056[0m  0.0720
     31        [36m0.4035[0m  0.0720
     32        [36m0.3692[0m  0.0670
     33        0.3894  0.0720
     34        [36m0.3629[0m  0.0710
     35        [36m0.3445[0m  0.0760
     36        [36m0.3443[0m  0.0740
     37        [36m0.3421[0m  0.0700
     38        [36m0.3141[0m  0.0710
     39        0.3162  0.0800
     40        0.3161  0.0700
     41        0.3206  0.0670
     42      

In [16]:
media = resultados.mean()
desvio = resultados.std()
media, desvio

(0.8804824561403507, 0.044266207799744314)

In [17]:
resultados

array([0.85964912, 0.8245614 , 0.84210526, 0.94736842, 0.89473684,
       0.92982456, 0.84210526, 0.94736842, 0.84210526, 0.875     ])