# Deep Learning A Z Classificação Binária - Breast Cancer Cruzada

In [2]:
!pip install scikeras

Collecting scikeras
  Downloading scikeras-0.13.0-py3-none-any.whl.metadata (3.1 kB)
Downloading scikeras-0.13.0-py3-none-any.whl (26 kB)
Installing collected packages: scikeras
Successfully installed scikeras-0.13.0



[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: C:\Users\guilh\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [3]:
import pandas as pd
import tensorflow as tf
import sklearn
import scikeras

In [4]:
pd.__version__, tf.__version__, sklearn.__version__, scikeras.__version__

('2.2.2', '2.17.0', '1.5.1', '0.13.0')

In [5]:
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import cross_val_score
from tensorflow.keras.models import Sequential
from tensorflow.keras import backend as k

In [6]:
X = pd.read_csv('entradas_breast.csv') # features da base de dados
y = pd.read_csv('saidas_breast.csv') # classes (respostas) da base de dados

Artigo dropout: https://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf

In [7]:
def criar_rede():
  k.clear_session() # limpa a memória de execuções anteriores
  rede_neural = Sequential([
      tf.keras.layers.InputLayer(shape=(30,)), # camada de entrada com 30 neurônios
      tf.keras.layers.Dense(units=16, activation='relu', kernel_initializer='random_uniform'), # primeira camada oculta com 16 neurônios
      # tf.keras.layers.Dropout(rate = 0.2),
      tf.keras.layers.Dense(units=16, activation='relu', kernel_initializer='random_uniform'), # segunda camada oculta com 16 neurônios
      # tf.keras.layers.Dropout(rate = 0.2),
      tf.keras.layers.Dense(units=1, activation = 'sigmoid')]) # camada de saída com 1 neurônio
  otimizador = tf.keras.optimizers.Adam(learning_rate = 0.001, clipvalue = 0.5)
  rede_neural.compile(optimizer = otimizador, loss = 'binary_crossentropy', metrics = ['binary_accuracy'])
  return rede_neural

# Aplicando o Dropout

O dropout consiste em zerar alguns neurônios de alguma camada, a fim de evitar o overfitting

In [15]:
def criar_rede():
  k.clear_session() # limpa a memória de execuções anteriores
  rede_neural = Sequential([
      tf.keras.layers.InputLayer(shape=(30,)), # camada de entrada com 30 neurônios
      tf.keras.layers.Dense(units=16, activation='relu', kernel_initializer='random_uniform'), # primeira camada oculta com 16 neurônios
      tf.keras.layers.Dropout(rate = 0.2), # dropout de 20% do total de neurônios da camada
      tf.keras.layers.Dense(units=16, activation='relu', kernel_initializer='random_uniform'), # segunda camada oculta com 16 neurônios
      tf.keras.layers.Dropout(rate = 0.2),
      tf.keras.layers.Dense(units=1, activation = 'sigmoid')]) # camada de saída com 1 neurônio
  otimizador = tf.keras.optimizers.Adam(learning_rate = 0.001, clipvalue = 0.5)
  rede_neural.compile(optimizer = otimizador, loss = 'binary_crossentropy', metrics = ['binary_accuracy'])
  return rede_neural

In [16]:
rede_neural = KerasClassifier(model = criar_rede, epochs = 100, batch_size = 10)

In [17]:
resultados = cross_val_score(estimator = rede_neural,
                             X = X, # features
                             y = y, # classes
                             cv = 10, # número de folds (divisões) da base de dados
                             scoring = 'accuracy' # métrica de avaliação
                             )

Epoch 1/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - binary_accuracy: 0.5379 - loss: 1.5436
Epoch 2/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.5964 - loss: 0.7192
Epoch 3/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.6285 - loss: 0.6664
Epoch 4/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - binary_accuracy: 0.6344 - loss: 0.5797
Epoch 5/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.5934 - loss: 0.6508
Epoch 6/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.6526 - loss: 0.6002
Epoch 7/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.6836 - loss: 0.5659
Epoch 8/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - binary_accuracy: 0.6634 - loss:

In [11]:
# Sem dropout
resultados # É possível perceber que houve grande variação nos resultados

array([0.78947368, 0.85964912, 0.89473684, 0.68421053, 0.59649123,
       0.85964912, 0.8245614 , 0.84210526, 0.80701754, 0.82142857])

In [12]:
# Sem dropout
resultados.mean()

0.7979323308270676

In [14]:
# Sem dropout
resultados.std() # Distância dos valores em relação à média

0.08594575237499714

In [18]:
# Com dropout
resultados # Obtivemos um ganho considerável de acurácia com a utilização do dropout

array([0.84210526, 0.85964912, 0.89473684, 0.94736842, 0.87719298,
       0.85964912, 0.85964912, 0.94736842, 0.85964912, 0.91071429])

In [19]:
# Com dropout
resultados.mean()

0.8858082706766919

In [20]:
# Com dropout
resultados.std()

0.03605650194875498