<a href="https://colab.research.google.com/github/Kaiziferr/Deep_Learning_Workshop/blob/master/w11_workshop_dropout.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd

from keras.models import Sequential
from keras.layers import Dense, Dropout
# Se utiliza para que los pesos no pase mayor a 3
from keras.constraints import maxnorm
from keras.wrappers.scikit_learn import KerasClassifier
from keras.optimizers import SGD
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.pipeline import Pipeline

In [2]:
#load data
url = 'https://raw.githubusercontent.com/Kaiziferr/Deep_Learning_Workshop/master/workshop_clasification_binary_multiclass/sonar_csv.csv'
data = pd.read_csv(url)
data_values = data.values

In [3]:
X = data_values[:,0:60].astype('float64')
y = data_values[:,60]

In [4]:
#Encoder
encoder = LabelEncoder()
encoder.fit(y)
encoded_y = encoder.transform(y)

In [5]:
def baseline_model():
  model = Sequential()
  model.add(Dense(60, input_dim = 60, activation='relu'))
  model.add(Dense(30, activation = 'relu'))
  model.add(Dense(1, activation='sigmoid'))
  #lr tasa de aprendizaje
  sgd = SGD(lr=0.01, momentum=0.8)
  model.compile(loss = 'binary_crossentropy', optimizer = sgd, metrics = ['accuracy'])

  return model

estimator = []
estimator.append(('standarize', StandardScaler()))
estimator.append(('MLP', KerasClassifier(build_fn=baseline_model, epochs=300, batch_size = 16, verbose=0)))

pipeline = Pipeline(estimator)
kfold = StratifiedKFold(n_splits=10, shuffle=True)

results = cross_val_score(pipeline, X, encoded_y,cv=kfold)
print(" Modelo de linea base: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

 Modelo de linea base: 85.98% (6.54%)


# Dropout Capa entrada

In [6]:
#Tasa de dropout del 20%
# Se impone una restricción en los pesos para cada capa oculta con la norma maxima de los pesos para que no exceda 3, estableciendo el argumento de kernel_constraint en la clase Dense
# Aumentar la tasa de aprendizaje y el momentum

def input_dropout_model():
  model = Sequential()
  model.add(Dropout(0.2, input_shape = (60,)))
  model.add(Dense(60, activation='relu', kernel_constraint=maxnorm(3)))
  model.add(Dense(30, activation='relu', kernel_constraint=maxnorm(3)))
  model.add(Dense(1, activation='sigmoid'))

  sgd = SGD(lr=0.01, momentum=0.9)
  model.compile(loss = 'binary_crossentropy', optimizer=sgd, metrics = ['accuracy'])

  return model

In [7]:
estimator = []
estimator.append(('standarize',StandardScaler()))
estimator.append(('MLP', KerasClassifier(build_fn=input_dropout_model, epochs = 300, batch_size=16,verbose=0)))

pipeline = Pipeline(estimator)
kfold = StratifiedKFold(n_splits=10, shuffle=True)

results = cross_val_score(pipeline, X, encoded_y, cv=kfold)
print(" Modelo de Dropout en la entrada: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

 Modelo de Dropout en la entrada: 86.93% (7.72%)


# Dropout Capa oculta
- Dropout se aplica entre las dos capas ocultas y entre la ultima capa oculta y capa de salida

In [16]:
def dropout_hidden_model():
  model = Sequential()
  model.add(Dense(60, input_dim = 60, activation='relu', kernel_constraint=maxnorm(1)))
  model.add(Dropout(0.2))
  model.add(Dense(30, activation='relu', kernel_constraint=maxnorm(1)))
  model.add(Dropout(0.2))
  model.add(Dense(1, activation='sigmoid'))

  sgd = SGD(lr=0.1, momentum=0.9)
  model.compile(loss = 'binary_crossentropy', optimizer=sgd, metrics = ['accuracy'])

  return model

In [17]:
estimator = []
estimator.append(('standarize',StandardScaler()))
estimator.append(('MLP', KerasClassifier(build_fn=dropout_hidden_model, epochs = 300, batch_size=16,verbose=0)))

pipeline = Pipeline(estimator)
kfold = StratifiedKFold(n_splits=10, shuffle=True)

results = cross_val_score(pipeline, X, encoded_y, cv=kfold)
print(" Modelo de Dropout en la capa oculta: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

 Modelo de Dropout en la capa oculta: 83.14% (4.03%)


# Recomendaciones del profesor

1. Utilizar por lo general un valor de Dropout del 20% - 50%
2. Utilizar drooupt en la entrada(Capa visible) y capas ocultas
3. Utilizar una gran tasa de aprendizaje (aumentar el factor de 0.01 a 10% o 100%) y un momentum de 0.9 a 0.99
4. Restrinja el tamaño de los pesos de la red. **Una gran tasa de aprendizaje puede resultar en pesos de red muy grandes**
