# STEP #3
Dropout Regularization in Keras

In [1]:
import numpy
from pandas import read_csv
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.optimizers import SGD
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline


In [2]:
seed = 7
numpy.random.seed(seed)
# load dataset
dataframe = read_csv("sonar.csv", header=None)
dataset = dataframe.values
# split into input (X) and output (Y) variables
X = dataset[:,0:60].astype(float)
Y = dataset[:,60]
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)


In [4]:
def create_baseline():

    model = Sequential()
    model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))

    model.add(Dense(30, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))

    sgd = SGD(lr=0.01, momentum=0.8, decay=0.0, nesterov=False)
    
    model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
    
    return model


In [5]:
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


Baseline: 86.52% (5.21%)


# STEP #4
Using Dropout on the Visible Layer

In [3]:
def create_baseline():

    model = Sequential()
    model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
    model.add(layers.Dropout(0.20))
    model.add(Dense(30, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))

    sgd = SGD(lr=0.01, momentum=0.8, decay=0.0, nesterov=False)
    
    model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])

    
    return model

In [6]:
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


Baseline: 84.61% (6.45%)


# STEP #5
Trying to Improve Performance

In [3]:
def create_baseline():

    model = Sequential()
    model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
    model.add(layers.Dropout(0.20))
    model.add(Dense(40, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))

    sgd = SGD(lr=0.01, momentum=0.8, decay=0.0, nesterov=False)
    
    model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])

    
    return model

In [4]:
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


Baseline: 86.06% (6.94%)


# STEP #6
Using Dropout on Hidden Layers

In [5]:
def create_baseline():

    model = Sequential()
    model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
    model.add(layers.Dropout(0.20))
    model.add(Dense(40, kernel_initializer='normal', activation='relu'))
    model.add(layers.Dropout(0.20))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))

    sgd = SGD(lr=0.01, momentum=0.9, decay=0.0, nesterov=False)
    
    model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])

    
    return model

In [6]:
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


Baseline: 84.59% (5.29%)


# STEP #7
Trying to Improve Performance

In [7]:
def create_baseline():

    model = Sequential()
    model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
    model.add(layers.Dropout(0.20))
    model.add(Dense(40, kernel_initializer='normal', activation='relu'))
    model.add(layers.Dropout(0.20))
    model.add(Dense(20, kernel_initializer='normal', activation='relu'))
    model.add(layers.Dropout(0.20))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))

    sgd = SGD(lr=0.02, momentum=0.9, decay=0.0, nesterov=False)
    
    model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])

    
    return model

In [8]:
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


Baseline: 84.61% (5.11%)


# STEP #8
Tips For Using Dropout

# STEP #8.1
Try Different Dropout values

In [10]:
def create_baseline():

    model = Sequential()
    model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
    model.add(layers.Dropout(0.30))
    model.add(Dense(40, kernel_initializer='normal', activation='relu'))
    model.add(layers.Dropout(0.30))
    model.add(Dense(20, kernel_initializer='normal', activation='relu'))
    model.add(layers.Dropout(0.30))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))

    sgd = SGD(lr=0.01, momentum=0.9, decay=0.0, nesterov=False)
    
    model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])

    
    return model

In [11]:
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


Baseline: 83.64% (7.22%)


# STEP #8.2
Try using a Larger network

In [3]:
def create_baseline():

    model = Sequential()
    model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
    model.add(layers.Dropout(0.20))
    model.add(Dense(40, kernel_initializer='normal', activation='relu'))
    model.add(Dense(30, kernel_initializer='normal', activation='relu'))
    model.add(Dense(20, kernel_initializer='normal', activation='relu'))
    model.add(layers.Dropout(0.20))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))

    sgd = SGD(lr=0.01, momentum=0.9, decay=0.0, nesterov=False)
    
    model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])

    
    return model

In [4]:
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


Baseline: 82.61% (6.38%)


# STEP #8.3
Try using Dropout on both visible and hidden units

In [14]:
def create_baseline():

    model = Sequential()
    model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
    model.add(layers.Dropout(0.20))
    model.add(Dense(40, kernel_initializer='normal', activation='relu'))
    model.add(layers.Dropout(0.20))
    model.add(Dense(30, kernel_initializer='normal', activation='relu'))
    model.add(layers.Dropout(0.20))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))

    sgd = SGD(lr=0.01, momentum=0.9, decay=0.0, nesterov=False)
    
    model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])

    
    return model

In [15]:
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


Baseline: 83.14% (6.55%)


# STEP #8.4
Try using large learning rate with decay and larger momentum

In [16]:
def create_baseline():

    model = Sequential()
    model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
    model.add(layers.Dropout(0.20))
    model.add(Dense(40, kernel_initializer='normal', activation='relu'))
    model.add(layers.Dropout(0.20))
    model.add(Dense(30, kernel_initializer='normal', activation='relu'))
    model.add(layers.Dropout(0.20))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))

    sgd = SGD(lr=0.30, momentum=0.99, decay=0.0, nesterov=False)
    
    model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])

    
    return model

In [17]:
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


Baseline: 53.38% (1.23%)


# STEP #8.5
Try constraining the size of the network weights

In [18]:
def create_baseline():

    model = Sequential()
    model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
    model.add(layers.Dropout(0.5))
    model.add(Dense(40, kernel_initializer='normal', activation='relu'))
    model.add(layers.Dropout(0.4))
    model.add(Dense(30, kernel_initializer='normal', activation='relu'))
    model.add(layers.Dropout(0.4))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))

    sgd = SGD(lr=0.30, momentum=0.99, decay=0.0, nesterov=False)
    
    model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])

    
    return model

In [19]:
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


Baseline: 53.38% (1.23%)
