# Dealing with Overfitting: Dropout

We'll use Boston Housing dataset to see the effect of Dropout in our model

In [6]:
from keras.datasets import boston_housing

In [7]:
(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()

In [8]:
# Using Feature-wise Normalization
mean = train_data.mean(axis=0)
train_data -= mean
std = train_data.std(axis=0)
train_data /= std

test_data -= mean
test_data /= std

In [9]:
from keras import layers
from keras import models

In [23]:
def base_model():
    base = models.Sequential()
    base.add(layers.Dense(64, activation='relu', input_shape=(train_data.shape[1],)))
    base.add(layers.Dense(64, activation='relu'))
    base.add(layers.Dense(1))
    base.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
    return base

def dropout_model(dropout):
    drop = models.Sequential()
    drop.add(layers.Dense(64, activation='relu', input_shape=(train_data.shape[1],)))
    drop.add(layers.Dropout(dropout))
    drop.add(layers.Dense(64, activation='relu'))
    drop.add(layers.Dropout(dropout))
    drop.add(layers.Dense(1))
    drop.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
    return drop

In [15]:
import numpy as np

In [20]:
k = 4
num_val_samples = len(train_data) // k
num_epochs = 200
all_scores_base = []
all_scores_dp1 = []
all_scores_dp2 = []
all_scores_dp3 = []
histbase = []
histdp1 = []
histdp2 = []
histdp3 = []

In [21]:
## Using Base model
print("Using Base model")
for i in range(k):
    print(f"Processing fold #{i}")
    val_data = train_data[i*num_val_samples:(i+1)*num_val_samples]
    val_targets = train_targets[i*num_val_samples:(i+1)*num_val_samples]
    partial_train_data = np.concatenate([
        train_data[:i*num_val_samples],
        train_data[(i+1)*num_val_samples:]
    ], axis=0)
    partial_train_targets = np.concatenate([
        train_targets[:i*num_val_samples],
        train_targets[(i+1)*num_val_samples:]
    ], axis=0)
    base = base_model()
    hist_base = base.fit(partial_train_data, partial_train_targets,
             epochs=num_epochs, batch_size=1, verbose=0, validation_data=(val_data, val_targets))
    
#     val_mse, val_mae = base.evaluate(val_data, val_targets, verbose=0)
#     all_scores_base.append(val_mae)
    histbase.append(hist_base)

Using Base model
Processing fold #0
Processing fold #1
Processing fold #2
Processing fold #3


In [50]:
## Using Dropout of 0.2
print("Using Dropout of 0.2")
for i in range(k):
    print(f"Processing fold #{i}")
    val_data = train_data[i*num_val_samples:(i+1)*num_val_samples]
    val_targets = train_targets[i*num_val_samples:(i+1)*num_val_samples]
    partial_train_data = np.concatenate([
        train_data[:i*num_val_samples],
        train_data[(i+1)*num_val_samples:]
    ], axis=0)
    partial_train_targets = np.concatenate([
        train_targets[:i*num_val_samples],
        train_targets[(i+1)*num_val_samples:]
    ], axis=0)
    drop1 = dropout_model(0.2)
    hist_dp1 = drop1.fit(partial_train_data, partial_train_targets,
             epochs=num_epochs, batch_size=1, verbose=0, validation_data=(val_data, val_targets))
    
#     val_mse, val_mae = drop1.evaluate(val_data, val_targets, verbose=0)
#     all_scores_dp1.append(val_mae)
    histdp1.append(hist_dp1)

Using Dropout of 0.2
Processing fold #0
Processing fold #1
Processing fold #2
Processing fold #3


In [51]:
## Using Dropout of 0.5
print("Using Dropout of 0.5")
for i in range(k):
    print(f"Processing fold #{i}")
    val_data = train_data[i*num_val_samples:(i+1)*num_val_samples]
    val_targets = train_targets[i*num_val_samples:(i+1)*num_val_samples]
    partial_train_data = np.concatenate([
        train_data[:i*num_val_samples],
        train_data[(i+1)*num_val_samples:]
    ], axis=0)
    partial_train_targets = np.concatenate([
        train_targets[:i*num_val_samples],
        train_targets[(i+1)*num_val_samples:]
    ], axis=0)
    drop2 = dropout_model(0.5)
    hist_dp2=drop2.fit(partial_train_data, partial_train_targets,
             epochs=num_epochs, batch_size=1, verbose=0, validation_data=(val_data, val_targets))
    
#     val_mse, val_mae = drop2.evaluate(val_data, val_targets, verbose=0)
#     all_scores_dp2.append(val_mae)
    histdp2.append(hist_dp2)

Using Dropout of 0.5
Processing fold #0
Processing fold #1
Processing fold #2
Processing fold #3


In [52]:
## Using Dropout of 0.8
print("Using Dropout of 0.8")
for i in range(k):
    print(f"Processing fold #{i}")
    val_data = train_data[i*num_val_samples:(i+1)*num_val_samples]
    val_targets = train_targets[i*num_val_samples:(i+1)*num_val_samples]
    partial_train_data = np.concatenate([
        train_data[:i*num_val_samples],
        train_data[(i+1)*num_val_samples:]
    ], axis=0)
    partial_train_targets = np.concatenate([
        train_targets[:i*num_val_samples],
        train_targets[(i+1)*num_val_samples:]
    ], axis=0)
    drop3 = dropout_model(0.8)
    hist_dp3=drop3.fit(partial_train_data, partial_train_targets,
             epochs=num_epochs, batch_size=1, verbose=0, validation_data=(val_data, val_targets))
    
#     val_mse, val_mae = drop3.evaluate(val_data, val_targets, verbose=0)
#     all_scores_dp3.append(val_mae)
    histdp3.append(hist_dp3)

Using Dropout of 0.8
Processing fold #0
Processing fold #1
Processing fold #2
Processing fold #3


In [53]:
all_scores_dp2 = all_scores_dp2[-4:]
print("All scores base: ", all_scores_base)
print("All scores base mean: ", np.mean(all_scores_base))
print("All scores dropout 0.2: ", all_scores_dp1)
print("All scores dropout 0.2 mean: ", np.mean(all_scores_dp1))
print("All scores dropout 0.5: ", all_scores_dp2)
print("All scores dropout 0.5 mean: ", np.mean(all_scores_dp2))
print("All scores dropout 0.8: ", all_scores_dp3)
print("All scores dropout 0.8 mean: ", np.mean(all_scores_dp3))

All scores base:  [2.233460284695767, 3.1001296893204793, 2.5625649489978755, 2.3723219810145917]
All scores base mean:  2.5671192260071782
All scores dropout 0.2:  [1.9605656638003812, 2.085175308850732, 2.7937788337764173, 2.3428971861848735]
All scores dropout 0.2 mean:  2.295604248153101
All scores dropout 0.5:  [2.686643671281267, 2.4491220087108045, 2.7358158673390305, 2.8191304336680045]
All scores dropout 0.5 mean:  2.6726779952497766
All scores dropout 0.8:  [3.378339403926736, 3.7507787647813853, 3.561897084264472, 4.369218269197067]
All scores dropout 0.8 mean:  3.765058380542415


In [39]:
average_mae_history_base = [np.mean([x[i] for x in hist_base]) for i in range(num_epochs)]

KeyError: 'val_mean_absolute_error'

In [47]:
[[x.history['val_mean_absolute_error'][i] for x in histbase] for i in range(num_epochs)]

KeyError: 'val_mean_absolute_error'

In [49]:
histbase[0].history.keys()

dict_keys(['loss', 'mean_absolute_error'])