# Regularisation in NNs

## 1. Set up the environment

In [None]:
# Import statements
from tensorflow import keras as kr
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Set my plotting style
plt.style.use(('dark_background', 'bmh'))
plt.rc('axes', facecolor='none')
plt.rc('figure', figsize=(16, 4))

In [None]:
# Set random seed for reproducibility
np.random.seed(0)
tf.set_random_seed(0)

In [None]:
# Shortcuts
imdb = kr.datasets.imdb
Tokeniser = kr.preprocessing.text.Tokenizer
models = kr.models
layers = kr.layers
regularisers = kr.regularizers
constraints = kr.constraints
EarlyStopping = kr.callbacks.EarlyStopping
ModelCheckpoint = kr.callbacks.ModelCheckpoint

## 2. Loading the data set

In [None]:
# Set the number of features we want
features_nb = 1000

# Load data and target vector from movie review data
(train_data, train_target), (test_data, test_target) = imdb.load_data(num_words=features_nb)

# Convert movie review data to a one-hot encoded feature matrix
tokeniser = Tokeniser(num_words=features_nb)
train_features = tokeniser.sequences_to_matrix(train_data, mode='binary')
test_features = tokeniser.sequences_to_matrix(test_data, mode='binary')

### 2.1 Exploring the data set

In [None]:
# Check data set sizes
print('train_data.shape:', train_data.shape)
print('train_target.shape:', train_target.shape)
print('test_data.shape:', test_data.shape)
print('test_target.shape:', test_target.shape)

In [None]:
# Check format of first training sample
print('type(train_data[0]):', type(train_data[0]))
print('type(train_target[0]):', type(train_target[0]))

In [None]:
# Check size of first 10 training samples and corresponding target
print('Reviews length:', [len(sample) for sample in train_data[:10]])
print('Review sentiment (bad/good):', train_target[:10])

In [None]:
# Show first review - machine format
print(train_data[0])

In [None]:
# Data set text visualisation helper function
def show_text(sample):
    word_to_id = imdb.get_word_index()
    word_to_id = {k:(v+3) for k,v in word_to_id.items()}
    word_to_id["<PAD>"] = 0
    word_to_id["<START>"] = 1
    word_to_id["<UNK>"] = 2

    id_to_word = {value:key for key,value in word_to_id.items()}
    print(' '.join(id_to_word[id_] for id_ in sample))

In [None]:
# Show first review - human format
show_text(train_data[0])

In [None]:
# Show first review - neural net format
print(train_features[0])

In [None]:
# Show first review - neural net format - explanation
print(train_features[0] * np.arange(len(train_features[0])))

## 3. Exploring regularisation of NN

Play with the code, especially the one marked `# toggle`.  
Start from `# toggle 0`, and then, one at the time, `# toggle 1` to `5`.

In [None]:
# Start neural network
network = models.Sequential()

# Add a Dropout layer
# network.add(layers.Dropout(0.2))  # toggle 4

# Add fully connected layer with a ReLU activation function and L2 regularization
network.add(layers.Dense(
    units=16, 
    activation='relu', 
#     kernel_regularizer=regularisers.l2(0.005),  # toggle 1
#     kernel_regularizer=regularisers.l1(0.001),  # toggle 2
#     kernel_constraint=constraints.max_norm(1),  # toggle 3
    input_shape=(features_nb,)
))

# Add fully connected layer with a ReLU activation function and L2 regularization
network.add(layers.Dense(
    units=16, 
#     kernel_regularizer=regularisers.l2(0.005),  # toggle 1
#     kernel_constraint=constraints.max_norm(1),  # toggle 3
    activation='relu'
))

# Add a Dropout layer
# network.add(layers.Dropout(0.5))  # toggle 4

# Add fully connected layer with a sigmoid activation function
network.add(layers.Dense(units=1, activation='sigmoid'))  # Compile neural network

# Compile network
network.compile(
    loss='binary_crossentropy',  # Cross-entropy
    optimizer='rmsprop',  # Root Mean Square Propagation
    metrics=['accuracy']   # Accuracy performance metric
)

In [None]:
# Train neural network
history = network.fit(
    train_features,  # Features
    train_target,  # Target vector
    epochs=25,  # Number of epochs
    verbose=0,  # No output
    batch_size=100,  # Number of observations per batch
    validation_data=(test_features, test_target),  # Data for evaluation
#     callbacks=[                                                                             # toggle 5
#         EarlyStopping(monitor='val_loss', patience=2),                                      # toggle 5
#         ModelCheckpoint(filepath='best_model.h5', monitor='val_loss', save_best_only=True)  # toggle 5
#     ],                                                                                      # toggle 5
)

In [None]:
# ! ls  # toggle 5

In [None]:
# Get training and test accuracy histories
train_loss = history.history['loss']
test_loss = history.history['val_loss']

# Create count of the number of epochs
epoch = range(1, len(train_loss) + 1)

# Visualize accuracy history
plt.figure()

plt.plot(epoch, train_loss)
plt.plot(epoch, test_loss)
# plt.plot(no_reg['epoch'], no_reg['train_loss'])  # toggle 0
# plt.plot(no_reg['epoch'], no_reg['test_loss'])  # toggle 0

plt.legend(['Train loss', 'Test loss', 'Train no-reg', 'Test no-reg'])
plt.xlabel('Epoch')
plt.ylabel('Loss score')

# Get training and test accuracy histories
train_accuracy = history.history['acc']
test_accuracy = history.history['val_acc']

# Visualize accuracy history
plt.figure()

plt.plot(epoch, train_accuracy)
plt.plot(epoch, test_accuracy)
# plt.plot(no_reg['epoch'], no_reg['train_accuracy'])  # toggle 0
# plt.plot(no_reg['epoch'], no_reg['test_accuracy'])  # toggle 0

plt.legend(['Train accuracy', 'Test accuracy', 'Train no-reg', 'Test no-reg'])
plt.xlabel('Epoch')
plt.ylabel('Accuracy Score')

no_reg = {                             # toggle 0
    'epoch': epoch,                    # toggle 0
    'train_loss': train_loss,          # toggle 0
    'test_loss': test_loss,            # toggle 0
    'train_accuracy': train_accuracy,  # toggle 0
    'test_accuracy': test_accuracy,    # toggle 0
}

In [None]:
# Backup weights
weights = network.layers[0].get_weights()[0]  # toggle 0
# weights_L1 = network.layers[0].get_weights()[0]  # toggle 1
# weights_L2 = network.layers[0].get_weights()[0]  # toggle 2
# weights_max = network.layers[0].get_weights()[0]  # toggle 3

After you got to toggle `# toggle 3`, execute the following code.

In [None]:
# Show weight distribution
plt.hist((
    weights.reshape(-1),
    weights_L1.reshape(-1),
    weights_L2.reshape(-1),
    weights_max.reshape(-1),
), 49, range=(-.5, .5), label=(
    'No-reg',
    'L1',
    'L2',
    'Max',
))
plt.legend();