# Regularisation in NNs

## 1. Set up the environment

In [29]:
# Import statements
from tensorflow import keras as kr 
import numpy as np
import matplotlib.pyplot as plt

In [30]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data.sampler import SubsetRandomSampler

In [31]:
# Set my plotting style
plt.style.use(('dark_background', 'bmh'))
plt.rc('axes', facecolor='none')
plt.rc('figure', figsize=(16, 4))

In [32]:
# Set random seed for reproducibility
np.random.seed(0)
torch.manual_seed(1)

<torch._C.Generator at 0x113bd0df0>

In [33]:
# Shortcuts
imdb = kr.datasets.imdb
Tokeniser = kr.preprocessing.text.Tokenizer

## 2. Loading the data set

In [34]:
# Set the number of features we want
features_nb = 1000

# Load data and target vector from movie review data
(train_data, train_target), (test_data, test_target) = imdb.load_data(num_words=features_nb)

# Convert movie review data to a one-hot encoded feature matrix
tokeniser = Tokeniser(num_words=features_nb)
train_features = tokeniser.sequences_to_matrix(train_data, mode='binary')
test_features = tokeniser.sequences_to_matrix(test_data, mode='binary')

### 2.1 Exploring the data set

In [35]:
# Check data set sizes
print('train_data.shape:', train_data.shape)
print('train_target.shape:', train_target.shape)
print('test_data.shape:', test_data.shape)
print('test_target.shape:', test_target.shape)

train_data.shape: (25000,)
train_target.shape: (25000,)
test_data.shape: (25000,)
test_target.shape: (25000,)


In [36]:
# Check format of first training sample
print('type(train_data[0]):', type(train_data[0]))
print('type(train_target[0]):', type(train_target[0]))

type(train_data[0]): <class 'list'>
type(train_target[0]): <class 'numpy.int64'>


In [37]:
# Check size of first 10 training samples and corresponding target
print('Reviews length:', [len(sample) for sample in train_data[:10]])
print('Review sentiment (bad/good):', train_target[:10])

Reviews length: [218, 189, 141, 550, 147, 43, 123, 562, 233, 130]
Review sentiment (bad/good): [1 0 0 1 0 0 1 0 1 0]


In [38]:
# Show first review - machine format
print(train_data[0])

[1, 14, 22, 16, 43, 530, 973, 2, 2, 65, 458, 2, 66, 2, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 2, 2, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2, 19, 14, 22, 4, 2, 2, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 2, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2, 2, 16, 480, 66, 2, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 2, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 2, 15, 256, 4, 2, 7, 2, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 2, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2, 56, 26, 141, 6, 194, 2, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 2, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 2, 88, 12, 16, 283, 5, 16, 2, 113, 103, 32, 15, 16, 2, 19, 178, 32]


In [39]:
# Data set text visualisation helper function
def show_text(sample):
    word_to_id = imdb.get_word_index()
    word_to_id = {k:(v+3) for k,v in word_to_id.items()}
    word_to_id["<PAD>"] = 0
    word_to_id["<START>"] = 1
    word_to_id["<UNK>"] = 2

    id_to_word = {value:key for key,value in word_to_id.items()}
    print(' '.join(id_to_word[id_] for id_ in sample))

In [40]:
# Show first review - human format
show_text(train_data[0])

<START> this film was just brilliant casting <UNK> <UNK> story direction <UNK> really <UNK> the part they played and you could just imagine being there robert <UNK> is an amazing actor and now the same being director <UNK> father came from the same <UNK> <UNK> as myself so i loved the fact there was a real <UNK> with this film the <UNK> <UNK> throughout the film were great it was just brilliant so much that i <UNK> the film as soon as it was released for <UNK> and would recommend it to everyone to watch and the <UNK> <UNK> was amazing really <UNK> at the end it was so sad and you know what they say if you <UNK> at a film it must have been good and this definitely was also <UNK> to the two little <UNK> that played the <UNK> of <UNK> and paul they were just brilliant children are often left out of the <UNK> <UNK> i think because the stars that play them all <UNK> up are such a big <UNK> for the whole film but these children are amazing and should be <UNK> for what they have done don't yo

In [41]:
# Show first review - neural net format
print(train_features[0])

[0. 1. 1. 0. 1. 1. 1. 1. 1. 1. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 0.
 0. 1. 1. 0. 1. 0. 1. 0. 1. 1. 0. 1. 1. 0. 1. 1. 0. 0. 0. 1. 0. 0. 1. 0.
 1. 0. 1. 1. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 1. 1. 0. 0. 0. 0. 1.
 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1. 0. 0. 0.
 0. 0. 1. 0. 1. 0. 0. 1. 1. 0. 1. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1. 0. 0.
 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 1. 0. 0.
 1. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.
 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.
 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.

In [42]:
# Show first review - neural net format - explanation
print(train_features[0] * np.arange(len(train_features[0])))

[  0.   1.   2.   0.   4.   5.   6.   7.   8.   9.   0.   0.  12.  13.
  14.  15.  16.  17.  18.  19.   0.  21.  22.   0.   0.  25.  26.   0.
  28.   0.  30.   0.  32.  33.   0.  35.  36.   0.  38.  39.   0.   0.
   0.  43.   0.   0.  46.   0.  48.   0.  50.  51.  52.   0.   0.   0.
  56.   0.   0.   0.   0.   0.  62.   0.   0.  65.  66.   0.   0.   0.
   0.  71.   0.   0.   0.   0.  76.  77.   0.   0.   0.   0.  82.   0.
   0.   0.   0.  87.  88.   0.   0.   0.  92.   0.   0.   0.   0.   0.
  98.   0. 100.   0.   0. 103. 104.   0. 106. 107.   0.   0.   0.   0.
 112. 113.   0.   0.   0. 117.   0.   0.   0.   0.   0.   0. 124.   0.
   0.   0.   0.   0. 130.   0.   0.   0. 134. 135.   0.   0.   0.   0.
   0. 141.   0.   0. 144.   0.   0. 147.   0.   0. 150.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0. 167.
   0.   0.   0.   0. 172. 173.   0.   0.   0.   0. 178.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0. 192.   0. 194.   0.
   0. 

## 3. Exploring regularisation of NN

Play with the code, especially the one marked `# toggle`.  
Start from `# toggle 0`, and then, one at the time, `# toggle 1` to `5`.

In [43]:
class ThreeLayerDense(nn.Module):

    def __init__(self, input_size, units_size):

        super(ThreeLayerDense, self).__init__()
        self.linear1 = torch.nn.Linear(input_size, units_size) #features_nb, 16
        self.linear2 = torch.nn.Linear(units_size, units_size)
        #self.dropout
        self.linear3 = torch.nn.Linear(units_size, 1)

    def forward(self, x):
        x = self.linear1(x)
        x = F.relu(x) 
        x = self.linear2(x) 
        x = F.relu(x)
        #Add dropout regularization
        #x = F.dropout(x, training=self.training)   
        return nn.Sigmoid()(self.linear3(x))

In [46]:
epochs = 2 #25
log_interval = 10
batch_size = 100

model = ThreeLayerDense(features_nb, 16)

criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.001)

#L1 regularization. Apply only to the first layer
l1_regularization_factor = 0.0001
#The model.params() would return a generator over all layers and activations. We would only need to retain 1st
params = next(model.parameters())

#l2 regularization can be added in the same fasjion as L1, but it can also be added via a weighT_decay parameter 
#directly in the optimizer (for all the layers)
#l2_regularization_factor = 0.0005
#optimizer = torch.optim.RMSprop(model.parameters(), lr=0.001, weight_decay = l2_regularization_factor)

In [47]:
epoch = 0
train_data_gen = zip(train_features, train_target)
train_size = len(train_target)

while epoch < epochs:
    predictions = []
    truth_values = []

    for batch_idx, (xs, y) in enumerate(train_data_gen):
        xs, y = torch.from_numpy(xs).float(), torch.FloatTensor([y])

        y_pred = model(xs)
        loss = criterion(y_pred, y)
        #uncomment to enable L1 regularisation
        #loss += l1_regularization_factor * torch.abs(params).sum()

        optimizer.zero_grad()
        loss.backward()
        #nn.utils.clip_grad_norm(model.parameters(), 0.5)
        optimizer.step()

        predictions.append(y_pred.cpu().data.numpy().ravel())
        truth_values.append(y)

        if batch_idx % log_interval == 0:
            print('Train Epoch: {}, mini-batch {} of {}, training loss: {:.6f}'.format(
                epoch, batch_idx, train_size, loss.item()))

    epoch += 1

Train Epoch: 0, mini-batch 0 of 25000, training loss: 0.458598
Train Epoch: 0, mini-batch 10 of 25000, training loss: 0.474120
Train Epoch: 0, mini-batch 20 of 25000, training loss: 0.973176
Train Epoch: 0, mini-batch 30 of 25000, training loss: 0.946634
Train Epoch: 0, mini-batch 40 of 25000, training loss: 0.513123
Train Epoch: 0, mini-batch 50 of 25000, training loss: 0.922678
Train Epoch: 0, mini-batch 60 of 25000, training loss: 0.871077
Train Epoch: 0, mini-batch 70 of 25000, training loss: 0.538530
Train Epoch: 0, mini-batch 80 of 25000, training loss: 0.526957
Train Epoch: 0, mini-batch 90 of 25000, training loss: 0.870298
Train Epoch: 0, mini-batch 100 of 25000, training loss: 0.784882
Train Epoch: 0, mini-batch 110 of 25000, training loss: 0.758884
Train Epoch: 0, mini-batch 120 of 25000, training loss: 0.569724
Train Epoch: 0, mini-batch 130 of 25000, training loss: 0.732693
Train Epoch: 0, mini-batch 140 of 25000, training loss: 0.606772
Train Epoch: 0, mini-batch 150 of 25

Train Epoch: 0, mini-batch 1300 of 25000, training loss: 0.725728
Train Epoch: 0, mini-batch 1310 of 25000, training loss: 0.749498
Train Epoch: 0, mini-batch 1320 of 25000, training loss: 0.707980
Train Epoch: 0, mini-batch 1330 of 25000, training loss: 0.954869
Train Epoch: 0, mini-batch 1340 of 25000, training loss: 0.768683
Train Epoch: 0, mini-batch 1350 of 25000, training loss: 0.720416
Train Epoch: 0, mini-batch 1360 of 25000, training loss: 0.783634
Train Epoch: 0, mini-batch 1370 of 25000, training loss: 0.783574
Train Epoch: 0, mini-batch 1380 of 25000, training loss: 0.709728
Train Epoch: 0, mini-batch 1390 of 25000, training loss: 0.699666
Train Epoch: 0, mini-batch 1400 of 25000, training loss: 0.781675
Train Epoch: 0, mini-batch 1410 of 25000, training loss: 0.370301
Train Epoch: 0, mini-batch 1420 of 25000, training loss: 0.756619
Train Epoch: 0, mini-batch 1430 of 25000, training loss: 0.334019
Train Epoch: 0, mini-batch 1440 of 25000, training loss: 0.479502
Train Epoc

Train Epoch: 0, mini-batch 2590 of 25000, training loss: 1.264931
Train Epoch: 0, mini-batch 2600 of 25000, training loss: 0.548043
Train Epoch: 0, mini-batch 2610 of 25000, training loss: 0.796529
Train Epoch: 0, mini-batch 2620 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 2630 of 25000, training loss: 0.336446
Train Epoch: 0, mini-batch 2640 of 25000, training loss: 0.313294
Train Epoch: 0, mini-batch 2650 of 25000, training loss: 1.041120
Train Epoch: 0, mini-batch 2660 of 25000, training loss: 0.693148
Train Epoch: 0, mini-batch 2670 of 25000, training loss: 0.694972
Train Epoch: 0, mini-batch 2680 of 25000, training loss: 0.693221
Train Epoch: 0, mini-batch 2690 of 25000, training loss: 0.995658
Train Epoch: 0, mini-batch 2700 of 25000, training loss: 0.693162
Train Epoch: 0, mini-batch 2710 of 25000, training loss: 0.717583
Train Epoch: 0, mini-batch 2720 of 25000, training loss: 0.314227
Train Epoch: 0, mini-batch 2730 of 25000, training loss: 0.466932
Train Epoc

Train Epoch: 0, mini-batch 3940 of 25000, training loss: 0.314655
Train Epoch: 0, mini-batch 3950 of 25000, training loss: 0.645391
Train Epoch: 0, mini-batch 3960 of 25000, training loss: 0.314891
Train Epoch: 0, mini-batch 3970 of 25000, training loss: 0.693148
Train Epoch: 0, mini-batch 3980 of 25000, training loss: 0.314775
Train Epoch: 0, mini-batch 3990 of 25000, training loss: 1.294976
Train Epoch: 0, mini-batch 4000 of 25000, training loss: 1.269227
Train Epoch: 0, mini-batch 4010 of 25000, training loss: 0.831059
Train Epoch: 0, mini-batch 4020 of 25000, training loss: 0.317323
Train Epoch: 0, mini-batch 4030 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 4040 of 25000, training loss: 0.693149
Train Epoch: 0, mini-batch 4050 of 25000, training loss: 0.313780
Train Epoch: 0, mini-batch 4060 of 25000, training loss: 0.615510
Train Epoch: 0, mini-batch 4070 of 25000, training loss: 0.690235
Train Epoch: 0, mini-batch 4080 of 25000, training loss: 0.315242
Train Epoc

Train Epoch: 0, mini-batch 5200 of 25000, training loss: 0.768198
Train Epoch: 0, mini-batch 5210 of 25000, training loss: 0.725890
Train Epoch: 0, mini-batch 5220 of 25000, training loss: 0.477106
Train Epoch: 0, mini-batch 5230 of 25000, training loss: 0.313272
Train Epoch: 0, mini-batch 5240 of 25000, training loss: 0.320443
Train Epoch: 0, mini-batch 5250 of 25000, training loss: 0.601229
Train Epoch: 0, mini-batch 5260 of 25000, training loss: 0.315955
Train Epoch: 0, mini-batch 5270 of 25000, training loss: 0.696177
Train Epoch: 0, mini-batch 5280 of 25000, training loss: 0.314741
Train Epoch: 0, mini-batch 5290 of 25000, training loss: 0.694664
Train Epoch: 0, mini-batch 5300 of 25000, training loss: 0.316883
Train Epoch: 0, mini-batch 5310 of 25000, training loss: 0.313267
Train Epoch: 0, mini-batch 5320 of 25000, training loss: 0.313264
Train Epoch: 0, mini-batch 5330 of 25000, training loss: 0.314618
Train Epoch: 0, mini-batch 5340 of 25000, training loss: 0.562337
Train Epoc

Train Epoch: 0, mini-batch 6530 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 6540 of 25000, training loss: 1.310228
Train Epoch: 0, mini-batch 6550 of 25000, training loss: 0.672894
Train Epoch: 0, mini-batch 6560 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 6570 of 25000, training loss: 0.699295
Train Epoch: 0, mini-batch 6580 of 25000, training loss: 0.693574
Train Epoch: 0, mini-batch 6590 of 25000, training loss: 0.315006
Train Epoch: 0, mini-batch 6600 of 25000, training loss: 0.695169
Train Epoch: 0, mini-batch 6610 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 6620 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 6630 of 25000, training loss: 0.798449
Train Epoch: 0, mini-batch 6640 of 25000, training loss: 0.314794
Train Epoch: 0, mini-batch 6650 of 25000, training loss: 0.698241
Train Epoch: 0, mini-batch 6660 of 25000, training loss: 0.698362
Train Epoch: 0, mini-batch 6670 of 25000, training loss: 0.691758
Train Epoc

Train Epoch: 0, mini-batch 7920 of 25000, training loss: 0.313263
Train Epoch: 0, mini-batch 7930 of 25000, training loss: 0.694253
Train Epoch: 0, mini-batch 7940 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 7950 of 25000, training loss: 0.314351
Train Epoch: 0, mini-batch 7960 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 7970 of 25000, training loss: 0.316683
Train Epoch: 0, mini-batch 7980 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 7990 of 25000, training loss: 0.313374
Train Epoch: 0, mini-batch 8000 of 25000, training loss: 0.700687
Train Epoch: 0, mini-batch 8010 of 25000, training loss: 0.426761
Train Epoch: 0, mini-batch 8020 of 25000, training loss: 0.313269
Train Epoch: 0, mini-batch 8030 of 25000, training loss: 0.693191
Train Epoch: 0, mini-batch 8040 of 25000, training loss: 0.313910
Train Epoch: 0, mini-batch 8050 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 8060 of 25000, training loss: 1.309915
Train Epoc

Train Epoch: 0, mini-batch 9220 of 25000, training loss: 0.747015
Train Epoch: 0, mini-batch 9230 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 9240 of 25000, training loss: 0.313262
Train Epoch: 0, mini-batch 9250 of 25000, training loss: 1.310220
Train Epoch: 0, mini-batch 9260 of 25000, training loss: 0.313264
Train Epoch: 0, mini-batch 9270 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 9280 of 25000, training loss: 0.313371
Train Epoch: 0, mini-batch 9290 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 9300 of 25000, training loss: 0.313776
Train Epoch: 0, mini-batch 9310 of 25000, training loss: 0.315462
Train Epoch: 0, mini-batch 9320 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 9330 of 25000, training loss: 0.346407
Train Epoch: 0, mini-batch 9340 of 25000, training loss: 0.693174
Train Epoch: 0, mini-batch 9350 of 25000, training loss: 0.695245
Train Epoch: 0, mini-batch 9360 of 25000, training loss: 0.621040
Train Epoc

Train Epoch: 0, mini-batch 10480 of 25000, training loss: 0.400885
Train Epoch: 0, mini-batch 10490 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 10500 of 25000, training loss: 0.313322
Train Epoch: 0, mini-batch 10510 of 25000, training loss: 0.643779
Train Epoch: 0, mini-batch 10520 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 10530 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 10540 of 25000, training loss: 0.313354
Train Epoch: 0, mini-batch 10550 of 25000, training loss: 0.313657
Train Epoch: 0, mini-batch 10560 of 25000, training loss: 0.692804
Train Epoch: 0, mini-batch 10570 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 10580 of 25000, training loss: 1.255968
Train Epoch: 0, mini-batch 10590 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 10600 of 25000, training loss: 0.313513
Train Epoch: 0, mini-batch 10610 of 25000, training loss: 0.691176
Train Epoch: 0, mini-batch 10620 of 25000, training loss: 0.32

Train Epoch: 0, mini-batch 11730 of 25000, training loss: 0.313266
Train Epoch: 0, mini-batch 11740 of 25000, training loss: 0.313335
Train Epoch: 0, mini-batch 11750 of 25000, training loss: 0.934611
Train Epoch: 0, mini-batch 11760 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 11770 of 25000, training loss: 1.311633
Train Epoch: 0, mini-batch 11780 of 25000, training loss: 0.817650
Train Epoch: 0, mini-batch 11790 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 11800 of 25000, training loss: 0.693162
Train Epoch: 0, mini-batch 11810 of 25000, training loss: 0.314426
Train Epoch: 0, mini-batch 11820 of 25000, training loss: 0.321722
Train Epoch: 0, mini-batch 11830 of 25000, training loss: 0.313264
Train Epoch: 0, mini-batch 11840 of 25000, training loss: 0.321037
Train Epoch: 0, mini-batch 11850 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 11860 of 25000, training loss: 0.315384
Train Epoch: 0, mini-batch 11870 of 25000, training loss: 0.69

Train Epoch: 0, mini-batch 13150 of 25000, training loss: 0.313266
Train Epoch: 0, mini-batch 13160 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 13170 of 25000, training loss: 0.313876
Train Epoch: 0, mini-batch 13180 of 25000, training loss: 1.303073
Train Epoch: 0, mini-batch 13190 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 13200 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 13210 of 25000, training loss: 0.313385
Train Epoch: 0, mini-batch 13220 of 25000, training loss: 0.693149
Train Epoch: 0, mini-batch 13230 of 25000, training loss: 0.693310
Train Epoch: 0, mini-batch 13240 of 25000, training loss: 0.313840
Train Epoch: 0, mini-batch 13250 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 13260 of 25000, training loss: 0.693128
Train Epoch: 0, mini-batch 13270 of 25000, training loss: 0.320654
Train Epoch: 0, mini-batch 13280 of 25000, training loss: 0.693763
Train Epoch: 0, mini-batch 13290 of 25000, training loss: 0.69

Train Epoch: 0, mini-batch 14550 of 25000, training loss: 0.313547
Train Epoch: 0, mini-batch 14560 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 14570 of 25000, training loss: 0.313712
Train Epoch: 0, mini-batch 14580 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 14590 of 25000, training loss: 0.313636
Train Epoch: 0, mini-batch 14600 of 25000, training loss: 0.693095
Train Epoch: 0, mini-batch 14610 of 25000, training loss: 0.322233
Train Epoch: 0, mini-batch 14620 of 25000, training loss: 0.729401
Train Epoch: 0, mini-batch 14630 of 25000, training loss: 0.693150
Train Epoch: 0, mini-batch 14640 of 25000, training loss: 0.313550
Train Epoch: 0, mini-batch 14650 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 14660 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 14670 of 25000, training loss: 0.317588
Train Epoch: 0, mini-batch 14680 of 25000, training loss: 0.693329
Train Epoch: 0, mini-batch 14690 of 25000, training loss: 0.31

Train Epoch: 0, mini-batch 15830 of 25000, training loss: 0.313459
Train Epoch: 0, mini-batch 15840 of 25000, training loss: 0.313935
Train Epoch: 0, mini-batch 15850 of 25000, training loss: 0.315693
Train Epoch: 0, mini-batch 15860 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 15870 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 15880 of 25000, training loss: 0.313365
Train Epoch: 0, mini-batch 15890 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 15900 of 25000, training loss: 0.336603
Train Epoch: 0, mini-batch 15910 of 25000, training loss: 0.315877
Train Epoch: 0, mini-batch 15920 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 15930 of 25000, training loss: 0.313286
Train Epoch: 0, mini-batch 15940 of 25000, training loss: 0.313311
Train Epoch: 0, mini-batch 15950 of 25000, training loss: 0.693146
Train Epoch: 0, mini-batch 15960 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 15970 of 25000, training loss: 1.05

Train Epoch: 0, mini-batch 17190 of 25000, training loss: 1.002028
Train Epoch: 0, mini-batch 17200 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 17210 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 17220 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 17230 of 25000, training loss: 0.314420
Train Epoch: 0, mini-batch 17240 of 25000, training loss: 0.313677
Train Epoch: 0, mini-batch 17250 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 17260 of 25000, training loss: 0.693154
Train Epoch: 0, mini-batch 17270 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 17280 of 25000, training loss: 0.690536
Train Epoch: 0, mini-batch 17290 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 17300 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 17310 of 25000, training loss: 1.232199
Train Epoch: 0, mini-batch 17320 of 25000, training loss: 0.313895
Train Epoch: 0, mini-batch 17330 of 25000, training loss: 1.30

Train Epoch: 0, mini-batch 18500 of 25000, training loss: 0.693063
Train Epoch: 0, mini-batch 18510 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 18520 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 18530 of 25000, training loss: 0.314565
Train Epoch: 0, mini-batch 18540 of 25000, training loss: 0.313494
Train Epoch: 0, mini-batch 18550 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 18560 of 25000, training loss: 0.314774
Train Epoch: 0, mini-batch 18570 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 18580 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 18590 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 18600 of 25000, training loss: 1.310143
Train Epoch: 0, mini-batch 18610 of 25000, training loss: 0.313276
Train Epoch: 0, mini-batch 18620 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 18630 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 18640 of 25000, training loss: 0.31

Train Epoch: 0, mini-batch 19990 of 25000, training loss: 0.343864
Train Epoch: 0, mini-batch 20000 of 25000, training loss: 0.756494
Train Epoch: 0, mini-batch 20010 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 20020 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 20030 of 25000, training loss: 0.316977
Train Epoch: 0, mini-batch 20040 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 20050 of 25000, training loss: 0.313589
Train Epoch: 0, mini-batch 20060 of 25000, training loss: 0.313613
Train Epoch: 0, mini-batch 20070 of 25000, training loss: 0.313505
Train Epoch: 0, mini-batch 20080 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 20090 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 20100 of 25000, training loss: 0.315158
Train Epoch: 0, mini-batch 20110 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 20120 of 25000, training loss: 0.347269
Train Epoch: 0, mini-batch 20130 of 25000, training loss: 1.07

Train Epoch: 0, mini-batch 21330 of 25000, training loss: 0.313265
Train Epoch: 0, mini-batch 21340 of 25000, training loss: 0.313508
Train Epoch: 0, mini-batch 21350 of 25000, training loss: 0.632001
Train Epoch: 0, mini-batch 21360 of 25000, training loss: 0.332183
Train Epoch: 0, mini-batch 21370 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 21380 of 25000, training loss: 0.313404
Train Epoch: 0, mini-batch 21390 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 21400 of 25000, training loss: 0.313262
Train Epoch: 0, mini-batch 21410 of 25000, training loss: 1.296899
Train Epoch: 0, mini-batch 21420 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 21430 of 25000, training loss: 0.314083
Train Epoch: 0, mini-batch 21440 of 25000, training loss: 0.317991
Train Epoch: 0, mini-batch 21450 of 25000, training loss: 0.313715
Train Epoch: 0, mini-batch 21460 of 25000, training loss: 0.693417
Train Epoch: 0, mini-batch 21470 of 25000, training loss: 0.69

Train Epoch: 0, mini-batch 22660 of 25000, training loss: 0.313262
Train Epoch: 0, mini-batch 22670 of 25000, training loss: 0.313765
Train Epoch: 0, mini-batch 22680 of 25000, training loss: 0.633067
Train Epoch: 0, mini-batch 22690 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 22700 of 25000, training loss: 0.313337
Train Epoch: 0, mini-batch 22710 of 25000, training loss: 0.313306
Train Epoch: 0, mini-batch 22720 of 25000, training loss: 0.313278
Train Epoch: 0, mini-batch 22730 of 25000, training loss: 0.695371
Train Epoch: 0, mini-batch 22740 of 25000, training loss: 0.313324
Train Epoch: 0, mini-batch 22750 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 22760 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 22770 of 25000, training loss: 0.313355
Train Epoch: 0, mini-batch 22780 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 22790 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 22800 of 25000, training loss: 0.69

Train Epoch: 0, mini-batch 24040 of 25000, training loss: 0.316286
Train Epoch: 0, mini-batch 24050 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 24060 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 24070 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 24080 of 25000, training loss: 0.313277
Train Epoch: 0, mini-batch 24090 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 24100 of 25000, training loss: 0.313269
Train Epoch: 0, mini-batch 24110 of 25000, training loss: 0.313508
Train Epoch: 0, mini-batch 24120 of 25000, training loss: 0.742773
Train Epoch: 0, mini-batch 24130 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 24140 of 25000, training loss: 0.313281
Train Epoch: 0, mini-batch 24150 of 25000, training loss: 0.693147
Train Epoch: 0, mini-batch 24160 of 25000, training loss: 0.313357
Train Epoch: 0, mini-batch 24170 of 25000, training loss: 0.750347
Train Epoch: 0, mini-batch 24180 of 25000, training loss: 0.31

In [None]:
# Get training and test accuracy histories
train_loss = history.history['loss']
test_loss = history.history['val_loss']

# Create count of the number of epochs
epoch = range(1, len(train_loss) + 1)

# Visualize accuracy history
plt.figure()

plt.plot(epoch, train_loss)
plt.plot(epoch, test_loss)
# plt.plot(no_reg['epoch'], no_reg['train_loss'])  # toggle 0
# plt.plot(no_reg['epoch'], no_reg['test_loss'])  # toggle 0

plt.legend(['Train loss', 'Test loss', 'Train no-reg', 'Test no-reg'])
plt.xlabel('Epoch')
plt.ylabel('Loss score')

# Get training and test accuracy histories
train_accuracy = history.history['acc']
test_accuracy = history.history['val_acc']

# Visualize accuracy history
plt.figure()

plt.plot(epoch, train_accuracy)
plt.plot(epoch, test_accuracy)
# plt.plot(no_reg['epoch'], no_reg['train_accuracy'])  # toggle 0
# plt.plot(no_reg['epoch'], no_reg['test_accuracy'])  # toggle 0

plt.legend(['Train accuracy', 'Test accuracy', 'Train no-reg', 'Test no-reg'])
plt.xlabel('Epoch')
plt.ylabel('Accuracy Score')

no_reg = {                             # toggle 0
    'epoch': epoch,                    # toggle 0
    'train_loss': train_loss,          # toggle 0
    'test_loss': test_loss,            # toggle 0
    'train_accuracy': train_accuracy,  # toggle 0
    'test_accuracy': test_accuracy,    # toggle 0
}

In [None]:
# Backup weights
weights = network.layers[0].get_weights()[0]  # toggle 0
# weights_L1 = network.layers[0].get_weights()[0]  # toggle 1
# weights_L2 = network.layers[0].get_weights()[0]  # toggle 2
# weights_max = network.layers[0].get_weights()[0]  # toggle 3

After you got to toggle `# toggle 3`, execute the following code.

In [None]:
# Show weight distribution
plt.hist((
    weights.reshape(-1),
    weights_L1.reshape(-1),
    weights_L2.reshape(-1),
    weights_max.reshape(-1),
), 49, range=(-.5, .5), label=(
    'No-reg',
    'L1',
    'L2',
    'Max',
))
plt.legend();