# Regularisation in NNs

## 1. Set up the environment

In [1]:
# Import statements
from tensorflow import keras as kr 
import numpy as np
import matplotlib.pyplot as plt

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [3]:
# Set my plotting style
plt.style.use(('dark_background', 'bmh'))
plt.rc('axes', facecolor='none')
plt.rc('figure', figsize=(16, 4))

In [4]:
# Set random seed for reproducibility
np.random.seed(0)
torch.manual_seed(1)

<torch._C.Generator at 0x112be9e90>

In [5]:
# Shortcuts
imdb = kr.datasets.imdb
Tokeniser = kr.preprocessing.text.Tokenizer

## 2. Loading the data set

In [6]:
# Set the number of features we want
features_nb = 1000

# Load data and target vector from movie review data
(train_data, train_target), (test_data, test_target) = imdb.load_data(num_words=features_nb)

# Convert movie review data to a one-hot encoded feature matrix
tokeniser = Tokeniser(num_words=features_nb)
train_features = tokeniser.sequences_to_matrix(train_data, mode='binary')
test_features = tokeniser.sequences_to_matrix(test_data, mode='binary')

### 2.1 Exploring the data set

In [7]:
# Check data set sizes
print('train_data.shape:', train_data.shape)
print('train_target.shape:', train_target.shape)
print('test_data.shape:', test_data.shape)
print('test_target.shape:', test_target.shape)

train_data.shape: (25000,)
train_target.shape: (25000,)
test_data.shape: (25000,)
test_target.shape: (25000,)


In [8]:
# Check format of first training sample
print('type(train_data[0]):', type(train_data[0]))
print('type(train_target[0]):', type(train_target[0]))

type(train_data[0]): <class 'list'>
type(train_target[0]): <class 'numpy.int64'>


In [9]:
# Check size of first 10 training samples and corresponding target
print('Reviews length:', [len(sample) for sample in train_data[:10]])
print('Review sentiment (bad/good):', train_target[:10])

Reviews length: [218, 189, 141, 550, 147, 43, 123, 562, 233, 130]
Review sentiment (bad/good): [1 0 0 1 0 0 1 0 1 0]


In [10]:
# Show first review - machine format
print(train_data[0])

[1, 14, 22, 16, 43, 530, 973, 2, 2, 65, 458, 2, 66, 2, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 2, 2, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2, 19, 14, 22, 4, 2, 2, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 2, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2, 2, 16, 480, 66, 2, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 2, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 2, 15, 256, 4, 2, 7, 2, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 2, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2, 56, 26, 141, 6, 194, 2, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 2, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 2, 88, 12, 16, 283, 5, 16, 2, 113, 103, 32, 15, 16, 2, 19, 178, 32]


In [11]:
# Data set text visualisation helper function
def show_text(sample):
    word_to_id = imdb.get_word_index()
    word_to_id = {k:(v+3) for k,v in word_to_id.items()}
    word_to_id["<PAD>"] = 0
    word_to_id["<START>"] = 1
    word_to_id["<UNK>"] = 2

    id_to_word = {value:key for key,value in word_to_id.items()}
    print(' '.join(id_to_word[id_] for id_ in sample))

In [12]:
# Show first review - human format
show_text(train_data[0])

<START> this film was just brilliant casting <UNK> <UNK> story direction <UNK> really <UNK> the part they played and you could just imagine being there robert <UNK> is an amazing actor and now the same being director <UNK> father came from the same <UNK> <UNK> as myself so i loved the fact there was a real <UNK> with this film the <UNK> <UNK> throughout the film were great it was just brilliant so much that i <UNK> the film as soon as it was released for <UNK> and would recommend it to everyone to watch and the <UNK> <UNK> was amazing really <UNK> at the end it was so sad and you know what they say if you <UNK> at a film it must have been good and this definitely was also <UNK> to the two little <UNK> that played the <UNK> of <UNK> and paul they were just brilliant children are often left out of the <UNK> <UNK> i think because the stars that play them all <UNK> up are such a big <UNK> for the whole film but these children are amazing and should be <UNK> for what they have done don't yo

In [13]:
# Show first review - neural net format
print(train_features[0])

[0. 1. 1. 0. 1. 1. 1. 1. 1. 1. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 0.
 0. 1. 1. 0. 1. 0. 1. 0. 1. 1. 0. 1. 1. 0. 1. 1. 0. 0. 0. 1. 0. 0. 1. 0.
 1. 0. 1. 1. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 1. 1. 0. 0. 0. 0. 1.
 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1. 0. 0. 0.
 0. 0. 1. 0. 1. 0. 0. 1. 1. 0. 1. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1. 0. 0.
 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 1. 0. 0.
 1. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.
 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.
 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.

In [14]:
# Show first review - neural net format - explanation
print(train_features[0] * np.arange(len(train_features[0])))

[  0.   1.   2.   0.   4.   5.   6.   7.   8.   9.   0.   0.  12.  13.
  14.  15.  16.  17.  18.  19.   0.  21.  22.   0.   0.  25.  26.   0.
  28.   0.  30.   0.  32.  33.   0.  35.  36.   0.  38.  39.   0.   0.
   0.  43.   0.   0.  46.   0.  48.   0.  50.  51.  52.   0.   0.   0.
  56.   0.   0.   0.   0.   0.  62.   0.   0.  65.  66.   0.   0.   0.
   0.  71.   0.   0.   0.   0.  76.  77.   0.   0.   0.   0.  82.   0.
   0.   0.   0.  87.  88.   0.   0.   0.  92.   0.   0.   0.   0.   0.
  98.   0. 100.   0.   0. 103. 104.   0. 106. 107.   0.   0.   0.   0.
 112. 113.   0.   0.   0. 117.   0.   0.   0.   0.   0.   0. 124.   0.
   0.   0.   0.   0. 130.   0.   0.   0. 134. 135.   0.   0.   0.   0.
   0. 141.   0.   0. 144.   0.   0. 147.   0.   0. 150.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0. 167.
   0.   0.   0.   0. 172. 173.   0.   0.   0.   0. 178.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0. 192.   0. 194.   0.
   0. 

## 3. Exploring regularisation of NN

Play with the code, especially the one marked `# toggle`.  
Start from `# toggle 0`, and then, one at the time, `# toggle 1` to `5`.

In [40]:
class ThreeLayerDense(nn.Module):

    def __init__(self, input_size, units_size):

        super(ThreeLayerDense, self).__init__()
        self.linear1 = torch.nn.Linear(input_size, units_size) #features_nb, 16
        self.linear2 = torch.nn.Linear(units_size, units_size)
        #self.dropout
        self.linear3 = torch.nn.Linear(units_size, 1)

    def forward(self, x):
        x = self.linear1(x)
        x = F.relu(x) 
        x = self.linear2(x) 
        x = F.relu(x)
        #Add dropout regularization
        #x = F.dropout(x, training=self.training)   
        return nn.Sigmoid()(self.linear3(x))

In [41]:
epochs = 25
log_interval = 10
batch_size = 100

model = ThreeLayerDense(features_nb, 16)

criterion = torch.nn.BCELoss()
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.001)

In [42]:
epochs = 25
log_interval = 10
batch_size = 100

model = ThreeLayerDense(features_nb, 16)

criterion = torch.nn.BCELoss()
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.001)

#l2 regularization
#l2_regularization_factor = 0.0005
#optimizer = torch.optim.RMSprop(model.parameters(), lr=0.001, weight_decay = l2_regularization_factor)

In [43]:
epoch = 0
train_data_gen = zip(train_features, train_target)
train_size = len(train_target)

while epoch < epochs:
    predictions = []
    truth_values = []

    for batch_idx, (xs, y) in enumerate(train_data_gen):
        xs, y = torch.from_numpy(xs).float(), torch.FloatTensor([y])

        y_pred = model(xs)
        loss = criterion(y_pred, y)

        optimizer.zero_grad()
        loss.backward(retain_graph=True)
        #nn.utils.clip_grad_norm(model.parameters(), 0.5)
        optimizer.step()

        predictions.append(y_pred.cpu().data.numpy().ravel())
        truth_values.append(y)

        if batch_idx % log_interval == 0:
            print('Train Epoch: {}, mini-batch {} of {}, training loss: {:.6f}'.format(
                epoch, batch_idx, train_size, loss.item()))

    epoch += 1

Train Epoch: 0, mini-batch 0 of 25000, training loss: 0.663057
Train Epoch: 0, mini-batch 10 of 25000, training loss: 0.672497
Train Epoch: 0, mini-batch 20 of 25000, training loss: 0.683663
Train Epoch: 0, mini-batch 30 of 25000, training loss: 0.657638
Train Epoch: 0, mini-batch 40 of 25000, training loss: 0.678395
Train Epoch: 0, mini-batch 50 of 25000, training loss: 0.675862
Train Epoch: 0, mini-batch 60 of 25000, training loss: 0.629715
Train Epoch: 0, mini-batch 70 of 25000, training loss: 0.678230
Train Epoch: 0, mini-batch 80 of 25000, training loss: 0.712157
Train Epoch: 0, mini-batch 90 of 25000, training loss: 0.608832
Train Epoch: 0, mini-batch 100 of 25000, training loss: 0.509348
Train Epoch: 0, mini-batch 110 of 25000, training loss: 0.481726
Train Epoch: 0, mini-batch 120 of 25000, training loss: 0.809702
Train Epoch: 0, mini-batch 130 of 25000, training loss: 0.432869
Train Epoch: 0, mini-batch 140 of 25000, training loss: 0.912524
Train Epoch: 0, mini-batch 150 of 25

Train Epoch: 0, mini-batch 1510 of 25000, training loss: 0.072857
Train Epoch: 0, mini-batch 1520 of 25000, training loss: 0.108400
Train Epoch: 0, mini-batch 1530 of 25000, training loss: 0.207838
Train Epoch: 0, mini-batch 1540 of 25000, training loss: 0.362455
Train Epoch: 0, mini-batch 1550 of 25000, training loss: 0.268761
Train Epoch: 0, mini-batch 1560 of 25000, training loss: 0.715087
Train Epoch: 0, mini-batch 1570 of 25000, training loss: 1.614928
Train Epoch: 0, mini-batch 1580 of 25000, training loss: 1.338564
Train Epoch: 0, mini-batch 1590 of 25000, training loss: 0.396873
Train Epoch: 0, mini-batch 1600 of 25000, training loss: 0.024909
Train Epoch: 0, mini-batch 1610 of 25000, training loss: 0.099906
Train Epoch: 0, mini-batch 1620 of 25000, training loss: 0.372154
Train Epoch: 0, mini-batch 1630 of 25000, training loss: 0.033038
Train Epoch: 0, mini-batch 1640 of 25000, training loss: 0.078775
Train Epoch: 0, mini-batch 1650 of 25000, training loss: 0.682308
Train Epoc

Train Epoch: 0, mini-batch 3050 of 25000, training loss: 0.356812
Train Epoch: 0, mini-batch 3060 of 25000, training loss: 0.044164
Train Epoch: 0, mini-batch 3070 of 25000, training loss: 0.388022
Train Epoch: 0, mini-batch 3080 of 25000, training loss: 0.037784
Train Epoch: 0, mini-batch 3090 of 25000, training loss: 0.050311
Train Epoch: 0, mini-batch 3100 of 25000, training loss: 1.693717
Train Epoch: 0, mini-batch 3110 of 25000, training loss: 0.216080
Train Epoch: 0, mini-batch 3120 of 25000, training loss: 0.026501
Train Epoch: 0, mini-batch 3130 of 25000, training loss: 0.306336
Train Epoch: 0, mini-batch 3140 of 25000, training loss: 0.096358
Train Epoch: 0, mini-batch 3150 of 25000, training loss: 0.018375
Train Epoch: 0, mini-batch 3160 of 25000, training loss: 1.069773
Train Epoch: 0, mini-batch 3170 of 25000, training loss: 0.424782
Train Epoch: 0, mini-batch 3180 of 25000, training loss: 0.084638
Train Epoch: 0, mini-batch 3190 of 25000, training loss: 0.513798
Train Epoc

Train Epoch: 0, mini-batch 4550 of 25000, training loss: 1.632124
Train Epoch: 0, mini-batch 4560 of 25000, training loss: 0.137966
Train Epoch: 0, mini-batch 4570 of 25000, training loss: 0.108778
Train Epoch: 0, mini-batch 4580 of 25000, training loss: 0.541218
Train Epoch: 0, mini-batch 4590 of 25000, training loss: 0.527060
Train Epoch: 0, mini-batch 4600 of 25000, training loss: 0.017891
Train Epoch: 0, mini-batch 4610 of 25000, training loss: 0.006248
Train Epoch: 0, mini-batch 4620 of 25000, training loss: 0.056907
Train Epoch: 0, mini-batch 4630 of 25000, training loss: 0.262572
Train Epoch: 0, mini-batch 4640 of 25000, training loss: 0.962585
Train Epoch: 0, mini-batch 4650 of 25000, training loss: 0.288561
Train Epoch: 0, mini-batch 4660 of 25000, training loss: 0.045156
Train Epoch: 0, mini-batch 4670 of 25000, training loss: 0.089328
Train Epoch: 0, mini-batch 4680 of 25000, training loss: 0.297483
Train Epoch: 0, mini-batch 4690 of 25000, training loss: 0.159139
Train Epoc

Train Epoch: 0, mini-batch 5850 of 25000, training loss: 0.271334
Train Epoch: 0, mini-batch 5860 of 25000, training loss: 0.320831
Train Epoch: 0, mini-batch 5870 of 25000, training loss: 0.517711
Train Epoch: 0, mini-batch 5880 of 25000, training loss: 0.570382
Train Epoch: 0, mini-batch 5890 of 25000, training loss: 0.024826
Train Epoch: 0, mini-batch 5900 of 25000, training loss: 0.016204
Train Epoch: 0, mini-batch 5910 of 25000, training loss: 0.212834
Train Epoch: 0, mini-batch 5920 of 25000, training loss: 0.058024
Train Epoch: 0, mini-batch 5930 of 25000, training loss: 0.157301
Train Epoch: 0, mini-batch 5940 of 25000, training loss: 1.177568
Train Epoch: 0, mini-batch 5950 of 25000, training loss: 0.001837
Train Epoch: 0, mini-batch 5960 of 25000, training loss: 1.190034
Train Epoch: 0, mini-batch 5970 of 25000, training loss: 0.060796
Train Epoch: 0, mini-batch 5980 of 25000, training loss: 0.038264
Train Epoch: 0, mini-batch 5990 of 25000, training loss: 0.116352
Train Epoc

Train Epoch: 0, mini-batch 7430 of 25000, training loss: 0.108693
Train Epoch: 0, mini-batch 7440 of 25000, training loss: 0.057230
Train Epoch: 0, mini-batch 7450 of 25000, training loss: 0.573595
Train Epoch: 0, mini-batch 7460 of 25000, training loss: 0.161639
Train Epoch: 0, mini-batch 7470 of 25000, training loss: 1.975173
Train Epoch: 0, mini-batch 7480 of 25000, training loss: 0.363071
Train Epoch: 0, mini-batch 7490 of 25000, training loss: 0.302737
Train Epoch: 0, mini-batch 7500 of 25000, training loss: 0.437858
Train Epoch: 0, mini-batch 7510 of 25000, training loss: 0.262641
Train Epoch: 0, mini-batch 7520 of 25000, training loss: 0.063130
Train Epoch: 0, mini-batch 7530 of 25000, training loss: 0.592833
Train Epoch: 0, mini-batch 7540 of 25000, training loss: 0.506257
Train Epoch: 0, mini-batch 7550 of 25000, training loss: 0.053897
Train Epoch: 0, mini-batch 7560 of 25000, training loss: 0.659386
Train Epoch: 0, mini-batch 7570 of 25000, training loss: 0.075486
Train Epoc

Train Epoch: 0, mini-batch 8890 of 25000, training loss: 1.183868
Train Epoch: 0, mini-batch 8900 of 25000, training loss: 0.241556
Train Epoch: 0, mini-batch 8910 of 25000, training loss: 0.792867
Train Epoch: 0, mini-batch 8920 of 25000, training loss: 0.043702
Train Epoch: 0, mini-batch 8930 of 25000, training loss: 0.223766
Train Epoch: 0, mini-batch 8940 of 25000, training loss: 0.399048
Train Epoch: 0, mini-batch 8950 of 25000, training loss: 0.019720
Train Epoch: 0, mini-batch 8960 of 25000, training loss: 0.015890
Train Epoch: 0, mini-batch 8970 of 25000, training loss: 0.029978
Train Epoch: 0, mini-batch 8980 of 25000, training loss: 0.359792
Train Epoch: 0, mini-batch 8990 of 25000, training loss: 0.014356
Train Epoch: 0, mini-batch 9000 of 25000, training loss: 0.848552
Train Epoch: 0, mini-batch 9010 of 25000, training loss: 0.046054
Train Epoch: 0, mini-batch 9020 of 25000, training loss: 0.840518
Train Epoch: 0, mini-batch 9030 of 25000, training loss: 0.010565
Train Epoc

Train Epoch: 0, mini-batch 10190 of 25000, training loss: 0.171108
Train Epoch: 0, mini-batch 10200 of 25000, training loss: 0.062050
Train Epoch: 0, mini-batch 10210 of 25000, training loss: 0.302470
Train Epoch: 0, mini-batch 10220 of 25000, training loss: 0.845729
Train Epoch: 0, mini-batch 10230 of 25000, training loss: 0.123123
Train Epoch: 0, mini-batch 10240 of 25000, training loss: 0.055507
Train Epoch: 0, mini-batch 10250 of 25000, training loss: 0.014978
Train Epoch: 0, mini-batch 10260 of 25000, training loss: 2.682865
Train Epoch: 0, mini-batch 10270 of 25000, training loss: 0.119537
Train Epoch: 0, mini-batch 10280 of 25000, training loss: 0.148235
Train Epoch: 0, mini-batch 10290 of 25000, training loss: 0.056917
Train Epoch: 0, mini-batch 10300 of 25000, training loss: 0.048143
Train Epoch: 0, mini-batch 10310 of 25000, training loss: 0.033685
Train Epoch: 0, mini-batch 10320 of 25000, training loss: 1.293919
Train Epoch: 0, mini-batch 10330 of 25000, training loss: 0.06

Train Epoch: 0, mini-batch 11610 of 25000, training loss: 1.663843
Train Epoch: 0, mini-batch 11620 of 25000, training loss: 0.002781
Train Epoch: 0, mini-batch 11630 of 25000, training loss: 0.079943
Train Epoch: 0, mini-batch 11640 of 25000, training loss: 0.172699
Train Epoch: 0, mini-batch 11650 of 25000, training loss: 0.000084
Train Epoch: 0, mini-batch 11660 of 25000, training loss: 0.498539
Train Epoch: 0, mini-batch 11670 of 25000, training loss: 0.016300
Train Epoch: 0, mini-batch 11680 of 25000, training loss: 4.620917
Train Epoch: 0, mini-batch 11690 of 25000, training loss: 0.007803
Train Epoch: 0, mini-batch 11700 of 25000, training loss: 0.332415
Train Epoch: 0, mini-batch 11710 of 25000, training loss: 0.073644
Train Epoch: 0, mini-batch 11720 of 25000, training loss: 0.045147
Train Epoch: 0, mini-batch 11730 of 25000, training loss: 0.013823
Train Epoch: 0, mini-batch 11740 of 25000, training loss: 0.050757
Train Epoch: 0, mini-batch 11750 of 25000, training loss: 0.25

Train Epoch: 0, mini-batch 13030 of 25000, training loss: 0.007072
Train Epoch: 0, mini-batch 13040 of 25000, training loss: 0.840688
Train Epoch: 0, mini-batch 13050 of 25000, training loss: 0.381166
Train Epoch: 0, mini-batch 13060 of 25000, training loss: 0.014791
Train Epoch: 0, mini-batch 13070 of 25000, training loss: 0.122918
Train Epoch: 0, mini-batch 13080 of 25000, training loss: 0.040537
Train Epoch: 0, mini-batch 13090 of 25000, training loss: 0.199675
Train Epoch: 0, mini-batch 13100 of 25000, training loss: 0.628531
Train Epoch: 0, mini-batch 13110 of 25000, training loss: 0.330659
Train Epoch: 0, mini-batch 13120 of 25000, training loss: 0.155867
Train Epoch: 0, mini-batch 13130 of 25000, training loss: 0.085374
Train Epoch: 0, mini-batch 13140 of 25000, training loss: 0.488998
Train Epoch: 0, mini-batch 13150 of 25000, training loss: 0.009565
Train Epoch: 0, mini-batch 13160 of 25000, training loss: 0.021618
Train Epoch: 0, mini-batch 13170 of 25000, training loss: 0.05

Train Epoch: 0, mini-batch 14300 of 25000, training loss: 0.906031
Train Epoch: 0, mini-batch 14310 of 25000, training loss: 0.003332
Train Epoch: 0, mini-batch 14320 of 25000, training loss: 0.660801
Train Epoch: 0, mini-batch 14330 of 25000, training loss: 0.096784
Train Epoch: 0, mini-batch 14340 of 25000, training loss: 0.000538
Train Epoch: 0, mini-batch 14350 of 25000, training loss: 0.059953
Train Epoch: 0, mini-batch 14360 of 25000, training loss: 1.561834
Train Epoch: 0, mini-batch 14370 of 25000, training loss: 0.071790
Train Epoch: 0, mini-batch 14380 of 25000, training loss: 0.017299
Train Epoch: 0, mini-batch 14390 of 25000, training loss: 0.005296
Train Epoch: 0, mini-batch 14400 of 25000, training loss: 0.421634
Train Epoch: 0, mini-batch 14410 of 25000, training loss: 0.067129
Train Epoch: 0, mini-batch 14420 of 25000, training loss: 0.675174
Train Epoch: 0, mini-batch 14430 of 25000, training loss: 0.005837
Train Epoch: 0, mini-batch 14440 of 25000, training loss: 0.01

Train Epoch: 0, mini-batch 15660 of 25000, training loss: 0.032134
Train Epoch: 0, mini-batch 15670 of 25000, training loss: 0.008799
Train Epoch: 0, mini-batch 15680 of 25000, training loss: 0.167831
Train Epoch: 0, mini-batch 15690 of 25000, training loss: 0.049764
Train Epoch: 0, mini-batch 15700 of 25000, training loss: 0.002557
Train Epoch: 0, mini-batch 15710 of 25000, training loss: 0.022602
Train Epoch: 0, mini-batch 15720 of 25000, training loss: 0.028805
Train Epoch: 0, mini-batch 15730 of 25000, training loss: 0.121387
Train Epoch: 0, mini-batch 15740 of 25000, training loss: 0.103959
Train Epoch: 0, mini-batch 15750 of 25000, training loss: 0.024588
Train Epoch: 0, mini-batch 15760 of 25000, training loss: 0.070786
Train Epoch: 0, mini-batch 15770 of 25000, training loss: 0.035964
Train Epoch: 0, mini-batch 15780 of 25000, training loss: 0.136340
Train Epoch: 0, mini-batch 15790 of 25000, training loss: 0.006177
Train Epoch: 0, mini-batch 15800 of 25000, training loss: 4.90

Train Epoch: 0, mini-batch 17030 of 25000, training loss: 0.043910
Train Epoch: 0, mini-batch 17040 of 25000, training loss: 0.009846
Train Epoch: 0, mini-batch 17050 of 25000, training loss: 0.002393
Train Epoch: 0, mini-batch 17060 of 25000, training loss: 0.145874
Train Epoch: 0, mini-batch 17070 of 25000, training loss: 0.578337
Train Epoch: 0, mini-batch 17080 of 25000, training loss: 0.044177
Train Epoch: 0, mini-batch 17090 of 25000, training loss: 0.044597
Train Epoch: 0, mini-batch 17100 of 25000, training loss: 0.051240
Train Epoch: 0, mini-batch 17110 of 25000, training loss: 0.402948
Train Epoch: 0, mini-batch 17120 of 25000, training loss: 3.510507
Train Epoch: 0, mini-batch 17130 of 25000, training loss: 0.014514
Train Epoch: 0, mini-batch 17140 of 25000, training loss: 0.026179
Train Epoch: 0, mini-batch 17150 of 25000, training loss: 0.092106
Train Epoch: 0, mini-batch 17160 of 25000, training loss: 0.013053
Train Epoch: 0, mini-batch 17170 of 25000, training loss: 1.64

Train Epoch: 0, mini-batch 18300 of 25000, training loss: 0.003130
Train Epoch: 0, mini-batch 18310 of 25000, training loss: 0.287468
Train Epoch: 0, mini-batch 18320 of 25000, training loss: 0.548827
Train Epoch: 0, mini-batch 18330 of 25000, training loss: 0.256612
Train Epoch: 0, mini-batch 18340 of 25000, training loss: 0.039735
Train Epoch: 0, mini-batch 18350 of 25000, training loss: 0.399009
Train Epoch: 0, mini-batch 18360 of 25000, training loss: 0.006139
Train Epoch: 0, mini-batch 18370 of 25000, training loss: 1.448768
Train Epoch: 0, mini-batch 18380 of 25000, training loss: 0.016089
Train Epoch: 0, mini-batch 18390 of 25000, training loss: 0.185790
Train Epoch: 0, mini-batch 18400 of 25000, training loss: 0.092042
Train Epoch: 0, mini-batch 18410 of 25000, training loss: 0.558647
Train Epoch: 0, mini-batch 18420 of 25000, training loss: 1.066081
Train Epoch: 0, mini-batch 18430 of 25000, training loss: 0.503614
Train Epoch: 0, mini-batch 18440 of 25000, training loss: 0.10

Train Epoch: 0, mini-batch 19640 of 25000, training loss: 0.034479
Train Epoch: 0, mini-batch 19650 of 25000, training loss: 0.037970
Train Epoch: 0, mini-batch 19660 of 25000, training loss: 0.047217
Train Epoch: 0, mini-batch 19670 of 25000, training loss: 0.116568
Train Epoch: 0, mini-batch 19680 of 25000, training loss: 1.234512
Train Epoch: 0, mini-batch 19690 of 25000, training loss: 1.420160
Train Epoch: 0, mini-batch 19700 of 25000, training loss: 0.319989
Train Epoch: 0, mini-batch 19710 of 25000, training loss: 0.017221
Train Epoch: 0, mini-batch 19720 of 25000, training loss: 0.565980
Train Epoch: 0, mini-batch 19730 of 25000, training loss: 0.595008
Train Epoch: 0, mini-batch 19740 of 25000, training loss: 0.203936
Train Epoch: 0, mini-batch 19750 of 25000, training loss: 0.019414
Train Epoch: 0, mini-batch 19760 of 25000, training loss: 0.064930
Train Epoch: 0, mini-batch 19770 of 25000, training loss: 0.028954
Train Epoch: 0, mini-batch 19780 of 25000, training loss: 0.12

Train Epoch: 0, mini-batch 20930 of 25000, training loss: 0.250753
Train Epoch: 0, mini-batch 20940 of 25000, training loss: 0.216835
Train Epoch: 0, mini-batch 20950 of 25000, training loss: 0.367369
Train Epoch: 0, mini-batch 20960 of 25000, training loss: 0.206165
Train Epoch: 0, mini-batch 20970 of 25000, training loss: 0.150257
Train Epoch: 0, mini-batch 20980 of 25000, training loss: 0.001044
Train Epoch: 0, mini-batch 20990 of 25000, training loss: 0.013733
Train Epoch: 0, mini-batch 21000 of 25000, training loss: 0.109545
Train Epoch: 0, mini-batch 21010 of 25000, training loss: 2.128257
Train Epoch: 0, mini-batch 21020 of 25000, training loss: 0.007414
Train Epoch: 0, mini-batch 21030 of 25000, training loss: 0.063479
Train Epoch: 0, mini-batch 21040 of 25000, training loss: 1.159721
Train Epoch: 0, mini-batch 21050 of 25000, training loss: 0.457270
Train Epoch: 0, mini-batch 21060 of 25000, training loss: 0.605487
Train Epoch: 0, mini-batch 21070 of 25000, training loss: 0.00

Train Epoch: 0, mini-batch 22410 of 25000, training loss: 0.004585
Train Epoch: 0, mini-batch 22420 of 25000, training loss: 0.178204
Train Epoch: 0, mini-batch 22430 of 25000, training loss: 0.027846
Train Epoch: 0, mini-batch 22440 of 25000, training loss: 0.031034
Train Epoch: 0, mini-batch 22450 of 25000, training loss: 0.113693
Train Epoch: 0, mini-batch 22460 of 25000, training loss: 0.002285
Train Epoch: 0, mini-batch 22470 of 25000, training loss: 0.025713
Train Epoch: 0, mini-batch 22480 of 25000, training loss: 0.000117
Train Epoch: 0, mini-batch 22490 of 25000, training loss: 0.175566
Train Epoch: 0, mini-batch 22500 of 25000, training loss: 0.015178
Train Epoch: 0, mini-batch 22510 of 25000, training loss: 0.016775
Train Epoch: 0, mini-batch 22520 of 25000, training loss: 0.171407
Train Epoch: 0, mini-batch 22530 of 25000, training loss: 0.609514
Train Epoch: 0, mini-batch 22540 of 25000, training loss: 0.141514
Train Epoch: 0, mini-batch 22550 of 25000, training loss: 0.36

Train Epoch: 0, mini-batch 23690 of 25000, training loss: 0.051803
Train Epoch: 0, mini-batch 23700 of 25000, training loss: 0.284521
Train Epoch: 0, mini-batch 23710 of 25000, training loss: 0.118840
Train Epoch: 0, mini-batch 23720 of 25000, training loss: 1.698763
Train Epoch: 0, mini-batch 23730 of 25000, training loss: 0.001330
Train Epoch: 0, mini-batch 23740 of 25000, training loss: 0.001398
Train Epoch: 0, mini-batch 23750 of 25000, training loss: 0.005917
Train Epoch: 0, mini-batch 23760 of 25000, training loss: 0.099938
Train Epoch: 0, mini-batch 23770 of 25000, training loss: 0.457634
Train Epoch: 0, mini-batch 23780 of 25000, training loss: 0.005029
Train Epoch: 0, mini-batch 23790 of 25000, training loss: 0.180113
Train Epoch: 0, mini-batch 23800 of 25000, training loss: 0.020613
Train Epoch: 0, mini-batch 23810 of 25000, training loss: 0.200635
Train Epoch: 0, mini-batch 23820 of 25000, training loss: 0.049420
Train Epoch: 0, mini-batch 23830 of 25000, training loss: 0.58

In [None]:
# ! ls  # toggle 5

In [None]:
# Get training and test accuracy histories
train_loss = history.history['loss']
test_loss = history.history['val_loss']

# Create count of the number of epochs
epoch = range(1, len(train_loss) + 1)

# Visualize accuracy history
plt.figure()

plt.plot(epoch, train_loss)
plt.plot(epoch, test_loss)
# plt.plot(no_reg['epoch'], no_reg['train_loss'])  # toggle 0
# plt.plot(no_reg['epoch'], no_reg['test_loss'])  # toggle 0

plt.legend(['Train loss', 'Test loss', 'Train no-reg', 'Test no-reg'])
plt.xlabel('Epoch')
plt.ylabel('Loss score')

# Get training and test accuracy histories
train_accuracy = history.history['acc']
test_accuracy = history.history['val_acc']

# Visualize accuracy history
plt.figure()

plt.plot(epoch, train_accuracy)
plt.plot(epoch, test_accuracy)
# plt.plot(no_reg['epoch'], no_reg['train_accuracy'])  # toggle 0
# plt.plot(no_reg['epoch'], no_reg['test_accuracy'])  # toggle 0

plt.legend(['Train accuracy', 'Test accuracy', 'Train no-reg', 'Test no-reg'])
plt.xlabel('Epoch')
plt.ylabel('Accuracy Score')

no_reg = {                             # toggle 0
    'epoch': epoch,                    # toggle 0
    'train_loss': train_loss,          # toggle 0
    'test_loss': test_loss,            # toggle 0
    'train_accuracy': train_accuracy,  # toggle 0
    'test_accuracy': test_accuracy,    # toggle 0
}

In [None]:
# Backup weights
weights = network.layers[0].get_weights()[0]  # toggle 0
# weights_L1 = network.layers[0].get_weights()[0]  # toggle 1
# weights_L2 = network.layers[0].get_weights()[0]  # toggle 2
# weights_max = network.layers[0].get_weights()[0]  # toggle 3

After you got to toggle `# toggle 3`, execute the following code.

In [None]:
# Show weight distribution
plt.hist((
    weights.reshape(-1),
    weights_L1.reshape(-1),
    weights_L2.reshape(-1),
    weights_max.reshape(-1),
), 49, range=(-.5, .5), label=(
    'No-reg',
    'L1',
    'L2',
    'Max',
))
plt.legend();