# Regularisation in NNs

## 1. Set up the environment

In [3]:
# Import statements
from tensorflow import keras as kr 
import numpy as np
import matplotlib.pyplot as plt

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
# Set my plotting style
plt.style.use(('dark_background', 'bmh'))
plt.rc('axes', facecolor='none')
plt.rc('figure', figsize=(16, 4))

In [5]:
# Set random seed for reproducibility
np.random.seed(0)
torch.manual_seed(1)

<torch._C.Generator at 0x115273510>

In [6]:
# Shortcuts
imdb = kr.datasets.imdb
Tokeniser = kr.preprocessing.text.Tokenizer

## 2. Loading the data set

In [7]:
# Set the number of features we want
features_nb = 1000

# Load data and target vector from movie review data
(train_data, train_target), (test_data, test_target) = imdb.load_data(num_words=features_nb)

# Convert movie review data to a one-hot encoded feature matrix
tokeniser = Tokeniser(num_words=features_nb)
train_features = tokeniser.sequences_to_matrix(train_data, mode='binary')
test_features = tokeniser.sequences_to_matrix(test_data, mode='binary')

### 2.1 Exploring the data set

In [8]:
# Check data set sizes
print('train_data.shape:', train_data.shape)
print('train_target.shape:', train_target.shape)
print('test_data.shape:', test_data.shape)
print('test_target.shape:', test_target.shape)

train_data.shape: (25000,)
train_target.shape: (25000,)
test_data.shape: (25000,)
test_target.shape: (25000,)


In [9]:
# Check format of first training sample
print('type(train_data[0]):', type(train_data[0]))
print('type(train_target[0]):', type(train_target[0]))

type(train_data[0]): <class 'list'>
type(train_target[0]): <class 'numpy.int64'>


In [10]:
# Check size of first 10 training samples and corresponding target
print('Reviews length:', [len(sample) for sample in train_data[:10]])
print('Review sentiment (bad/good):', train_target[:10])

Reviews length: [218, 189, 141, 550, 147, 43, 123, 562, 233, 130]
Review sentiment (bad/good): [1 0 0 1 0 0 1 0 1 0]


In [11]:
# Show first review - machine format
print(train_data[0])

[1, 14, 22, 16, 43, 530, 973, 2, 2, 65, 458, 2, 66, 2, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 2, 2, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2, 19, 14, 22, 4, 2, 2, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 2, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2, 2, 16, 480, 66, 2, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 2, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 2, 15, 256, 4, 2, 7, 2, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 2, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2, 56, 26, 141, 6, 194, 2, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 2, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 2, 88, 12, 16, 283, 5, 16, 2, 113, 103, 32, 15, 16, 2, 19, 178, 32]


In [12]:
# Data set text visualisation helper function
def show_text(sample):
    word_to_id = imdb.get_word_index()
    word_to_id = {k:(v+3) for k,v in word_to_id.items()}
    word_to_id["<PAD>"] = 0
    word_to_id["<START>"] = 1
    word_to_id["<UNK>"] = 2

    id_to_word = {value:key for key,value in word_to_id.items()}
    print(' '.join(id_to_word[id_] for id_ in sample))

In [13]:
# Show first review - human format
show_text(train_data[0])

<START> this film was just brilliant casting <UNK> <UNK> story direction <UNK> really <UNK> the part they played and you could just imagine being there robert <UNK> is an amazing actor and now the same being director <UNK> father came from the same <UNK> <UNK> as myself so i loved the fact there was a real <UNK> with this film the <UNK> <UNK> throughout the film were great it was just brilliant so much that i <UNK> the film as soon as it was released for <UNK> and would recommend it to everyone to watch and the <UNK> <UNK> was amazing really <UNK> at the end it was so sad and you know what they say if you <UNK> at a film it must have been good and this definitely was also <UNK> to the two little <UNK> that played the <UNK> of <UNK> and paul they were just brilliant children are often left out of the <UNK> <UNK> i think because the stars that play them all <UNK> up are such a big <UNK> for the whole film but these children are amazing and should be <UNK> for what they have done don't yo

In [14]:
# Show first review - neural net format
print(train_features[0])

[0. 1. 1. 0. 1. 1. 1. 1. 1. 1. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 0.
 0. 1. 1. 0. 1. 0. 1. 0. 1. 1. 0. 1. 1. 0. 1. 1. 0. 0. 0. 1. 0. 0. 1. 0.
 1. 0. 1. 1. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 1. 1. 0. 0. 0. 0. 1.
 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1. 0. 0. 0.
 0. 0. 1. 0. 1. 0. 0. 1. 1. 0. 1. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1. 0. 0.
 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 1. 0. 0.
 1. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.
 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.
 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.

In [15]:
# Show first review - neural net format - explanation
print(train_features[0] * np.arange(len(train_features[0])))

[  0.   1.   2.   0.   4.   5.   6.   7.   8.   9.   0.   0.  12.  13.
  14.  15.  16.  17.  18.  19.   0.  21.  22.   0.   0.  25.  26.   0.
  28.   0.  30.   0.  32.  33.   0.  35.  36.   0.  38.  39.   0.   0.
   0.  43.   0.   0.  46.   0.  48.   0.  50.  51.  52.   0.   0.   0.
  56.   0.   0.   0.   0.   0.  62.   0.   0.  65.  66.   0.   0.   0.
   0.  71.   0.   0.   0.   0.  76.  77.   0.   0.   0.   0.  82.   0.
   0.   0.   0.  87.  88.   0.   0.   0.  92.   0.   0.   0.   0.   0.
  98.   0. 100.   0.   0. 103. 104.   0. 106. 107.   0.   0.   0.   0.
 112. 113.   0.   0.   0. 117.   0.   0.   0.   0.   0.   0. 124.   0.
   0.   0.   0.   0. 130.   0.   0.   0. 134. 135.   0.   0.   0.   0.
   0. 141.   0.   0. 144.   0.   0. 147.   0.   0. 150.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0. 167.
   0.   0.   0.   0. 172. 173.   0.   0.   0.   0. 178.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0. 192.   0. 194.   0.
   0. 

## 3. Exploring regularisation of NN

Play with the code, especially the one marked `# toggle`.  
Start from `# toggle 0`, and then, one at the time, `# toggle 1` to `5`.

In [16]:
class ThreeLayerDense(nn.Module):

    def __init__(self, input_size, units_size):

        super(ThreeLayerDense, self).__init__()
        self.linear1 = torch.nn.Linear(input_size, units_size) #features_nb, 16
        self.linear2 = torch.nn.Linear(units_size, units_size)
        #self.dropout
        self.linear3 = torch.nn.Linear(units_size, 1)

    def forward(self, x):
        x = self.linear1(x)
        x = F.relu(x) 
        x = self.linear2(x) 
        x = F.relu(x)
        return nn.Sigmoid()(self.linear3(x))

In [17]:
epochs = 25
log_interval = 10
batch_size = 100

# Add fully connected layer with a sigmoid activation function
model = ThreeLayerDense(features_nb,16)

criterion = torch.nn.BCELoss()
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.001)

In [None]:
epoch = 0
train_data_gen = zip(train_features,train_target)
train_size = len(train_target)
while epoch < epochs:
    predictions = []
    truth_values = []

    for batch_idx, (xs, y) in enumerate(train_data_gen):
        xs, y = torch.from_numpy(xs).float(), torch.FloatTensor([y])

        y_pred = model(xs)
        loss = criterion(y_pred, y)
        optimizer.zero_grad()
        loss.backward(retain_graph=True)
        #nn.utils.clip_grad_norm(model.parameters(), 0.5)
        optimizer.step()

        predictions.append(y_pred.cpu().data.numpy().ravel())
        truth_values.append(y)

        if batch_idx % log_interval == 0:
            print('Train Epoch: {}, mini-batch {} of {}, training loss: {:.6f}'.format(
                epoch, batch_idx, train_size, loss.item()))

    epoch += 1

Train Epoch: 0, mini-batch 0 of 25000, training loss: 0.741060
Train Epoch: 0, mini-batch 10 of 25000, training loss: 0.716671
Train Epoch: 0, mini-batch 20 of 25000, training loss: 0.641829
Train Epoch: 0, mini-batch 30 of 25000, training loss: 0.634434
Train Epoch: 0, mini-batch 40 of 25000, training loss: 0.748209
Train Epoch: 0, mini-batch 50 of 25000, training loss: 0.631697
Train Epoch: 0, mini-batch 60 of 25000, training loss: 0.634775
Train Epoch: 0, mini-batch 70 of 25000, training loss: 0.631318
Train Epoch: 0, mini-batch 80 of 25000, training loss: 0.687322
Train Epoch: 0, mini-batch 90 of 25000, training loss: 0.690306
Train Epoch: 0, mini-batch 100 of 25000, training loss: 0.622003
Train Epoch: 0, mini-batch 110 of 25000, training loss: 0.595997
Train Epoch: 0, mini-batch 120 of 25000, training loss: 0.777372
Train Epoch: 0, mini-batch 130 of 25000, training loss: 0.550216
Train Epoch: 0, mini-batch 140 of 25000, training loss: 0.828558
Train Epoch: 0, mini-batch 150 of 25

Train Epoch: 0, mini-batch 1370 of 25000, training loss: 0.589652
Train Epoch: 0, mini-batch 1380 of 25000, training loss: 0.125836
Train Epoch: 0, mini-batch 1390 of 25000, training loss: 0.172541
Train Epoch: 0, mini-batch 1400 of 25000, training loss: 0.669641
Train Epoch: 0, mini-batch 1410 of 25000, training loss: 0.342450
Train Epoch: 0, mini-batch 1420 of 25000, training loss: 0.390328
Train Epoch: 0, mini-batch 1430 of 25000, training loss: 0.126586
Train Epoch: 0, mini-batch 1440 of 25000, training loss: 0.506219
Train Epoch: 0, mini-batch 1450 of 25000, training loss: 0.118727
Train Epoch: 0, mini-batch 1460 of 25000, training loss: 0.251418
Train Epoch: 0, mini-batch 1470 of 25000, training loss: 0.076189
Train Epoch: 0, mini-batch 1480 of 25000, training loss: 0.016424
Train Epoch: 0, mini-batch 1490 of 25000, training loss: 0.156109
Train Epoch: 0, mini-batch 1500 of 25000, training loss: 0.043361
Train Epoch: 0, mini-batch 1510 of 25000, training loss: 0.101089
Train Epoc

Train Epoch: 0, mini-batch 2710 of 25000, training loss: 0.432132
Train Epoch: 0, mini-batch 2720 of 25000, training loss: 0.009024
Train Epoch: 0, mini-batch 2730 of 25000, training loss: 0.371265
Train Epoch: 0, mini-batch 2740 of 25000, training loss: 0.578765
Train Epoch: 0, mini-batch 2750 of 25000, training loss: 0.077705
Train Epoch: 0, mini-batch 2760 of 25000, training loss: 0.277328
Train Epoch: 0, mini-batch 2770 of 25000, training loss: 0.450903
Train Epoch: 0, mini-batch 2780 of 25000, training loss: 0.019599
Train Epoch: 0, mini-batch 2790 of 25000, training loss: 0.147169
Train Epoch: 0, mini-batch 2800 of 25000, training loss: 0.181916
Train Epoch: 0, mini-batch 2810 of 25000, training loss: 0.348726
Train Epoch: 0, mini-batch 2820 of 25000, training loss: 1.184747
Train Epoch: 0, mini-batch 2830 of 25000, training loss: 0.094559
Train Epoch: 0, mini-batch 2840 of 25000, training loss: 0.043091
Train Epoch: 0, mini-batch 2850 of 25000, training loss: 0.182814
Train Epoc

Train Epoch: 0, mini-batch 4240 of 25000, training loss: 0.375515
Train Epoch: 0, mini-batch 4250 of 25000, training loss: 0.118049
Train Epoch: 0, mini-batch 4260 of 25000, training loss: 0.075126
Train Epoch: 0, mini-batch 4270 of 25000, training loss: 0.578917
Train Epoch: 0, mini-batch 4280 of 25000, training loss: 0.103251
Train Epoch: 0, mini-batch 4290 of 25000, training loss: 0.177213
Train Epoch: 0, mini-batch 4300 of 25000, training loss: 0.086183
Train Epoch: 0, mini-batch 4310 of 25000, training loss: 0.114434
Train Epoch: 0, mini-batch 4320 of 25000, training loss: 0.006419
Train Epoch: 0, mini-batch 4330 of 25000, training loss: 0.808411
Train Epoch: 0, mini-batch 4340 of 25000, training loss: 1.847954
Train Epoch: 0, mini-batch 4350 of 25000, training loss: 0.055592
Train Epoch: 0, mini-batch 4360 of 25000, training loss: 0.068986
Train Epoch: 0, mini-batch 4370 of 25000, training loss: 0.694376
Train Epoch: 0, mini-batch 4380 of 25000, training loss: 0.534204
Train Epoc

Train Epoch: 0, mini-batch 5630 of 25000, training loss: 0.035715
Train Epoch: 0, mini-batch 5640 of 25000, training loss: 0.007826
Train Epoch: 0, mini-batch 5650 of 25000, training loss: 0.055135
Train Epoch: 0, mini-batch 5660 of 25000, training loss: 0.030104
Train Epoch: 0, mini-batch 5670 of 25000, training loss: 0.057552
Train Epoch: 0, mini-batch 5680 of 25000, training loss: 0.167198
Train Epoch: 0, mini-batch 5690 of 25000, training loss: 0.440067
Train Epoch: 0, mini-batch 5700 of 25000, training loss: 0.168357
Train Epoch: 0, mini-batch 5710 of 25000, training loss: 0.633475
Train Epoch: 0, mini-batch 5720 of 25000, training loss: 0.122149
Train Epoch: 0, mini-batch 5730 of 25000, training loss: 0.058213
Train Epoch: 0, mini-batch 5740 of 25000, training loss: 0.122332
Train Epoch: 0, mini-batch 5750 of 25000, training loss: 0.037699
Train Epoch: 0, mini-batch 5760 of 25000, training loss: 0.031356
Train Epoch: 0, mini-batch 5770 of 25000, training loss: 0.068774
Train Epoc

Train Epoch: 0, mini-batch 6960 of 25000, training loss: 0.229888
Train Epoch: 0, mini-batch 6970 of 25000, training loss: 0.005206
Train Epoch: 0, mini-batch 6980 of 25000, training loss: 0.159398
Train Epoch: 0, mini-batch 6990 of 25000, training loss: 0.022556
Train Epoch: 0, mini-batch 7000 of 25000, training loss: 0.277373
Train Epoch: 0, mini-batch 7010 of 25000, training loss: 0.174773
Train Epoch: 0, mini-batch 7020 of 25000, training loss: 1.450884
Train Epoch: 0, mini-batch 7030 of 25000, training loss: 1.103633
Train Epoch: 0, mini-batch 7040 of 25000, training loss: 0.477950
Train Epoch: 0, mini-batch 7050 of 25000, training loss: 0.135970
Train Epoch: 0, mini-batch 7060 of 25000, training loss: 0.463283
Train Epoch: 0, mini-batch 7070 of 25000, training loss: 0.073511
Train Epoch: 0, mini-batch 7080 of 25000, training loss: 0.109352
Train Epoch: 0, mini-batch 7090 of 25000, training loss: 0.989030
Train Epoch: 0, mini-batch 7100 of 25000, training loss: 0.038590
Train Epoc

Train Epoch: 0, mini-batch 8250 of 25000, training loss: 0.021264
Train Epoch: 0, mini-batch 8260 of 25000, training loss: 0.557723
Train Epoch: 0, mini-batch 8270 of 25000, training loss: 0.078777
Train Epoch: 0, mini-batch 8280 of 25000, training loss: 0.031971
Train Epoch: 0, mini-batch 8290 of 25000, training loss: 0.105045
Train Epoch: 0, mini-batch 8300 of 25000, training loss: 0.051605
Train Epoch: 0, mini-batch 8310 of 25000, training loss: 0.089248
Train Epoch: 0, mini-batch 8320 of 25000, training loss: 0.012993
Train Epoch: 0, mini-batch 8330 of 25000, training loss: 0.089026
Train Epoch: 0, mini-batch 8340 of 25000, training loss: 0.266008
Train Epoch: 0, mini-batch 8350 of 25000, training loss: 0.131792
Train Epoch: 0, mini-batch 8360 of 25000, training loss: 0.400798
Train Epoch: 0, mini-batch 8370 of 25000, training loss: 3.762007
Train Epoch: 0, mini-batch 8380 of 25000, training loss: 0.749384
Train Epoch: 0, mini-batch 8390 of 25000, training loss: 0.241829
Train Epoc

Train Epoch: 0, mini-batch 9680 of 25000, training loss: 0.891989
Train Epoch: 0, mini-batch 9690 of 25000, training loss: 0.937710
Train Epoch: 0, mini-batch 9700 of 25000, training loss: 0.249375
Train Epoch: 0, mini-batch 9710 of 25000, training loss: 0.082014
Train Epoch: 0, mini-batch 9720 of 25000, training loss: 1.655181
Train Epoch: 0, mini-batch 9730 of 25000, training loss: 0.014215
Train Epoch: 0, mini-batch 9740 of 25000, training loss: 0.328966
Train Epoch: 0, mini-batch 9750 of 25000, training loss: 0.009532
Train Epoch: 0, mini-batch 9760 of 25000, training loss: 0.016776
Train Epoch: 0, mini-batch 9770 of 25000, training loss: 0.315282
Train Epoch: 0, mini-batch 9780 of 25000, training loss: 0.003763
Train Epoch: 0, mini-batch 9790 of 25000, training loss: 2.079187
Train Epoch: 0, mini-batch 9800 of 25000, training loss: 0.010080
Train Epoch: 0, mini-batch 9810 of 25000, training loss: 0.017785
Train Epoch: 0, mini-batch 9820 of 25000, training loss: 0.007470
Train Epoc

Train Epoch: 0, mini-batch 11140 of 25000, training loss: 0.000677
Train Epoch: 0, mini-batch 11150 of 25000, training loss: 0.211772
Train Epoch: 0, mini-batch 11160 of 25000, training loss: 0.047258
Train Epoch: 0, mini-batch 11170 of 25000, training loss: 1.258221
Train Epoch: 0, mini-batch 11180 of 25000, training loss: 0.126435
Train Epoch: 0, mini-batch 11190 of 25000, training loss: 0.368660
Train Epoch: 0, mini-batch 11200 of 25000, training loss: 0.480867
Train Epoch: 0, mini-batch 11210 of 25000, training loss: 0.136391
Train Epoch: 0, mini-batch 11220 of 25000, training loss: 0.041511
Train Epoch: 0, mini-batch 11230 of 25000, training loss: 0.323613
Train Epoch: 0, mini-batch 11240 of 25000, training loss: 0.227131
Train Epoch: 0, mini-batch 11250 of 25000, training loss: 0.174507
Train Epoch: 0, mini-batch 11260 of 25000, training loss: 0.570316
Train Epoch: 0, mini-batch 11270 of 25000, training loss: 0.490969
Train Epoch: 0, mini-batch 11280 of 25000, training loss: 0.24

Train Epoch: 0, mini-batch 12430 of 25000, training loss: 0.416376
Train Epoch: 0, mini-batch 12440 of 25000, training loss: 2.390151
Train Epoch: 0, mini-batch 12450 of 25000, training loss: 0.561809
Train Epoch: 0, mini-batch 12460 of 25000, training loss: 0.168254
Train Epoch: 0, mini-batch 12470 of 25000, training loss: 0.242876
Train Epoch: 0, mini-batch 12480 of 25000, training loss: 0.722814
Train Epoch: 0, mini-batch 12490 of 25000, training loss: 0.793184
Train Epoch: 0, mini-batch 12500 of 25000, training loss: 0.519394
Train Epoch: 0, mini-batch 12510 of 25000, training loss: 0.140334
Train Epoch: 0, mini-batch 12520 of 25000, training loss: 0.107285
Train Epoch: 0, mini-batch 12530 of 25000, training loss: 0.020514
Train Epoch: 0, mini-batch 12540 of 25000, training loss: 0.189991
Train Epoch: 0, mini-batch 12550 of 25000, training loss: 0.256083
Train Epoch: 0, mini-batch 12560 of 25000, training loss: 0.193469
Train Epoch: 0, mini-batch 12570 of 25000, training loss: 0.71

Train Epoch: 0, mini-batch 13970 of 25000, training loss: 2.581089
Train Epoch: 0, mini-batch 13980 of 25000, training loss: 0.272787
Train Epoch: 0, mini-batch 13990 of 25000, training loss: 1.861854
Train Epoch: 0, mini-batch 14000 of 25000, training loss: 0.189956
Train Epoch: 0, mini-batch 14010 of 25000, training loss: 0.202847
Train Epoch: 0, mini-batch 14020 of 25000, training loss: 0.050837
Train Epoch: 0, mini-batch 14030 of 25000, training loss: 0.022166
Train Epoch: 0, mini-batch 14040 of 25000, training loss: 0.509929
Train Epoch: 0, mini-batch 14050 of 25000, training loss: 0.575498
Train Epoch: 0, mini-batch 14060 of 25000, training loss: 0.049877
Train Epoch: 0, mini-batch 14070 of 25000, training loss: 0.714975
Train Epoch: 0, mini-batch 14080 of 25000, training loss: 0.635127
Train Epoch: 0, mini-batch 14090 of 25000, training loss: 0.335302
Train Epoch: 0, mini-batch 14100 of 25000, training loss: 1.168481
Train Epoch: 0, mini-batch 14110 of 25000, training loss: 0.34

Train Epoch: 0, mini-batch 15440 of 25000, training loss: 0.025377
Train Epoch: 0, mini-batch 15450 of 25000, training loss: 0.098988
Train Epoch: 0, mini-batch 15460 of 25000, training loss: 0.003546
Train Epoch: 0, mini-batch 15470 of 25000, training loss: 0.055610
Train Epoch: 0, mini-batch 15480 of 25000, training loss: 0.019093
Train Epoch: 0, mini-batch 15490 of 25000, training loss: 0.740690
Train Epoch: 0, mini-batch 15500 of 25000, training loss: 0.010398
Train Epoch: 0, mini-batch 15510 of 25000, training loss: 0.261165
Train Epoch: 0, mini-batch 15520 of 25000, training loss: 0.024269
Train Epoch: 0, mini-batch 15530 of 25000, training loss: 0.021735
Train Epoch: 0, mini-batch 15540 of 25000, training loss: 0.043500
Train Epoch: 0, mini-batch 15550 of 25000, training loss: 0.281395
Train Epoch: 0, mini-batch 15560 of 25000, training loss: 1.268614
Train Epoch: 0, mini-batch 15570 of 25000, training loss: 0.767259
Train Epoch: 0, mini-batch 15580 of 25000, training loss: 0.00

Train Epoch: 0, mini-batch 16870 of 25000, training loss: 0.003095
Train Epoch: 0, mini-batch 16880 of 25000, training loss: 0.000108
Train Epoch: 0, mini-batch 16890 of 25000, training loss: 0.065827
Train Epoch: 0, mini-batch 16900 of 25000, training loss: 0.004856
Train Epoch: 0, mini-batch 16910 of 25000, training loss: 0.078855
Train Epoch: 0, mini-batch 16920 of 25000, training loss: 0.016270
Train Epoch: 0, mini-batch 16930 of 25000, training loss: 1.082990
Train Epoch: 0, mini-batch 16940 of 25000, training loss: 0.085360
Train Epoch: 0, mini-batch 16950 of 25000, training loss: 0.039076
Train Epoch: 0, mini-batch 16960 of 25000, training loss: 0.115687
Train Epoch: 0, mini-batch 16970 of 25000, training loss: 0.576850
Train Epoch: 0, mini-batch 16980 of 25000, training loss: 0.117479
Train Epoch: 0, mini-batch 16990 of 25000, training loss: 0.082589
Train Epoch: 0, mini-batch 17000 of 25000, training loss: 0.148266
Train Epoch: 0, mini-batch 17010 of 25000, training loss: 0.06

Train Epoch: 0, mini-batch 18110 of 25000, training loss: 0.305988
Train Epoch: 0, mini-batch 18120 of 25000, training loss: 0.144220
Train Epoch: 0, mini-batch 18130 of 25000, training loss: 0.060948
Train Epoch: 0, mini-batch 18140 of 25000, training loss: 0.151573
Train Epoch: 0, mini-batch 18150 of 25000, training loss: 0.056079
Train Epoch: 0, mini-batch 18160 of 25000, training loss: 0.005527
Train Epoch: 0, mini-batch 18170 of 25000, training loss: 0.980717
Train Epoch: 0, mini-batch 18180 of 25000, training loss: 0.021968
Train Epoch: 0, mini-batch 18190 of 25000, training loss: 1.337979
Train Epoch: 0, mini-batch 18200 of 25000, training loss: 0.005598
Train Epoch: 0, mini-batch 18210 of 25000, training loss: 0.009470
Train Epoch: 0, mini-batch 18220 of 25000, training loss: 0.055478
Train Epoch: 0, mini-batch 18230 of 25000, training loss: 1.284391
Train Epoch: 0, mini-batch 18240 of 25000, training loss: 0.003926
Train Epoch: 0, mini-batch 18250 of 25000, training loss: 0.01

Train Epoch: 0, mini-batch 19370 of 25000, training loss: 0.007493
Train Epoch: 0, mini-batch 19380 of 25000, training loss: 0.414980
Train Epoch: 0, mini-batch 19390 of 25000, training loss: 0.032032
Train Epoch: 0, mini-batch 19400 of 25000, training loss: 1.750396
Train Epoch: 0, mini-batch 19410 of 25000, training loss: 0.166618
Train Epoch: 0, mini-batch 19420 of 25000, training loss: 0.035464
Train Epoch: 0, mini-batch 19430 of 25000, training loss: 0.125427
Train Epoch: 0, mini-batch 19440 of 25000, training loss: 0.009154
Train Epoch: 0, mini-batch 19450 of 25000, training loss: 0.001513
Train Epoch: 0, mini-batch 19460 of 25000, training loss: 0.029878
Train Epoch: 0, mini-batch 19470 of 25000, training loss: 0.257645
Train Epoch: 0, mini-batch 19480 of 25000, training loss: 0.014918
Train Epoch: 0, mini-batch 19490 of 25000, training loss: 0.032626
Train Epoch: 0, mini-batch 19500 of 25000, training loss: 0.033073
Train Epoch: 0, mini-batch 19510 of 25000, training loss: 0.00

Train Epoch: 0, mini-batch 20810 of 25000, training loss: 0.123088
Train Epoch: 0, mini-batch 20820 of 25000, training loss: 0.217426
Train Epoch: 0, mini-batch 20830 of 25000, training loss: 0.301322
Train Epoch: 0, mini-batch 20840 of 25000, training loss: 0.003360
Train Epoch: 0, mini-batch 20850 of 25000, training loss: 0.011562
Train Epoch: 0, mini-batch 20860 of 25000, training loss: 0.101807
Train Epoch: 0, mini-batch 20870 of 25000, training loss: 0.112275
Train Epoch: 0, mini-batch 20880 of 25000, training loss: 1.341494
Train Epoch: 0, mini-batch 20890 of 25000, training loss: 0.018845
Train Epoch: 0, mini-batch 20900 of 25000, training loss: 1.957750
Train Epoch: 0, mini-batch 20910 of 25000, training loss: 0.020309
Train Epoch: 0, mini-batch 20920 of 25000, training loss: 0.005219
Train Epoch: 0, mini-batch 20930 of 25000, training loss: 0.256992
Train Epoch: 0, mini-batch 20940 of 25000, training loss: 0.129655
Train Epoch: 0, mini-batch 20950 of 25000, training loss: 0.24

Train Epoch: 0, mini-batch 22190 of 25000, training loss: 0.023039
Train Epoch: 0, mini-batch 22200 of 25000, training loss: 0.013306
Train Epoch: 0, mini-batch 22210 of 25000, training loss: 0.363100
Train Epoch: 0, mini-batch 22220 of 25000, training loss: 0.009641
Train Epoch: 0, mini-batch 22230 of 25000, training loss: 0.540001
Train Epoch: 0, mini-batch 22240 of 25000, training loss: 0.028538
Train Epoch: 0, mini-batch 22250 of 25000, training loss: 0.026374
Train Epoch: 0, mini-batch 22260 of 25000, training loss: 1.303832
Train Epoch: 0, mini-batch 22270 of 25000, training loss: 0.061830
Train Epoch: 0, mini-batch 22280 of 25000, training loss: 2.132433
Train Epoch: 0, mini-batch 22290 of 25000, training loss: 0.016168
Train Epoch: 0, mini-batch 22300 of 25000, training loss: 0.008035
Train Epoch: 0, mini-batch 22310 of 25000, training loss: 0.007714
Train Epoch: 0, mini-batch 22320 of 25000, training loss: 0.008672
Train Epoch: 0, mini-batch 22330 of 25000, training loss: 0.07

Train Epoch: 0, mini-batch 23660 of 25000, training loss: 0.060520
Train Epoch: 0, mini-batch 23670 of 25000, training loss: 0.145529
Train Epoch: 0, mini-batch 23680 of 25000, training loss: 0.247305
Train Epoch: 0, mini-batch 23690 of 25000, training loss: 0.039215
Train Epoch: 0, mini-batch 23700 of 25000, training loss: 0.254025
Train Epoch: 0, mini-batch 23710 of 25000, training loss: 0.114241
Train Epoch: 0, mini-batch 23720 of 25000, training loss: 1.582356
Train Epoch: 0, mini-batch 23730 of 25000, training loss: 0.003452
Train Epoch: 0, mini-batch 23740 of 25000, training loss: 0.003625
Train Epoch: 0, mini-batch 23750 of 25000, training loss: 0.009783
Train Epoch: 0, mini-batch 23760 of 25000, training loss: 0.092677
Train Epoch: 0, mini-batch 23770 of 25000, training loss: 0.550467
Train Epoch: 0, mini-batch 23780 of 25000, training loss: 0.002972
Train Epoch: 0, mini-batch 23790 of 25000, training loss: 0.235835
Train Epoch: 0, mini-batch 23800 of 25000, training loss: 0.01

In [None]:
# ! ls  # toggle 5

In [None]:
# Get training and test accuracy histories
train_loss = history.history['loss']
test_loss = history.history['val_loss']

# Create count of the number of epochs
epoch = range(1, len(train_loss) + 1)

# Visualize accuracy history
plt.figure()

plt.plot(epoch, train_loss)
plt.plot(epoch, test_loss)
# plt.plot(no_reg['epoch'], no_reg['train_loss'])  # toggle 0
# plt.plot(no_reg['epoch'], no_reg['test_loss'])  # toggle 0

plt.legend(['Train loss', 'Test loss', 'Train no-reg', 'Test no-reg'])
plt.xlabel('Epoch')
plt.ylabel('Loss score')

# Get training and test accuracy histories
train_accuracy = history.history['acc']
test_accuracy = history.history['val_acc']

# Visualize accuracy history
plt.figure()

plt.plot(epoch, train_accuracy)
plt.plot(epoch, test_accuracy)
# plt.plot(no_reg['epoch'], no_reg['train_accuracy'])  # toggle 0
# plt.plot(no_reg['epoch'], no_reg['test_accuracy'])  # toggle 0

plt.legend(['Train accuracy', 'Test accuracy', 'Train no-reg', 'Test no-reg'])
plt.xlabel('Epoch')
plt.ylabel('Accuracy Score')

no_reg = {                             # toggle 0
    'epoch': epoch,                    # toggle 0
    'train_loss': train_loss,          # toggle 0
    'test_loss': test_loss,            # toggle 0
    'train_accuracy': train_accuracy,  # toggle 0
    'test_accuracy': test_accuracy,    # toggle 0
}

In [None]:
# Backup weights
weights = network.layers[0].get_weights()[0]  # toggle 0
# weights_L1 = network.layers[0].get_weights()[0]  # toggle 1
# weights_L2 = network.layers[0].get_weights()[0]  # toggle 2
# weights_max = network.layers[0].get_weights()[0]  # toggle 3

After you got to toggle `# toggle 3`, execute the following code.

In [None]:
# Show weight distribution
plt.hist((
    weights.reshape(-1),
    weights_L1.reshape(-1),
    weights_L2.reshape(-1),
    weights_max.reshape(-1),
), 49, range=(-.5, .5), label=(
    'No-reg',
    'L1',
    'L2',
    'Max',
))
plt.legend();