In [None]:
# %% Deep learning - Section 15.146
#    Xavier and Kaiming initialisations

# This code pertains a deep learning course provided by Mike X. Cohen on Udemy:
#   > https://www.udemy.com/course/deeplearning_x
# The "base" code in this repository is adapted (with very minor modifications)
# from code developed by the course instructor (Mike X. Cohen), while the
# "exercises" and the "code challenges" contain more original solutions and
# creative input from my side. If you are interested in DL (and if you are
# reading this statement, chances are that you are), go check out the course, it
# is singularly good.


In [2]:
# %% Libraries and modules
import numpy               as np
import matplotlib.pyplot   as plt
import torch
import torch.nn            as nn
import seaborn             as sns
import copy
import torch.nn.functional as F
import pandas              as pd
import scipy.stats         as stats
import sklearn.metrics     as skm
import time

from torch.utils.data                 import DataLoader,TensorDataset
from sklearn.model_selection          import train_test_split
from google.colab                     import files
from torchsummary                     import summary
from IPython                          import display
from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('svg')
plt.style.use('default')


In [16]:
# %% Model class

class FFN_model(nn.Module):
    def __init__(self):
        super().__init__()

        # Architecture
        self.input   = nn.Linear(100,100)
        self.hidden1 = nn.Linear(100,100)
        self.hidden2 = nn.Linear(100,100)
        self.hidden3 = nn.Linear(100,100)
        self.output  = nn.Linear(100,  2)

    # Forward propagation
    def forward(self,x):

        x = F.relu(self.input(x))
        x = F.relu(self.hidden1(x))
        x = F.relu(self.hidden2(x))
        x = self.output(x)

        return x


In [None]:
# %% Model instance

model = FFN_model()
print(model)


In [None]:
# %% Explore weights

# Get all weights and biases
all_weight = np.array([])
all_biases = np.array([])

for p in model.named_parameters():

    if 'bias' in p[0]:
        all_biases = np.concatenate( (all_biases,p[1].data.numpy().flatten()),axis=0 )
    elif 'weight' in p[0]:
        all_weight = np.concatenate( (all_weight,p[1].data.numpy().flatten()),axis=0 )

print(f'There are {len(all_biases)} bias parameters.')
print(f'There are {len(all_weight)} weight parameters.')

# Plot (hist)
phi = (1 + np.sqrt(5)) / 2
fig,ax = plt.subplots(1,3,figsize=(2*phi*6,6))

ax[0].hist(all_biases,40,alpha=0.8)
ax[0].set_title('Histogram of initial biases')

ax[1].hist(all_weight,40,alpha=0.8)
ax[1].set_title('Histogram of initial weights')

# Plot (lines)
y_bias,x_bias     = np.histogram(all_biases,30)
y_weight,x_weight = np.histogram(all_weight,30)

ax[2].plot((x_bias[1:]+x_bias[:-1])/2,y_bias/np.sum(y_bias),label='Bias')
ax[2].plot((x_weight[1:]+x_weight[:-1])/2,y_weight/np.sum(y_weight),label='Weight')
ax[2].set_title('Density estimate for both')
ax[2].legend()

for i in range(3):
    ax[i].set_xlabel('Initial value')
    ax[i].set_ylabel('Count')
ax[2].set_ylabel('Probability')

plt.savefig('figure20_xavier_kaiming_inits.png')
plt.show()
files.download('figure20_xavier_kaiming_inits.png')


In [None]:
# %% Explore weights by layer

# Plot
phi = (1 + np.sqrt(5)) / 2
fig,ax = plt.subplots(1,2,figsize=(2*phi*6,6))

n_layers = sum(1 for m in model.modules() if isinstance(m,nn.Linear))
cmap     = plt.cm.plasma(np.linspace(0.2,0.9,n_layers))
i        = -1

for p in model.named_parameters():

    if 'weight' in p[0]:
        i += 1

    data = p[1].data.numpy().flatten()
    y,x  = np.histogram(data,10)

    if 'bias' in p[0]:
        ax[0].plot((x[1:]+x[:-1])/2,y/np.sum(y),label='%s bias (N=%g)'%(p[0][:-5],len(data)),color=cmap[i])

    elif 'weight' in p[0]:
        ax[1].plot((x[1:]+x[:-1])/2,y/np.sum(y),label='%s weight (N=%g)'%(p[0][:-7],len(data)),color=cmap[i])

ax[0].set_title('Biases per layer')
ax[0].legend()
ax[1].set_title('Weights per layer')
ax[1].legend(bbox_to_anchor=(1,1),loc='upper left')

plt.savefig('figure21_xavier_kaiming_inits.png')
plt.show()
files.download('figure21_xavier_kaiming_inits.png')


In [None]:
# Weird output for the output biases ?

print(model.output.bias.data)


In [39]:
# Check  docstring for linear layers

nn.Linear?

# Attributes:
# weight: the learnable weights of the module of shape
#     :math:`(\text{out\_features}, \text{in\_features})`. The values are
#     initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`, where
#     :math:`k = \frac{1}{\text{in\_features}}`
# bias:   the learnable bias of the module of shape :math:`(\text{out\_features})`.
#         If :attr:`bias` is ``True``, the values are initialized from
#         :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
#         :math:`k = \frac{1}{\text{in\_features}}`


In [None]:
# Test whether the numbers match our prediction from the formula

# Empirical bias range
bias_range = [ torch.min(model.hidden1.bias.data).item(), torch.max(model.hidden1.bias.data).item() ]
bias_count = len(model.hidden1.bias.data)

# Theoretical expected value
sigma = np.sqrt(1/bias_count)

# Print
print('Theoretical sigma = ' + str(sigma))
print('Empirical range = ' + str(bias_range))


In [48]:
# %% Use a Xavier variance initialisation

# Default in PyTorch is, as we have seen, Kaiming initialisation (uniform), here
# how to change to a normal with Xavier variance parametrisation

model = FFN_model()

# Change the weights (leave biases as -default- Kaiming)
for p in model.named_parameters():
    if 'weight' in p[0]:
        nn.init.xavier_normal_(p[1].data)

# Rerun upper cells before continuing


In [None]:
# Test whether the numbers match our prediction from the formula

# Empirical bias range
weight_var   = torch.var(model.hidden1.weight.data.flatten()).item()
weight_count = len(model.hidden1.weight.data)

# Theoretical expected value (2*weight_count because same input as output)
sigma_x = 2 / (weight_count + weight_count)

# Print
print('Theoretical sigma = ' + str(sigma_x))
print('Empirical variance = ' + str(weight_var))


In [None]:
# %% Note

# There are several other weights initialization methods availabe in PyTorch.
# > See https://pytorch.org/docs/stable/nn.init.html


In [55]:
# %% Exercise 1
#    Explore the weight initialization options using PyTorch's functions (nn.init.<method>).
#    For example: apply Xavier-uniform, Kaiming, constant (this is what we did in the first video of this section).

# Truncated normal
model = FFN_model()

# Change the weights
for p in model.named_parameters():
    if 'weight' in p[0]:
        nn.init.trunc_normal_(p[1].data,mean=0,std=1,a=-2,b=2)


In [61]:
# %% Exercise 1
#    Continue ...

# Truncated normal
model = FFN_model()

# Change the weights
for p in model.named_parameters():
    if 'weight' in p[0]:
        nn.init.orthogonal_(p[1].data,gain=4)


In [65]:
# %% Exercise 1
#    Continue ...

# Truncated normal
model = FFN_model()

# Change the weights
for p in model.named_parameters():
    if 'weight' in p[0]:
        nn.init.sparse_(p[1].data,sparsity=0.25,std=0.01)
