# Weight Initializations

In [1]:
import torch
import torch.nn as nn

In [2]:
layer=nn.Linear(5,5)
layer.weight

Parameter containing:
tensor([[ 0.3913,  0.2667, -0.2969, -0.1375, -0.0343],
        [ 0.4304,  0.0209,  0.4193,  0.0858, -0.1380],
        [-0.2428,  0.2050, -0.3106, -0.2301,  0.2931],
        [ 0.0899,  0.1515,  0.3406, -0.1123, -0.3853],
        [ 0.3848,  0.1945,  0.3751, -0.2213,  0.1515]], requires_grad=True)

In [3]:
layer.weight.data

tensor([[ 0.3913,  0.2667, -0.2969, -0.1375, -0.0343],
        [ 0.4304,  0.0209,  0.4193,  0.0858, -0.1380],
        [-0.2428,  0.2050, -0.3106, -0.2301,  0.2931],
        [ 0.0899,  0.1515,  0.3406, -0.1123, -0.3853],
        [ 0.3848,  0.1945,  0.3751, -0.2213,  0.1515]])

#### Initialize weights from a uniform distribution

In [4]:
nn.init.uniform_(layer.weight, a=0, b=3)

Parameter containing:
tensor([[2.5634, 1.0990, 2.8034, 2.0824, 1.8095],
        [0.6102, 2.0487, 2.5331, 2.2755, 2.2411],
        [0.9018, 1.7530, 1.9782, 0.4400, 2.8729],
        [2.2291, 1.4573, 0.6810, 2.2628, 2.3113],
        [1.1030, 2.7107, 2.4390, 1.3277, 1.2108]], requires_grad=True)

#### Normal distribution

In [5]:
nn.init.normal_(layer.weight,mean=0,std=1)

Parameter containing:
tensor([[-0.3163,  2.6449, -0.7303,  0.9716,  0.2513],
        [ 0.8100,  1.8922,  1.2896, -1.5715,  0.1743],
        [-0.3700, -0.4202, -0.7640, -1.2785, -0.6731],
        [ 0.8440, -0.6178, -0.1466, -0.7210,  1.2774],
        [ 0.3760,  1.1948, -1.1976,  0.1761, -0.9667]], requires_grad=True)

In [6]:
nn.init.normal_(layer.weight,mean=0,std=0.2)

Parameter containing:
tensor([[-0.3261, -0.0833,  0.2708,  0.6548,  0.1230],
        [-0.0181, -0.2598,  0.3671,  0.0280,  0.1458],
        [ 0.3988, -0.3099,  0.4152, -0.1246, -0.0150],
        [-0.0212, -0.0536, -0.3268, -0.1177,  0.2928],
        [-0.3257, -0.0554, -0.0671, -0.0396, -0.0762]], requires_grad=True)

#### Constant distribution for initializing bias

In [7]:
nn.init.constant_(layer.bias,0)
layer.bias

Parameter containing:
tensor([0., 0., 0., 0., 0.], requires_grad=True)

#### Xavier uniform and Xavier normal

In [8]:
nn.init.xavier_normal_(layer.weight,gain=1)

Parameter containing:
tensor([[-0.0613, -0.1214, -0.1923,  0.0713,  0.1960],
        [-0.6523,  0.8566,  0.4492, -0.0956,  0.6262],
        [ 1.2191, -0.3310, -0.4069, -1.1685,  0.5757],
        [ 0.1219, -0.3070,  0.2313, -0.0084, -0.7597],
        [ 0.9070,  0.0607,  0.4432,  0.0219, -0.1574]], requires_grad=True)

In [9]:
nn.init.xavier_uniform_(layer.weight,gain=1)

Parameter containing:
tensor([[-0.3995, -0.5362, -0.3039,  0.7398,  0.4432],
        [-0.7020, -0.7619,  0.1389, -0.3520, -0.2449],
        [ 0.1597, -0.2841, -0.3740,  0.2189,  0.3427],
        [-0.0880,  0.4797,  0.2522,  0.5815,  0.5699],
        [-0.1325, -0.0444, -0.1829, -0.1955,  0.1637]], requires_grad=True)