In [1]:
import torch
import torch.nn as nn

## MSE Loss

In [4]:
prediction=torch.randn(4,5) #input
label=torch.randn(4,5) # target

In [4]:
mse= nn.MSELoss(reduction ='mean')
loss=mse(prediction,label)
loss

tensor(1.8961)

In [6]:
((prediction-label)**2).mean()

tensor(1.8961)

## Binary Cross Entropy Loss

In [26]:
label=torch.zeros(4,5).random_(0,2)
label

tensor([[0., 1., 1., 0., 0.],
        [0., 1., 1., 0., 1.],
        [1., 1., 1., 1., 1.],
        [0., 0., 1., 1., 1.]])

In [27]:
bce= nn.BCELoss(reduction='mean')
sigmoid=nn.Sigmoid()

In [28]:
bce(sigmoid(prediction),label)

tensor(0.7543)

In [29]:
bces= nn.BCEWithLogitsLoss(reduction= 'mean')
bces(prediction,label)

tensor(0.7543)

## Numpy implementation
because lists are time consuming

In [2]:
import numpy as np

In [36]:
x=prediction.numpy()
y=label.numpy()

In [32]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [37]:
print(x)
print(sigmoid(x))

[[ 0.04552053 -1.1774291  -1.5634993   0.7498118   1.8019177 ]
 [-0.734017   -0.73632747  0.5587094  -0.12531994 -0.5406369 ]
 [ 1.3721094   1.6399989   1.5724233   1.0965109  -0.35268596]
 [-1.4873792  -1.5600015  -0.8833712  -0.22771786  1.1618035 ]]
[[0.51137817 0.23551476 0.17314509 0.67913765 0.8583822 ]
 [0.32431382 0.32380775 0.6361539  0.46871096 0.36803943]
 [0.79772073 0.8375348  0.8281288  0.7496058  0.4127313 ]
 [0.18431541 0.17364642 0.2924797  0.44331524 0.7616603 ]]


In [38]:
x=sigmoid(x)
loss_values=[]
for i in range(len(y)):
    batch_loss=[]
    for j in range(len(y[0])):
        batch_loss.append(-np.log(x[i][j]) if y[i][j]==1 else -np.log(1-x[i][j]))
    loss_values.append(batch_loss)
print(np.mean(loss_values))

0.7542846074746271


## Weight initialization

In [50]:
layer=nn.Linear(5,5)
print(layer.weight)

Parameter containing:
tensor([[-0.2299, -0.0466, -0.3708, -0.1004, -0.4236],
        [-0.1491,  0.2098,  0.4284, -0.1519, -0.2041],
        [-0.1463, -0.2627, -0.2122,  0.0127, -0.3216],
        [ 0.2948, -0.1676, -0.4331, -0.4381, -0.0104],
        [-0.1977, -0.2850, -0.3745,  0.4308, -0.3619]], requires_grad=True)


In [49]:
nn.init.uniform_(layer.weight.data,a=0,b=1)

tensor([[0.4007, 0.7538, 0.2810, 0.1080, 0.0250],
        [0.3180, 0.9785, 0.5931, 0.0541, 0.0605],
        [0.5983, 0.2368, 0.8176, 0.5504, 0.5245],
        [0.9410, 0.7079, 0.0486, 0.1650, 0.6996],
        [0.1363, 0.1987, 0.5833, 0.0049, 0.7870]])

In [55]:
nn.init.constant_(layer.weight,1.5) # Not used

Parameter containing:
tensor([[1.5000, 1.5000, 1.5000, 1.5000, 1.5000],
        [1.5000, 1.5000, 1.5000, 1.5000, 1.5000],
        [1.5000, 1.5000, 1.5000, 1.5000, 1.5000],
        [1.5000, 1.5000, 1.5000, 1.5000, 1.5000],
        [1.5000, 1.5000, 1.5000, 1.5000, 1.5000]], requires_grad=True)

In [56]:
nn.init.constant_(layer.bias,1.5) 

Parameter containing:
tensor([1.5000, 1.5000, 1.5000, 1.5000, 1.5000], requires_grad=True)

In [53]:
nn.init.normal_(layer.weight,mean=0,std=1)

Parameter containing:
tensor([[-0.2610,  0.6897,  0.0051, -1.2690,  0.0798],
        [-0.1847,  0.6756,  0.4114,  0.2486, -0.4882],
        [-0.2873,  0.7859,  1.0290, -2.0959,  0.2280],
        [ 1.0409,  0.3036, -0.3518,  0.3663,  0.3631],
        [-0.4027,  1.5003, -0.3487,  1.5931,  0.3396]], requires_grad=True)

In [57]:
nn.init.xavier_normal_(layer.weight,gain=1) 

Parameter containing:
tensor([[-2.1719e-01, -1.4582e-01, -1.0145e-01, -2.8100e-01, -1.7589e-01],
        [ 2.0184e-01, -1.1840e-01,  1.2264e-01, -5.7026e-01, -2.0965e-02],
        [ 8.7806e-01, -3.6449e-01, -8.7530e-04,  1.8208e-01,  4.2400e-01],
        [-5.3750e-01, -3.5745e-01, -2.1193e-01,  2.4263e-01, -2.8164e-01],
        [ 1.9545e-01, -6.9395e-01,  1.1256e-01,  3.6696e-01, -1.7162e-01]],
       requires_grad=True)