In [1]:
import torch
from torch import nn, optim

In [2]:
# training set
X = torch.tensor([[0., 0.],
              [0., 1.],
              [1., 0.],
              [1., 1.]])

# true labels
y = torch.tensor([0, 1, 1, 0])

In [3]:
X,y

(tensor([[0., 0.],
         [0., 1.],
         [1., 0.],
         [1., 1.]]),
 tensor([0, 1, 1, 0]))

In [4]:
linear_units = 2
model = nn.Sequential(
    nn.Linear(2, linear_units),
    nn.ReLU(),
    nn.Linear(linear_units, 2),
)
lossfun = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(),lr=0.5)

In [5]:
def changeWeights(model):
    for layer in model:
        if hasattr(layer, "weight"):
            layer.weight.data = 0.01 * torch.randn(layer.in_features, layer.out_features)
            layer.bias.data = 0.01 * torch.ones(layer.out_features)

In [6]:
changeWeights(model)
for name,param in model.named_parameters():
    print(name, param.data, param.grad)

0.weight tensor([[-0.0062,  0.0008],
        [-0.0125, -0.0020]]) None
0.bias tensor([0.0100, 0.0100]) None
2.weight tensor([[ 0.0022, -0.0085],
        [ 0.0064,  0.0187]]) None
2.bias tensor([0.0100, 0.0100]) None


In [7]:
y_hat = model(X)
loss = lossfun(y_hat, y)

In [8]:
loss

tensor(0.6932, grad_fn=<NllLossBackward0>)

In [9]:
y_hat

tensor([[0.0099, 0.0103],
        [0.0100, 0.0102],
        [0.0100, 0.0100],
        [0.0100, 0.0100]], grad_fn=<AddmmBackward0>)

In [10]:
nn.Softmax()(y_hat)

  nn.Softmax()(y_hat)


tensor([[0.4999, 0.5001],
        [0.4999, 0.5001],
        [0.5000, 0.5000],
        [0.5000, 0.5000]], grad_fn=<SoftmaxBackward0>)

In [11]:
optimizer.zero_grad()
y_hat.retain_grad()
loss.backward()

In [13]:
print(y_hat.grad)

None


In [14]:
for name,param in model.named_parameters():
    print(name, param.data, param.grad)

0.weight tensor([[-0.0062,  0.0008],
        [-0.0125, -0.0020]]) tensor([[ 8.8476e-09,  7.2643e-08],
        [ 0.0000e+00, -3.4067e-03]])
0.bias tensor([0.0100, 0.0100]) tensor([1.5734e-07, 9.8348e-07])
2.weight tensor([[ 0.0022, -0.0085],
        [ 0.0064,  0.0187]]) tensor([[-3.8376e-07, -2.4651e-04],
        [ 3.8376e-07,  2.4651e-04]])
2.bias tensor([0.0100, 0.0100]) tensor([-3.8259e-05,  3.8251e-05])


In [15]:
optimizer.step()

In [16]:
y_hat = model(X)
loss = lossfun(y_hat, y)

In [17]:
y_hat

tensor([[0.0100, 0.0102],
        [0.0100, 0.0102],
        [0.0100, 0.0100],
        [0.0100, 0.0100]], grad_fn=<AddmmBackward0>)

In [18]:
loss

tensor(0.6931, grad_fn=<NllLossBackward0>)