In [32]:
import torch
from torch import nn
import numpy as np

# (1,1) Linear Neural Net

Create a (1,1) linear "neural net"
It has one input and one output and calculates
y = weight * x + bias

In [86]:
net = nn.Linear(1,1)
net.weight, net.bias

(Parameter containing:
 tensor([[-0.3517]], requires_grad=True),
 Parameter containing:
 tensor([0.9578], requires_grad=True))

throw some input on the "neural net"
we make a 2 x 1 tensor; i.e. two different inputs

In [87]:
x = torch.tensor([[1.0], [2.0]])

In [88]:
y = net(x)
y

tensor([[0.6061],
        [0.2545]], grad_fn=<AddmmBackward>)

Taking weight & bias from above and calculate output from "neural net" by hand...

In [91]:
-0.3517*1+0.9578, -0.3517*2+0.9578

(0.6061, 0.25439999999999996)

# (2,1) Linear Neural Network

Two inputs, one output

$y = weight[0]*x[0] + weight[1]*x[1] + bias$


In [72]:
net = nn.Linear(2,1)
net.weight, net.bias

(Parameter containing:
 tensor([[-0.4350,  0.2051]], requires_grad=True),
 Parameter containing:
 tensor([0.1787], requires_grad=True))

For fun, we also calculate the gradient of y wrt x
To this end, we need `x.requires_grad_(True)` and a `detach()` in all x expressions further below

In [78]:
x = torch.tensor([1.0, 2.0])
x.requires_grad_(True)
y = net(x)
y

tensor([0.1540], grad_fn=<AddBackward0>)

In [81]:
net.weight.detach().numpy(), x.detach().numpy()*net.weight.detach().numpy()

(array([[-0.43499947,  0.20514582]], dtype=float32),
 array([[-0.43499947,  0.41029164]], dtype=float32))

In [83]:
np.sum(x.detach().numpy()*net.weight.detach().numpy()) + net.bias.detach().numpy()

array([0.1539852], dtype=float32)

In [84]:
list(net.named_parameters())

[('weight',
  Parameter containing:
  tensor([[-0.4350,  0.2051]], requires_grad=True)),
 ('bias',
  Parameter containing:
  tensor([0.1787], requires_grad=True))]

Now we calculate the gradient of y wrt to x. The gradient is equal the weights.

In [85]:
y.backward()
print(x.grad)

tensor([-0.4350,  0.2051])


# (1,1) Network with non-linear activation function (tanh)

In [148]:
net = nn.Sequential(nn.Linear(2, 1), nn.Tanh())
x = torch.tensor([1.0, 2.0])
y = net(x)
y

tensor([0.3251], grad_fn=<TanhBackward>)

What happens? The neural network calculates

$y = \tanh ( weight[0]*x[0] + weight[1]*x[1] + bias )$

I.e. it takes the output from the first layer (which is a linear network) and applies the second layer (tanh) to it.

In [159]:
np.tanh(
    np.sum(
        x.numpy()*net[0].weight.detach().numpy())
    + net[0].bias.detach().numpy())

array([0.3251395], dtype=float32)