In [5]:
# following along the pytorch tutorial at 
# https://pytorch.org/tutorials/beginner/nlp/deep_learning_tutorial.html

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [6]:
torch.has_cudnn

True

In [7]:
torch.manual_seed(1)

<torch._C.Generator at 0x275d8ee2b10>

In [9]:
lin = nn.Linear(5, 3)  # maps from R^5 to R^3, parameters A, b
# data is 2x5.  A maps from 5 to 3... can we map "data" under A?
data = torch.randn(2, 5)
print(lin(data))  # yes

tensor([[ 0.1755, -0.3268, -0.5069],
        [-0.6602,  0.2260,  0.1089]], grad_fn=<ThAddmmBackward>)


In [10]:
data

tensor([[-1.1948,  0.0250, -0.7627,  1.3969, -0.3245],
        [ 0.2879,  1.0579,  0.9621,  0.3935,  1.1322]])

In [14]:
lin.weight

Parameter containing:
tensor([[ 0.2304, -0.1974, -0.0867,  0.2099, -0.4210],
        [ 0.2682, -0.0920,  0.2275,  0.0622, -0.0548],
        [ 0.1240,  0.0221,  0.1633, -0.1743, -0.0326]], requires_grad=True)

In [15]:
lin.bias

Parameter containing:
tensor([-0.0403,  0.0648, -0.0018], requires_grad=True)

In [29]:
mmul = np.matmul(lin.weight.detach().numpy(), data.numpy().T) + lin.bias.detach().numpy().T[:,None]

In [30]:
mmul

array([[ 0.17545223, -0.66023815],
       [-0.3268445 ,  0.22604857],
       [-0.5068932 ,  0.1089043 ]], dtype=float32)

In [38]:
np.isclose(lin(data).detach().numpy(), mmul.T).all()

True

In [41]:
relu_data = torch.randn(2, 2)
print(relu_data)
print('')
print(F.relu(relu_data))

tensor([[ 1.8213, -0.1814],
        [-0.9515,  0.4057]])

tensor([[1.8213, 0.0000],
        [0.0000, 0.4057]])


In [42]:
# Softmax is also in torch.nn.functional
data = torch.randn(5)
print(data)
print(F.softmax(data, dim=0))
print(F.softmax(data, dim=0).sum())  # Sums to 1 because it is a distribution!
print(F.log_softmax(data, dim=0))  # theres also log_softmax

tensor([-1.5164,  0.7322,  2.2820, -1.2080,  1.1120])
tensor([0.0142, 0.1347, 0.6347, 0.0194, 0.1970])
tensor(1.)
tensor([-4.2530, -2.0044, -0.4546, -3.9446, -1.6246])


In [43]:
data = data.detach().numpy()

In [44]:
data

array([-1.5164275,  0.732198 ,  2.2819784, -1.2080135,  1.1120269],
      dtype=float32)

In [52]:
np.exp(data)/(np.sum(np.exp(data)))

array([0.01422105, 0.13474025, 0.6346853 , 0.01935861, 0.19699487],
      dtype=float32)