In [2]:
import torch

#  Matrix Manipulation

In [9]:
m = torch.zeros((2, 3))
m

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [10]:
m += 2
m

tensor([[2., 2., 2.],
        [2., 2., 2.]])

In [11]:
m * 2

tensor([[4., 4., 4.],
        [4., 4., 4.]])

In [16]:
m.transpose(0, 1)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])

In [17]:
# matrix mult
m @ m.transpose(0, 1)

tensor([[12., 12.],
        [12., 12.]])

In [21]:
m.shape

torch.Size([2, 3])

In [18]:
m.sum()

tensor(12.)

In [23]:
m

tensor([[2., 2., 2.],
        [2., 2., 2.]])

In [20]:
m.sum(dim=0, keepdim=True)

tensor([[4., 4., 4.]])

In [22]:
m.sum(dim=1, keepdim=True)

tensor([[6.],
        [6.]])

In [24]:
m / m.sum(dim=1, keepdim=True)

tensor([[0.3333, 0.3333, 0.3333],
        [0.3333, 0.3333, 0.3333]])

In [28]:
m[0, 0]

tensor(2.)

In [31]:
m[0, 0].item()

2.0

In [34]:
W = torch.rand((3, 2), requires_grad=True)
W

tensor([[0.3219, 0.7722],
        [0.1214, 0.5174],
        [0.1747, 0.5430]], requires_grad=True)

In [41]:
W[0, 0].item()

0.3219485282897949

In [43]:
W[0, 0].grad

  W[0, 0].grad


In [45]:
W.shape, W.nelement()

(torch.Size([3, 2]), 6)

In [47]:
t = torch.randn((3, 4))
t

tensor([[-3.0326, -0.1882,  0.3196,  0.8407],
        [ 0.3396, -1.4239, -0.9254,  1.7809],
        [-1.1648, -0.1799,  0.2390,  1.2073]])

In [51]:
# reshape tensors

t.view(6,-1)

tensor([[-3.0326, -0.1882],
        [ 0.3196,  0.8407],
        [ 0.3396, -1.4239],
        [-0.9254,  1.7809],
        [-1.1648, -0.1799],
        [ 0.2390,  1.2073]])

In [48]:
t.view(-1)

tensor([-3.0326, -0.1882,  0.3196,  0.8407,  0.3396, -1.4239, -0.9254,  1.7809,
        -1.1648, -0.1799,  0.2390,  1.2073])

In [13]:
# interesting feature of matrix multiplication
m1 = torch.randn((4, 5, 6, 7))
m2 = torch.randn((7, 10))
m3 = torch.randn(10)

m1.shape, m2.shape, (m1 @ m2 + m3).shape

(torch.Size([4, 5, 6, 7]), torch.Size([7, 10]), torch.Size([4, 5, 6, 10]))

# NN cheatsheet

In [57]:
xs = torch.tensor([1, 2, 3])
xs

tensor([1, 2, 3])

In [64]:
# one-hot encoding

torch.nn.functional.one_hot(xs, num_classes=5)

tensor([[0, 1, 0, 0, 0],
        [0, 0, 1, 0, 0],
        [0, 0, 0, 1, 0]])

In [27]:
# softmax

N = 5

logits = torch.rand((1, N))
counts = logits.exp()
probs = counts / counts.sum()
logits, counts, probs

(tensor([[0.7412, 0.7900, 0.3212, 0.7595, 0.4772]]),
 tensor([[2.0985, 2.2035, 1.3788, 2.1371, 1.6115]]),
 tensor([[0.2225, 0.2337, 0.1462, 0.2266, 0.1709]]))

In [32]:
torch.softmax(logits, dim=1) # exactly matching up - this is recommended as it handles very large numbers

tensor([[0.2225, 0.2337, 0.1462, 0.2266, 0.1709]])

In [36]:
# negative log-likelihood loss function and entropy
nll = -probs.log().sum().item()
nll

8.130066871643066

In [42]:
torch.nn.CrossEntropyLoss(logits)

CrossEntropyLoss()

In [43]:
t = torch.randn((2, 4))
t

tensor([[-0.0789, -1.6703, -0.7573, -1.2772],
        [ 1.2356, -0.8805, -1.3549, -0.6466]])

In [46]:
t.sum(0), t.sum(1)

# 0: across columns
# 1: across rows

(tensor([ 1.1567, -2.5508, -2.1122, -1.9238]), tensor([-3.7837, -1.6463]))