CSS24N: PyTorch Tutorial

In [2]:
import torch
import torch.nn as nn


import pprint
pp = pprint.PrettyPrinter()

Tensors

In [2]:
list_of_lists = [
    [1,2,3],
    [4,5,6],
]
print(list_of_lists)

[[1, 2, 3], [4, 5, 6]]


In [3]:
#Initializing a tensor
data = torch.tensor([
                    [0,1],
                    [2,3],
                    [4,5]
                   ])

print(data)

tensor([[0, 1],
        [2, 3],
        [4, 5]])


In [4]:
# Initializing a tensor with an explicit data type
# Notice the dots after the numbers, which specify that they are float
data = torch.tensor([
                    [0,1],
                    [2,3],
                    [4,5]
                   ], dtype=torch.float32)

print(data)

tensor([[0., 1.],
        [2., 3.],
        [4., 5.]])


In [5]:
# Initializing a tensor with an explicit data type
# Notice the dots after the numbers, which specify that they are float
data = torch.tensor([
                    [0.1111111,1],
                    [2,3],
                    [4,5]
                   ], dtype=torch.float32)

print(data)

tensor([[0.1111, 1.0000],
        [2.0000, 3.0000],
        [4.0000, 5.0000]])


In [6]:
# Initializing a tensor with an explicit data type
# Notice the dots after the numbers, which specify that they are float
data = torch.tensor([
                    [0.1111111,1],
                    [2,3],
                    [4,5]
                   ])

print(data)

tensor([[0.1111, 1.0000],
        [2.0000, 3.0000],
        [4.0000, 5.0000]])


In [7]:
zeros = torch.zeros(2,5)    # a tensor of all zeros
print(zeros)

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])


In [8]:
ones = torch.ones(3,4)      # a tensor of all ones
print(ones)

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])


In [9]:
rr = torch.arange(1, 10)
print(rr)

tensor([1, 2, 3, 4, 5, 6, 7, 8, 9])


In [10]:
rr + 2

tensor([ 3,  4,  5,  6,  7,  8,  9, 10, 11])

In [11]:
rr * 2

tensor([ 2,  4,  6,  8, 10, 12, 14, 16, 18])

In [12]:
a = torch.tensor([[1,2], [2,3], [4,5]])        #(3,2)
b = torch.tensor ([[1,2,3,4], [5,6,7,8]])      #(2,4)

print(a)
print(b)
print(a.matmul(b))
print(a @ b)

tensor([[1, 2],
        [2, 3],
        [4, 5]])
tensor([[1, 2, 3, 4],
        [5, 6, 7, 8]])
tensor([[11, 14, 17, 20],
        [17, 22, 27, 32],
        [29, 38, 47, 56]])
tensor([[11, 14, 17, 20],
        [17, 22, 27, 32],
        [29, 38, 47, 56]])


In [13]:
v = torch.tensor([1,2,3])

In [14]:
v.shape

torch.Size([3])

In [15]:
torch.tensor([[1,2,3],[4,5,6]]) @ v  #(2,3) @ (3) -> (2)

tensor([14, 32])

In [16]:
matr_2d = torch.tensor([[1,2,3], [4,5,6]])
print(matr_2d.shape)
print(matr_2d)

torch.Size([2, 3])
tensor([[1, 2, 3],
        [4, 5, 6]])


In [17]:
matr_3d = torch.tensor([[[1,2,3,4], [-2,5,6,9]], [[5,6,7,2], [8,9,10,4]], [[-3,2,2,1], [4,6,5,9]]])
print(matr_3d.shape)
print(matr_3d)

torch.Size([3, 2, 4])
tensor([[[ 1,  2,  3,  4],
         [-2,  5,  6,  9]],

        [[ 5,  6,  7,  2],
         [ 8,  9, 10,  4]],

        [[-3,  2,  2,  1],
         [ 4,  6,  5,  9]]])


In [18]:
# Reshaping 

rr = torch.arange(1,16)
print(rr.shape)
print(rr)
print()
rr = rr.view(5,3)
print(rr.shape)
print(rr)

torch.Size([15])
tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

torch.Size([5, 3])
tensor([[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9],
        [10, 11, 12],
        [13, 14, 15]])


In [19]:
# Inter-convert tensors with NumPy arrays
import numpy as np 

# numpy.ndarray --> torch.Tensor:
arr = np.array([1,0,5])
data = torch.tensor(arr)
print(data)

# torch.Tensor --> numpy.ndarray:
new_arr = data.numpy()
print(new_arr)


tensor([1, 0, 5], dtype=torch.int32)
[1 0 5]


In [24]:
data = torch.arange(1,36,dtype=torch.float32).reshape(5,7)
print(data)

print(data.sum(dim=0))
print(data.sum(dim=1))
print(data.std(dim=1))

tensor([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11., 12., 13., 14.],
        [15., 16., 17., 18., 19., 20., 21.],
        [22., 23., 24., 25., 26., 27., 28.],
        [29., 30., 31., 32., 33., 34., 35.]])
tensor([ 75.,  80.,  85.,  90.,  95., 100., 105.])
tensor([ 28.,  77., 126., 175., 224.])
tensor([2.1602, 2.1602, 2.1602, 2.1602, 2.1602])


In [25]:
data.sum()

tensor(630.)

In [27]:
data = torch.tensor([[1, 2.2, 9.6], [4, -7.2, 6.3]])

row_avg = data.mean(dim = 1)
col_avg = data.mean(dim = 0)

print(row_avg.shape)
print(row_avg)

print(col_avg.shape)
print(col_avg)

torch.Size([2])
tensor([4.2667, 1.0333])
torch.Size([3])
tensor([ 2.5000, -2.5000,  7.9500])


Indexing

In [28]:
#Initialize an example tensor
x = torch.Tensor([
                    [[1,2], [3,4]],
                    [[5,6], [7,8]],
                    [[9,10], [11,12]]                  
                ])
x

tensor([[[ 1.,  2.],
         [ 3.,  4.]],

        [[ 5.,  6.],
         [ 7.,  8.]],

        [[ 9., 10.],
         [11., 12.]]])

In [29]:
x.shape

torch.Size([3, 2, 2])

In [30]:
x[0]

tensor([[1., 2.],
        [3., 4.]])

In [31]:
x[:, 0]

tensor([[ 1.,  2.],
        [ 5.,  6.],
        [ 9., 10.]])

In [32]:
matr = torch.arange(1,16).view(5,3)
print(matr)

tensor([[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9],
        [10, 11, 12],
        [13, 14, 15]])


In [33]:
matr[0]

tensor([1, 2, 3])

In [34]:
matr[0, :]

tensor([1, 2, 3])

In [35]:
matr[:, 0]

tensor([ 1,  4,  7, 10, 13])

In [37]:
matr[0:3]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [38]:
matr[:, 0:2]

tensor([[ 1,  2],
        [ 4,  5],
        [ 7,  8],
        [10, 11],
        [13, 14]])

In [39]:
matr[0:3, 0:2]

tensor([[1, 2],
        [4, 5],
        [7, 8]])

In [40]:
matr[0][2]

tensor(3)

In [41]:
matr[0:3, 2]

tensor([3, 6, 9])

In [43]:
matr[[0, 2, 4]]

tensor([[ 1,  2,  3],
        [ 7,  8,  9],
        [13, 14, 15]])

In [44]:
# Get the top left element of each element in out tensor
x[:, 0, 0]

tensor([1., 5., 9.])

In [45]:
x[:, :, :]

tensor([[[ 1.,  2.],
         [ 3.,  4.]],

        [[ 5.,  6.],
         [ 7.,  8.]],

        [[ 9., 10.],
         [11., 12.]]])

In [47]:
i = torch.tensor([0,0,1,1])
x[i]

tensor([[[1., 2.],
         [3., 4.]],

        [[1., 2.],
         [3., 4.]],

        [[5., 6.],
         [7., 8.]],

        [[5., 6.],
         [7., 8.]]])

In [48]:
i = torch.tensor([1,2])
j = torch.tensor([0])
x[i,j]

tensor([[ 5.,  6.],
        [ 9., 10.]])

In [49]:
x[0,0,0]

tensor(1.)

In [50]:
x[0,0,0].item()

1.0

Autograd

In [51]:
# Create an example tensor
# requires_grad parameter tells PyTorch to store gradients
x = torch.tensor([2.], requires_grad=True)

# Print the gradient if it is calculated
# Currently None since x is a scalar
pp.pprint(x.grad)

None


In [52]:
# Calculating the gradient of y with respect to x
y = x * x * 3  #3x^2
y.backward()
pp.pprint(x.grad)  # d(y)/d(x) = d(3x^2)/d(x) = 6x = 12

tensor([12.])


In [53]:
z = x * x * 3  #3x^2
z.backward()
pp.pprint(x.grad)

tensor([24.])


Neural Network Module

In [54]:
import torch.nn as nn

Linear Layer

In [9]:
# Create the inputs
input  = torch.ones(2,3,4)
# N * H_in -> N * H_out

# Make a linear layers transforming N,*,H_in dimensinal inputs to N,*,H_out
# dimensional outputs
linear = nn.Linear(4,2)
linear_output = linear(input)
linear_output

tensor([[[-0.3182, -1.5017],
         [-0.3182, -1.5017],
         [-0.3182, -1.5017]],

        [[-0.3182, -1.5017],
         [-0.3182, -1.5017],
         [-0.3182, -1.5017]]], grad_fn=<ViewBackward0>)

In [10]:
list(linear.parameters())   # Ax + b

[Parameter containing:
 tensor([[-0.3661, -0.3881,  0.0692,  0.1403],
         [-0.4170, -0.3235, -0.3443, -0.2951]], requires_grad=True),
 Parameter containing:
 tensor([ 0.2265, -0.1218], requires_grad=True)]

Activate Function Layer

In [11]:
linear_output

tensor([[[-0.3182, -1.5017],
         [-0.3182, -1.5017],
         [-0.3182, -1.5017]],

        [[-0.3182, -1.5017],
         [-0.3182, -1.5017],
         [-0.3182, -1.5017]]], grad_fn=<ViewBackward0>)

In [12]:
sigmoid = nn.Sigmoid()
output = sigmoid(linear_output)
output

tensor([[[0.4211, 0.1822],
         [0.4211, 0.1822],
         [0.4211, 0.1822]],

        [[0.4211, 0.1822],
         [0.4211, 0.1822],
         [0.4211, 0.1822]]], grad_fn=<SigmoidBackward0>)

Putting the Layers Together

In [13]:
block = nn.Sequential(
    nn.Linear(4,2),
    nn.Sigmoid()
)

input = torch.ones(2,3,4)
output = block(input)
output

tensor([[[0.5146, 0.4353],
         [0.5146, 0.4353],
         [0.5146, 0.4353]],

        [[0.5146, 0.4353],
         [0.5146, 0.4353],
         [0.5146, 0.4353]]], grad_fn=<SigmoidBackward0>)

Custom Modules

In [27]:
class MultilayerPerceptron(nn.Module):

    def __init__(self, input_size, hidden_size):
        # Call to the __init__ function of the super class
        super(MultilayerPerceptron, self).__init__()

        # Bookeeping: Saving the initialization parameters
        self.input_size = input_size 
        self.hidden_size = hidden_size

        # Defining model
        self.model = nn.Sequential(
            nn.Linear(self.input_size, self.hidden_size),
            nn.ReLU(),
            nn.Linear(self.hidden_size, self.input_size),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        output = self.model(x)
        return output

In [28]:
class MultilayerPerceptron(nn.Module):

    def __init__(self, input_size, hidden_size):
        # Call to the __init__ function of the super class
        super(MultilayerPerceptron, self).__init__()

        # Bookeeping: Saving the initialization parameters
        self.input_size = input_size 
        self.hidden_size = hidden_size

        
        self.linear = nn.Linear(self.input_size, self.hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(self.hidden_size, self.input_size)
        self.sigmoid = nn.Sigmoid()
        

    def forward(self, x):
        linear = self.linear(x)
        relu = self.relu(linear)
        linear2 = self.linear2(relu)
        output = self.sigmoid(linear2)
        return output

In [29]:
input = torch.randn(2,5)
model = MultilayerPerceptron(5,3)
model(input)

tensor([[0.4809, 0.4138, 0.5427, 0.5882, 0.3883],
        [0.4693, 0.4129, 0.5534, 0.6052, 0.3752]], grad_fn=<SigmoidBackward0>)

In [30]:
list(model.named_parameters())

[('linear.weight',
  Parameter containing:
  tensor([[-0.4310, -0.4083, -0.1639, -0.0366, -0.2172],
          [ 0.0314, -0.1335,  0.1495, -0.2736, -0.0144],
          [ 0.3473, -0.3474, -0.2891,  0.3375,  0.2125]], requires_grad=True)),
 ('linear.bias',
  Parameter containing:
  tensor([ 0.1087, -0.2151,  0.1045], requires_grad=True)),
 ('linear2.weight',
  Parameter containing:
  tensor([[ 0.3041,  0.2127, -0.1660],
          [ 0.1998,  0.2390, -0.0134],
          [-0.4420, -0.3497,  0.1536],
          [ 0.1077,  0.3634,  0.2538],
          [ 0.4338, -0.1611, -0.1994]], requires_grad=True)),
 ('linear2.bias',
  Parameter containing:
  tensor([ 0.0878, -0.3351,  0.0193,  0.1052, -0.2570], requires_grad=True))]

Optimization

In [31]:
import torch.optim as optim

In [32]:
y = torch.ones(10,5)

x = y + torch.randn_like(y)
x

tensor([[ 0.5544,  0.1459,  1.0044,  1.7458,  2.0460],
        [ 3.3121,  1.2519,  1.6468,  1.4870,  0.6172],
        [ 1.4931,  1.9620,  1.0585,  2.0545, -0.7695],
        [-0.0894,  1.9517,  3.1122,  0.5133,  0.8462],
        [ 1.2496, -0.2300, -0.7683,  0.2982,  1.1728],
        [ 1.7048,  1.6121,  1.0969, -1.4258,  1.5372],
        [-0.0469, -0.0060,  1.3264,  1.3942,  1.3191],
        [ 1.6710,  1.6651,  1.6418,  2.4056,  0.4815],
        [ 1.1138, -0.5905,  1.4405,  1.1449,  2.3507],
        [ 0.8765,  0.0885,  1.1176,  1.4609,  2.6048]])

In [33]:
model = MultilayerPerceptron(5,3)
adam = optim.Adam(model.parameters(), lr=1e-1)
loss_function = nn.BCELoss()
y_pred = model(x)
loss_function(y_pred, y).item()

  from .autonotebook import tqdm as notebook_tqdm


0.7802898287773132

In [34]:
n_epoch = 10

for epoch in range(n_epoch):
    adam.zero_grad()
    y_pred = model(x)
    loss = loss_function(y_pred, y)
    print(f"Epoch {epoch}: training loss: {loss}")
    loss.backward()
    adam.step()

Epoch 0: training loss: 0.7802898287773132
Epoch 1: training loss: 0.6248525977134705
Epoch 2: training loss: 0.5123491883277893
Epoch 3: training loss: 0.4047016203403473
Epoch 4: training loss: 0.31168022751808167
Epoch 5: training loss: 0.23499569296836853
Epoch 6: training loss: 0.17042464017868042
Epoch 7: training loss: 0.11568012088537216
Epoch 8: training loss: 0.07212550193071365
Epoch 9: training loss: 0.04149128869175911


In [36]:
list(model.parameters())

[Parameter containing:
 tensor([[-0.4422, -0.1639, -0.2280, -0.4130, -0.3278],
         [-0.8402, -0.0077, -0.7542,  0.0268, -0.2037],
         [ 0.4339,  0.7685,  0.7886,  0.7640,  1.0076]], requires_grad=True),
 Parameter containing:
 tensor([-0.7665, -0.6243,  1.2091], requires_grad=True),
 Parameter containing:
 tensor([[ 0.8375,  0.3541,  0.9586],
         [ 0.2751, -0.0918,  1.4028],
         [ 0.1885,  0.4533,  1.0643],
         [-0.0490,  0.1128,  1.3288],
         [ 0.2006,  0.0415,  0.3433]], requires_grad=True),
 Parameter containing:
 tensor([1.1072, 0.4132, 1.1789, 1.0029, 0.4870], requires_grad=True)]