# Learning Pytorch

This is a notebook dedicated to studying the pytorch library.

In [55]:
# Importing libraries
import torch
import numpy as np

# Defining matrices

An uninitialized matrix is declared, but does not contain definite known values before it is used. When an uninitialized matrix is created, whatever values were in the allocated memory at the time will appear as the initial values.

* *Matrices are tensors in pytorch*

In [6]:
A = torch.empty(3,3)
print(A)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])


Create a random matrix

In [8]:
A = torch.rand(3,3)
print(A)

tensor([[7.5744e-01, 4.5145e-01, 2.9504e-05],
        [1.6367e-01, 4.9357e-01, 9.4373e-02],
        [1.9575e-01, 1.5126e-01, 2.2690e-02]])


***
Create a zeros matrix of a certain type (e.g. long)

In [23]:
A = torch.zeros(3, 3, dtype = torch.long)
print(A)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])


***
Create tensor from data

In [20]:
A = torch.tensor([
    [1,2,3],
    [1,2,3],
    [1,2,3]])
print(A)

tensor([[1, 2, 3],
        [1, 2, 3],
        [1, 2, 3]])


***
Create a tensor based on an existing tensor. These methods will reuse properties of the input tensor, e.g. dtype, unless new values are provided by user

In [21]:
print('Before: ', A)

A = A.new_ones(3, 3, dtype=torch.double)
print('\nAfter:', A)

A = torch.randn_like(A, dtype=torch.float)
print('\nAfter:', A)

Before:  tensor([[1, 2, 3],
        [1, 2, 3],
        [1, 2, 3]])

After: tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)

After: tensor([[-1.6302, -0.2963, -0.1537],
        [ 1.0368, -0.9728,  0.0580],
        [-0.4342, -0.2210,  0.4961]])


***
Getting tensor sizes

In [24]:
A.size()

torch.Size([3, 3])

In [25]:
A.size(0)

3

In [26]:
A.size(1)

3

# Operations

In [32]:
A = torch.rand(3,4)
B = torch.rand(3,4)

print(A)
print()
print(B)

tensor([[0.7633, 0.3361, 0.5148, 0.8237],
        [0.9560, 0.9681, 0.8387, 0.2981],
        [0.7193, 0.4462, 0.8869, 0.3316]])

tensor([[0.3422, 0.5406, 0.8569, 0.0255],
        [0.8350, 0.0786, 0.3037, 0.2474],
        [0.6080, 0.3772, 0.1104, 0.2438]])


In [33]:
print(A + B)
print()
print(torch.add(A, B))


tensor([[1.1055, 0.8767, 1.3717, 0.8492],
        [1.7910, 1.0467, 1.1424, 0.5455],
        [1.3273, 0.8234, 0.9972, 0.5754]])

tensor([[1.1055, 0.8767, 1.3717, 0.8492],
        [1.7910, 1.0467, 1.1424, 0.5455],
        [1.3273, 0.8234, 0.9972, 0.5754]])


***
You can add a tensor as the output

In [35]:
res = torch.empty(3,4)
torch.add(A, B, out=res)

print(res)
print()

res = A + B
print(res)

tensor([[1.1055, 0.8767, 1.3717, 0.8492],
        [1.7910, 1.0467, 1.1424, 0.5455],
        [1.3273, 0.8234, 0.9972, 0.5754]])

tensor([[1.1055, 0.8767, 1.3717, 0.8492],
        [1.7910, 1.0467, 1.1424, 0.5455],
        [1.3273, 0.8234, 0.9972, 0.5754]])


***
Additions in-place

* *Any operation with a postfix ```_``` will change the value of the variable in place*

In [36]:
B.add_(A)
print(B)

tensor([[1.1055, 0.8767, 1.3717, 0.8492],
        [1.7910, 1.0467, 1.1424, 0.5455],
        [1.3273, 0.8234, 0.9972, 0.5754]])


***
Other operations

In [37]:
A = torch.rand(3,3)
B = torch.rand(3,3)

print(A)
print()
print(B)

tensor([[0.0867, 0.6522, 0.3563],
        [0.3901, 0.4485, 0.0518],
        [0.7424, 0.8825, 0.7586]])

tensor([[0.8049, 0.0878, 0.8091],
        [0.9244, 0.7567, 0.2840],
        [0.2652, 0.0349, 0.6687]])


In [38]:
A * B

tensor([[0.0698, 0.0573, 0.2883],
        [0.3606, 0.3394, 0.0147],
        [0.1969, 0.0308, 0.5073]])

In [42]:
torch.mul(A, B)

tensor([[0.0698, 0.0573, 0.2883],
        [0.3606, 0.3394, 0.0147],
        [0.1969, 0.0308, 0.5073]])

In [43]:
A / B

tensor([[ 0.1078,  7.4243,  0.4403],
        [ 0.4220,  0.5928,  0.1822],
        [ 2.7995, 25.3155,  1.1346]])

In [44]:
torch.div(A, B)

tensor([[ 0.1078,  7.4243,  0.4403],
        [ 0.4220,  0.5928,  0.1822],
        [ 2.7995, 25.3155,  1.1346]])

In [47]:
C = torch.empty(3,3)
C.copy_(A)
C

tensor([[0.0867, 0.6522, 0.3563],
        [0.3901, 0.4485, 0.0518],
        [0.7424, 0.8825, 0.7586]])

In [58]:
u = torch.tensor([1,2,3])
v = torch.tensor([1,2,3])

# 1D arrays only
torch.dot(u, v)

tensor(14)

In [60]:
A = torch.rand(3,3)
b = torch.rand(3,1)

print(A)
print(b)

tensor([[0.4575, 0.3944, 0.4304],
        [0.4790, 0.0502, 0.2371],
        [0.8485, 0.3766, 0.5591]])
tensor([[0.5433],
        [0.1352],
        [0.8103]])


In [62]:
# Dot product between arrays, matrices and tensors
torch.matmul(A,b)

tensor([[0.6506],
        [0.4592],
        [0.9649]])

In [64]:
b[1]

tensor([0.1352])

In [66]:
# First column of A, just like in numpy
A[:,0]

tensor([0.4575, 0.4790, 0.8485])

In [67]:
# First row of A
A[0]

tensor([0.4575, 0.3944, 0.4304])

***
Resizing and rshaping tensors

In [68]:
A = torch.randn(4,4)
B = A.view(16)

print(B, B.size())

tensor([ 0.4216,  0.0295, -1.0160, -0.2457,  0.2064,  0.5868, -0.3122,  0.1365,
         0.1204, -1.1389,  0.0130, -1.6980, -0.5227,  0.3761, -0.0141,  0.9038]) torch.Size([16])


In [69]:
C = A.view(-1,8)

print(C, C.size())

tensor([[ 0.4216,  0.0295, -1.0160, -0.2457,  0.2064,  0.5868, -0.3122,  0.1365],
        [ 0.1204, -1.1389,  0.0130, -1.6980, -0.5227,  0.3761, -0.0141,  0.9038]]) torch.Size([2, 8])


In [71]:
x = torch.tensor([16])
x

tensor([16])

In [75]:
# Converting x to a python number
x = x.item()
x

16

# Numpy Bridge

Converting a Torch Tensor to a NumPy array and vice versa is a breeze.

The Torch Tensor and NumPy array will share their underlying memory locations (if the Torch Tensor is on CPU), and changing one will change the other.

In [79]:
A = torch.ones(5)
print(A)

tensor([1., 1., 1., 1., 1.])


In [80]:
B = A.numpy()
print(B)

[1. 1. 1. 1. 1.]


In [81]:
A.add_(1)
print(A)
print(B)

tensor([2., 2., 2., 2., 2.])
[2. 2. 2. 2. 2.]


***
The same can be done from numpy to torch

In [86]:
A = np.ones(5)
B = torch.from_numpy(A)

np.add(A, 1, out=A)

print(A)
print(B)

[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)


# Moving tensors between devices

Tensors can be moved onto any device using the .to method.

In [87]:
A = torch.rand(3)
print(A)

if torch.cuda.is_available():
    device = torch.device('cuda')
    B = torch.ones_like(A, device=device)
    A = A.to(device)
    C = A + B
    print(C)
    print(C.to('cpu', torch.double))

tensor([0.5002, 0.4467, 0.1819])
tensor([1.5002, 1.4467, 1.1819], device='cuda:0')
tensor([1.5002, 1.4467, 1.1819], dtype=torch.float64)


# Autograd: Automatic Diferentiation

The autograd package provides automatic differentiation for all operations on Tensors. It is a define-by-run framework, which means that your backprop is defined by how your code is run, and that every single iteration can be different.

```torch.Tensor``` is the central class of the package. If you set its attribute ```.requires_grad``` as ```True```, it starts to track all operations on it. When you finish your computation you can call ```.backward()``` and have all the gradients computed automatically. The gradient for this tensor will be accumulated into ```.grad``` attribute.

To stop a tensor from tracking history, you can call ```.detach()``` to detach it from the computation history, and to prevent future computation from being tracked.

To prevent tracking history (and using memory), you can also wrap the code block in with ```torch.no_grad():```. This can be particularly helpful when evaluating a model because the model may have trainable parameters with ```requires_grad=True```, but for which we don’t need the gradients.

```Tensor``` and ```Function``` are interconnected and build up an acyclic graph, that encodes a complete history of computation. Each tensor has a ```.grad_fn``` attribute that references a ```Function``` that has created the Tensor (except for Tensors created by the user - their ```grad_fn``` is ```None```).

In [106]:
A = torch.ones(2, 2, requires_grad = True)
print(A)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)


In [107]:
B = A + 2
print(B)

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)


In [108]:
print(B.grad_fn)

<AddBackward0 object at 0x000001728DC2BCA0>


In [109]:
C = B * B * 3
out = C.mean()

print(C, out)

tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>) tensor(27., grad_fn=<MeanBackward0>)


# Gradients

In [110]:
# This has to run in order to obtain the derivative for A
out.backward()

In [111]:
A.grad

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])

In [118]:
A = torch.randn(3, requires_grad = True)
print(A)

B = A * 2
while B.data.norm() < 1000:
    B = B * 2

print(B)

tensor([ 0.9804,  0.1342, -0.1519], requires_grad=True)
tensor([1003.8809,  137.3787, -155.5637], grad_fn=<MulBackward0>)


In [119]:
v = torch.tensor([0.1, 1.0, 0.0001], dtype = torch.float)
B.backward(v)

print(A.grad)

tensor([1.0240e+02, 1.0240e+03, 1.0240e-01])
