# PyTorch Basics

In [1]:
import torch

In [4]:
# Example 1 - Construct an empty matrix - unitialized
x = torch.empty(5, 3)
print(x)

tensor([[ 0.0000e+00, -0.0000e+00, -3.3469e-19],
        [-1.5849e+29,  5.6052e-45,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00, -0.0000e+00, -3.3467e-19]])


In [5]:
# Example 2 - Construct a randomly initialized matrix
x = torch.rand(5, 3)
print(x)

tensor([[ 0.2791,  0.2893,  0.5192],
        [ 0.6518,  0.7641,  0.8102],
        [ 0.8538,  0.7759,  0.0164],
        [ 0.4984,  0.3115,  0.4968],
        [ 0.3539,  0.9372,  0.7003]])


In [6]:
# Example 3 - Construct a zero matrix with dtype long
x = torch.zeros(5, 3, dtype=torch.long)
print(x)

tensor([[ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0]])


In [7]:
# Example 4 - Construct a densor directly from data
x = torch.tensor([5.5, 3])
print(x)

tensor([ 5.5000,  3.0000])


In [9]:
# Example 5 - Create a tensor basing on an existing tensor. These methods will reuse properties of the input tensor e.g. dtype, unless other specify
x = x.new_ones(5, 3, dtype=torch.double) # new_* methods take in sizes
print(x)

x = torch.randn_like(x, dtype=torch.float) # Overriding dtype!
print(x)

tensor([[ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1.,  1.]], dtype=torch.float64)
tensor([[ 0.6884, -0.2034,  1.2214],
        [ 0.1229,  0.6018, -0.0918],
        [ 0.8889,  1.5302, -0.0369],
        [-1.1927,  1.2110, -0.6282],
        [ 2.2036, -0.2416, -0.9564]])


In [10]:
# Getting a size of a tensor
print(x.size())

torch.Size([5, 3])


## Operations

In [12]:
# Addition
y = torch.rand(5, 3)
print(x + y)

tensor([[ 1.1794,  0.3499,  2.0833],
        [ 0.2200,  0.6396,  0.4263],
        [ 1.3064,  2.4677,  0.8250],
        [-0.3334,  2.0770,  0.3313],
        [ 3.1314, -0.2363, -0.0870]])


In [13]:
# Addition - alternative
print(torch.add(x, y))

tensor([[ 1.1794,  0.3499,  2.0833],
        [ 0.2200,  0.6396,  0.4263],
        [ 1.3064,  2.4677,  0.8250],
        [-0.3334,  2.0770,  0.3313],
        [ 3.1314, -0.2363, -0.0870]])


In [14]:
# Addition - Providing an output tensor as argument
result = torch.empty(5, 3)
torch.add(x, y, out=result)
print(result)

tensor([[ 1.1794,  0.3499,  2.0833],
        [ 0.2200,  0.6396,  0.4263],
        [ 1.3064,  2.4677,  0.8250],
        [-0.3334,  2.0770,  0.3313],
        [ 3.1314, -0.2363, -0.0870]])


In [15]:
# Addition - in-place
y.add_(x)
print(y)

tensor([[ 1.1794,  0.3499,  2.0833],
        [ 0.2200,  0.6396,  0.4263],
        [ 1.3064,  2.4677,  0.8250],
        [-0.3334,  2.0770,  0.3313],
        [ 3.1314, -0.2363, -0.0870]])


Note: Any operation that mutates a tensor in-place is post-fixed with an _. For example: x.copy_(y), x.t_(), will change x.

## Slicing like numpy array

In [17]:
print(x[:, 1])

tensor([-0.2034,  0.6018,  1.5302,  1.2110, -0.2416])


In [18]:
x = torch.randn(4, 4)

In [19]:
# Resizing/Reshaping tensor - using torch.view
x = torch.randn(4, 4)
y = x.view(16)
z = x.view(-1, 8) # The size -1 is inferred from other dimensions

print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


In [21]:
# If you only have one element tensor, use .item() to get the value as a python number
x = torch.randn(1)
print(x)
print(x.item())

tensor([-0.1320])
-0.13196571171283722


## NumPy Bridge
Converting a Torch Tensor to NumPy array and vice versa

In [24]:
# Initialize a sample torch tensor
a = torch.ones(5)
print(a)

tensor([ 1.,  1.,  1.,  1.,  1.])


In [25]:
# Converting a Torch to numpy array
b = a.numpy()
print(b)

[ 1.  1.  1.  1.  1.]


In [26]:
# Another example - See how the numpy array changed in value
a.add_(1)
print(a)
print(b)

tensor([ 2.,  2.,  2.,  2.,  2.])
[ 2.  2.  2.  2.  2.]


The a.numpy() is an operation. If the value of a has been changed, then b will change as well. 

Convert NumPy Array to Torch Tensor

In [27]:
import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
np.add(a, 1, out=a)
print(a)
print(b)

[ 2.  2.  2.  2.  2.]
tensor([ 2.,  2.,  2.,  2.,  2.], dtype=torch.float64)


Similarly, torch.from_numpy is an operation. If the value of a has been changed, the value of b will be changed as well

# Autograd: The automatic differentiation

Central to all neural networks in PyTorch is the autograd package. Let’s first briefly visit this, and we will then go to training our first neural network.

The autograd package provides automatic differentiation for all operations on Tensors. It is a define-by-run framework, which means that your backprop is defined by how your code is run, and that every single iteration can be different.



## Tensor
torch.Tensor is the central class of the package. Once the attribute `.requires_grad` is set to `True`, it starts to track all operations on it. 

### The .backward()
When the computation is finished, then by calling `backward()`, the gradients will be calculated automatically.

The gradient for this tensor will be accuculated into `.grad` attribute

### Stop a tensor from tracking history - The .detach()
If we want to stop a tensor from tracing history, we can call `.detach()` to detach it from the computation history, and to prevent future computation fro being tracked

To prevent history tracking (as it takes up memory), wrap the code block in `with torch.no_grad():`...

### The Function
`torch.Tensor` and `torch.Function` are interconnected to build the computation graph, that encodes a complete history of computation. Each variable has a .grad_fn attribute that references a `Function` that has created the `Tensor`

In [31]:
# Initialize a random tensor
x = torch.ones(2, 2, requires_grad=True)
print(x)

tensor([[ 1.,  1.],
        [ 1.,  1.]])


In [32]:
# Use an Add operation on tensor x
y = x + 2
print(y)

tensor([[ 3.,  3.],
        [ 3.,  3.]])


In [33]:
# As y was created as a result of an add operation, so it has a grad_fn
print(y.grad_fn)

<AddBackward0 object at 0x11b196f28>


In [34]:
# Try to do more operations on y
z = y * y * 3
out = z.mean()

print(z, out)

tensor([[ 27.,  27.],
        [ 27.,  27.]]) tensor(27.)


In [35]:
print(z.grad_fn)

<MulBackward0 object at 0x11b1a6630>


In [36]:
# `.requires_grad_(...) changes an existing Tensor's `requires_grad` flag in-place
a = torch.randn(2, 2)
a = ((a * 3) / (a - 1))
print(a.requires_grad)

False


In [38]:
a.requires_grad_(True)
print(a.requires_grad)

True


In [39]:
b = (a * a).sum()
print(b.grad_fn)

<SumBackward0 object at 0x11b190fd0>


## Gradients and Backprop()

In [41]:
out.backward()

You should have got a matrix of 4.5. Let’s call the out Tensor “o”. We have that o=14∑izi, zi=3(xi+2)2 and zi∣∣xi=1=27. Therefore, ∂o∂xi=32(xi+2), hence ∂o∂xi∣∣xi=1=92=4.5.

In [42]:
print(x.grad)

tensor([[ 4.5000,  4.5000],
        [ 4.5000,  4.5000]])


In [43]:
# Another example with autograd
x = torch.randn(3, requires_grad=True)
y = x * 2
while y.data.norm() < 1000:
    y = y * 2
    
print(y)

tensor([-1151.2181,   114.7034,   146.5987])


In [49]:
# Backprop with customized gradients
gradients = torch.tensor([0.1, 1.0, 0.00001], dtype=torch.float)
y.backward(gradients)

print(x.grad)

tensor([ 1.0240e+02,  1.0240e+03,  1.0240e-02])


# Neural Networks packages