This is a short experimental file for learning pytrch

# Tensors

In [2]:
# imports
import torch

In [3]:
# constructing a 5x3 matrix uninitialised
x = torch.empty(5,3)
print(x)

tensor([[-1.0082e+03,  4.5867e-41, -1.0082e+03],
        [ 4.5867e-41,  3.7386e-14,  6.6532e-33],
        [ 1.8337e+31,  1.3556e-19,  1.8370e+25],
        [ 2.0616e-19,  4.7429e+30,  7.4908e+11],
        [ 1.9438e-19,  1.2123e+25,  3.9666e-11]])


In [14]:
# construct randomly initialized matrix
x = torch.rand(5, 3)
print(x)

tensor([[0.6430, 0.1098, 0.9129],
        [0.7758, 0.8375, 0.8661],
        [0.2373, 0.1825, 0.4706],
        [0.8746, 0.3732, 0.9155],
        [0.0307, 0.6626, 0.1139]])


In [15]:
# Construct a matrix filled zeros and of dtype long:
x = torch.zeros(5, 3, dtype=torch.long)
print(x)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])


In [16]:
# Construct a tensor directly from data:
x = torch.tensor([5.5, 3])
print(x)

tensor([5.5000, 3.0000])


or create a tensor based on an existing tensor. These methods will reuse properties of the input tensor, e.g. dtype, unless new values are provided by user



In [21]:
# new_* methods take in sizes
x = x.new_ones(5, 3, dtype=torch.double)
print(x)
print(x.size())

x = torch.randn_like(x, dtype=torch.float)    # override dtype!
print(x) # result has the same size
print(x.size())

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)
torch.Size([5, 3])
tensor([[-0.6055,  0.3931, -0.3421],
        [-2.3581, -0.3688, -2.1091],
        [ 0.0252,  1.6905, -1.5767],
        [-0.6803,  0.0398,  1.6817],
        [ 1.0721, -1.0206, -0.6299]])
torch.Size([5, 3])


# Operations
There are multiple syntaxes for operations. In the following example, wewill take a look at the addition operation.

Addition: syntax 1

In [27]:
y = torch.rand(5, 3)
print("x:",x)
print("y:",y)
print("x+y:",x + y)

x: tensor([[-0.6055,  0.3931, -0.3421],
        [-2.3581, -0.3688, -2.1091],
        [ 0.0252,  1.6905, -1.5767],
        [-0.6803,  0.0398,  1.6817],
        [ 1.0721, -1.0206, -0.6299]])
y: tensor([[0.7458, 0.6246, 0.8671],
        [0.6695, 0.0545, 0.3273],
        [0.6190, 0.9127, 0.2201],
        [0.5893, 0.2776, 0.6201],
        [0.2419, 0.1144, 0.0803]])
x+y: tensor([[ 0.1403,  1.0176,  0.5249],
        [-1.6886, -0.3143, -1.7817],
        [ 0.6442,  2.6032, -1.3566],
        [-0.0910,  0.3174,  2.3018],
        [ 1.3140, -0.9062, -0.5496]])


same as above but call func

In [28]:
print(torch.add(x, y))

tensor([[ 0.1403,  1.0176,  0.5249],
        [-1.6886, -0.3143, -1.7817],
        [ 0.6442,  2.6032, -1.3566],
        [-0.0910,  0.3174,  2.3018],
        [ 1.3140, -0.9062, -0.5496]])


Addition: providing an output tensor as argument

In [29]:
result = torch.empty(5, 3)
torch.add(x, y, out=result)
print(result)

tensor([[ 0.1403,  1.0176,  0.5249],
        [-1.6886, -0.3143, -1.7817],
        [ 0.6442,  2.6032, -1.3566],
        [-0.0910,  0.3174,  2.3018],
        [ 1.3140, -0.9062, -0.5496]])


add in place

In [30]:
# adds x to y
y.add_(x)
print(y)

tensor([[ 0.1403,  1.0176,  0.5249],
        [-1.6886, -0.3143, -1.7817],
        [ 0.6442,  2.6032, -1.3566],
        [-0.0910,  0.3174,  2.3018],
        [ 1.3140, -0.9062, -0.5496]])


Any operation that mutates a tensor in-place is post-fixed with an _. For example: x.copy_(y), x.t_(), will change x.

In [32]:
print(x)
print(x[:, 1]) # classic numpy indexing

tensor([[-0.6055,  0.3931, -0.3421],
        [-2.3581, -0.3688, -2.1091],
        [ 0.0252,  1.6905, -1.5767],
        [-0.6803,  0.0398,  1.6817],
        [ 1.0721, -1.0206, -0.6299]])
tensor([ 0.3931, -0.3688,  1.6905,  0.0398, -1.0206])


### resizing a tensor using torch.view

In [36]:
x = torch.randn(4, 4)
y = x.view(16)
z = x.view(-1, 8)  # the size -1 is inferred from other dimensions
print("x:",x)
print("y:",y)
print("z:",z)
print(x.size(), y.size(), z.size())

x: tensor([[-0.5155,  0.5098,  2.1862, -1.0543],
        [ 0.0699,  1.1978, -0.8849, -0.0753],
        [-0.2021, -1.4264,  0.0489,  0.6374],
        [ 0.9559,  1.9166, -0.8450, -0.9537]])
y: tensor([-0.5155,  0.5098,  2.1862, -1.0543,  0.0699,  1.1978, -0.8849, -0.0753,
        -0.2021, -1.4264,  0.0489,  0.6374,  0.9559,  1.9166, -0.8450, -0.9537])
z: tensor([[-0.5155,  0.5098,  2.1862, -1.0543,  0.0699,  1.1978, -0.8849, -0.0753],
        [-0.2021, -1.4264,  0.0489,  0.6374,  0.9559,  1.9166, -0.8450, -0.9537]])
torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


printing a one element tensor using .item()

In [39]:
x = torch.randn(1)
print(x)
print(x.item())

tensor([-0.5314])
-0.5314245820045471


# numpy to and back from torch tensor

In [40]:
a = torch.ones(5)
print(a)

tensor([1., 1., 1., 1., 1.])


In [41]:
b = a.numpy()
print(b)

[1. 1. 1. 1. 1.]


In [61]:
a.add_(1)
print(a)
print(b) # notice b changes as well as by referance

tensor([21., 21., 21., 21., 21.])
[21. 21. 21. 21. 21.]


### converting numpy to torch tensor

In [70]:
import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
np.add(a, 1, out=a)
print(a)
print(b)

[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)


In [72]:
np.add(a, 1, out=a)
print(a)
print(b)

[4. 4. 4. 4. 4.]
tensor([4., 4., 4., 4., 4.], dtype=torch.float64)


# CUDA tensors

In [6]:
import torch
print(torch.cuda.is_available())
x = torch.randn(1)
print(x)
print(x.item())

True
tensor([2.8869])
2.8868794441223145


In [8]:
# let us run this cell only if CUDA is available
# We will use ``torch.device`` objects to move tensors in and out of GPU
if torch.cuda.is_available():
    device = torch.device("cuda")          # a CUDA device object
    y = torch.ones_like(x, device=device)  # directly create a tensor on GPU
    x = x.to(device)                       # or just use strings ``.to("cuda")``
    z = x + y
    print(z)
    print(z.to("cpu", torch.double))       # ``.to`` can also change dtype together!

tensor([3.8869], device='cuda:0')
tensor([3.8869], dtype=torch.float64)


# Autograd automatic differentiation

In [1]:
import torch

Create a tensor and set requires_grad=True to track computation with it

In [2]:
x = torch.ones(2, 2, requires_grad=True)
print(x)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)


In [3]:
y = x + 2
print(y)

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)


y was created as a result of an operation, so it has a grad_fn.

In [4]:
print(y.grad_fn)

<AddBackward0 object at 0x7f9ea8bb9080>


In [5]:
z = y * y * 3
out = z.mean()

print(z)
print(out)

tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>)
tensor(27., grad_fn=<MeanBackward0>)


<span style="background-color: #80a573">.requires_grad_( ... )</span> changes an existing Tensor’s requires_grad flag in-place. The input flag defaults to False if not given.

In [6]:
a = torch.randn(2, 2)
a = ((a * 3) / (a - 1))
print(a.requires_grad)
a.requires_grad_(True)
print(a.requires_grad)
b = (a * a).sum()
print(b.grad_fn)

False
True
<SumBackward0 object at 0x7f9ea8baaba8>


### Gradients
Let’s backprop now. Because out contains a single scalar, out.backward() is equivalent to out.backward(torch.tensor(1.)).

In [7]:
out.backward()

In [8]:
print(x.grad)

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])
