# Setup

In [None]:
import torch

In [None]:
torch.cuda.is_available()

True

In [None]:
x = torch.rand(5, 3)
print(x)

tensor([[0.4946, 0.1042, 0.8705],
        [0.1956, 0.4582, 0.7810],
        [0.1720, 0.7122, 0.0769],
        [0.1971, 0.4751, 0.0173],
        [0.6778, 0.8895, 0.8203]])


# Tensor

In [None]:
x = torch.empty(1)
print(x)

tensor([1.9216e-24])


In [None]:
x = torch.empty(3)
print(x)

tensor([4.5852e-35, 0.0000e+00, 4.5852e-35])


In [None]:
x = torch.empty(2,2,3)
print(x)

tensor([[[0.0000e+00, 0.0000e+00, 0.0000e+00],
         [0.0000e+00, 4.5808e-35, 0.0000e+00]],

        [[4.9610e-36, 0.0000e+00, 8.9724e-05],
         [4.5779e-41, 0.0000e+00, 0.0000e+00]]])


In [None]:
x = torch.rand(2, 3)
print(x)

tensor([[0.6032, 0.2879, 0.9893],
        [0.8788, 0.5228, 0.5254]])


In [None]:
x = torch.zeros(4,2)
print(x)

tensor([[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]])


In [None]:
x = torch.ones(2)
print(x)

tensor([1., 1.])


In [None]:
print(x.size())

torch.Size([2])


In [None]:
print(x.dtype)

torch.float32


In [None]:
x = torch.zeros(4,2, dtype=torch.float16)
print(x)

tensor([[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]], dtype=torch.float16)


In [None]:
x = torch.tensor([2,4])
print(x)

tensor([2, 4])


In [None]:
x = torch.tensor([2.0,3], requires_grad=True)

In [None]:
x = torch.rand(2)
y = torch.rand(2)
print(x+y)

tensor([0.9286, 0.5548])


In [None]:
torch.add(x,y)

tensor([0.9286, 0.5548])

In [None]:
x = torch.rand(3,2)
print(x[:,1])

tensor([0.6393, 0.7299, 0.7906])


In [None]:
print(x[1,1].item())

0.7298689484596252


In [None]:
x = torch.randn(4,4)
y = x.view(16)
print(y)

tensor([ 1.3545, -0.5511, -0.3394, -0.7771, -1.8928, -1.0083,  0.6386, -0.8393,
         1.9115,  0.0044, -0.0707, -0.2707, -0.4677, -0.1457,  0.7103, -0.1364])


In [None]:
z = x.view(-1, 8)
print(z.size())

torch.Size([2, 8])


In [None]:
a = torch.ones(5)
print(a)

tensor([1., 1., 1., 1., 1.])


In [None]:
b = a.numpy()
print(b)

[1. 1. 1. 1. 1.]


In [None]:
print(type(b))

<class 'numpy.ndarray'>


In [None]:
a.add_(1)

tensor([2., 2., 2., 2., 2.])

In [None]:
print(a)

tensor([2., 2., 2., 2., 2.])


In [None]:
print(b)

[2. 2. 2. 2. 2.]


In [None]:
import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
print(a)
print(b)

[1. 1. 1. 1. 1.]
tensor([1., 1., 1., 1., 1.], dtype=torch.float64)


In [None]:
a += 1
print(a); print(b)

[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)


# Auto-grad

In [None]:
import torch
# The autograd package provides automatic differentiation 
# for all operations on Tensors

# requires_grad = True -> tracks all operations on the tensor. 
x = torch.randn(3, requires_grad=True)
y = x + 2

# y was created as a result of an operation, so it has a grad_fn attribute.
# grad_fn: references a Function that has created the Tensor
print(x) # created by the user -> grad_fn is None
print(y)
print(y.grad_fn)

# Do more operations on y
z = y * y * 3
print(z)
z = z.mean()
print(z)
# Let's compute the gradients with backpropagation
# When we finish our computation we can call .backward() and have all the gradients computed automatically.
# The gradient for this tensor will be accumulated into .grad attribute.
# It is the partial derivate of the function w.r.t. the tensor

z.backward()
print(x.grad) # dz/dx

tensor([-2.2747, -0.7034, -0.5632], requires_grad=True)
tensor([-0.2747,  1.2966,  1.4368], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x7f9d38269550>
tensor([0.2263, 5.0435, 6.1935], grad_fn=<MulBackward0>)
tensor(3.8211, grad_fn=<MeanBackward0>)
tensor([-0.5493,  2.5932,  2.8737])


In [None]:
# -------------
# Model with non-scalar output:
# If a Tensor is non-scalar (more than 1 elements), we need to specify arguments for backward() 
# specify a gradient argument that is a tensor of matching shape.
# needed for vector-Jacobian product

x = torch.randn(3, requires_grad=True)

y = x * 2
for _ in range(10):
    y = y * 2

print(y)
print(y.shape)

v = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float32)
y.backward(v)
print(x.grad)

tensor([-701.0704, 3874.8335, 1118.1719], grad_fn=<MulBackward0>)
torch.Size([3])
tensor([2.0480e+02, 2.0480e+03, 2.0480e-01])


In [None]:
# -------------
# Stop a tensor from tracking history:
# For example during our training loop when we want to update our weights
# then this update operation should not be part of the gradient computation
# - x.requires_grad_(False)
# - x.detach()
# - wrap in 'with torch.no_grad():'

# .requires_grad_(...) changes an existing flag in-place.
a = torch.randn(2, 2)
print(a.requires_grad)
b = ((a * 3) / (a - 1))
print(b.grad_fn)
a.requires_grad_(True)
print(a.requires_grad)
b = (a * a).sum()
print(b.grad_fn)

False
None
True
<SumBackward0 object at 0x7f9d38269ee0>
