In [86]:
import torch
import numpy as np

In [87]:
# Construct a 5x3 matrix, unitialized, the value is all random.
x = torch.empty(5,3)
print(x)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])


In [88]:
# Construct a randomly initialized matrix
x = torch.rand(5,3)
y = torch.rand(5,3)

print(x)
print(y)

# operations:

# Element-wise Addition
print('x+y:', x+y)
# Element-wise subtraction 
print('x-y:', x-y)
# Element-wise productin
print('x*y:', x*y)
# Element-wise division
print('x / y:', x/y)


# in-place addition
# Any operation that mutates a tensor in-place is post-ftixed with an _.
# e.g. x.copy_(y), x.t_() 
x.add_(y)
print(x)

# y.T is not an in-place operation. 
print(y.T, y)

# In-place transpose
y.t_()
print(y)

tensor([[0.5234, 0.8670, 0.5657],
        [0.9090, 0.0939, 0.4472],
        [0.7692, 0.3984, 0.4631],
        [0.6804, 0.6074, 0.5757],
        [0.8101, 0.7735, 0.9022]])
tensor([[0.2714, 0.2833, 0.3242],
        [0.6935, 0.8722, 0.7527],
        [0.6513, 0.7990, 0.6988],
        [0.5922, 0.9492, 0.6755],
        [0.9310, 0.7571, 0.0162]])
x+y: tensor([[0.7947, 1.1503, 0.8899],
        [1.6025, 0.9661, 1.2000],
        [1.4205, 1.1974, 1.1620],
        [1.2726, 1.5566, 1.2512],
        [1.7412, 1.5306, 0.9184]])
x-y: tensor([[ 0.2520,  0.5838,  0.2414],
        [ 0.2155, -0.7782, -0.3055],
        [ 0.1178, -0.4006, -0.2357],
        [ 0.0882, -0.3418, -0.0999],
        [-0.1209,  0.0165,  0.8860]])
x*y: tensor([[0.1420, 0.2456, 0.1834],
        [0.6304, 0.0819, 0.3366],
        [0.5010, 0.3183, 0.3237],
        [0.4030, 0.5766, 0.3889],
        [0.7543, 0.5856, 0.0146]])
x / y: tensor([[ 1.9287,  3.0607,  1.7445],
        [ 1.3108,  0.1077,  0.5941],
        [ 1.1809,  0.4987,  0.6627

In [89]:
# Construct a tensor directly from data:
# c = torch.tensor(data)

# Tocopy construct from a tensor, it's recommend to use sourceTensor.clone().detach()
c = x.clone().detach()
print(c)
# Use Numpy indexing:
# Get the value of last column
print(c[:, -1])

tensor([[0.7947, 1.1503, 0.8899],
        [1.6025, 0.9661, 1.2000],
        [1.4205, 1.1974, 1.1620],
        [1.2726, 1.5566, 1.2512],
        [1.7412, 1.5306, 0.9184]])
tensor([0.8899, 1.2000, 1.1620, 1.2512, 0.9184])


In [90]:
x = torch.randn(4,4)
print(x.shape)
# If you want to resize/reshape the tensor, you can use torch.view()
y = x.view(16)
print(y.shape)
# the size -1 is inferred from other dimensions.
z = x.view(-1, 8)
print(z.shape)

torch.Size([4, 4])
torch.Size([16])
torch.Size([2, 8])


In [91]:
# If you have a one element tensor, use .item() to get the value as a python number
x = torch.rand(1)
print(x)
print(x.item())

tensor([0.5889])
0.5889042019844055


In [92]:
# Converting a Torch Tensor to a Numpy array and vice versa is simple
# The Torch Tensor and Numpy array will share their underlying memory locations,
# and changing one will change the other.

# Converting a torch tensor to a numpy array
a = torch.ones(5)
print(a)
b = a.numpy()
print(b, type(b))

# changing one will affect the others:
a.add_(10)
print(a)
print(b)



tensor([1., 1., 1., 1., 1.])
[1. 1. 1. 1. 1.] <class 'numpy.ndarray'>
tensor([11., 11., 11., 11., 11.])
[11. 11. 11. 11. 11.]


In [93]:
# Converting numpy array to torch tensor
a = np.ones(5)
b = torch.from_numpy(a)

# Same as converting Tensor to numpy array, changing one will affect the others.
# All the Tensors on the CPU except a CharTensor support converting to Numpy and back.
a += 1
print(a, b)

[2. 2. 2. 2. 2.] tensor([2., 2., 2., 2., 2.], dtype=torch.float64)


In [94]:
# Tensors on CUDA:
if torch.cuda.is_available():
    device = torch.device("cuda")
    y = torch.ones_like(x, device=device) #directly create a tensor on GPU
    x = x.to(device)                      #.to("cuda") could move tensors in GPU
    z = x + y
    print(z)
    print(z.to("cpu", torch.double))      #.to() can also change dtype.

In [121]:
# The autograd package provides automatic differentiation for all operations on Tensors.
# Your BP is defined by how your code is run, and that every single iteration can be different.


# torch.Tensor is the central class. 
# If you sef its attribute '.requires_grad' as 'True', it starts to track all operations on it.
# When you finish your computation, you can call '.backward()' and have all the gradients computed automatically.
# The gradient for this tensor will be accumulated into '.grad' attribute.

# To stop a tensor from tracking history, you can call '.detach()' to detach if from the computation history, and to prevent future computation from being tracked.

# To prevent tracking history(memory using), you can wrap the code block in 'with torch.no_grad()'.
# This can be helpful when evaluating a model because the model may have trainable parameters with 'requires_grad = True', but for which we don't need the gradients.

# 'Tensor' and 'Function' are interconnected and build up an acyclic(not recycle) graph, that encodes a complete history of computation.
# Each tensor has a '.grad_fn' attribute that references a 'Function' that has created the 'Tensor'.

# If you wan to calculate the derivatives, you can call '.backward()' on a 'Tensor'.
# If 'Tensor' is a scalar, you don't need to specify any arguments to 'backward()'
# However, if it has more elements, you need to specify a 'gradient' argument that is a tensor of matching shape.

x = torch.ones(2, 2, requires_grad=True)

y = x + 2

z = y * y * 3

out = z.mean()

# Becaues out contains a single scalar, out.backward() is equivlant to out.backward(torch.tensor(1.))
out.backward()
# If out is no longer a scalar, torch.autograd couldn't compute the full Jacobian directly, but if we want the
# Vector-Jacobian product, simply pass the vector to backward as argument.

print(x.grad)

# Could stop a tensor from tracking history
x = x.detach()
print(x.requires_grad, x.grad)

# can also stop autograd from tracking history on Tensors by wrapping the code block in 'with torch.no_grad()'
with torch.no_grad():
    print((x ** 2).requires_grad)

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])
False None
False


In [122]:
a = torch.rand(2,2)
a = ((a * 3) / (a - 1))

print(a.requires_grad)

# .requires_grad_() changes an existing Tensor's requires_grad flag in-place.
a.requires_grad_(True)
print(a.requires_grad)

# equivlant to \sum(a \odot a)
b = (a * a).sum()

b.backward()
print(a.grad)

False
True
tensor([[-11.7981,  -1.8759],
        [ -1.1495, -39.3355]])
