In [1]:
import torch
import numpy as np
torch.__version__

'1.12.1+cu102'

# 1. Tensors
Their job is to represent data in a numerical way

## 1.1 Creating tensors

In [3]:
scalar = torch.tensor(8)
scalar

tensor(8)

In [5]:
# number of dimensions
scalar.ndim

0

In [6]:
# get single Python integer value
scalar.item()

8

In [8]:
vector = torch.tensor([1, 2, 3])
vector.ndim, vector

(1, tensor([1, 2, 3]))

`ndim` is the number of left (right) brackets '['

In [9]:
vector.shape

torch.Size([3])

In [11]:
MATRIX = torch.tensor([[1,2,3], [4,5,6], [7,8,9]])
print(MATRIX.ndim)
print(MATRIX.shape)
MATRIX

2
torch.Size([3, 3])


tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [12]:
TENSOR = torch.tensor([[[[1,2,3], [4,5,6], [7,8,9]]]])
print(TENSOR.ndim)
print(TENSOR.shape)
TENSOR

4
torch.Size([1, 1, 3, 3])


tensor([[[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]])

In [13]:
random_tensor = torch.rand(size=(3, 4))
random_tensor

tensor([[0.2783, 0.7273, 0.2696, 0.9542],
        [0.9088, 0.0256, 0.5039, 0.9883],
        [0.6169, 0.2908, 0.0283, 0.9927]])

In [14]:
random_tensor.shape

torch.Size([3, 4])

In [15]:
zeros_tensor = torch.zeros(size=(2, 2))
ones_tensor = torch.ones(size=(3, 3))
zeros_tensor, ones_tensor

(tensor([[0., 0.],
         [0., 0.]]),
 tensor([[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]]))

In [17]:
torch.arange(start=1, end=10, step=1)

tensor([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [18]:
torch.zeros_like(MATRIX)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])

In [19]:
torch.ones_like(zeros_tensor)

tensor([[1., 1.],
        [1., 1.]])

## 1.2 Tensor datatypes

https://pytorch.org/docs/stable/tensors.html#data-types

some datatypes are specific for CPU and some are better for GPU

lower precision datatypes are generally faster to compute on but sacrifice some performance on evaluation metrics

In [20]:
tensor_32float = torch.tensor([3.0, 5.0, 7.0],
                              dtype=None,  # default is torch.torch.float32
                              device=None # use the default device
                              )
tensor_32float.device, tensor_32float.dtype

(device(type='cpu'), torch.float32)

Common issues in PyTorch:
* shapes of tensors don't match
* different devices for tensors
* require another type for tensor

In [22]:
tensor_16float = torch.tensor([1, 2, 1],
                              dtype=torch.float16)
tensor_16float.dtype

torch.float16

## 1.3 Tensor information

In [25]:
E = torch.rand(size=(1, 2, 3, 4, 5))
E.dtype, E.device, E.shape

(torch.float32, device(type='cpu'), torch.Size([1, 2, 3, 4, 5]))

## 1.4 Tensors manipulation
All data (images, audio, text, etc) gets represented as tensors

In [27]:
t = torch.tensor([[1, 2],
                  [4, 5]])
print(t * 10)
# data in t has not changed
print(t)

tensor([[10, 20],
        [40, 50]])
tensor([[1, 2],
        [4, 5]])


In [28]:
torch.multiply(t, 15)

tensor([[15, 30],
        [60, 75]])

In [31]:
# pointwise
print(f'{t[0]} * {t[1]} = {torch.multiply(t[0], t[1])}')

tensor([1, 2]) * tensor([4, 5]) = tensor([ 4, 10])


In [35]:
# matrix multiplication: 1st method which is faster
print(f'{t[0]} @ {t[1].T} = {torch.matmul(t[0], t[1].T)}')
#print(f'{t[0]} @ {t[1].T} = {torch.mm(t[0], t[1].T)}')

tensor([1, 2]) @ tensor([4, 5]) = 14


In [36]:
# 2nd method for matrix multiplication
print(f'{t[0]} @ {t[1].T} = {t[0] @ t[1].T}')

tensor([1, 2]) @ tensor([4, 5]) = 14


In [45]:
tensor_A = torch.tensor([[1, 2, 3],
                          [4, 5, 6]],
                        dtype=torch.float32)
# Techniques for transpose
#tensor_A.T
torch.transpose(tensor_A, dim0=0, dim1=1)

tensor([[1., 4.],
        [2., 5.],
        [3., 6.]])

In [51]:
torch.manual_seed(19)

linear = torch.nn.Linear(in_features=3,
                         out_features=1)
linear

Linear(in_features=3, out_features=1, bias=True)

In [53]:
# feed-forward layer use matrix multiplication
out = linear(tensor_A)
print('output shape: ', out.shape)
out

output shape:  torch.Size([2, 1])


tensor([[1.3514],
        [3.2511]], grad_fn=<AddmmBackward0>)

In [58]:
# aggregate functions
M = torch.tensor([[1, 2, 3, 4, 5],
                  [10, 20, 30, 40, 50]],
                 dtype=torch.float32)
print('max =', M.max())
print('min =', M.min())
print('mean =', M.mean())
print('median =', M.median())
print('sum =', M.sum())

max = tensor(50.)
min = tensor(1.)
mean = tensor(16.5000)
median = tensor(5.)
sum = tensor(165.)


In [60]:
print('max =', M.max(dim=0)) # max in each column
print('min =', M.min(dim=1))  # min in each row
print('mean =', M.mean(dim=1)) # mean for each row
print('median =', M.median(dim=1)) # median for each row
print('sum =', M.sum(dim=0)) # sum for each column

max = torch.return_types.max(
values=tensor([10., 20., 30., 40., 50.]),
indices=tensor([1, 1, 1, 1, 1]))
min = torch.return_types.min(
values=tensor([ 1., 10.]),
indices=tensor([0, 0]))
mean = tensor([ 3., 30.])
median = torch.return_types.median(
values=tensor([ 3., 30.]),
indices=tensor([2, 2]))
sum = tensor([11., 22., 33., 44., 55.])


In [66]:
# the index of max value (!IN FLATTEN TENSOR)
torch.argmax(M)

tensor(9)

In [67]:
torch.argmin(M, dim=1) # index of min in each row

tensor([0, 0])

In [68]:
# change tensor datatype
tensor = torch.tensor([1, 2, 3])
tensor.dtype

torch.int64

In [71]:
tensor = tensor.type(torch.float32)
tensor.dtype

torch.float32

**Very important topic is how to reshape tensors**. PyTorch contain different methods for this task:

In [95]:
tensor = torch.tensor([[1, 2, 3],
                       [4, 5, 6]])
tensor.shape

torch.Size([2, 3])

In [96]:
# reshape tensor to shape if compatible
tensor = tensor.reshape(shape=(3, 2))
tensor

tensor([[1, 2],
        [3, 4],
        [5, 6]])

In [97]:
# return a view of the original tensor with another shape
tmp = tensor.view((2, 3))
tmp

tensor([[1, 2, 3],
        [4, 5, 6]])

In [98]:
# modify view of tensor equal to modify the original
tmp[1, 1] = 15
tensor

tensor([[ 1,  2],
        [ 3,  4],
        [15,  6]])

In [89]:
# concatenates a sequence of tensors along a new dimension
t1 = torch.tensor([1, 2])
t2 = torch.tensor([3, 4])
t3 = torch.tensor([5, 6])
torch.stack(tensors=[t1, t2, t3], dim=1)

tensor([[1, 3, 5],
        [2, 4, 6]])

In [90]:
# add dimension with value 1 at dim
t1 = torch.unsqueeze(t1, dim=1)
print(t1)
print('shape is', t1.shape)

# remove all the dimensions with value 1
torch.squeeze(t1)

tensor([[1],
        [2]])
shape is torch.Size([2, 1])


tensor([1, 2])

In [94]:
# permute the dimensions
tensor_A = torch.rand(size=(4, 5, 3))
tensor_A = tensor_A.permute(dims=(2,0,1))
tensor_A.shape

torch.Size([3, 4, 5])

## 2. PyTorch + NumPy

In [100]:
numpy_array = np.array([1, 2, 3, 4])

# from numpy to pytorch
torch_tensor = torch.from_numpy(numpy_array)
print('type is', torch_tensor.dtype)
torch_tensor

type is torch.int64


tensor([1, 2, 3, 4])

In [101]:
# back to numpy
torch_tensor.numpy()

array([1, 2, 3, 4])

Default type for numpy is float64, but pytorch often use float32

In [102]:
a = np.array([1.0, 2., 3.1])
print('type of elements in numpy array:', a.dtype)

t = torch.from_numpy(a)
print('type of elements in torch tensor:', t.dtype)

t = torch.from_numpy(a).type(torch.float32)
print('type of elements in torch tensor AFTER TRANSFORM:', t.dtype)

type of elements in numpy array: float64
type of elements in torch tensor: torch.float64
type of elements in torch tensor AFTER TRANSFORM: torch.float32


# 3. Reproducibility
It can be useful when you want to perform repeatable experiments

In [106]:
rand_A = torch.rand(size=(5, 2))
rand_B = torch.rand(size=(5, 2))
print(rand_A)
print(rand_B)

tensor([[0.2023, 0.4515],
        [0.9240, 0.2862],
        [0.0071, 0.6098],
        [0.9307, 0.7573],
        [0.7274, 0.2586]])
tensor([[0.4472, 0.7946],
        [0.6463, 0.3526],
        [0.9688, 0.0623],
        [0.5991, 0.6110],
        [0.1700, 0.6772]])


In [107]:
rand_A==rand_B

tensor([[False, False],
        [False, False],
        [False, False],
        [False, False],
        [False, False]])

In [110]:
import random
RANDOM_SEED = 19
# set randomness
torch.manual_seed(seed=RANDOM_SEED)
rand_A = torch.rand(size=(5, 2))

# if you want to be the same values in rand() function
torch.manual_seed(seed=RANDOM_SEED)
rand_B = torch.rand(size=(5, 2))
rand_A==rand_B

tensor([[True, True],
        [True, True],
        [True, True],
        [True, True],
        [True, True]])

# 4. GPU and CPU
Neural network require a lot of numerical operations. By default these operations are done on a CPU. Another common piece of hardware is a GPU, which is often much faster at performing the specific types of operations (like matrix multiplications) than CPUs

In [111]:
torch.cuda.is_available()

False

In [112]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

best practice is to write **agnostic code** (could be run on Cpu OR GPU)

In [113]:
torch.cuda.device_count()

0

In [116]:
# you can put tensor on the device you set
tensor = tensor.to(device)
tensor.device

device(type='cpu')

if tensor is on the GPU, it can't transform to NumPy

In [117]:
tensor.cpu().numpy()

array([[ 1,  2],
       [ 3,  4],
       [15,  6]])

# 5. Practice

## 5.1
Documentation reading - A big part of deep learning (and learning to code in general) is getting familiar with the documentation of a certain framework you're using. We'll be using the PyTorch documentation a lot throughout the rest of this course. So I'd recommend spending 10-minutes reading the following (it's okay if you don't get some things for now, the focus is not yet full understanding, it's awareness). See the documentation on torch.Tensor and for torch.cuda.

## 5.2
Create a random tensor with shape (7, 7).

In [3]:
tens = torch.rand(size=(7,7))
print(tens.shape)
tens

torch.Size([7, 7])


tensor([[0.7633, 0.7557, 0.1536, 0.2635, 0.0415, 0.6492, 0.8437],
        [0.8314, 0.8822, 0.0669, 0.4342, 0.3151, 0.2040, 0.1443],
        [0.4630, 0.7861, 0.6378, 0.9377, 0.8173, 0.3743, 0.9132],
        [0.0056, 0.3141, 0.5586, 0.8003, 0.3115, 0.8216, 0.8663],
        [0.2617, 0.9900, 0.6988, 0.6661, 0.1711, 0.4917, 0.2130],
        [0.4823, 0.5206, 0.7125, 0.6074, 0.1137, 0.4844, 0.4674],
        [0.5858, 0.5613, 0.9119, 0.2263, 0.3823, 0.4847, 0.1190]])

## 5.3
Perform a matrix multiplication on the tensor from 2 with another random tensor with shape (1, 7) (hint: you may have to transpose the second tensor).

In [4]:
another_tens = torch.rand(size=(1, 7))
multiplication = torch.mm(input=tens, mat2=another_tens.T)
print(multiplication.shape)
multiplication

torch.Size([7, 1])


tensor([[2.4061],
        [1.9980],
        [2.4949],
        [1.8935],
        [1.9159],
        [1.8621],
        [1.6747]])

## 5.4
Set the random seed to 0 and do exercises 2 & 3 over again.

In [5]:
torch.manual_seed(seed=0)
tens = torch.rand(size=(7,7))
another_tens = torch.rand(size=(1, 7))
multiplication = torch.mm(input=tens, mat2=another_tens.T)
multiplication

tensor([[1.8542],
        [1.9611],
        [2.2884],
        [3.0481],
        [1.7067],
        [2.5290],
        [1.7989]])

## 5.5
Speaking of random seeds, we saw how to set it with torch.manual_seed() but is there a GPU equivalent? (hint: you'll need to look into the documentation for torch.cuda for this one). If there is, set the GPU random seed to 1234.

In [6]:
torch.cuda.manual_seed(seed=1234)

## 5.6
Create two random tensors of shape (2, 3) and send them both to the GPU (you'll need access to a GPU for this). Set torch.manual_seed(1234) when creating the tensors (this doesn't have to be the GPU random seed).

In [17]:
device = "cuda" if torch.cuda.is_available() else "cpu"

torch.manual_seed(1234)

tens1 = torch.rand(size=(2, 3), device=device)
tens2 = torch.rand(size=(2, 3), device=device)
print(tens1)
print(tens2)

tensor([[0.0290, 0.4019, 0.2598],
        [0.3666, 0.0583, 0.7006]])
tensor([[0.0518, 0.4681, 0.6738],
        [0.3315, 0.7837, 0.5631]])


## 5.7
Perform a matrix multiplication on the tensors you created in 6 (again, you may have to adjust the shapes of one of the tensors).

In [9]:
out = tens1 @ tens2.T
out

tensor([[0.3647, 0.4709],
        [0.5184, 0.5617]])

# 5.8
Find the maximum and minimum values of the output of 7.

In [12]:
print('max = {}, min = {}'.format(out.max().item(), out.min().item()))

max = 0.5617256760597229, min = 0.3647301197052002


## 5.9
Find the maximum and minimum index values of the output of 7.

In [13]:
print('index of max = {}, index of min = {}'.format(out.argmax(), out.argmin()))

index of max = 3, index of min = 0


# 5.10
Make a random tensor with shape (1, 1, 1, 7) and then create a new tensor with all the 1 dimensions removed to be left with a tensor of shape (10). Set the seed to 7 when you create it and print out the first tensor and it's shape as well as the second tensor and it's shape.

In [16]:
torch.manual_seed(seed=7)
random_tensor = torch.rand(size=(1, 1, 1, 7))
print('shape of original tensor is {} and the tensor:\n {}'.format(random_tensor.shape, random_tensor))

other_tensor = random_tensor.squeeze()
print('shape of new tensor is {} and the tensor:\n {}'.format(other_tensor.shape, other_tensor))

shape of original tensor is torch.Size([1, 1, 1, 7]) and the tensor:
 tensor([[[[0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071]]]])
shape of new tensor is torch.Size([7]) and the tensor:
 tensor([0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071])
