# Pytorch Fundamentals

In [None]:
import torch
import pandas as pd
import numpy as np

In [None]:
torch.__version__

'2.3.0+cu121'

### Types- Scalar, Vector, Matrix, Tensor

In [None]:
# Scalar
scalar = torch.tensor(7)
scalar

tensor(7)

In [None]:
scalar.ndim

0

In [None]:
# Tensor to python int
scalar.item()

7

In [None]:
# Vector
vector = torch.tensor([7, 7])
vector

tensor([7, 7])

In [None]:
vector.ndim

1

In [None]:
# 2 elements are present
vector.shape

torch.Size([2])

In [None]:
matrix = torch.tensor([[7,3,6],
                      [9,10,2]])
matrix

tensor([[ 7,  3,  6],
        [ 9, 10,  2]])

In [None]:
matrix.ndim

2

In [None]:
matrix.shape

torch.Size([2, 3])

In [None]:
matrix[0]

tensor([7, 3, 6])

In [None]:
matrix[1]

tensor([ 9, 10,  2])

In [None]:
tensor = torch.tensor([[[[1,2,3],
                        [4,5,6],
                        [7,8,9]]]])
tensor

tensor([[[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]])

In [None]:
tensor.ndim

4

In [None]:
tensor.shape

torch.Size([1, 1, 3, 3])

In [None]:
tensor[0][0][2]

tensor([7, 8, 9])

### Random Tensors

In [None]:
# Random Tensors
random_tensor = torch.rand(1,2,3,4)
random_tensor

tensor([[[[0.2927, 0.4682, 0.6685, 0.7968],
          [0.8934, 0.0684, 0.8691, 0.7810],
          [0.5684, 0.7396, 0.0980, 0.6879]],

         [[0.2231, 0.5030, 0.8433, 0.9387],
          [0.1048, 0.7637, 0.1121, 0.4875],
          [0.5953, 0.2580, 0.3051, 0.9228]]]])

In [None]:
random_tensor.ndim

4

In [None]:
random_image_size_tensor = torch.rand(3,224,224)
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([3, 224, 224]), 3)

### Zeros and Ones

In [None]:
# Zero Tensors
zeros = torch.zeros(3,4)
zeros

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [None]:
zeros.shape, zeros.ndim

(torch.Size([3, 4]), 2)

In [None]:
# One Tensors
ones = torch.ones(3,4)
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [None]:
ones.shape, ones.ndim

(torch.Size([3, 4]), 2)

### Range of Tensors and Tensors-like

In [None]:
# Range of Tensors - torch.arange()
torch.arange(0,10)

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
range_tensors = torch.arange(start=0, end=1000, step=100)
range_tensors

tensor([  0, 100, 200, 300, 400, 500, 600, 700, 800, 900])

In [None]:
# Tensors like - same shape
torch.zeros_like(range_tensors)

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [None]:
torch.ones_like(range_tensors)

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

### Tensor Datatypes

In [None]:
float_tensor = torch.tensor([3,6,9],
                               dtype=None,
                               device=None,
                               requires_grad=False)
float_tensor

tensor([3, 6, 9])

In [None]:
float_tensor.dtype

torch.int64

In [None]:
# Convert float64 to float32
float_16_tensor = float_tensor.type(torch.float16)
float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

In [None]:
# Common Error ( well not really :) )
float_tensor+float_16_tensor

tensor([ 6., 12., 18.], dtype=torch.float16)

### Manipulating Tensors (tensor operations)

In [None]:
# Create a tensor
tensor = torch.tensor([1,2,3])
tensor

tensor([1, 2, 3])

In [None]:
# Addition operation
tensor + 10

tensor([11, 12, 13])

In [None]:
# Multiplication
tensor * 10

tensor([10, 20, 30])

In [None]:
# Subtraction
tensor - 10

tensor([-9, -8, -7])

In [None]:
# You may even use pytorch in-built funtion
torch.mul(tensor, 10)
torch.add(tensor, 10)

tensor([11, 12, 13])

In [None]:
# Matrix Multiplication (Dot product)
torch.matmul(tensor, tensor)

tensor(14)

In [None]:
# How we got that?
tensor[0]*tensor[0] + tensor[1]*tensor[1] + tensor[2]*tensor[2]

tensor(14)

Time Comparison btw manual calculation and in-built function

In [None]:
%%time
value = 0
for i in range(len(tensor)):
  value = tensor[i]*tensor[i]
print(value)

tensor(9)
CPU times: user 2.15 ms, sys: 0 ns, total: 2.15 ms
Wall time: 3.08 ms


In [None]:
%%time
torch.matmul(tensor, tensor)

CPU times: user 36 µs, sys: 5 µs, total: 41 µs
Wall time: 44.3 µs


tensor(14)

In [None]:
%%time
tensor @ tensor

CPU times: user 48 µs, sys: 0 ns, total: 48 µs
Wall time: 50.5 µs


tensor(14)

Most common error - The shape error

Mitigation technique - transpose

In [None]:
tensor_A = torch.tensor([[1,2],
                         [3,4],
                         [5,6]])
tensor_B = torch.tensor([[6,7],
                         [8,9],
                         [10,11]])

tensor_A.shape, tensor_B.shape

(torch.Size([3, 2]), torch.Size([3, 2]))

In [None]:
# RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)
# torch.mm(tensor_A, tensor_B)

In [None]:
# Fixing it using transpose method
tensor_B.T.shape, tensor_B.shape

(torch.Size([2, 3]), torch.Size([3, 2]))

In [None]:
torch.mm(tensor_A, tensor_B.T)

tensor([[ 20,  26,  32],
        [ 46,  60,  74],
        [ 72,  94, 116]])

In [None]:
torch.mm(tensor_A.T, tensor_B)

tensor([[ 80,  89],
        [104, 116]])

### Tensor Aggregation

In [None]:
x = torch.arange(1, 100, 5)
x

tensor([ 1,  6, 11, 16, 21, 26, 31, 36, 41, 46, 51, 56, 61, 66, 71, 76, 81, 86,
        91, 96])

In [None]:
# Min
x.min(), torch.min(x)

(tensor(1), tensor(1))

In [None]:
# Max
x.max(), torch.max(x)

(tensor(96), tensor(96))

In [None]:
# Mean
# RuntimeError: mean(): could not infer output dtype.
# Input dtype must be either a floating point or complex dtype.
# Got: Long (i.e., int64)
# x.mean(), torch.mean(x)

x.type(torch.float16).mean(), torch.mean(x.type(torch.float16))

(tensor(48.5000, dtype=torch.float16), tensor(48.5000, dtype=torch.float16))

In [None]:
# Sum
x.sum(), torch.sum(x)

(tensor(970), tensor(970))

Positional min max

In [None]:
# Index of the min element
x.argmin(), torch.argmin(x)

(tensor(0), tensor(0))

In [None]:
# Index of the max element
x.argmax(), torch.argmax(x)

(tensor(19), tensor(19))

### Reshaping, stacking, squeezing and unsqueezing tensors
Also, viewing and permuting

In [None]:
 # Create a tensor
 x = torch.arange(1., 11.)
 x, x.shape

(tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]), torch.Size([10]))

In [None]:
# Add an extra dimension
x_reshaped = x.reshape(1, 1, 10)
x_reshaped, x_reshaped.shape

(tensor([[[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]]]),
 torch.Size([1, 1, 10]))

In [None]:
# Change the view (VERY CONFUSING!)
z = x.view(1,10)
z, z.shape

(tensor([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]]),
 torch.Size([1, 10]))

In [None]:
# Changing z changes x as well
z[0][0] = 5
z, x

(tensor([[ 5.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]]),
 tensor([ 5.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]))

In [None]:
# Stack tensors on top of each other
# vstack:dim=0  ;  hstack:dim=1
x_stacked = torch.stack([x, x, x, x], dim=1)
x_stacked

tensor([[ 5.,  5.,  5.,  5.],
        [ 2.,  2.,  2.,  2.],
        [ 3.,  3.,  3.,  3.],
        [ 4.,  4.,  4.,  4.],
        [ 5.,  5.,  5.,  5.],
        [ 6.,  6.,  6.,  6.],
        [ 7.,  7.,  7.,  7.],
        [ 8.,  8.,  8.,  8.],
        [ 9.,  9.,  9.,  9.],
        [10., 10., 10., 10.]])

In [None]:
# Squeezing
x_reshaped.shape, x_reshaped.squeeze().shape, torch.squeeze(x_reshaped).shape

(torch.Size([1, 1, 10]), torch.Size([10]), torch.Size([10]))

In [None]:
# Unsqueezing
x_squeezed = torch.squeeze(x_reshaped)
x_unsqueezed = torch.unsqueeze(x_squeezed, dim=0)

x_unsqueezed, x_unsqueezed.shape

(tensor([[ 5.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]]),
 torch.Size([1, 10]))

In [None]:
# Permuting the dimension of a image tensor
x_original = torch.rand(224, 224, 3) # [height, width, color channel]
x_permuted = x_original.permute(2, 0, 1) # [color, height, width]

x_original.shape, x_permuted.shape

(torch.Size([224, 224, 3]), torch.Size([3, 224, 224]))

### Indexing

In [None]:
x = torch.arange(1, 10).reshape(1,3,3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [None]:
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [None]:
x[0][0], x[0,0]

(tensor([1, 2, 3]), tensor([1, 2, 3]))

In [None]:
x[0][0][0], x[0,0,0]

(tensor(1), tensor(1))

In [None]:
# Exercise- How to get the element 9?
x[0][2][2], x[0,2,2]

(tensor(9), tensor(9))

In [None]:
# Exercise- How to retrive the middle column (not row)
x[:,:,1], x[0,:,1]

(tensor([[2, 5, 8]]), tensor([2, 5, 8]))

In [None]:
# Exercise- Retrive the middle tensor element
x[:,1,1], x[0,1,1]

(tensor([5]), tensor(5))

### Pytorch and NumPy

1. NumPy Array -> Pytorch Tensor

In [None]:
array = np.arange(1, 11)
# tensor = torch.from_numpy(array).type(torch.float32)
tensor = torch.from_numpy(array)

array, tensor

(array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10]),
 tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10]))

In [None]:
# Remember, the default dtype for numpy is float64 and for tensor, it is float32.
# Warning: Converting numpy to pytorch, the dtype is kept same as numpy.
array.dtype, tensor.dtype

(dtype('int64'), torch.int64)

2. Pytorch Tensor -> NumPy Array

In [None]:
tensor = torch.arange(2, 21, 2)
array = tensor.numpy()

tensor, array

(tensor([ 2,  4,  6,  8, 10, 12, 14, 16, 18, 20]),
 array([ 2,  4,  6,  8, 10, 12, 14, 16, 18, 20]))

In [None]:
# the dtype is the default one for a tensor
tensor.dtype, array.dtype

(torch.int64, dtype('int64'))

### Reproducibility (Random Seed)

In [None]:
# Create 2 random tensors
tensor_rand_A = torch.rand(3,4)
tensor_rand_B = torch.rand(3,4)

print(tensor_rand_A)
print(tensor_rand_B)
print(tensor_rand_A == tensor_rand_B)

tensor([[0.0555, 0.5278, 0.5116, 0.7610],
        [0.8754, 0.0563, 0.7986, 0.1151],
        [0.9662, 0.9543, 0.5386, 0.8521]])
tensor([[0.3885, 0.6286, 0.7035, 0.1071],
        [0.1911, 0.5758, 0.6678, 0.3168],
        [0.4849, 0.2660, 0.8186, 0.3985]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [None]:
# Create 2 random but reproducible tensors
RANDOM_SEED = 42

torch.manual_seed(RANDOM_SEED)
tensor_rand_A = torch.rand(3,4)

torch.manual_seed(RANDOM_SEED)
tensor_rand_B = torch.rand(3,4)

print(tensor_rand_A)
print(tensor_rand_B)
print(tensor_rand_A == tensor_rand_B)

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


### Setting up GPU

In [None]:
!nvidia-smi

Sun Jun  2 12:35:55 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   41C    P8               9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

Checking for GPU access with Pytorch

In [None]:
torch.cuda.is_available()

True

Check device agnostic code

In [None]:
device = "cuda" if torch.cuda.is_available else "cpu"
device

'cuda'

In [None]:
# Count the number of gpu devices
torch.cuda.device_count()

1

### Putting tensors (and models) on the GPU

In [None]:
# Create a tensor (default on CPU)
tensor = torch.tensor([1,2,3])
print(tensor, tensor.device)

tensor([1, 2, 3]) cpu


In [None]:
# Move tensor to GPU (if available)
tensor_on_gpu = tensor.to(device)
print(tensor_on_gpu)

tensor([1, 2, 3], device='cuda:0')


### Moving tensors back to the CPU

In [None]:
# TypeError: can't convert cuda:0 device type tensor to numpy.
# Use Tensor.cpu() to copy the tensor to host memory first.
# tensor_on_gpu.numpy()

In [None]:
# Before converting tensor to numpy array, set the device to cpu
narray_on_cpu = tensor_on_gpu.cpu().numpy()
narray_on_cpu

array([1, 2, 3])

# Exercises

### 1. Documentation reading

A big part of deep learning (and learning to code in general) is getting familiar with the documentation of a certain framework you're using. We'll be using the PyTorch documentation a lot throughout the rest of this course. So I'd recommend spending 10-minutes reading the following (it's okay if you don't get some things for now, the focus is not yet full understanding, it's awareness):
  * The documentation on [`torch.Tensor`](https://pytorch.org/docs/stable/tensors.html#torch-tensor).
  * The documentation on [`torch.cuda`](https://pytorch.org/docs/master/notes/cuda.html#cuda-semantics).



In [None]:
# No code solution (reading)

### 2. Create a random tensor with shape `(7, 7)`.


In [None]:
# Import torch
import torch

# Create random tensor
random_tensor_A = torch.rand(7,7)
random_tensor_A

tensor([[0.9998, 0.5944, 0.6541, 0.0337, 0.1716, 0.3336, 0.5782],
        [0.0600, 0.2846, 0.2007, 0.5014, 0.3139, 0.4654, 0.1612],
        [0.1568, 0.2083, 0.3289, 0.1054, 0.9192, 0.4008, 0.9302],
        [0.6558, 0.0766, 0.8460, 0.3624, 0.3083, 0.0850, 0.0029],
        [0.6431, 0.3908, 0.6947, 0.0897, 0.8712, 0.1330, 0.4137],
        [0.6044, 0.7581, 0.9037, 0.9555, 0.1035, 0.6258, 0.2849],
        [0.4452, 0.1258, 0.9554, 0.1330, 0.7672, 0.6757, 0.6625]])

### 3. Perform a matrix multiplication on the tensor from 2 with another random tensor with shape `(1, 7)` (hint: you may have to transpose the second tensor).

In [None]:
# Create another random tensor
random_tensor_B = torch.rand(1,7)

# Perform matrix multiplication
torch.mm(random_tensor_A, random_tensor_B.T)

tensor([[1.7076],
        [1.1082],
        [1.2293],
        [1.0241],
        [1.3169],
        [2.5243],
        [1.6868]])

### 4. Set the random seed to `0` and do 2 & 3 over again.

The output should be:
```
(tensor([[1.8542],
         [1.9611],
         [2.2884],
         [3.0481],
         [1.7067],
         [2.5290],
         [1.7989]]), torch.Size([7, 1]))
```

In [None]:
# Set manual seed
RANDOM_SEED = 0
torch.manual_seed(RANDOM_SEED)

# Create two random tensors
randtenA = torch.rand(7,7)
randtenB = torch.rand(1,7)

# Matrix multiply tensors
torch.mm(randtenA, randtenB.T)

tensor([[1.8542],
        [1.9611],
        [2.2884],
        [3.0481],
        [1.7067],
        [2.5290],
        [1.7989]])

### 5. Speaking of random seeds, we saw how to set it with `torch.manual_seed()` but is there a GPU equivalent? (hint: you'll need to look into the documentation for `torch.cuda` for this one)
  * If there is, set the GPU random seed to `1234`.

In [None]:
# Set random seed on the GPU
torch.cuda.manual_seed(1234)


### 6. Create two random tensors of shape `(2, 3)` and send them both to the GPU (you'll need access to a GPU for this). Set `torch.manual_seed(1234)` when creating the tensors (this doesn't have to be the GPU random seed). The output should be something like:

```
Device: cuda
(tensor([[0.0290, 0.4019, 0.2598],
         [0.3666, 0.0583, 0.7006]], device='cuda:0'),
 tensor([[0.0518, 0.4681, 0.6738],
         [0.3315, 0.7837, 0.5631]], device='cuda:0'))
```

In [None]:
# Set random seed
RANDOM_SEED = 1234
torch.manual_seed(RANDOM_SEED)

# Check for access to GPU
device = "cuda" if torch.cuda.is_available() else "cpu"

# Create two random tensors on GPU
tensorA = torch.rand(2,3).to(device)
tensorB = torch.rand(2,3).to(device)

print("Device:", device)
tensorA, tensorB

Device: cuda


(tensor([[0.0290, 0.4019, 0.2598],
         [0.3666, 0.0583, 0.7006]], device='cuda:0'),
 tensor([[0.0518, 0.4681, 0.6738],
         [0.3315, 0.7837, 0.5631]], device='cuda:0'))


### 7. Perform a matrix multiplication on the tensors you created in 6 (again, you may have to adjust the shapes of one of the tensors).

The output should look like:
```
(tensor([[0.3647, 0.4709],
         [0.5184, 0.5617]], device='cuda:0'), torch.Size([2, 2]))
```

In [None]:
# Perform matmul on tensor_A and tensor_B
output = torch.mm(tensorA, tensorB.T)
output

tensor([[0.3647, 0.4709],
        [0.5184, 0.5617]], device='cuda:0')

### 8. Find the maximum and minimum values of the output of 7.

In [None]:
# Find max
maxnum = torch.max(output)

# Find min
minnum = torch.min(output)

maxnum, minnum

(tensor(0.5617, device='cuda:0'), tensor(0.3647, device='cuda:0'))

### 9. Find the maximum and minimum index values of the output of 7.

In [None]:
# Find arg max
argmax = torch.argmax(output)

# Find arg min
argmin = torch.argmin(output)

argmax, argmin

(tensor(3, device='cuda:0'), tensor(0, device='cuda:0'))


### 10. Make a random tensor with shape `(1, 1, 1, 10)` and then create a new tensor with all the `1` dimensions removed to be left with a tensor of shape `(10)`. Set the seed to `7` when you create it and print out the first tensor and it's shape as well as the second tensor and it's shape.

The output should look like:

```
tensor([[[[0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297,
           0.3653, 0.8513]]]]) torch.Size([1, 1, 1, 10])
tensor([0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297, 0.3653,
        0.8513]) torch.Size([10])
```

In [None]:
# Set seed
RANDOM_SEED = 7
torch.manual_seed(RANDOM_SEED)

# Create random tensor
tensor = torch.rand(1,1,1,10)

# Remove single dimensions
newTensor = torch.squeeze(tensor)

# Print out tensors and their shapes
print(tensor, tensor.shape)
print(newTensor, newTensor.shape)

tensor([[[[0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297,
           0.3653, 0.8513]]]]) torch.Size([1, 1, 1, 10])
tensor([0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297, 0.3653,
        0.8513]) torch.Size([10])
