Questions: https://github.com/mrdbourke/pytorch-deep-learning/discussions
Resource notebook:

In [1]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print(torch.__version__)

2.1.0+cu121


## Introduction to Tensors
### Creating Tensors

In [2]:
scalar = torch.tensor(7)
print(scalar)

tensor(7)


In [3]:
scalar.ndim

0

In [4]:
i=scalar.item()
print(i)

7


In [5]:
# Vector
vector = torch.tensor([7,7])
vector
print(vector.ndim)

1


In [6]:
vector.shape

torch.Size([2])

In [7]:
# Matrix
MATRIX = torch.tensor([[7,7,7],[4,4,4]])
MATRIX

tensor([[7, 7, 7],
        [4, 4, 4]])

In [8]:
print(MATRIX.shape)
print(MATRIX.ndim)

torch.Size([2, 3])
2


In [9]:
# matrix
print(MATRIX.item())

RuntimeError: ignored

In [10]:
# Tensor
TENSOR = torch.tensor([
                        [[1,2,3],
                        [3,6,9],
                        [2,4,5]],


                        [[4,2,3],
                        [3,6,9],
                        [2,4,5]]

                        ])

TENSOR

tensor([[[1, 2, 3],
         [3, 6, 9],
         [2, 4, 5]],

        [[4, 2, 3],
         [3, 6, 9],
         [2, 4, 5]]])

In [11]:
TENSOR.ndim

3

In [12]:
TENSOR.shape

torch.Size([2, 3, 3])

In [13]:
TENSOR[1]

tensor([[4, 2, 3],
        [3, 6, 9],
        [2, 4, 5]])

### Random Tensor

In [14]:
random_tensors = torch.rand(1,2,2)

In [15]:
random_tensors

tensor([[[0.1682, 0.0803],
         [0.1826, 0.4550]]])

In [16]:
print(random_tensors.ndim)
print(random_tensors.shape)

3
torch.Size([1, 2, 2])


In [17]:
random_tensors = torch.rand(3,224,224)### channel, height, width
random_tensors

tensor([[[0.1105, 0.1223, 0.2312,  ..., 0.4229, 0.9196, 0.7823],
         [0.2940, 0.6078, 0.0691,  ..., 0.4252, 0.7415, 0.6430],
         [0.6386, 0.1429, 0.3640,  ..., 0.2326, 0.2654, 0.3019],
         ...,
         [0.6851, 0.7296, 0.1491,  ..., 0.9341, 0.8112, 0.2073],
         [0.7547, 0.3611, 0.7479,  ..., 0.1657, 0.8866, 0.0047],
         [0.6947, 0.6515, 0.1968,  ..., 0.2986, 0.4915, 0.5467]],

        [[0.9225, 0.7708, 0.2730,  ..., 0.9692, 0.6611, 0.5715],
         [0.1408, 0.8788, 0.1699,  ..., 0.4109, 0.5333, 0.0379],
         [0.6151, 0.2196, 0.3906,  ..., 0.5840, 0.8302, 0.3348],
         ...,
         [0.8244, 0.6308, 0.8103,  ..., 0.7403, 0.2730, 0.8033],
         [0.6825, 0.8445, 0.5012,  ..., 0.0218, 0.3235, 0.3246],
         [0.0845, 0.5986, 0.6507,  ..., 0.1680, 0.4923, 0.8761]],

        [[0.2451, 0.6083, 0.3799,  ..., 0.6898, 0.7653, 0.6495],
         [0.4556, 0.0227, 0.3143,  ..., 0.1390, 0.9919, 0.8971],
         [0.0471, 0.4002, 0.7969,  ..., 0.2715, 0.9374, 0.

In [18]:
random_tensors.shape, random_tensors.ndim

(torch.Size([3, 224, 224]), 3)

### Zeros and ones

In [19]:
zero = torch.zeros(3,4)
zero

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [20]:
random_tensors = torch.rand(3,4)

In [21]:
zero*random_tensors ## not the dot product this is the element wise multiplication

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [22]:
ones = torch.ones(3,4)
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [None]:
### Range

In [23]:
one_to_ten = torch.arange(0,10)
one_to_ten

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [24]:
a = torch.arange(start = 0,end = 10, step= 2)
a

tensor([0, 2, 4, 6, 8])

In [25]:
## Creating tensor-like
ten_zeros =  torch.zeros_like(input=one_to_ten)
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

### Tensor Datatypes


In [26]:
float_32_tensor  = torch.tensor([3.0,6.0,9.0],dtype = None)
float_32_tensor

tensor([3., 6., 9.])

In [27]:
float_32_tensor.dtype

torch.float32

In [28]:
float_32_tensor  = torch.tensor([3.0,6.0,9.0],dtype = torch.float32)
float_32_tensor.dtype

torch.float32

In [29]:
float_16_tensor  = torch.tensor([3.0,6.0,9.0],dtype = torch.float16)

In [30]:
mix = float_32_tensor*float_16_tensor
mix.dtype

torch.float32

In [31]:
mix

tensor([ 9., 36., 81.])

In [32]:
int_32 = torch.tensor([3,6,9],dtype=torch.int32)
int_32

tensor([3, 6, 9], dtype=torch.int32)

In [33]:
float_32_tensor*int_32

tensor([ 9., 36., 81.])

In [None]:
#### Getting info from Tensors
## Dtype
## shape
## device

In [34]:
some_random = torch.rand(3,4)
some_random

tensor([[0.5090, 0.0779, 0.8312, 0.5723],
        [0.2860, 0.2764, 0.1287, 0.8459],
        [0.3824, 0.6053, 0.3442, 0.5572]])

In [35]:
#dtype
print("Dtype :- ", some_random.dtype)
print("Shape :- ", some_random.shape)
print("Size :- ", some_random.size())
print("Device :- ", some_random.device)

Dtype :-  torch.float32
Shape :-  torch.Size([3, 4])
Size :-  torch.Size([3, 4])
Device :-  cpu


In [36]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [37]:
### Manipulating Tensors
## Addition
## Subtraction
## Multiplication (ele-wise)
## Division
## Matrix Multiplication

In [38]:
tensor = torch.rand(1,3)

In [39]:
tensor

tensor([[0.5891, 0.9579, 0.5137]])

In [40]:
tensor1 = tensor+10
print(tensor1)

tensor([[10.5891, 10.9579, 10.5137]])


In [41]:
tensor10 = tensor*10
print(tensor10)

tensor([[5.8913, 9.5792, 5.1367]])


In [42]:
tensor_1 = torch.tensor([1,2,3])
tensor_2 = torch.tensor([1, 2, 3])
print(tensor_1*tensor_2)

tensor([1, 4, 9])


In [43]:
## Matrix Multiplication
print(torch.matmul(tensor_1,tensor_2))### torch.matmul is faster

tensor(14)


In [44]:
t = torch.tensor([[1, 2, 3],[4,5,6],[7,8,9]])
print(t)
print(torch.transpose(t,0,1))

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
tensor([[1, 4, 7],
        [2, 5, 8],
        [3, 6, 9]])


In [45]:
### Shapes of Matrix

tensor_a= torch.tensor([[1,2],
                        [3,4],
                        [5,6]])

tensor_b = torch.tensor([[7,10],
                         [8,11],
                         [9,12]])

torch.mm(tensor_a,torch.transpose(tensor_b,0,1)) ## mm -> matmul


tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])

In [46]:
torch.mm(tensor_a,tensor_b.T)

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])

In [47]:
#### Finding the min, max, mean, sum

x = torch.tensor([1,2,3])
print(torch.min(x))
print(torch.max(x))

tensor(1)
tensor(3)


In [48]:
print(torch.mean(x)) #### Good error

RuntimeError: ignored

In [49]:
### changing the type
x = torch.tensor([1,2,3],dtype=torch.float32)
print(torch.mean(x))


x = torch.tensor([1,2,3])

print(x.type(torch.float32).mean())
print(torch.mean(x.type(torch.float32)))

tensor(2.)
tensor(2.)
tensor(2.)


In [50]:
## argmax, argmin
x = torch.rand(1,10)
print(x)
print(torch.argmax(x))
print(torch.argmin(x))

tensor([[0.2916, 0.8762, 0.9098, 0.8432, 0.2722, 0.4740, 0.3173, 0.2102, 0.4762,
         0.9235]])
tensor(9)
tensor(7)


In [51]:
x[0][0]

tensor(0.2916)

In [None]:
### Reshaping, viewing and stacking, squeezing, unsqueezing  tensors
# Reshape
# view
# stack
# squeeze
# unsqueeze
#Permute

In [52]:
x = torch.arange(1.,10.)
x,x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [53]:
x_reshaped = x.reshape(9,1)
x_reshaped, x_reshaped.shape

(tensor([[1.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]),
 torch.Size([9, 1]))

In [54]:
# View - shares the same memory as the original tensor
# so x and z share the same memory
z_view = x.view(1,9)
z_view, z_view.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [55]:
x.shape,z_view.shape

(torch.Size([9]), torch.Size([1, 9]))

In [56]:
x,z_view

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]),
 tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]))

In [57]:
z_view[:,0] = 90.
z_view,x

(tensor([[90.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.]]),
 tensor([90.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.]))

In [58]:
## stack tensor
x_stacked = torch.stack([x,x,x],dim=0)

x_stacked_1 = torch.stack([x,x,x],dim=1)
x_stacked,x,x_stacked_1

(tensor([[90.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
         [90.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
         [90.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.]]),
 tensor([90.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.]),
 tensor([[90., 90., 90.],
         [ 2.,  2.,  2.],
         [ 3.,  3.,  3.],
         [ 4.,  4.,  4.],
         [ 5.,  5.,  5.],
         [ 6.,  6.,  6.],
         [ 7.,  7.,  7.],
         [ 8.,  8.,  8.],
         [ 9.,  9.,  9.]]))

In [59]:
x_reshaped,x_reshaped.shape

(tensor([[90.],
         [ 2.],
         [ 3.],
         [ 4.],
         [ 5.],
         [ 6.],
         [ 7.],
         [ 8.],
         [ 9.]]),
 torch.Size([9, 1]))

In [60]:
x_reshaped.squeeze(),x_reshaped.squeeze().shape #-> removes all single dimensions

(tensor([90.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.]), torch.Size([9]))

In [61]:
x_reshaped.unsqueeze(dim=0),x_reshaped.unsqueeze(dim=0).shape

(tensor([[[90.],
          [ 2.],
          [ 3.],
          [ 4.],
          [ 5.],
          [ 6.],
          [ 7.],
          [ 8.],
          [ 9.]]]),
 torch.Size([1, 9, 1]))

In [62]:
x_original = torch.rand(size=(224,225,3)) # [height,width,channels]
x_permuted = x_original.permute(2,0,1)
x_permuted.shape

torch.Size([3, 224, 225])

In [63]:
 # Indexing

In [64]:
x  =torch.arange(1,10).reshape(1,3,3)
x,x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [65]:
#dim = 0
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [66]:
#dim=1
x[0][0]

tensor([1, 2, 3])

In [67]:
#dim=2
x[0][0][0]

tensor(1)

In [68]:
x[0][2][2]

tensor(9)

In [70]:
x[:,0:2,:]

tensor([[[1, 2, 3],
         [4, 5, 6]]])

In [71]:
x[:,:,0]

tensor([[1, 4, 7]])

In [74]:
x[:,1,1],x[0][1][1]

(tensor([5]), tensor(5))

tensor(5)

In [76]:
x[0][0][:],x[0,0,:]

(tensor([1, 2, 3]), tensor([1, 2, 3]))

In [81]:
x[0][:][2],x[0,:,2]

(tensor([7, 8, 9]), tensor([3, 6, 9]))

In [82]:
#always use the x[0,:,:] approach

### Pytorch and Numpy

#### Numpy tp tensor

In [84]:
import numpy as np

In [87]:
array = np.arange(1.0,8.0)
tensor_64 = torch.from_numpy(array)
array,tensor

# the default type of numpy is float64 while the default type of tensor is float32
# so the while converting a numpy to tensor the created tensor will be of type float64
# so to convert a numpy to tensor of dtype = float32 we can use...
tensor_32 = torch.from_numpy(array).type(dtype=torch.float32)

array,tensor_64,tensor_32

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64),
 tensor([1., 2., 3., 4., 5., 6., 7.]))

In [90]:
array = array + 1
array,tensor_64
## array and tensor does not share the same memory space

(array([ 4.,  5.,  6.,  7.,  8.,  9., 10.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [102]:
tensor_random =  torch.ones(7)

In [103]:
# Tensor to numpy
numpy_tensor = tensor_random.numpy()
tensor_random, numpy_tensor

## the numpy_tensor takes the dtype of float32 which the default of torch tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [104]:
tensor_random+=1
tensor_random,numpy_tensor
# the numpy will share the same memory

(tensor([2., 2., 2., 2., 2., 2., 2.]),
 array([2., 2., 2., 2., 2., 2., 2.], dtype=float32))

## Reproducibility (trying to take random out of random)

In [107]:

torch_not_random_A = torch.rand(3,3)
torch_not_random_B = torch.rand(3,3)
torch_not_random_A,torch_not_random_B

(tensor([[0.6993, 0.7504, 0.4892],
         [0.4369, 0.1653, 0.7441],
         [0.8943, 0.9651, 0.6108]]),
 tensor([[0.3713, 0.5961, 0.3273],
         [0.7553, 0.9432, 0.2846],
         [0.9662, 0.1398, 0.0897]]))

In [109]:
## Lets use a seed

RANDOM_SEED  = 42
torch.manual_seed(RANDOM_SEED)
torch_not_random_A = torch.rand(3,3)
torch.manual_seed(RANDOM_SEED)
torch_not_random_B = torch.rand(3,3)

print(torch_not_random_B ==torch_not_random_A)

tensor([[True, True, True],
        [True, True, True],
        [True, True, True]])


### GPU access

In [2]:
!nvidia-smi

Wed Jan  3 03:22:02 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   60C    P8              10W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [3]:
torch.cuda.is_available()

True

In [4]:
device  = "cuda" if torch.cuda.is_available() else "cpu"

In [5]:
torch.cuda.device_count()

1

In [7]:
###
tensor_cpu = torch.tensor([1,2,3])
print(tensor_cpu, tensor_cpu.device)

tensor([1, 2, 3]) cpu


In [9]:
tensor_gpu = tensor_cpu.to(device)
tensor_gpu

tensor([1, 2, 3], device='cuda:0')

In [11]:
tensor_gpu.numpy() ## good error
## cant convert tensor on gpu to numpy

TypeError: ignored

In [12]:
tensor_gpu_cpu = tensor_gpu.cpu()
tensor_gpu_cpu.numpy()

array([1, 2, 3])

In [13]:
tensor_gpu

tensor([1, 2, 3], device='cuda:0')