In [1]:
import torch
torch.__version__

'2.5.0+cu121'

### Intro to tensors

In [2]:
#Scalar
scalar = torch.tensor(7)
scalar

tensor(7)

In [3]:
scalar.ndim #to get the dimension of the tensor

0

In [4]:
scalar.item() #for one ele tensors only

7

In [5]:
#vectors
vector = torch.tensor([1,2])
vector

tensor([1, 2])

In [6]:
vector.ndim

1

In [7]:
vector.shape #has 2 elements so

torch.Size([2])

In [8]:
Matrix = torch.tensor([[1,2],
                       [3,4]])
Matrix

tensor([[1, 2],
        [3, 4]])

In [9]:
Matrix.ndim

2

for dimension we can calculate the number of open brackets

In [10]:
Matrix.shape #this goes outer to inner sees outer how many 2 then next inner each also has 2 so

torch.Size([2, 2])

In [12]:
tensor=torch.tensor([[[1,2,3],
                      [4,5,6],
                      [7,8,9]]])
tensor.ndim,tensor.shape #so 1st outside has 1 3 ka then each has 3 3

(3, torch.Size([1, 3, 3]))

In [13]:
random_tensor = torch.rand(size = (3,4))
random_tensor,random_tensor.dtype

(tensor([[0.3481, 0.9720, 0.1542, 0.0828],
         [0.6748, 0.5886, 0.2848, 0.7447],
         [0.1833, 0.1163, 0.4779, 0.6000]]),
 torch.float32)

In [14]:
# Create a random tensor of size (224, 224, 3) #so shape is what we pass as size
random_image_size_tensor = torch.rand(size=(224, 224, 3))
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([224, 224, 3]), 3)

In [15]:
# Create a tensor of all zeros
zeros = torch.zeros(size=(3, 4))
zeros, zeros.dtype

(tensor([[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]),
 torch.float32)

In [16]:
# Create a tensor of all ones
ones = torch.ones(size=(3, 4))
ones, ones.dtype

(tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]),
 torch.float32)

In [17]:
zero_to_ten = torch.arange(start =0,end=10,step = 2)
zero_to_ten#to get in range

tensor([0, 2, 4, 6, 8])

In [18]:
#copy as same of other tensor
ten_zeros = torch.zeros_like(zero_to_ten)
ten_zeros
#can use ones_like also for all 1

tensor([0, 0, 0, 0, 0])

### tensor dtypes

In [19]:
# Default datatype for tensors is float32
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=None, # defaults to None, which is torch.float32 or whatever datatype is passed
                               device=None, # defaults to None, which uses the default tensor type
                               requires_grad=False) # if True, operations performed on the tensor are recorded

float_32_tensor.shape, float_32_tensor.dtype, float_32_tensor.device

(torch.Size([3]), torch.float32, device(type='cpu'))

In [20]:
float_16_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=torch.float16) # torch.half would also work
#pytorch want tensors to be of same and on same device when doing calculations
float_16_tensor.dtype

torch.float16

### getting info from tensors

In [21]:
# Create a tensor
some_tensor = torch.rand(3, 4)

# Find out details about it
print(some_tensor)
print(f"Shape of tensor: {some_tensor.shape}")
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"Device tensor is stored on: {some_tensor.device}") # will default to CPU

tensor([[0.5684, 0.7568, 0.3058, 0.0906],
        [0.0347, 0.6842, 0.8186, 0.2700],
        [0.7353, 0.0257, 0.5322, 0.7740]])
Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


### when error comes mainly in this only device,datatype and shape
###  so do "what, what, where":
### "what shape are my tensors? what datatype are they and where are they stored? what shape, what datatype, where where where"

### Operations on Tensors

In [23]:
tensor = torch.tensor([1,2,3])
tensor+10

tensor([11, 12, 13])

In [24]:
tensor*10

tensor([10, 20, 30])

In [25]:
#even after this tensor wont change until reassigned
tensor

tensor([1, 2, 3])

In [26]:
# Subtract and reassign
tensor = tensor - 10
tensor

tensor([-9, -8, -7])

In [27]:
# Add and reassign
tensor = tensor + 10
tensor

tensor([1, 2, 3])

In [28]:
# Can also use torch functions
torch.multiply(tensor, 10)
#still remains same
tensor

tensor([1, 2, 3])

In [29]:
# Element-wise multiplication (each element multiplies its equivalent, index 0->0, 1->1, 2->2)
print(tensor, "*", tensor)
print("Equals:", tensor * tensor)

tensor([1, 2, 3]) * tensor([1, 2, 3])
Equals: tensor([1, 4, 9])


### Matrix Multiplication is the main in dl

In [30]:
# The inner dimensions must match:
# (3, 2) @ (2, 3) will work and @is used for matrix multiplication
# he resulting matrix has the shape of the outer dimensions:
# (2, 3) @ (3, 2) -> (2, 2)
#use torch.matmul()

In [31]:
tensor = torch.tensor([1, 2, 3])
tensor.shape

torch.Size([3])

In [32]:
tensor * tensor
torch.matmul(tensor,tensor)
tensor @ tensor
#all give same

tensor(14)

In [33]:
#can do by hand
%%time
# Matrix multiplication by hand
# (avoid doing operations with for loops at all cost, they are computationally expensive)
value = 0
for i in range(len(tensor)):#use of for loop
  value += tensor[i] * tensor[i]
value

CPU times: user 1.21 ms, sys: 13 µs, total: 1.22 ms
Wall time: 1.29 ms


tensor(14)

In [34]:
%%time
torch.matmul(tensor, tensor)

CPU times: user 520 µs, sys: 0 ns, total: 520 µs
Wall time: 453 µs


tensor(14)

In [35]:
#common error is shape mismatch
# Shapes need to be in the right way
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]], dtype=torch.float32)

tensor_B = torch.tensor([[7, 10],
                         [8, 11],
                         [9, 12]], dtype=torch.float32)

torch.matmul(tensor_A, tensor_B) # (this will error)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

In [36]:
#so we can do is transpose
tensor_B.T#one way

tensor([[ 7.,  8.,  9.],
        [10., 11., 12.]])

In [37]:
#or
torch.transpose(tensor_B,0,1)#dim 0 and 1 to be swapped

tensor([[ 7.,  8.,  9.],
        [10., 11., 12.]])

In [38]:
# The operation works when tensor_B is transposed
print(f"Original shapes: tensor_A = {tensor_A.shape}, tensor_B = {tensor_B.shape}\n")
print(f"New shapes: tensor_A = {tensor_A.shape} (same as above), tensor_B.T = {tensor_B.T.shape}\n")
print(f"Multiplying: {tensor_A.shape} * {tensor_B.T.shape} <- inner dimensions match\n")
print("Output:\n")
output = torch.matmul(tensor_A, tensor_B.T)
print(output)
print(f"\nOutput shape: {output.shape}")

Original shapes: tensor_A = torch.Size([3, 2]), tensor_B = torch.Size([3, 2])

New shapes: tensor_A = torch.Size([3, 2]) (same as above), tensor_B.T = torch.Size([2, 3])

Multiplying: torch.Size([3, 2]) * torch.Size([2, 3]) <- inner dimensions match

Output:

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

Output shape: torch.Size([3, 3])


In [39]:
# torch.mm is a shortcut for matmul
torch.mm(tensor_A, tensor_B.T)

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

In [40]:
#matrix mul like this is also called dot product between two matrices
#use of torch.nn.Linear() where basically intenrnal the normal W.T * x +b is done
x= tensor_A
linear = torch.nn.Linear(in_features=2,out_features=6) #since x is 3,2 we need here 2 and take this as the input layer 2 and output layer 6
output = linear(x) #so this will do the matrix mul inside
print(f"Input shape: {x.shape}\n")
print(f"Output:\n{output}\n\nOutput shape: {output.shape}")

Input shape: torch.Size([3, 2])

Output:
tensor([[-1.3376,  1.4144,  1.1985, -0.9805, -0.0777,  0.3219],
        [-3.3926,  2.5973,  2.5854, -2.0223, -0.0588,  1.8675],
        [-5.4476,  3.7801,  3.9723, -3.0641, -0.0400,  3.4132]],
       grad_fn=<AddmmBackward0>)

Output shape: torch.Size([3, 6])


### max,min,agg etc

In [2]:
import torch
x= torch.arange(0,100,10)
x,x.dtype

(tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90]), torch.int64)

In [3]:
print(f"Minimum: {x.min()}")
print(f"Maximum: {x.max()}")
# print(f"Mean: {x.mean()}") # this will error
print(f"Mean: {x.type(torch.float32).mean()}") # won't work without float datatype
print(f"Sum: {x.sum()}")

Minimum: 0
Maximum: 90
Mean: 45.0
Sum: 450


In [4]:
#we can do the abpve ones with torch methods also
torch.min(x),torch.max(x),torch.sum(x),torch.mean(x.type(torch.float32))

(tensor(0), tensor(90), tensor(450), tensor(45.))

In [5]:
# Create a tensor
tensor = torch.arange(10, 100, 10)
print(f"Tensor: {tensor}")

# Returns index of max and min values mainly used when in softmax activation function
print(f"Index where max value occurs: {tensor.argmax()}")
print(f"Index where min value occurs: {tensor.argmin()}")

Tensor: tensor([10, 20, 30, 40, 50, 60, 70, 80, 90])
Index where max value occurs: 8
Index where min value occurs: 0


### Changing Datatypes of tensors

In [7]:
tensor=  torch.tensor([1, 2, 3])
tensor.dtype

torch.int64

In [8]:
tensor_float64 = tensor.type(torch.float64)
tensor_float64

tensor([1., 2., 3.], dtype=torch.float64)

In [9]:
# Create a float16 tensor
tensor_float16 = tensor.type(torch.float16)
tensor_float16

tensor([1., 2., 3.], dtype=torch.float16)

In [10]:
# Create an int8 tensor
tensor_int8 = tensor.type(torch.int8)
tensor_int8

tensor([1, 2, 3], dtype=torch.int8)

In [11]:
#lower the number less precise and faster to compute but lesser accuracy

### Reshape,stack,squeeze,unsqueeze

In [12]:
# Create a tensor
import torch
x = torch.arange(1., 8.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7.]), torch.Size([7]))

In [13]:
# Add an extra dimension so what is passed as shape so always think from outside to inside
x_reshaped = x.reshape(1, 7)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]))

In [14]:
# Change view (keeps same data as original but changes view)
# See more: https://stackoverflow.com/a/54507446/7900723
z = x.view(1, 7)
z, z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]))

In [15]:
'''
Remember though, changing the view of a tensor with torch.view() really only creates a new view of the same tensor.

So changing the view changes the original tensor too
'''
# Changing z changes x
z[:, 0] = 5
z, x

(tensor([[5., 2., 3., 4., 5., 6., 7.]]), tensor([5., 2., 3., 4., 5., 6., 7.]))

In [16]:
# Stack tensors on top of each other
x_stacked = torch.stack([x, x, x, x], dim=0) # try changing dim to dim=1 and see what happens
x_stacked

tensor([[5., 2., 3., 4., 5., 6., 7.],
        [5., 2., 3., 4., 5., 6., 7.],
        [5., 2., 3., 4., 5., 6., 7.],
        [5., 2., 3., 4., 5., 6., 7.]])

In [17]:
x_stacked = torch.stack([x, x, x, x], dim=1)
x_stacked

tensor([[5., 5., 5., 5.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.],
        [4., 4., 4., 4.],
        [5., 5., 5., 5.],
        [6., 6., 6., 6.],
        [7., 7., 7., 7.]])

In [18]:
print(f"Previous tensor: {x_reshaped}")
print(f"Previous shape: {x_reshaped.shape}")

# Remove extra dimension from x_reshaped
x_squeezed = x_reshaped.squeeze()
print(f"\nNew tensor: {x_squeezed}")
print(f"New shape: {x_squeezed.shape}")
# so squeeze removes all the dimensions with 1,remember this as squeezing the tensor to only have dimensions over 1

Previous tensor: tensor([[5., 2., 3., 4., 5., 6., 7.]])
Previous shape: torch.Size([1, 7])

New tensor: tensor([5., 2., 3., 4., 5., 6., 7.])
New shape: torch.Size([7])


In [21]:
print(f"Previous tensor: {x_squeezed}")
print(f"Previous shape: {x_squeezed.shape}")

## Add an extra dimension with unsqueeze so reverse of sequeeze where we add 1 at index dim
x_unsqueezed = x_squeezed.unsqueeze(dim=0)
print(f"\nNew tensor: {x_unsqueezed}")
print(f"New shape: {x_unsqueezed.shape}")

Previous tensor: tensor([5., 2., 3., 4., 5., 6., 7.])
Previous shape: torch.Size([7])

New tensor: tensor([[5., 2., 3., 4., 5., 6., 7.]])
New shape: torch.Size([1, 7])


In [22]:
print(f"Previous tensor: {x_squeezed}")
print(f"Previous shape: {x_squeezed.shape}")
#if dim =1 then it will add a 1 at the end
## Add an extra dimension with unsqueeze so reverse of sequeeze where we add 1 at index dim
x_unsqueezed = x_squeezed.unsqueeze(dim=1)
print(f"\nNew tensor: {x_unsqueezed}")
print(f"New shape: {x_unsqueezed.shape}")

Previous tensor: tensor([5., 2., 3., 4., 5., 6., 7.])
Previous shape: torch.Size([7])

New tensor: tensor([[5.],
        [2.],
        [3.],
        [4.],
        [5.],
        [6.],
        [7.]])
New shape: torch.Size([7, 1])


In [23]:
# Create tensor with specific shape
x_original = torch.rand(size=(224, 224, 3))

# Permute the original tensor to rearrange the axis order
x_permuted = x_original.permute(2, 0, 1) # shifts axis 0->1, 1->2, 2->0
#so it will rearrange the axis order to what as we passed like 0th index to 1th in next
print(f"Previous shape: {x_original.shape}")
print(f"New shape: {x_permuted.shape}")
'''
Because permuting returns a view (shares the same data as the original),
the values in the permuted tensor will be the same as the original tensor
and if you change the values in the view, it will change the values of the
original
'''

Previous shape: torch.Size([224, 224, 3])
New shape: torch.Size([3, 224, 224])


'\nBecause permuting returns a view (shares the same data as the original), \nthe values in the permuted tensor will be the same as the original tensor \nand if you change the values in the view, it will change the values of the \noriginal\n'

### Indexing

In [24]:
# Create a tensor
import torch
x = torch.arange(1, 10).reshape(1, 3, 3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [25]:
# Indexing values goes outer dimension -> inner dimension (check out the square brackets).
# Let's index bracket by bracket
print(f"First square bracket:\n{x[0]}")
print(f"Second square bracket: {x[0][0]}")
print(f"Third square bracket: {x[0][0][0]}")

First square bracket:
tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
Second square bracket: tensor([1, 2, 3])
Third square bracket: 1


In [26]:
#can use like this also
x[:,0] #so 1st dim and 0th index

tensor([[1, 2, 3]])

In [27]:
# Get all values of 0th & 1st dimensions but only index 1 of 2nd dimension
x[:,:,1] #so 1st and 2nd dim and 1st index

tensor([[2, 5, 8]])

In [28]:
# Get all values of the 0 dimension but only the 1 index value of the 1st and 2nd dimension
x[:, 1, 1]
#so always go by outer to inner and L to R in indexing

tensor([5])

In [29]:
# Get index 0 of 0th and 1st dimension and all values of 2nd dimension
x[0, 0, :] # same as x[0][0]

tensor([1, 2, 3])

### PyTorch and Numpy

In [33]:
#main ones are numpy -> tensor and reverse
# NumPy array to tensor
import torch
import numpy as np
array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array)
tensor,array


(tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64),
 array([1., 2., 3., 4., 5., 6., 7.]))

In [34]:

'''
By default, NumPy arrays are created with the datatype float64 and if you convert it to a PyTorch tensor, it'll keep the same datatype (as above).

However, many PyTorch calculations default to using float32.

So if you want to convert your NumPy array (float64) -> PyTorch tensor (float64) -> PyTorch tensor (float32), you can use tensor = torch.from_numpy(array).type(torch.float32)
'''

"\nBy default, NumPy arrays are created with the datatype float64 and if you convert it to a PyTorch tensor, it'll keep the same datatype (as above).\n\nHowever, many PyTorch calculations default to using float32.\n\nSo if you want to convert your NumPy array (float64) -> PyTorch tensor (float64) -> PyTorch tensor (float32), you can use tensor = torch.from_numpy(array).type(torch.float32)\n"

In [35]:
#so once done array change no change in tensor
# Change the array, keep the tensor
array = array + 1
array, tensor

(array([2., 3., 4., 5., 6., 7., 8.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [36]:
# Tensor to NumPy array
tensor = torch.ones(7) # create a tensor of ones with dtype=float32
numpy_tensor = tensor.numpy() # will be dtype=float32 unless changed
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [37]:
# Change the tensor, keep the array the same
tensor = tensor + 1
tensor, numpy_tensor

(tensor([2., 2., 2., 2., 2., 2., 2.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

### Reproducibility

In [39]:
import torch

# Create two random tensors
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)

print(f"Tensor A:\n{random_tensor_A}\n")
print(f"Tensor B:\n{random_tensor_B}\n")
print(f"Does Tensor A equal Tensor B? (anywhere)")
random_tensor_A == random_tensor_B #one way where we use and each ka check we do
# sometimes we want to get good random numbers(like similar where we can use)

Tensor A:
tensor([[0.2211, 0.2778, 0.0477, 0.8467],
        [0.3704, 0.6770, 0.6430, 0.3753],
        [0.2223, 0.5986, 0.9784, 0.0746]])

Tensor B:
tensor([[0.1657, 0.0764, 0.9565, 0.0707],
        [0.3400, 0.7942, 0.9154, 0.7104],
        [0.9782, 0.8565, 0.5887, 0.6970]])

Does Tensor A equal Tensor B? (anywhere)


tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])

In [40]:
'''
ust as you might've expected, the tensors come out with different values.

But what if you wanted to create two random tensors with the same values.

As in, the tensors would still contain random values but they would be of the same flavour.

That's where torch.manual_seed(seed) comes in, where seed is an integer (like 42 but it could be anything) that flavours the randomness.

This is useful for when you want to create a set of identical random numbers in a specific order.
'''
import torch
import random
#set the random seed
RANDOM_SEED = 43
torch.random.manual_seed(RANDOM_SEED)

random_tensor_C = torch.rand(3, 4)

# Have to reset the seed every time a new rand() is called (IMP)
# Without this, tensor_D would be different to tensor_C
torch.random.manual_seed(seed=RANDOM_SEED) # try commenting this line out and seeing what happens
random_tensor_D = torch.rand(3, 4)

print(f"Tensor C:\n{random_tensor_C}\n")
print(f"Tensor D:\n{random_tensor_D}\n")
print(f"Does Tensor C equal Tensor D? (anywhere)")
random_tensor_C == random_tensor_D


Tensor C:
tensor([[0.4540, 0.1965, 0.9210, 0.3462],
        [0.1481, 0.0858, 0.5909, 0.0659],
        [0.7476, 0.6253, 0.9392, 0.1338]])

Tensor D:
tensor([[0.4540, 0.1965, 0.9210, 0.3462],
        [0.1481, 0.0858, 0.5909, 0.0659],
        [0.7476, 0.6253, 0.9392, 0.1338]])

Does Tensor C equal Tensor D? (anywhere)


tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])

### Running tensors on GPU and faster computations

In [41]:
!nvidia-smi # ! means run this on command line

Mon Oct 28 16:27:15 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   42C    P8              10W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [42]:
#to see if pytorch has access to gpu
torch.cuda.is_available()

True

In [43]:
#so if want to setup the code o gpu whenever available
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [44]:
#it is always better to write device agnostic code. This means code that'll run on CPU (always available) or GPU (if available).

In [45]:
#count number of gpu
torch.cuda.device_count()

1

In [46]:
# Create tensor (default on CPU)
tensor = torch.tensor([1, 2, 3])

# Tensor not on GPU
print(tensor, tensor.device)

# Move tensor to GPU (if available)
tensor_on_gpu = tensor.to(device)
tensor_on_gpu

#so the point is here we have two tensors one on cpu and one on gpu
#better is like reassign like tensor = tensor.to(device)

tensor([1, 2, 3]) cpu


tensor([1, 2, 3], device='cuda:0')

In [47]:
'''
Notice the second tensor has device='cuda:0', this means it's stored on the 0th GPU available (GPUs are 0 indexed, if two GPUs were available, they'd be 'cuda:0' and 'cuda:1' respectively, up to 'cuda:n').
'''

"\nNotice the second tensor has device='cuda:0', this means it's stored on the 0th GPU available (GPUs are 0 indexed, if two GPUs were available, they'd be 'cuda:0' and 'cuda:1' respectively, up to 'cuda:n').\n"

In [None]:
# Moving back to CPU -> why? if we sometimes want work with numpy(it doesnt have GPU)
tensor.numpy() #error as tensor is now on gpu

In [48]:
# Instead, copy the tensor back to cpu
tensor_back_on_cpu = tensor_on_gpu.cpu().numpy()#so to cpy then to numpy
tensor_back_on_cpu

array([1, 2, 3])

In [49]:
tensor_on_gpu#as we dint reassign

tensor([1, 2, 3], device='cuda:0')

In [50]:
# Set random seed on the GPU
torch.cuda.manual_seed(1234)