## 00 PyTorch Fundamentals

[Documentation](https://www.learnpytorch.io/)
[Github](https://github.com/mrdbourke/pytorch-deep-learning/)

In [2]:
import torch

In [3]:
torch.__version__

'2.6.0+cu126'

## Scalar - zero dimension just single number

In [4]:

scalar = torch.tensor(7)
print(scalar)
print(scalar.item()) # only work with scalar
print(scalar.ndim) #no of dimensions

tensor(7)
7
0


# Vector

In [5]:
#Tensor takes only takes one argument so we need to pass multiple values as list and support only single data type just like array.
vector = torch.tensor([7,7])
print(vector)
print(vector.ndim)
print(vector.shape)

tensor([7, 7])
1
torch.Size([2])


In [6]:
MATRIX = torch.tensor([[7,8],
                       [9,10],
                       [11,12]])
print(MATRIX)
print(MATRIX.ndim)
print(MATRIX.shape)

tensor([[ 7,  8],
        [ 9, 10],
        [11, 12]])
2
torch.Size([3, 2])


In [7]:
MATRIX = torch.tensor([[1,2,3],
                       [4,5,6],
                       [7,8,9]])
TENSOR = torch.tensor([[[1,2,3],
                        [4,5,6],
                        [7,8,9]]])
print(TENSOR)

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])


In [8]:
#Random Tensor
random_tensor = torch.rand(size=(3,4))
print(random_tensor)
print(random_tensor.dtype)

tensor([[0.6014, 0.8261, 0.1146, 0.7396],
        [0.6940, 0.0600, 0.3670, 0.4350],
        [0.9001, 0.9625, 0.0104, 0.8893]])
torch.float32


In [9]:
#Create a random tensor of size(224,224,3)
random_image_size_tensor = torch.rand(size=(3,224,224))
random_image_size_tensor.shape,random_image_size_tensor.ndim
print(random_image_size_tensor)

tensor([[[9.9344e-01, 9.2996e-01, 6.4399e-01,  ..., 4.2444e-01,
          4.1043e-01, 9.4939e-01],
         [4.2675e-01, 3.3688e-01, 5.0589e-03,  ..., 8.9129e-02,
          3.7795e-01, 9.0876e-01],
         [8.0909e-01, 6.2921e-01, 8.9517e-01,  ..., 9.2712e-01,
          8.9692e-01, 9.3149e-01],
         ...,
         [3.7555e-01, 3.6154e-01, 2.3698e-01,  ..., 7.1312e-01,
          2.4707e-01, 5.4632e-01],
         [1.9212e-01, 5.6399e-01, 2.8006e-01,  ..., 6.6688e-01,
          5.4347e-01, 2.7273e-02],
         [9.2931e-01, 7.5552e-01, 8.9210e-01,  ..., 2.0988e-01,
          8.8649e-01, 9.6090e-01]],

        [[6.9025e-01, 6.7428e-02, 9.4168e-01,  ..., 2.1554e-01,
          4.0498e-02, 7.0000e-01],
         [2.5154e-01, 4.1677e-01, 8.1003e-01,  ..., 8.3311e-01,
          4.8956e-01, 9.9231e-01],
         [6.7576e-01, 3.3368e-01, 1.0858e-01,  ..., 7.8002e-01,
          6.0651e-01, 1.0258e-01],
         ...,
         [7.5913e-01, 7.0019e-02, 8.9258e-04,  ..., 1.0186e-01,
          3.788

In [10]:
#zeros and ones
zeros = torch.zeros(size=(3,4))
zeros,zeros.dtype

(tensor([[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]),
 torch.float32)

In [11]:
ones = torch.ones(size=(3,4))
ones,ones.dtype

(tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]),
 torch.float32)

## Torch range and Tensors like

In [12]:
# use torch.arange(), and torch.range() is depricated
zero_to_ten_deprecated = torch.range(0,10) #Note: this may return error in the future versions

#Create a range of value 0 to 10
zero_to_ten = torch.arange(start=0,end=10,step=1)
print(zero_to_ten)

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])


  zero_to_ten_deprecated = torch.range(0,10) #Note: this may return error in the future versions


In [13]:
#Some times you might want one tensor of a certain type with the same shape as another tensor.
#For example, a tensor of all zeros with the same shape as a previous tensor.

ten_zeros = torch.zeros_like(input=zero_to_ten)
print(ten_zeros)


tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])


## Tensor datatypes


In [14]:
#dtype defaults to None, which is torch.float32 or whatever datatype is passed.
#device defaults to None, which uses the default tensor type.
#if requires_grad is True, operations performed on the tensor are recorded.
float_32_tensor = torch.tensor([3.0,6.0,9.0],dtype=None,device=None,requires_grad=False)
float_32_tensor.shape,float_32_tensor.dtype,float_32_tensor.device

(torch.Size([3]), torch.float32, device(type='cpu'))

In [15]:
float_16_tensor = torch.tensor([3.0,6.0,9.0],dtype=torch.float16) # torch.half would also work
print(float_16_tensor.dtype)

torch.float16


In [16]:
float_32_tensor = torch.tensor([3.0,6.0,9.0],dtype=None,device="cuda",requires_grad=False)
float_16_tensor = torch.tensor([3.0,6.0,9.0],dtype=torch.float16,device="cuda",requires_grad=False)

## Getting information from tensors

In [17]:
print(float_32_tensor,float_16_tensor)
print(f"Shape of tensor: {float_32_tensor.shape}, {float_16_tensor.shape}")
print(f"Datatype of tensor: {float_32_tensor.dtype},{float_16_tensor.dtype}")
print(f"Device tensor is stored on: {float_32_tensor.device}, {float_16_tensor.device}")

tensor([3., 6., 9.], device='cuda:0') tensor([3., 6., 9.], device='cuda:0', dtype=torch.float16)
Shape of tensor: torch.Size([3]), torch.Size([3])
Datatype of tensor: torch.float32,torch.float16
Device tensor is stored on: cuda:0, cuda:0


In [18]:
#create a tensor
some_tensor = torch.rand(3,4)
#Find out details about it
print(some_tensor)
print(f"Shape of tensor: {some_tensor.shape}")
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"Device tensor is stored on: {some_tensor.device}")

tensor([[0.7857, 0.9457, 0.8463, 0.3303],
        [0.2750, 0.0563, 0.3631, 0.6921],
        [0.1836, 0.6602, 0.4464, 0.0040]])
Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


### Floating Point

![image.png](attachment:image.png)

In [19]:
f32  = torch.tensor([3,224,224]) #Defualt datatype
f16  = torch.tensor([3,224,224],dtype=torch.float16)
f64  = torch.tensor([3,224,224],dtype=torch.float64)
bf16 = torch.tensor([3,224,224],dtype=torch.bfloat16)

f32.dtype,f16.dtype,f64.dtype,bf16.dtype

(torch.int64, torch.float16, torch.float64, torch.bfloat16)

In [20]:
f32*f16

tensor([9.0000e+00, 5.0176e+04, 5.0176e+04], dtype=torch.float16)

#### Alternate

In [21]:
#CPU Tensor
f16  = torch.HalfTensor([3,224,224])
bf16 = torch.BFloat16Tensor([3,224,224])
f32  = torch.FloatTensor([3,224,224])
f64  = torch.DoubleTensor([3,224,224])

f16,bf16,f32,f64


(tensor([  3., 224., 224.], dtype=torch.float16),
 tensor([  3., 224., 224.], dtype=torch.bfloat16),
 tensor([  3., 224., 224.]),
 tensor([  3., 224., 224.], dtype=torch.float64))

In [22]:
#GPU Tensor
f16  = torch.cuda.HalfTensor([3,224,224])
bf16 = torch.cuda.BFloat16Tensor([3,224,224])
f32  = torch.cuda.FloatTensor([3,224,224])
f64  = torch.cuda.DoubleTensor([3,224,224])

# At first use it triggered warning to don't use this method when cuda is used and recommended the device="cuda" method.

f16,bf16,f32,f64

  f16  = torch.cuda.HalfTensor([3,224,224])


(tensor([  3., 224., 224.], device='cuda:0', dtype=torch.float16),
 tensor([  3., 224., 224.], device='cuda:0', dtype=torch.bfloat16),
 tensor([  3., 224., 224.], device='cuda:0'),
 tensor([  3., 224., 224.], device='cuda:0', dtype=torch.float64))

### Integer

In [23]:
#Signed Integer - The range is from negative to positive
i8  = torch.tensor([3,127,127],dtype=torch.int8)# its signed dtype so the range is -128 to 127
i16 = torch.tensor([3,224,224],dtype=torch.int16)
i32 = torch.tensor([3,224,224],dtype=torch.int32)
i64 = torch.tensor([3,224,224],dtype=torch.int64)

i8.dtype,i16.dtype,i32.dtype,i64.dtype

(torch.int8, torch.int16, torch.int32, torch.int64)

In [24]:
#Unsigned Integer - The range is from zero to positive or negative
ui8  = torch.tensor([3,255,255],dtype=torch.uint8)#its unsigned so the range is 0 to 255 or -255
ui16 = torch.tensor([3,224,224],dtype=torch.uint16)
ui32 = torch.tensor([3,224,224],dtype=torch.uint32)
ui64 = torch.tensor([3,224,224],dtype=torch.uint64)

ui8.dtype,ui16.dtype,ui32.dtype,ui64.dtype

(torch.uint8, torch.uint16, torch.uint32, torch.uint64)

#### Alternate

In [25]:
#CPU Tensor

#Signed
i8  = torch.CharTensor([3,127,127])
i16 = torch.ShortTensor([3,224,224])
i32 = torch.IntTensor([3,224,224])
i64 = torch.LongTensor([3,224,224])

#Unsigned
ui8 = torch.ByteTensor([255])

i8,i16,i32,i64,ui8

(tensor([  3, 127, 127], dtype=torch.int8),
 tensor([  3, 224, 224], dtype=torch.int16),
 tensor([  3, 224, 224], dtype=torch.int32),
 tensor([  3, 224, 224]),
 tensor([255], dtype=torch.uint8))

In [26]:
#GPU Tensor

#Signed
i8 = torch.cuda.CharTensor([3,127,127])
i16 = torch.cuda.ShortTensor([3,224,224])
i32 = torch.cuda.IntTensor([3,224,224])
i64 = torch.cuda.LongTensor([3,224,224])

#Unsinged
ui8 = torch.cuda.ByteTensor([3,255,255])

i8,i16,i32,i64,ui8

(tensor([  3, 127, 127], device='cuda:0', dtype=torch.int8),
 tensor([  3, 224, 224], device='cuda:0', dtype=torch.int16),
 tensor([  3, 224, 224], device='cuda:0', dtype=torch.int32),
 tensor([  3, 224, 224], device='cuda:0'),
 tensor([  3, 255, 255], device='cuda:0', dtype=torch.uint8))

### Tensor Manipulation

In [27]:
tensor = torch.tensor([1,2,3])
tensor = tensor+10
print(tensor)

tensor([11, 12, 13])


In [28]:
tensor = tensor - 10
print(tensor)

tensor([1, 2, 3])


In [29]:
tensor = tensor * 10
print(tensor)

tensor([10, 20, 30])


In [30]:
tensor = tensor ** 2
print(tensor)

tensor([100, 400, 900])


In [31]:
tensor = tensor // 2
print(tensor)

tensor([ 50, 200, 450])


In [32]:
tensor = tensor / 2
print(tensor)

tensor([ 25., 100., 225.])


In [33]:
tensor = torch.tensor([1,2,3])
tensor = torch.add(tensor,10) # unlike list methods we need to store that update in the variable
print(tensor)

tensor([11, 12, 13])


In [34]:
tensor = torch.sub(tensor,10)
print(tensor)

tensor([1, 2, 3])


In [35]:
tensor = torch.mul(tensor,20)
print(tensor)

tensor([20, 40, 60])


In [36]:
tensor = torch.pow(tensor,2)
print(tensor)

tensor([ 400, 1600, 3600])


In [37]:
tensor = torch.div(tensor,10)
print(tensor)

tensor([ 40., 160., 360.])


In [38]:
tensor = torch.floor_divide(tensor,10)
print(tensor)

tensor([ 4., 16., 36.])


### Matrix Multiplication

One of then most common operations in machine learning and deep learning algorithms(like neural networks) is matrix mulitiplication.
PyTorch implements matrix multiplication functionality in the torch.matmul() method.
The main two rules for matrix multiplication to remember are:

1. The inner dimensions must match:
- (3,2) @ (3,2) won't work
- (2,3) @ (3,2) will work
- (3,2) @ (2,3) will work

2. The resulting matrix has the shape of the outer dimensions:
- (2,3) @ (3,2) -> (2,2)
- (3,2) @ (2,3) -> (3,3)

**Note: "@" in Python is the symbol for matrix multiplication**

**Resource: You can see all of the rules for matrix multiplication using `torch.matmul()` in the Pytorch documentation.**

In [39]:
a = torch.tensor([1,2,3])
print(a.shape)

torch.Size([3])


| Operation                  | Calculation              | Code                   |
| ---------------------------| -------------------------|------------------------|
| Element-wise multiplication| `[1*1,2*2,3*3] = [1,4,9]`| `tensor * tensor`      |
| Matrix multiplication      | `[1*1+2*2+3*3] = [14]`   | `tensor.matmul(tensor)`|

In [40]:
#Element - wise matrix multiplication
print(a*a)

tensor([1, 4, 9])


In [41]:
#Matrix multiplication
torch.matmul(a,a)

tensor(14)

In [42]:
#Can also use the '@' symbol for matrix multiplication, though not recommended.
a@a

tensor(14)

You can do matrix multiplication by hand but its not recommended.

The in-built `torch.matmul()` method is faster.

In [43]:
%%time
a = [1,2,3]
b = [1,2,3]
value = 0
for i in range(len(a)):
    value += a[i]*b[i]

print(value)

a = torch.tensor([1,2,3])
b = torch.tensor([1,2,3])
# c = a @ b # Alternative Matrix Multiplication but not recommended.
c = torch.matmul(a,b)
print(c)

14
tensor(14)
CPU times: total: 0 ns
Wall time: 999 μs


In [44]:
%%time
a = torch.tensor([1,2,3])
b = torch.tensor([1,2,3])
torch.matmul(a,b)

CPU times: total: 0 ns
Wall time: 0 ns


tensor(14)

In [45]:
%%time
a = torch.tensor([[1,2],[3,4]])
b = torch.tensor([[5,6],[7,8]])
torch.matmul(a,b)

CPU times: total: 0 ns
Wall time: 1e+03 μs


tensor([[19, 22],
        [43, 50]])

#### One of the most common errors is deep learning(shape errors)

Because much of deep learning is multiplying and performing operations on matrices and matrices have a strict rule about what shapes and sizes can be combined, one of the most common errors you'll run into in deep learning is shape mismatches

In [46]:
#Shapes need to be in the right way
tensor_A = torch.tensor([[1,2],
                         [3,4],
                         [5,6]], dtype=torch.float32)
tensor_B = torch.tensor([[7,10],
                         [8,11],
                         [9,12]], dtype=torch.float32)
torch.matmul(tensor_A,tensor_B)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

We can make matrix multiplication work between `tensor_A` and `tensor_B` by making their inner dimension match.

one of the ways to do this with a **transpose**(switching the dimensions of a given tensor).

You can perform transposes in PyTorch using either:

- `torch.transpose(input,dim0,dim1)` - where `input` is the desired tensor to transpose and `dim0` and `dim1` are the dimensions to be swapped.
- `tensor.T` - where `tensor` is the desired tensor to transpose.

Lets try the latter.

In [50]:
#view tensor_A and tensor_B
print(tensor_A)
print(tensor_B)

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])
tensor([[ 7., 10.],
        [ 8., 11.],
        [ 9., 12.]])


In [51]:
#view tensor_A and tensor_B.T
print(tensor_A)
print(tensor_B.T)

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])
tensor([[ 7.,  8.,  9.],
        [10., 11., 12.]])


In [52]:
#The operation works when tensor_B is transposed
print(f"Original shapes: tensor_A = {tensor_A.shape}, tensor_B = {tensor_B.shape}")
print(f"New shapes: tensor_A = {tensor_A.shape} (same as above), tensor_B.T = {tensor_B.T.shape}")
print(f"Multiplying: {tensor_A.shape}*{tensor_B.T.shape} <- innner dimensions match")
print(f"Output:\n")
output = torch.matmul(tensor_A,tensor_B.T)
print(output)
print(f"\nOutput shape: {output.shape}")

Original shapes: tensor_A = torch.Size([3, 2]), tensor_B = torch.Size([3, 2])
New shapes: tensor_A = torch.Size([3, 2]) (same as above), tensor_B.T = torch.Size([2, 3])
Multiplying: torch.Size([3, 2])*torch.Size([2, 3]) <- innner dimensions match
Output:

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

Output shape: torch.Size([3, 3])


In [53]:
#torch.mm is a shortcut for matmul
torch.mm(tensor_A,tensor_B.T)

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

Without the transpose, the rules of matrix multiplication aren't fulfilled and we get an error like above.

You can create your own matrix multiplication visuals like this at http://matrixmultiplication.xyz/.

Note: A matrix multiplication like this is also referred to as the dot product of two matrices.

Neural networks are full of matrix multiplications and dot products.

### Finding the min, max, mean, sum, etc(tensor aggregation)

In [54]:
#Create a tensor
x = torch.arange(0,100,10)
x = x+1
print(x)

tensor([ 1, 11, 21, 31, 41, 51, 61, 71, 81, 91])


In [55]:
#find the min
torch.min(x),x.min()

(tensor(1), tensor(1))

In [56]:
#find the max
torch.max(x),x.max()

(tensor(91), tensor(91))

In [57]:
#find the mean
torch.mean(x),x.mean() #typical dtype error. Some operations only possible in particular datatype.

RuntimeError: mean(): could not infer output dtype. Input dtype must be either a floating point or complex dtype. Got: Long

In [58]:
#find the mean
torch.mean(x.type(torch.float32)),x.type(torch.float32).mean()

(tensor(46.), tensor(46.))

In [60]:
#find the sum
torch.sum(x),x.sum()

(tensor(460), tensor(460))

#### Find the posistional min and max

In [61]:
print(x)

tensor([ 1, 11, 21, 31, 41, 51, 61, 71, 81, 91])


In [62]:
#find the index of max element
torch.argmax(x),x.argmax()

(tensor(9), tensor(9))

In [63]:
#find the index of min element
torch.argmin(x),x.argmin()

(tensor(0), tensor(0))

In [64]:
print(f"minmum element in x: {x[torch.argmin(x)].item()}")
print(f"maximum elememnt in x: {x[torch.argmax(x)].item()}")

minmum element in x: 1
maximum elememnt in x: 91


### Reshaping, stacking, squeezing and unsqueezing tensors

* Reshaping - Reshapes an input tensor to defined shape.
* View - Return a view of an input tensor of certain shape but keep the same memory as the original tensor.
* Stacking - Combine multiple tensors on top of each other (vstack) or side by side (hstack).
* Squeeze - Removes all `1` dimensions from a tensor.
* Unsqueeze - Add a `1` dimension to a target tensor.
* Permute - Return  a view of the input with dimensions permuted (swapped) in a certain way.

These are all useful to fix tensor issues.

Why do any of these?
Because deep learning models (neural networks) are all about manipulation tensors in some way. And because of the rules of matrix multiplication, if you've got shape mismatches, you'll run into errors. These methods help you make sure the right elements of your tensors are mixing with the right elements of other tensors.

Let's try them out.

First, we'll create a tensor.

In [2]:
#Let's create a tensor
import torch
x = torch.arange(1.,10.)
x,x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [3]:
#Add an extra dimension
x_reshaped = x.reshape(2,7)# shape error
x_reshaped,x_reshaped.shape

RuntimeError: shape '[2, 7]' is invalid for input of size 9

In [4]:
x_reshaped = x.reshape(9,1) #think in factors way. Others wise get error.
x_reshaped,x_reshaped.shape

(tensor([[1.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]),
 torch.Size([9, 1]))

In [5]:
x_reshaped = x.reshape(1,9)
x_reshaped,x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [6]:
x_reshaped = x.reshape(3,3)
x_reshaped,x_reshaped.shape

(tensor([[1., 2., 3.],
         [4., 5., 6.],
         [7., 8., 9.]]),
 torch.Size([3, 3]))

In [7]:
#Change the view
z = x.view(1,9)
z,z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [8]:
#Changing z changes x (because a view of a tensor shares the same memory as the original input) aka mutable.
z[:,0]=5
z,x

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]),
 tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]))

In [9]:
# Stack tensors on top of tensors
x_stacked = torch.stack([x,x,x,x],dim=1) #dim=1, dim=0
x_stacked

tensor([[5., 5., 5., 5.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.],
        [4., 4., 4., 4.],
        [5., 5., 5., 5.],
        [6., 6., 6., 6.],
        [7., 7., 7., 7.],
        [8., 8., 8., 8.],
        [9., 9., 9., 9.]])

`torch.squeeze(input)` Squeezes `input` to remove all the dimensions with value 1. 

In [None]:
#torch.squeeze() - remove all single dimensions from a target tensor.
x_reshaped

tensor([[5., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

In [None]:
#torch.squeeze() - removes all single dimensions from a target tensor.
print(f"Previous tensor: {x_reshaped}")
print(f"Previous shape: {x_reshaped.shape}")

#Remove extra dimensions from x_reshaped
x_squeezed = x_reshaped.squeeze()
print(f"\nNew tensor: {x_squeezed}")
print(f"New shape: {x_squeezed.shape}")

Previous tensor: tensor([[5., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])
Previous shape: torch.Size([3, 3])

New tensor: tensor([[5., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])
New shape: torch.Size([3, 3])


In [None]:
#torch.unsqueeze() - adds a single dimension to a target tensor at a specific dim(dimension)
print(f"Previous target: {x_squeezed}")
print(f"Previous shape: {x_squeezed.shape}")

#Add an extra dimension with unsqueeze
x_unsqueezed = x_squeezed.unsqueeze(dim=0)
print(f"\nNew tensor: {x_unsqueezed}")
print(f"New shape: {x_unsqueezed.shape}")

Previous target: tensor([[5., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])
Previous shape: torch.Size([3, 3])

New tensor: tensor([[[5., 2., 3.],
         [4., 5., 6.],
         [7., 8., 9.]]])
New shape: torch.Size([1, 3, 3])


In [None]:
x_reshaped.shape

torch.Size([3, 3])

In [None]:
x_reshaped.squeeze()

tensor([[5., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

In [None]:
x_reshaped.squeeze().shape

torch.Size([3, 3])

In [None]:
# torch.permute - rearranges the dimensions of a target tensor in a specified order
x_original = torch.rand(size=(224,224,3)) #[height,width,color_channels # the dimensions counted from 0
#we have 3 dimensions here 224,224,3 so it becomes 0,1,2

# Permute the original tensor to rearrange the axis (or dim) order.
x_permuted = x_original.permute(2,0,1) #shifts axis 0->1, 1->2, 2->0

print(f"Previous shape: {x_original.shape}")
print(f"New shape: {x_permuted.shape}")# [color_channels, height, width]

Previous shape: torch.Size([224, 224, 3])
New shape: torch.Size([3, 224, 224])


In [None]:
x_original[0,0,0] = 230943
print(x_original)

tensor([[[2.3094e+05, 5.0889e-01, 5.8530e-01],
         [1.7348e-01, 9.5342e-01, 7.4408e-01],
         [4.1048e-01, 2.4209e-01, 4.6233e-01],
         ...,
         [5.2864e-01, 3.7559e-01, 5.0635e-01],
         [2.6533e-02, 9.5046e-01, 8.1433e-01],
         [7.2038e-01, 3.0871e-01, 9.5890e-01]],

        [[9.1616e-01, 8.3540e-01, 6.9949e-01],
         [9.5810e-01, 7.3440e-01, 9.7432e-01],
         [3.3344e-01, 4.7178e-01, 3.9595e-01],
         ...,
         [9.8824e-01, 8.5818e-02, 7.1709e-01],
         [4.8343e-02, 5.5665e-01, 9.9558e-01],
         [6.2684e-01, 3.9626e-01, 2.6743e-01]],

        [[8.3327e-01, 6.3802e-01, 2.7520e-02],
         [5.9913e-01, 2.6958e-01, 8.4740e-01],
         [7.6398e-01, 6.7103e-01, 2.8693e-02],
         ...,
         [9.8376e-02, 3.8695e-01, 2.2484e-01],
         [9.6326e-01, 7.4155e-01, 5.6390e-01],
         [5.0211e-02, 3.1871e-01, 3.2404e-01]],

        ...,

        [[2.6600e-01, 9.9707e-01, 6.7196e-01],
         [7.4936e-01, 4.4237e-01, 1.6388e-01]

In [None]:
x_original[0,0,0],x_permuted[0,0,0] # permute use view(), and view() shares the same memory if we use view() or permute() it can affect the original data.

(tensor(230943.), tensor(230943.))

### Indexing (selecting data from tensors)
Indexing with PyTorch is similar to indexing with NumPy.

In [11]:
# Create a tensor
import torch
x = torch.arange(1,10).reshape(1,3,3)
x,x.shape


(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [12]:
#Let's index on our new tensor
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [None]:
#Let's index on the middle bracket (dim=1)
x[0][0]


tensor([1, 2, 3])

In [None]:
#Let's index on the most inner bracket (last dimension)
x[0][0][0]

tensor(1)

In [None]:
x[0][2][2]

tensor(9)

In [None]:
print(x[0][0])
print(x[:,0])

tensor([1, 2, 3])
tensor([[1, 2, 3]])


In [None]:
#We can also use ":" to select "all" of a target dimension
x[:,0]

tensor([[1, 2, 3]])

In [None]:
#Get all values of 0th and 1st dimensions but only index 1 of 2nd dimension
x[:,:,1]

tensor([[2, 5, 8]])

In [None]:
#Get all values of the 0 dimension but only the 1 index value of 1st and 2nd dimension
x[:,1,1]

tensor([5])

In [13]:
#Get index 0 of 0th and 1st dimension and all values of 2nd dimensions
x[0,0,:]

tensor([1, 2, 3])

In [None]:
x[0,2,2],x[:,2,2]

(tensor(9), tensor([9]))

In [None]:
x[0,:,2],x[:,:,2]

(tensor([3, 6, 9]), tensor([[3, 6, 9]]))

### PyTorch tensor & NumPy
Numpy is a popular scientific Python numerical computing library and because of this, PyTorch has functionality to intract with it.
* Data in NumPy, want in PyTorch tensor -> torch.from_numpy(ndarray)
* PyTorch tensor -> NumPy -> torch.Tensro.numpy()

In [None]:
import torch
import numpy as np

array = np.arange(1.0,8.0)
#Warning: when converting from numpy -> pytorch, pytorch reflects numpy's default datatype of float64 unless specified otherwise.
tensor = torch.from_numpy(array)
array,tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [None]:
tensor = tensor.type(torch.float32)
array.dtype,tensor.dtype

(dtype('float64'), torch.float32)

In [None]:
array.dtype,tensor.dtype

(dtype('float64'), torch.float64)

In [14]:
import numpy as np
import math

x = np.linspace(-math.pi,math.pi,2000)
y = np.sin(x)

x,y

(array([-3.14159265, -3.13844949, -3.13530633, ...,  3.13530633,
         3.13844949,  3.14159265]),
 array([-1.22464680e-16, -3.14315906e-03, -6.28628707e-03, ...,
         6.28628707e-03,  3.14315906e-03,  1.22464680e-16]))

### Reproducibility(trying to take random out of random)
In short how a neural network learns:
`start with random numbers -> tensor operations -> update random numbers to try and make them of the data -> again -> again -> again->`
To reduce the randomness in the neural networks and PyTorch comes the concept of a random seed.
Essentially what the random seed does is "flavour" the randomness.

In [None]:
import torch
#Creat two random tensors
random_tensor_A = torch.rand(3,4)
random_tensor_B = torch.rand(3,4)

print(random_tensor_A)
print(random_tensor_B)
print(random_tensor_A==random_tensor_B)

tensor([[0.3259, 0.9306, 0.8459, 0.6560],
        [0.1116, 0.9726, 0.5772, 0.4115],
        [0.0010, 0.4364, 0.2394, 0.4546]])
tensor([[0.0509, 0.6389, 0.2446, 0.3497],
        [0.4615, 0.3588, 0.4226, 0.4283],
        [0.8974, 0.5865, 0.3292, 0.9636]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


Just as we might've expected, the tensors come out with different values.

But what if we wanted to create two random tensors with the same values.

As in, the tensors would still contain random values but they would be of the same flavour.

That's where torch.manual_seed(seed) comes in, where seed is an integer (like 42 but it could be anything) that flavours the randomness.

Let's try it out by creating some more flavoured random tensors.

In [None]:
#lets make some random but reproducable tensors
#set the random seed.
Random_seed = 42
torch.manual_seed(Random_seed)
random_tensor_C = torch.rand(3,4,device="cpu")
torch.manual_seed(Random_seed)
random_tensor_D = torch.rand(3,4,device="cpu")
print(random_tensor_C)
print(random_tensor_D)
print(random_tensor_C==random_tensor_D)
#All the random numbers can reproduce by random seed use torch.manual_seed before generating random tensor.

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])



Nice!

It looks like setting the seed worked.

Resource: What we've just covered only scratches the surface of reproducibility in PyTorch. For more, on reproducibility in general and random seeds, I'd checkout:

[The PyTorch reproducibility documentation](https://pytorch.org/docs/stable/notes/randomness.html) (a good exercise would be to read through this for 10-minutes and even if you don't understand it now, being aware of it is important).
[The Wikipedia random seed page](https://en.wikipedia.org/wiki/Random_seed) (this'll give a good overview of random seeds and pseudorandomness in general).

In [None]:
!nvidia-smi

Fri Jun 27 16:04:50 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 576.80                 Driver Version: 576.80         CUDA Version: 12.9     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 2050      WDDM  |   00000000:01:00.0 Off |                  N/A |
| N/A   47C    P0              8W /   33W |       0MiB /   4096MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
torch.cuda.device_count()

1

## Running tensors and PyTorch on the GPUs (and making faster computations)
GPUs = faster computation on numbers, thanks to CUDA(Nvidia hardware) + Pytorch working behind the scenes to make everything honky dory(good).

To work with GPU we have three options:

|Platform         |Iniial setup |Cost             |
|-----------------|-------------|-----------------|
|Google Colab     |Easy         |Freemium         |
|Own Dedicated GPU|Medium       |One time purchase|
|Cloud            |Medium       |Expensive        |

For PyTorch its capable of running on the GPU or CPU, its best practice to setup device agnostic code:
[Doumentaion for best practice.](https://docs.pytorch.org/docs/stable/notes/cuda.html#best-practices)

Example: Run on GPU if available, else default to CPU.

In [None]:
#Setup device for Agnostic Code(if GPU available runs on GPU else runs on CPU)
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
tensor = torch.tensor([1,2,3])
tensor_on_gpu = tensor.to(device)

### Moving tensors back to the CPU

In [None]:
#If tensors on GPU, can't transform to NumPy.
tensor_on_gpu.numpy()

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [None]:
tensors_back_on_cpu = tensor_on_gpu.cpu().numpy()
print(tensors_back_on_cpu)


[1 2 3]
