In [1]:
import torch

In [2]:
torch.__version__

'2.6.0+cu124'

In [3]:
rdn_tsr = torch.rand(size=(5, 6, 3))
rdn_tsr.ndim, rdn_tsr.shape

(3, torch.Size([5, 6, 3]))

In [4]:
zeros = torch.zeros(3, 6)
zeros

tensor([[0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.]])

In [5]:
ones = torch.ones(3, 4)
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [6]:
one_ten = torch.arange(start=1, end=10, step=2)
one_ten

tensor([1, 3, 5, 7, 9])

In [7]:
ten_zeros = torch.zeros_like(one_ten)
ten_zeros

tensor([0, 0, 0, 0, 0])

In [8]:
float_32_tensor = torch.tensor([3., 6., 9.],
                               dtype=None, # What datatype is the tensor(e.g., 16 bit floating point, 32 bit floating point)
                               device='cpu', # What device is your tensor on
                               requires_grad=False #Whether or not to track gradients with this tensor operations
                               )
float_32_tensor.dtype

torch.float32

In [9]:
float_16_tensor = float_32_tensor.type(torch.float16)
float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

In [10]:
float_16_tensor * float_32_tensor

tensor([ 9., 36., 81.])

In [11]:
float_64_tensor = float_16_tensor.type(torch.double)
float_64_tensor.dtype

torch.float64

In [12]:
int_32_tensor = torch.tensor([3, 6, 9], dtype=torch.int32)
int_32_tensor.dtype

torch.int32

### Getting information from tensors ( tensor attributes )

1. Tensor not right datatype - to do get datatype form a tensor, can use tensor.dtype
2. Tensors not right shape - to get shape from a tensor, can use tensor.shape
3. Tensors not on the right device - to get device from a tensor, can use tensor.device




In [13]:
some_tensor = torch.rand(3, 4)
some_tensor

tensor([[0.6966, 0.9909, 0.1876, 0.1038],
        [0.6667, 0.3044, 0.9702, 0.3239],
        [0.3035, 0.0207, 0.5106, 0.5599]])

In [14]:
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"Shape of tensor: {some_tensor.shape}")
print(f"Device of a tensor: {some_tensor.device}")

Datatype of tensor: torch.float32
Shape of tensor: torch.Size([3, 4])
Device of a tensor: cpu


### Manipulating Tensors ( operations)

Tensor operations include:
* Addition
* Subtraction
* Multiplication ( element-wise )
* Division
* Matrix Multiplication

In [15]:
tensor = torch.tensor([1, 2, 3])
tensor + 10

tensor([11, 12, 13])

In [16]:
tensor * 10

tensor([10, 20, 30])

In [17]:
tensor - 10

tensor([-9, -8, -7])

1. This multiplication won't work because of the inner dimensions doesn't match

` torch.matmul(torch.rand(3, 3), torch.rand(2,3)) `

2. This one, will work because the inner dimensions match. The result shape will be the shape of the other dimensions.

`torch.matmul(torch.rand(3, 2), torch.rand(2,3)) `
* Result shape 3x3

In [18]:
mu = torch.matmul(torch.rand(3, 5), torch.rand(5, 4))
print(mu)
print(f"Tensor 3x5 multiplied to tensor 5x4. Resulting in shape 3x4. Shape: {mu.shape}")

tensor([[0.9579, 1.4612, 1.0498, 0.6433],
        [0.7465, 1.5197, 2.2692, 1.1531],
        [1.1000, 2.1067, 1.8262, 1.1932]])
Tensor 3x5 multiplied to tensor 5x4. Resulting in shape 3x4. Shape: torch.Size([3, 4])


In [19]:
tensor_A = torch.rand(3, 2)
tensor_B = torch.rand(3, 2)
# torch.mm(tensor_A, tensor_B) ## Torch.mm is an alias for torch.matmul
# the torch.mm above is an error, because I am trying to multiply 3x2 by 3x2.

To fix tensor shape issues, we are able to manipulate the shape of one of our tensors using a **transpose**.

A **transpose** switches the axes or dimensions of a given tensor.

In [20]:
# The matrix multiplication operation works when tensor_A is transposed

print(f"Original shape: {tensor_A.shape}, Transposed shape: {tensor_A.T.shape} \n")
print(f"Multiplying: {tensor_B.shape}, {tensor_A.T.shape} \n")
print(f"{torch.mm(tensor_B, tensor_A.T)} \n")
print(f"Output shape: {torch.mm(tensor_B, tensor_A.T).shape}")

Original shape: torch.Size([3, 2]), Transposed shape: torch.Size([2, 3]) 

Multiplying: torch.Size([3, 2]), torch.Size([2, 3]) 

tensor([[1.2979, 1.7067, 0.6205],
        [0.2843, 0.4727, 0.1154],
        [0.6056, 0.8333, 0.2819]]) 

Output shape: torch.Size([3, 3])


In [31]:
x = torch.arange(0, 100, 10)
x, x.dtype

(tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90]), torch.int64)

In [32]:
torch.min(x), torch.max(x)

(tensor(0), tensor(90))

In [34]:
# Torch.mean requires float to work, if I run this on an Long datatype I will get an error
torch.mean(x.type(torch.float32))

tensor(45.)

In [36]:
# torch.argmin finds and returns the position (index) where the minimun value is. Returns the index
# torch.argmax does the same, but for maximum value.
torch.argmin(x),torch.argmax(x)

(tensor(0), tensor(9))

## Reshaping, stacking, squeezing and unsqueezing tensors

* Reshaping - reshapes an input tensor to a defined shape
* View - return a view of an input tensor of certain shape but keep the same memory as the original tensor
* Stacking - combine multiple tensors on top of each other (vstack) or side by side (hstack)
* Squeeze - remove all `1` diemnsions from a tensor
* Unsqueeze - add a `1` dimension to a target tensor
* Permute - return a view of the input with dimensions permuted (swapped) in a certain way

In [36]:
import torch
x = torch. arange(1., 11)
x, x.shape

(tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]), torch.Size([10]))

In [47]:
# Adding an extra dimension
# new shape has to be compatible with the number of dimensions
# 3x4 is 12, compatible with x.shape
X_reshaped = x.reshape(5,2)
X_reshaped

tensor([[ 5.,  2.],
        [ 3.,  4.],
        [ 5.,  5.],
        [ 7.,  8.],
        [ 9., 10.]])

In [48]:
# Changing the view
z = x.view(2,5)
z, z.shape

(tensor([[ 5.,  2.,  3.,  4.,  5.],
         [ 5.,  7.,  8.,  9., 10.]]),
 torch.Size([2, 5]))

In [49]:
# Changing z changes x (because a view of tensor shares the same memory as the original input)
z[:, 0] = 5
z, x

(tensor([[ 5.,  2.,  3.,  4.,  5.],
         [ 5.,  7.,  8.,  9., 10.]]),
 tensor([ 5.,  2.,  3.,  4.,  5.,  5.,  7.,  8.,  9., 10.]))

In [50]:
# Stack tensors on top of each other
x_stacked = torch.stack([x, x, x, x], dim=1)
x_stacked

tensor([[ 5.,  5.,  5.,  5.],
        [ 2.,  2.,  2.,  2.],
        [ 3.,  3.,  3.,  3.],
        [ 4.,  4.,  4.,  4.],
        [ 5.,  5.,  5.,  5.],
        [ 5.,  5.,  5.,  5.],
        [ 7.,  7.,  7.,  7.],
        [ 8.,  8.,  8.,  8.],
        [ 9.,  9.,  9.,  9.],
        [10., 10., 10., 10.]])

In [55]:
# Torch.squeeze() -  Removes all single dimensions from a target tensor
X_reshaped = X_reshaped.reshape(1, 10)
X_reshaped

tensor([[ 5.,  2.,  3.,  4.,  5.,  5.,  7.,  8.,  9., 10.]])

In [56]:
X_reshaped.shape

torch.Size([1, 10])

In [59]:
X_reshaped.squeeze().shape

torch.Size([10])

In [69]:
xh = torch.rand(8, 3, 2, 1)
xh.shape

torch.Size([8, 3, 2, 1])

In [74]:
# Adds an extra dimension with unsqueeze
xh.unsqueeze(dim=1).shape

torch.Size([8, 1, 3, 2, 1])

In [78]:
# The permute function swaps the dimensions of a tensor, according to the index
# In the cells above you can see the first dimension is index 0, the second is index 1, and so it goes
xh.permute(2, 3, 1, 0).shape

torch.Size([2, 1, 3, 8])

In [83]:
x_img = torch.rand(size=(224, 224, 3)) # height, width, colour channel (simulating an image tensor)
print(f"Previous shape: {x_img.shape}")
print(f"New shape: {x_img.permute(2, 0, 1).shape}")

Previous shape: torch.Size([224, 224, 3])
New shape: torch.Size([3, 224, 224])


In [86]:
x_img[0, 0, 0] = .9559
x_img[0, 0, 0], x_img.permute(2, 0, 1)[0, 0, 0]

(tensor(0.9559), tensor(0.9559))

## Indexing ( selecting data from tensors)

Indexing with PyTorch is similar to indexing with NumPy

In [87]:
x = torch.arange(1, 10).reshape(1, 3, 3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [93]:
# Indexing the middle bracket
x[0][1]

tensor([4, 5, 6])

In [94]:
# Indexing the most inner bracket ( last dimension )
x[0][0][0] # 1

tensor(1)

In [95]:
# 5
x[0, 1, 1]

tensor(5)

In [96]:
# Use ":" to select "all" of a target dimension
x[:, 0]

tensor([[1, 2, 3]])

In [103]:
x[:, :, 2]

tensor([[3, 6, 9]])

## PyTorch tensor & NumPy

NumPy is a popular scientific Python numerical computing library. And because of this, PyTorch has functionality to interact with it.

* Data in NumPy -> in PyTorch tensor -> `torch.from_numpy(ndarray)`
* PyTorch tensor -> NumPy -> `torch.Tensor.numpy()`

In [104]:
import torch
import numpy as np

array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array)
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [105]:
array.dtype

dtype('float64')

In [106]:
torch.arange(1., 8.).dtype

torch.float32

Default datatype for `NumPy` is float64, and for PyTorch is float32.
PyTorch will reflect that datatype when converting a NumPy array to tensor.

In [107]:
# Change the value of array, won't change the Tensor
array = array + 1
array, tensor

(array([2., 3., 4., 5., 6., 7., 8.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [109]:
tensor = torch.ones(7)
numpy_tensor = tensor.numpy()
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

## Reproducibility ( trying to take random out of random )

To reduce the randomness in neural networks and PyTorch comes the concept of a **random seed**.

In [110]:
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)

print(random_tensor_A)
print(random_tensor_B)
print(random_tensor_A == random_tensor_B)

tensor([[0.6580, 0.8752, 0.8867, 0.2554],
        [0.8845, 0.5625, 0.0817, 0.6164],
        [0.6999, 0.0909, 0.7256, 0.8685]])
tensor([[0.7535, 0.9798, 0.0939, 0.4385],
        [0.5096, 0.9962, 0.1849, 0.1350],
        [0.7950, 0.3936, 0.1208, 0.6470]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [112]:
# Making some random but reproducible tensors

random_seed = 42

torch.manual_seed(random_seed)
random_tensor_C = torch.rand(3, 2)

torch.manual_seed(random_seed)
random_tensor_D = torch.rand(3, 2)

print(random_tensor_C)
print(random_tensor_D)
print(random_tensor_D == random_tensor_C)

tensor([[0.8823, 0.9150],
        [0.3829, 0.9593],
        [0.3904, 0.6009]])
tensor([[0.8823, 0.9150],
        [0.3829, 0.9593],
        [0.3904, 0.6009]])
tensor([[True, True],
        [True, True],
        [True, True]])


## Running tensor and PyTorch objects on the GPUs ( and making faster computations )

GPUs = faster computation on numbers, thanks to CUDA + NVIDIA hardware + PyTorch working behing the scenes to make everything run smoothly.

In [1]:
!nvidia-smi

Sun May 18 15:58:20 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   61C    P8             10W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [5]:
 # Check for GPU access with PyTorch
import torch
torch.cuda.is_available()

True

In [12]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [13]:
# Counting number of devices
torch.cuda.device_count()

1

## Putting tensor ( and models) on the GPU

* The reason we would want tensor/models on the GPU is because using a GPU results in **faster** computations.

In [17]:
tensor = torch.tensor([1, 2, 3], device = 'cpu')

print(tensor, tensor.device)

tensor([1, 2, 3]) cpu


In [21]:
tensor_on_gpu = tensor.to(device)
print(tensor_on_gpu, tensor_on_gpu.device)

tensor([1, 2, 3], device='cuda:0') cuda:0


In [23]:
tensor_back_on_cpu = tensor_on_gpu.cpu().numpy()
tensor_back_on_cpu.device

'cpu'

In [24]:
tensor_on_gpu

tensor([1, 2, 3], device='cuda:0')