<a href="https://colab.research.google.com/github/JinXianWen95/PyTorch-bootcamp/blob/main/00_pytorch_intro.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print(torch.__version__)

2.9.0+cu126


## Introduction to tensors

### Creating tensors
https://docs.pytorch.org/docs/stable/tensors.html

In [None]:
# scalar
scalar = torch.tensor(7)
print(scalar)
print(scalar.ndim)
print(scalar.item())

tensor(7)
0
7


In [None]:
# vector
vector = torch.tensor([7, 7])
print(vector)
print(vector.ndim)
print(vector.shape)

tensor([7, 7])
1
torch.Size([2])


In [None]:
# Matrix
MATRIX = torch.tensor([[7, 8],
                       [9, 10]])
print(MATRIX)
print(MATRIX.ndim)
print(MATRIX[1])
print(MATRIX.shape)

tensor([[ 7,  8],
        [ 9, 10]])
2
tensor([ 9, 10])
torch.Size([2, 2])


In [None]:
# tensor
TENSOR = torch.tensor([[[1, 2, 3],
                        [3, 6, 9],
                        [2, 4, 5]]])
print(TENSOR)
print(TENSOR.ndim)
print(TENSOR.shape)
print(TENSOR[0])

tensor([[[1, 2, 3],
         [3, 6, 9],
         [2, 4, 5]]])
3
torch.Size([1, 3, 3])
tensor([[1, 2, 3],
        [3, 6, 9],
        [2, 4, 5]])


### Random tensors
Usually neural network start with tensors full of random numbers and the adjust those random numbers to better represent the data


In [None]:
# Create a random tensor of size (3, 4)
random_tensor = torch.rand(3, 4)
print(random_tensor)
print(random_tensor.ndim)

tensor([[0.4725, 0.8306, 0.4466, 0.2159],
        [0.3931, 0.2428, 0.9567, 0.5609],
        [0.2338, 0.2898, 0.1533, 0.7384]])
2


In [None]:
# Create a random tensor with similar shape ro an image tensor
random_image_size_tensor = torch.rand(size=(224, 224, 3)) # height, widath, colour channels RGB
random_image_size_tensor.ndim

3

In [None]:
# Create a tensor of all zeros and ones
zeros = torch.zeros(size=(3, 4))
print(zeros)
ones = torch.ones(size=(3,4))
print(ones)
print(ones.dtype)

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])
tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])
torch.float32


### Creating a range of tensors and tensors-like

In [None]:
# Use torch.arange()
one_to_ten = torch.arange(start=1, end=1000, step=77)
print(one_to_ten)

tensor([  1,  78, 155, 232, 309, 386, 463, 540, 617, 694, 771, 848, 925])


In [None]:
# Creating tensors like
ten_zeroes = torch.zeros_like(input=one_to_ten)
print(ten_zeroes)

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])


## Tensor datatypes

**Note:** Tensor datatypes is one of the three errors you'll run into pytorch:


1. Tensors not right datatype
2. Tensors not right shape
3. Tensors not on the right device



In [None]:
# Float 32 tensor
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=None, # default will still be float32
                               device=None, # cpu or gpu
                               requires_grad=False # whether operations performed on the tensor are recorded
                               )
print(float_32_tensor)

tensor([3., 6., 9.])


In [None]:
float_16_tensor = float_32_tensor.type(torch.float16)
print(float_16_tensor)
print(float_32_tensor.dtype)

tensor([3., 6., 9.], dtype=torch.float16)
torch.float32


In [None]:
float_32_tensor * float_16_tensor

tensor([ 9., 36., 81.])

### Getting information from tensors

In [None]:
some_tensor = torch.rand(size=(3,4), dtype=torch.float16, device="cuda")
print(some_tensor)
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"Shape of tensor: {some_tensor.shape}")
print(f"Device of tensor is on: {some_tensor.device}")

tensor([[0.8984, 0.7603, 0.6685, 0.7490],
        [0.6924, 0.7837, 0.9116, 0.1975],
        [0.9199, 0.4771, 0.2744, 0.5679]], device='cuda:0',
       dtype=torch.float16)
Datatype of tensor: torch.float16
Shape of tensor: torch.Size([3, 4])
Device of tensor is on: cuda:0


## Manipulating Tensors (tensor operations)

Tensor operations include:
* Addition
* Subtraction
* Multiplication (element-wise)
* Division
* Matrix multiplication (dot product). This needs satisfy two rules:

  * The **inner dimensions** must match. `(2, 3) @ (2, 3)` won't work
  * The resulting matrix has the shape of the **outer dimensions**.
  `(3, 2) @ (2, 3) -> (3, 3)`



In [None]:
tensor = torch.tensor([1,2,3])
print(tensor + 10)
print(tensor - 10)
print(tensor * 10)
torch.mul(tensor, 10)
print(tensor / 10)

tensor([11, 12, 13])
tensor([-9, -8, -7])
tensor([10, 20, 30])
tensor([0.1000, 0.2000, 0.3000])


In [None]:
%%time
print(torch.matmul(tensor, tensor))
print(tensor @ tensor)

tensor(14)
tensor(14)
CPU times: user 1.49 ms, sys: 337 µs, total: 1.83 ms
Wall time: 10.5 ms


In [None]:
%%time
value = 0
for i in range(len(tensor)):
  value += tensor[i] * tensor[i]

print(value)

tensor(14)
CPU times: user 1.03 ms, sys: 234 µs, total: 1.27 ms
Wall time: 2.03 ms


### Shape error

In [None]:
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]])
tensor_B = torch.tensor([[7, 10],
                         [8, 11],
                         [9, 12]])
print(f"tensor_A shape: {tensor_A.shape} and tensor_B shaper: {tensor_B.shape}")

# mm stands for matmul
print(torch.mm(tensor_A, tensor_B.T))
print(torch.mm(tensor_A.T, tensor_B))

tensor_A shape: torch.Size([3, 2]) and tensor_B shaper: torch.Size([3, 2])
tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])
tensor([[ 76, 103],
        [100, 136]])


### Finding the min, max, mean, sum etc (tensor aggragation)

In [None]:
x = torch.arange(0,100,10)
print(x)
print(x.dtype)

print(torch.min(x))
print(torch.max(x))
print(torch.mean(x.type(torch.float32))) # torch.mean requires a tensor of float32
print(torch.sum(x))

# Find the postional min and max
print(torch.argmin(x))
print(torch.argmax(x))

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])
torch.int64
tensor(0)
tensor(90)
tensor(45.)
tensor(450)
tensor(0)
tensor(9)


### Reshaping, stacking, squeezing and unsqueezing tensors


*   Reshaping - reshapes an input tensor to a defined shape
*   View - return a view of an input tensor of certain shape but keep the same memory
*   Stack - combine multiple tensors on top of each other (vstack) or side by side
*   Squeezing - removes all `1` dimensions from a tensor
*   Unsqueezing - add a `1` dimensions from a tensor
*   Permute - return a view of input with dimension permuted in a certain way





In [None]:
t = torch.arange(1., 11.)
print(f"tensor: {t} with shape {t.shape} and dim {t.ndim}")

# reshape
t_reshaped = t.reshape(10, 1) # add an extra dimension
print(f"tensor: {t_reshaped} with shape {t_reshaped.shape} and dim {t_reshaped.ndim}")
print(f"tensor: {t_reshaped.reshape(5,2)} with shape {t_reshaped.reshape(5,2).shape}")

# changing the view changes also the original tensor even if their shapes are different
z = t.view(2, 5)
z[:, 0] = 5
print(f"View of tensor: {z} with shape {z.shape} and dim {z.ndim}")
print(f"Original tensor: {t} with shape {t.shape} and dim {t.ndim}")

tensor: tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]) with shape torch.Size([10]) and dim 1
tensor: tensor([[ 1.],
        [ 2.],
        [ 3.],
        [ 4.],
        [ 5.],
        [ 6.],
        [ 7.],
        [ 8.],
        [ 9.],
        [10.]]) with shape torch.Size([10, 1]) and dim 2
tensor: tensor([[ 1.,  2.],
        [ 3.,  4.],
        [ 5.,  6.],
        [ 7.,  8.],
        [ 9., 10.]]) with shape torch.Size([5, 2])
View of tensor: tensor([[ 5.,  2.,  3.,  4.,  5.],
        [ 5.,  7.,  8.,  9., 10.]]) with shape torch.Size([2, 5]) and dim 2
Original tensor: tensor([ 5.,  2.,  3.,  4.,  5.,  5.,  7.,  8.,  9., 10.]) with shape torch.Size([10]) and dim 1


In [None]:
# stack the tensor on top of each other
t = torch.arange(1., 11.)
s = torch.arange(11., 21.)
print(f"Original tensor: {t} with shape {t.shape} and dim {t.ndim}")
print(f"Original tensor: {s} with shape {s.shape} and dim {s.ndim}")
t_stacked = torch.stack([t, s], dim=1)
print(f"t_stacked with dim 1: {t_stacked} has shape {t_stacked.shape}")
t_stacked = torch.stack([t, s], dim=0)
print(f"t_stacked with dim 0: {t_stacked} has shape {t_stacked.shape}")
t_stacked = torch.stack([t, s], dim=-1)
print(f"t_stacked with dim -1: {t_stacked} has shape {t_stacked.shape}")

Original tensor: tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]) with shape torch.Size([10]) and dim 1
Original tensor: tensor([11., 12., 13., 14., 15., 16., 17., 18., 19., 20.]) with shape torch.Size([10]) and dim 1
t_stacked with dim 1: tensor([[ 1., 11.],
        [ 2., 12.],
        [ 3., 13.],
        [ 4., 14.],
        [ 5., 15.],
        [ 6., 16.],
        [ 7., 17.],
        [ 8., 18.],
        [ 9., 19.],
        [10., 20.]]) has shape torch.Size([10, 2])
t_stacked with dim 0: tensor([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.],
        [11., 12., 13., 14., 15., 16., 17., 18., 19., 20.]]) has shape torch.Size([2, 10])
t_stacked with dim -1: tensor([[ 1., 11.],
        [ 2., 12.],
        [ 3., 13.],
        [ 4., 14.],
        [ 5., 15.],
        [ 6., 16.],
        [ 7., 17.],
        [ 8., 18.],
        [ 9., 19.],
        [10., 20.]]) has shape torch.Size([10, 2])


In [None]:
# squeeze
print(f"original tensor: {t_reshaped} with shape {t_reshaped.shape} and dim {t_reshaped.ndim}")
t_squeezed = t_reshaped.squeeze()
print(f"After squeezing: {t_squeezed} with shape {t_squeezed.shape} and dim {t_squeezed.ndim}")

t_unsqueezed = t_squeezed.unsqueeze(dim=0)
print(f"After unsqueezing: {t_unsqueezed} with shape {t_unsqueezed.shape} and dim {t_unsqueezed.ndim}")

original tensor: tensor([[ 5.],
        [ 2.],
        [ 3.],
        [ 4.],
        [ 5.],
        [ 5.],
        [ 7.],
        [ 8.],
        [ 9.],
        [10.]]) with shape torch.Size([10, 1]) and dim 2
After squeezing: tensor([ 5.,  2.,  3.,  4.,  5.,  5.,  7.,  8.,  9., 10.]) with shape torch.Size([10]) and dim 1
After unsqueezing: tensor([[ 5.,  2.,  3.,  4.,  5.,  5.,  7.,  8.,  9., 10.]]) with shape torch.Size([1, 10]) and dim 2


In [None]:
x_original = torch.rand(size=(224,224,3)) # [height, width, color_channels]
print(f"original tensor with shape {x_original.shape} and dim {x_original.ndim}")

x_permuted = x_original.permute(2, 0, 1)
print(f"after permute with shape {x_permuted.shape} and dim {x_permuted.ndim}")

x_original[0, 1, 2] = 0.
print(x_permuted[2,0,1])

original tensor with shape torch.Size([224, 224, 3]) and dim 3
after permute with shape torch.Size([3, 224, 224]) and dim 3
tensor(0.)


### Indexing (select data from tensors)

In [None]:
import torch

x = torch.arange(1, 10).reshape(1, 3, 3)
print(f"tensor {x} has shaper {x.shape}")

print(x[0])
print(x[0][0])
print(x[0, 0, 0])
print(x[:,0]) # ":" to select "all" of a target dimension
print(x[:, :, 1]) # get all values of 0th and 1st dimensions but only index 1 of 2nd dimension
print(x[:, 1, 1]) # get all values of 0th dimension but only the 1 index value of 1st and 2nd dimension
print(x[0, 0, :]) # same as x[0][0]
print(x[0, 2, 2]) # return 9
print(x[0, :, 2]) # return [3,6,9]

tensor tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]]) has shaper torch.Size([1, 3, 3])
tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
tensor([1, 2, 3])
tensor(1)
tensor([[1, 2, 3]])
tensor([[2, 5, 8]])
tensor([5])
tensor([1, 2, 3])
tensor(9)
tensor([3, 6, 9])


## Pytorch tensors & Numpy
* numpy -> torch => `torch.from_numpy(ndarray)`
* torch -> numpy => `torch.Tensor.numpy()`

In [None]:
import torch
import numpy as np

array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array)
print(f"{array} has dtype {array.dtype}") # default is float64
print(tensor.type(torch.float32))

# changing array value => they share the memory
array[0] = 99
array = array + 1 # return a new array
print(array)
print(tensor)

[1. 2. 3. 4. 5. 6. 7.] has dtype float64
tensor([1., 2., 3., 4., 5., 6., 7.])
[100.   3.   4.   5.   6.   7.   8.]
tensor([99.,  2.,  3.,  4.,  5.,  6.,  7.], dtype=torch.float64)


In [None]:
tensor = torch.ones(7)
array = tensor.numpy()
print(tensor.type(torch.float32))
print(f"{array} has dtype {array.dtype}")

# changing tensor value => they share memory
tensor[0] = 99
tensor = tensor + 1
print(tensor)
print(array)

tensor([1., 1., 1., 1., 1., 1., 1.])
[1. 1. 1. 1. 1. 1. 1.] has dtype float32
tensor([100.,   2.,   2.,   2.,   2.,   2.,   2.])
[99.  1.  1.  1.  1.  1.  1.]


## Reproducibility (trying to take random out of random)
To reduce the randomness in neural networks and PytTrch comes the concept of a random seed.

In [None]:
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)

print(random_tensor_A)
print(random_tensor_B)
print(random_tensor_A == random_tensor_B)

# With fixed feed
print("*** Fixed seed ***")
RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)
random_tensor_C = torch.rand(3, 4)

torch.manual_seed(RANDOM_SEED) # we have to re-set it to the same seed
random_tensor_D = torch.rand(3, 4)

print(random_tensor_C)
print(random_tensor_D)
print(random_tensor_C == random_tensor_D)


tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8694, 0.5677, 0.7411, 0.4294],
        [0.8854, 0.5739, 0.2666, 0.6274],
        [0.2696, 0.4414, 0.2969, 0.8317]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])
*** Fixed seed ***
tensor([[0.1053, 0.2695, 0.3588, 0.1994],
        [0.5472, 0.0062, 0.9516, 0.0753],
        [0.8860, 0.5832, 0.3376, 0.8090]])
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


## Running tensors and PyTorch objects on the GPUs

In [None]:
### Getting a GPU, in our case setup in the runtime
!nvidia-smi

### Check for GPU access
import torch
print(torch.cuda.is_available())

### Setup device agnostic code: https://docs.pytorch.org/docs/main/notes/cuda.html#device-agnostic-code
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"device: {device} with {torch.cuda.device_count()}")

Thu Dec 11 17:01:54 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   36C    P8              9W /   70W |       2MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
# Putting tensors on the GPU which is faster
tensor = torch.tensor([1,2,3], device="cpu") # default is cpu
print(tensor, tensor.device)

tensor = tensor.to(device) # default is cpu
print(tensor, tensor.device)

tensor([1, 2, 3]) cpu
tensor([1, 2, 3], device='cuda:0') cuda:0


In [None]:
# Moving tensor back to cpu, because NumPy works on cpu
#tensor.numpy() # this will get error
tensor.cpu().numpy()

## Exercises

In [None]:
# 2. Create a random tensor with shape (7, 7).
tensor1 = torch.rand(7, 7)
print(tensor1)

# 3. Perform a matrix multiplication on the tensor from 2 with another random tensor with shape (1, 7)
tensor2 = torch.rand(1, 7)
print(tensor2)

result = tensor1.mm(tensor2.T)

tensor([[0.5015, 0.8677, 0.6224, 0.6116, 0.7772, 0.6933, 0.3630],
        [0.2118, 0.7986, 0.2312, 0.1616, 0.6006, 0.8309, 0.2397],
        [0.5177, 0.3435, 0.1824, 0.7813, 0.5449, 0.6595, 0.8203],
        [0.9728, 0.9636, 0.1753, 0.5959, 0.9586, 0.4017, 0.4638],
        [0.5589, 0.5144, 0.3873, 0.7905, 0.9937, 0.3394, 0.1829],
        [0.3758, 0.3335, 0.5029, 0.3559, 0.6268, 0.2317, 0.5278],
        [0.3381, 0.5320, 0.6124, 0.3395, 0.9829, 0.4897, 0.7895]])
tensor([[0.7627, 0.1948, 0.5891, 0.1250, 0.3926, 0.5625, 0.2425]])


In [None]:
# 4. Set the random seed to 0 and do exercises 2 & 3 over again.
import torch

torch.manual_seed(42)
tensor1 = torch.rand(7, 7)
tensor2 = torch.rand(1, 7)
print(tensor1.mm(tensor2.T))

# 5.Speaking of random seeds, we saw how to set it with torch.manual_seed()
# but is there a GPU equivalent? (hint: you'll need to look into the documentation
# for torch.cuda for this one). If there is, set the GPU random seed to 1234.
torch.manual_seed(1234) # using gpu, this will automatically set `torch.cuda.manual_seed_all(seed)`

# 6. Create two random tensors of shape (2, 3) and send them both to the GPU
# (you'll need access to a GPU for this). Set torch.manual_seed(1234) when creating the
# tensors (this doesn't have to be the GPU random seed).
tensor1 = torch.rand(size=(2, 3), device="cuda")
tensor2 = torch.rand(size=(2, 3), device="cuda")
print(f"{tensor1} and {tensor2}")

# 7. Perform a matrix multiplication on the tensors you created in 6
# (again, you may have to adjust the shapes of one of the tensors).
tensor_result = tensor1.mm(tensor2.T)
print(tensor_result)

# 8. Find the maximum and minimum values of the output of 7.
print(f"The minimum value is: {tensor_result.min()}.\n The maximum is: {tensor_result.max()}")

# 9. Find the maximum and minimum index values of the output of 7.
print(f"The index of the minimum value is: {tensor_result.argmin()}.\n The index of the maximum value i: {tensor_result.argmax()}")

tensor([[1.3299],
        [1.8111],
        [1.3228],
        [0.9725],
        [1.8285],
        [1.4806],
        [2.0489]])
tensor([[0.1272, 0.8167, 0.5440],
        [0.6601, 0.2721, 0.9737]], device='cuda:0') and tensor([[0.6208, 0.0276, 0.3255],
        [0.1114, 0.6812, 0.3608]], device='cuda:0')
tensor([[0.2786, 0.7668],
        [0.7343, 0.6102]], device='cuda:0')
the minimum value is: 0.27863210439682007.
 The maximum is: 0.7667766809463501
the index of the minimum value is: 0.
 The index of the maximum value i: 1


In [None]:
# 10. Make a random tensor with shape (1, 1, 1, 10) and then create a new tensor with all the 1 dimensions removed to be left with a tensor of shape (10).
# Set the seed to 7 when you create it and print out the first tensor and it's shape as well as the second tensor and it's shape.
torch.manual_seed(7)
tensor = torch.rand(1, 1, 1, 10)
print(f"The original tensor: {tensor} with shape: {tensor.shape}")
tensor_result = tensor.squeeze()

print(f"The second tensor: {tensor_result} with shape: {tensor_result.shape}")


The original tensor: tensor([[[[0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297,
           0.3653, 0.8513]]]]) with shape: torch.Size([1, 1, 1, 10])
The second tensor: tensor([0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297, 0.3653,
        0.8513]) with shape: torch.Size([10])
