In [43]:
import torch
import matplotlib.pyplot as plt
import numpy as np

In [44]:
print(torch.__version__)

2.9.1+cu128


In [45]:
scalar = torch.tensor(7)
scalar

tensor(7)

In [46]:
scalar.ndim

0

In [47]:
scalar.item()
# tensor back as python int

7

In [48]:
vector = torch.tensor([7,7])
vector

tensor([7, 7])

In [49]:
vector.ndim

1

In [50]:
vector.shape

torch.Size([2])

In [51]:
# matrix
MATRIX = torch.tensor([[7,7],
                      [8,8]])
MATRIX

tensor([[7, 7],
        [8, 8]])

In [52]:
MATRIX.ndim

2

In [53]:
MATRIX[1]

tensor([8, 8])

In [54]:
MATRIX.shape

torch.Size([2, 2])

In [55]:
tensor = torch.tensor([[[1,1,1],
                        [2,2,2],
                        [3,3,3]]])
tensor

tensor([[[1, 1, 1],
         [2, 2, 2],
         [3, 3, 3]]])

In [56]:
tensor.ndim

3

In [57]:
tensor.shape

torch.Size([1, 3, 3])

In [58]:
tensor[0]

tensor([[1, 1, 1],
        [2, 2, 2],
        [3, 3, 3]])

In [59]:
tensor[0][1]

tensor([2, 2, 2])

In [60]:
tensor[0][0][0]

tensor(1)

### random tensor

random tensors are important because neural networks start with tensors full of random numbers and then adjust those random numbers to better represent the full data

In [61]:
random_tensor = torch.rand(3,4)
random_tensor

tensor([[0.4163, 0.0602, 0.8839, 0.3916],
        [0.1110, 0.5034, 0.5621, 0.0090],
        [0.5582, 0.8812, 0.6250, 0.6653]])

In [62]:
rand = torch.rand(1,3,3)
rand

tensor([[[0.9482, 0.7561, 0.2149],
         [0.1672, 0.5665, 0.1653],
         [0.3032, 0.9105, 0.8635]]])

In [63]:
rand.ndim

3

In [64]:
random_image = torch.rand(size=(224,224,3)) # height, weight, RGB
random_image.shape, random_image.ndim

(torch.Size([224, 224, 3]), 3)

In [65]:
# zeros and ones random tensor
zero = torch.zeros(size=(3,4))
zero

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [66]:
one = torch.ones(size=(2,3))
one, one.dtype

(tensor([[1., 1., 1.],
         [1., 1., 1.]]),
 torch.float32)

#### creating a range of tensors and tensors-like

In [67]:
# use torch.arange()
torch.arange(0,10)

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [68]:
a = torch.arange(start=0, end=100, step=3)
a

tensor([ 0,  3,  6,  9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51,
        54, 57, 60, 63, 66, 69, 72, 75, 78, 81, 84, 87, 90, 93, 96, 99])

In [69]:
# tensors-like
zeros = torch.zeros_like(input=a)
zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

### Tensor datatypes

In [70]:
tensor = torch.tensor([3,4,5,6], 
                      dtype=None,           # what datatype is the tensor
                      device="cuda",        # what device is the tensor on 
                      requires_grad=False)  # whether to track gradients with this tensor
tensor

tensor([3, 4, 5, 6], device='cuda:0')

In [71]:
tensor_16 = tensor.type(torch.half)
tensor_16, tensor_16.device

(tensor([3., 4., 5., 6.], device='cuda:0', dtype=torch.float16),
 device(type='cuda', index=0))

In [72]:
tensor * tensor_16

tensor([ 9., 16., 25., 36.], device='cuda:0', dtype=torch.float16)

### Tensor manipulation (operations)

In [73]:
tensor1 = torch.rand(size=(2,1))
tensor, tensor + 10

(tensor([3, 4, 5, 6], device='cuda:0'),
 tensor([13, 14, 15, 16], device='cuda:0'))

In [74]:
tensor*10

tensor([30, 40, 50, 60], device='cuda:0')

In [75]:
torch.mul(tensor, 10)

tensor([30, 40, 50, 60], device='cuda:0')

In [76]:
# matrix multiplication
tensor = torch.tensor([1,2,3])
torch.matmul(tensor, tensor)

tensor(14)

In [77]:
%%time
tensor@tensor

CPU times: user 102 μs, sys: 64 μs, total: 166 μs
Wall time: 120 μs


tensor(14)

### Common errors in deep learning

In [78]:
tensor_a = torch.tensor([[1,2],
                         [3,4],
                         [5,6]])
tensor_b = torch.tensor([[7,10],
                         [8,11],
                         [9,12]])

In [80]:
torch.mm(tensor_a, tensor_b)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

In [81]:
tensor_b.T, tensor_b.T.shape

(tensor([[ 7,  8,  9],
         [10, 11, 12]]),
 torch.Size([2, 3]))

In [82]:
torch.mm(tensor_a, tensor_b.T)

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])

### Tensor aggregation

In [83]:
x = torch.arange(0,100,10)
x.dtype

torch.int64

In [84]:
# min
torch.min(x), x.min()

(tensor(0), tensor(0))

In [85]:
torch.max(x)

tensor(90)

In [86]:
torch.mean(x.type(torch.float32))

tensor(45.)

In [87]:
x.argmin()
# find the position in tensor that has the min value 

tensor(0)

In [88]:
x.argmax()

tensor(9)

### Reshaping, stacking, squeezing and unsqueezing
* rehaping - reshapes the input tensor to a defined shape
* view - return a view of an input tensor of certain shape but keep the same memory
* stacking - combine multiple tensors on top of each other (vstack) or side by side (hstack)
* squeeze - remove all `1` dimensions from a tensor
* unsqueeze - add a `1` dimension to a target tensor
* permute - return a view of the input with dimensions permuted(swapped) in a certain way

In [89]:
import torch
x = torch.arange(1.,10.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [90]:
x_r = x.reshape(1, 9)
x_r

tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [91]:
z = x.view(1, 9)
z

# changing z changes x because a view of tensor shares the same memory as the original

tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [92]:
z[:, 0] = 9
x

tensor([9., 2., 3., 4., 5., 6., 7., 8., 9.])

In [93]:
#stack tensors on top of each other
x_stacked = torch.stack([x,x,x], dim=1)
x_stacked

tensor([[9., 9., 9.],
        [2., 2., 2.],
        [3., 3., 3.],
        [4., 4., 4.],
        [5., 5., 5.],
        [6., 6., 6.],
        [7., 7., 7.],
        [8., 8., 8.],
        [9., 9., 9.]])

In [96]:
# torch.squeeze 
x_r, x_r.shape

(tensor([[9., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [100]:
x_s = x_r.squeeze(), x_r.squeeze().shape
x_s

(tensor([9., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [102]:
x_s.unsqueeze(dim=0)

AttributeError: 'tuple' object has no attribute 'unsqueeze'

In [104]:
# torch.permute - rearrange the dimensions of a target tensor in a specified order
x = torch.rand(2,4,5)
x, x.size()

(tensor([[[0.8082, 0.3641, 0.3671, 0.0360, 0.8877],
          [0.0237, 0.3369, 0.5066, 0.6322, 0.8740],
          [0.6262, 0.8804, 0.2707, 0.1325, 0.7130],
          [0.9939, 0.1759, 0.8439, 0.8183, 0.2804]],
 
         [[0.0300, 0.4549, 0.0435, 0.9794, 0.4115],
          [0.2143, 0.0332, 0.8236, 0.5866, 0.8415],
          [0.9791, 0.7359, 0.3296, 0.5543, 0.0891],
          [0.7135, 0.9607, 0.5920, 0.0498, 0.0673]]]),
 torch.Size([2, 4, 5]))

In [111]:
x[0, 0, 0] = 10

In [112]:
x_p = x.permute(2, 0, 1) # shifts axis
x_p.shape, x.shape

(torch.Size([5, 2, 4]), torch.Size([2, 4, 5]))

In [113]:
x_p

tensor([[[10.0000,  0.0237,  0.6262,  0.9939],
         [ 0.0300,  0.2143,  0.9791,  0.7135]],

        [[ 0.3641,  0.3369,  0.8804,  0.1759],
         [ 0.4549,  0.0332,  0.7359,  0.9607]],

        [[ 0.3671,  0.5066,  0.2707,  0.8439],
         [ 0.0435,  0.8236,  0.3296,  0.5920]],

        [[ 0.0360,  0.6322,  0.1325,  0.8183],
         [ 0.9794,  0.5866,  0.5543,  0.0498]],

        [[ 0.8877,  0.8740,  0.7130,  0.2804],
         [ 0.4115,  0.8415,  0.0891,  0.0673]]])

### Indexing (selecting data from tensors)

In [116]:
import torch 
x = torch.arange(1,10).reshape(1,3,3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [117]:
x[0], x[0,0], x[0,0,0]

(tensor([[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]),
 tensor([1, 2, 3]),
 tensor(1))

In [119]:
x[0][2][2]

tensor(9)

### Pytorch and Numpy

In [120]:
import torch 
import numpy as np

array = np.arange(1,8)
tensor = torch.from_numpy(array)
array, tensor

(array([1, 2, 3, 4, 5, 6, 7]), tensor([1, 2, 3, 4, 5, 6, 7]))

In [121]:
array.dtype, tensor.dtype

(dtype('int64'), torch.int64)

### Reproductability (trying to take random out of random)

random seed - flavours the randomness, to reduce the randomness in NN and pytorch

In [131]:
import torch

a = torch.rand(3,4)
b = torch.rand(3,4)

a == b

tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])

In [139]:
# random but reproducable tensors
torch.manual_seed(42)
c = torch.rand(3,4)
torch.manual_seed(42)
d = torch.rand(3,4)

c == d

tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])

### Tensors on GPUs

In [140]:
import torch 
torch.cuda.is_available()

True

In [142]:
device="cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [143]:
torch.cuda.device_count()

1

In [144]:
tensor = torch.tensor([1,2,3])
tensor, tensor.device

(tensor([1, 2, 3]), device(type='cpu'))

In [146]:
tensor_gpu = tensor.to(device)
tensor_gpu, tensor_gpu.device


(tensor([1, 2, 3], device='cuda:0'), device(type='cuda', index=0))

In [149]:
tensor_back = tensor_gpu.cpu().numpy()
tensor_back
# numpy doesnt work on gpu

array([1, 2, 3])

In [151]:
!nvidia-smi

Wed Jan  7 18:32:23 2026       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 580.119.02             Driver Version: 580.119.02     CUDA Version: 13.0     |
+-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4050 ...    Off |   00000000:01:00.0 Off |                  N/A |
| N/A   43C    P8              1W /   60W |     117MiB /   6141MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+

+----------------------------------------------