In [2]:
import torch
import matplotlib.pyplot as plt
import numpy as np

In [3]:
print(torch.__version__)

2.9.1+cu128


In [4]:
scalar = torch.tensor(7)
scalar

tensor(7)

In [5]:
scalar.ndim

0

In [6]:
scalar.item()
# tensor back as python int

7

In [7]:
vector = torch.tensor([7,7])
vector

tensor([7, 7])

In [8]:
vector.ndim

1

In [9]:
vector.shape

torch.Size([2])

In [10]:
# matrix
MATRIX = torch.tensor([[7,7],
                      [8,8]])
MATRIX

tensor([[7, 7],
        [8, 8]])

In [11]:
MATRIX.ndim

2

In [12]:
MATRIX[1]

tensor([8, 8])

In [13]:
MATRIX.shape

torch.Size([2, 2])

In [14]:
tensor = torch.tensor([[[1,1,1],
                        [2,2,2],
                        [3,3,3]]])
tensor

tensor([[[1, 1, 1],
         [2, 2, 2],
         [3, 3, 3]]])

In [15]:
tensor.ndim

3

In [16]:
tensor.shape

torch.Size([1, 3, 3])

In [17]:
tensor[0]

tensor([[1, 1, 1],
        [2, 2, 2],
        [3, 3, 3]])

In [18]:
tensor[0][1]

tensor([2, 2, 2])

In [19]:
tensor[0][0][0]

tensor(1)

### random tensor

random tensors are important because neural networks start with tensors full of random numbers and then adjust those random numbers to better represent the full data

In [20]:
random_tensor = torch.rand(3,4)
random_tensor

tensor([[0.0160, 0.2793, 0.0808, 0.4258],
        [0.5188, 0.5247, 0.2160, 0.8931],
        [0.0141, 0.4722, 0.3689, 0.4757]])

In [21]:
rand = torch.rand(1,3,3)
rand

tensor([[[0.2077, 0.8568, 0.4538],
         [0.8050, 0.5532, 0.1474],
         [0.3972, 0.3314, 0.6380]]])

In [22]:
rand.ndim

3

In [23]:
random_image = torch.rand(size=(224,224,3)) # height, weight, RGB
random_image.shape, random_image.ndim

(torch.Size([224, 224, 3]), 3)

In [24]:
# zeros and ones random tensor
zero = torch.zeros(size=(3,4))
zero

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [25]:
one = torch.ones(size=(2,3))
one, one.dtype

(tensor([[1., 1., 1.],
         [1., 1., 1.]]),
 torch.float32)

#### creating a range of tensors and tensors-like

In [26]:
# use torch.arange()
torch.arange(0,10)

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [27]:
a = torch.arange(start=0, end=100, step=3)
a

tensor([ 0,  3,  6,  9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51,
        54, 57, 60, 63, 66, 69, 72, 75, 78, 81, 84, 87, 90, 93, 96, 99])

In [28]:
# tensors-like
zeros = torch.zeros_like(input=a)
zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

### Tensor datatypes

In [29]:
tensor = torch.tensor([3,4,5,6], 
                      dtype=None,           # what datatype is the tensor
                      device="cuda",        # what device is the tensor on 
                      requires_grad=False)  # whether to track gradients with this tensor
tensor

tensor([3, 4, 5, 6], device='cuda:0')

In [30]:
tensor_16 = tensor.type(torch.half)
tensor_16, tensor_16.device

(tensor([3., 4., 5., 6.], device='cuda:0', dtype=torch.float16),
 device(type='cuda', index=0))

In [31]:
tensor * tensor_16

tensor([ 9., 16., 25., 36.], device='cuda:0', dtype=torch.float16)

### Tensor manipulation (operations)

In [32]:
tensor1 = torch.rand(size=(2,1))
tensor, tensor + 10

(tensor([3, 4, 5, 6], device='cuda:0'),
 tensor([13, 14, 15, 16], device='cuda:0'))

In [33]:
tensor*10

tensor([30, 40, 50, 60], device='cuda:0')

In [34]:
torch.mul(tensor, 10)

tensor([30, 40, 50, 60], device='cuda:0')

In [35]:
# matrix multiplication
tensor = torch.tensor([1,2,3])
torch.matmul(tensor, tensor)

tensor(14)

In [36]:
%%time
tensor@tensor

CPU times: user 123 μs, sys: 81 μs, total: 204 μs
Wall time: 186 μs


tensor(14)

### Common errors in deep learning

In [38]:
tensor_a = torch.tensor([[1,2],
                         [3,4],
                         [5,6]])
tensor_b = torch.tensor([[7,10],
                         [8,11],
                         [9,12]])

In [40]:
torch.mm(tensor_a, tensor_b)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

In [45]:
tensor_b.T, tensor_b.T.shape

(tensor([[ 7,  8,  9],
         [10, 11, 12]]),
 torch.Size([2, 3]))

In [44]:
torch.mm(tensor_a, tensor_b.T)

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])

### Tensor aggregation

In [55]:
x = torch.arange(0,100,10)
x.dtype

torch.int64

In [52]:
# min
torch.min(x), x.min()

(tensor(0), tensor(0))

In [53]:
torch.max(x)

tensor(90)

In [56]:
torch.mean(x.type(torch.float32))

tensor(45.)

In [57]:
x.argmin()
# find the position in tensor that has the min value 

tensor(0)

In [58]:
x.argmax()

tensor(9)

### Reshaping, stacking, squeezing and unsqueezing
* rehaping - reshapes the input tensor to a defined shape
* view - return a view of an input tensor of certain shape but keep the same memory
* stacking - combine multiple tensors on top of each other (vstack) or side by side (hstack)
* squeeze - remove all `1` dimensions from a tensor
* unsqueeze - add a `1` dimension to a target tensor
* permute - return a view of the input with dimensions permuted(swapped) in a certain way

In [60]:
import torch
x = torch.arange(1.,10.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [64]:
x_r = x.reshape(1, 9)
x_r

tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [None]:
z = x.view(1, 9)
z

# changing z changes x because a view of tensor shares the same memory as the original

tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [67]:
z[:, 0] = 9
x

tensor([9., 2., 3., 4., 5., 6., 7., 8., 9.])

In [72]:
#stack tensors on top of each other
x_stacked = torch.stack([x,x,x], dim=1)
x_stacked

tensor([[9., 9., 9.],
        [2., 2., 2.],
        [3., 3., 3.],
        [4., 4., 4.],
        [5., 5., 5.],
        [6., 6., 6.],
        [7., 7., 7.],
        [8., 8., 8.],
        [9., 9., 9.]])