In [1]:
import torch

In [2]:
print(f"Torch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")

Torch version: 2.5.0+cu124
CUDA available: True
CUDA version: 12.4


# Tensors

Tensor is a generalization of vectors and matrices and is easily understood as a multidimensional array.

### Scalar (0D Tensor)

In [3]:
scalar = torch.tensor(3.14159)

scalar

tensor(3.1416)

In [4]:
scalar.ndim

0

In [5]:
scalar.item()

3.141590118408203

### Vector (1D Tensor)

In [6]:
vector = torch.tensor([1, 2, 3, 4, 5])

vector

tensor([1, 2, 3, 4, 5])

In [7]:
vector.ndim

1

In [8]:
vector.shape

torch.Size([5])

### Matrix (2D Tensor)

In [9]:
matrix = torch.tensor([[1, 2], [4, 5], [7, 8]])

matrix

tensor([[1, 2],
        [4, 5],
        [7, 8]])

In [10]:
matrix.ndim

2

In [11]:
matrix.shape

torch.Size([3, 2])

### Tensor (3D or higher)

In [12]:
tensor = torch.tensor([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])

tensor

tensor([[[1, 2],
         [3, 4]],

        [[5, 6],
         [7, 8]]])

In [13]:
tensor.ndim

3

In [14]:
tensor.shape

torch.Size([2, 2, 2])

#### Random Tensors

In [15]:
random_tensor = torch.rand(3, 4)

random_tensor

tensor([[0.6128, 0.5262, 0.9460, 0.3918],
        [0.4268, 0.3392, 0.7706, 0.8523],
        [0.7983, 0.6527, 0.4114, 0.8266]])

In [16]:
random_tensor.shape

torch.Size([3, 4])

In [17]:
random_tensor = torch.rand(2, 2, 2)

random_tensor

tensor([[[0.3893, 0.4623],
         [0.6839, 0.5665]],

        [[0.1410, 0.6728],
         [0.4812, 0.6518]]])

In [18]:
random_tensor.shape

torch.Size([2, 2, 2])

In [19]:
random_image_size_tensor = torch.rand(size=(3, 224, 224)) # 3 channels, 224x224 image

random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([3, 224, 224]), 3)

#### Zeros and Ones Tensors

In [20]:
zeros_tensor = torch.zeros(size=(3, 4))

zeros_tensor

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [21]:
zeros_tensor.dtype

torch.float32

In [22]:
ones_tensor = torch.ones(size=(3, 4))

ones_tensor

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [23]:
ones_tensor.dtype

torch.float32

#### Range of Tensors and Tensors-like

In [24]:
five_numbers = torch.arange(start=0, end=10, step=2)

five_numbers

tensor([0, 2, 4, 6, 8])

In [25]:
five_zeros = torch.zeros_like(five_numbers)

five_zeros

tensor([0, 0, 0, 0, 0])

### Tensors Data Types

In [26]:
int_64_tensor = torch.tensor([1, 2, 3], dtype=None)

int_64_tensor.dtype

torch.int64

In [27]:
int_8_tensor = torch.tensor([1, 2, 3], dtype=torch.int8)

int_8_tensor.dtype

torch.int8

In [28]:
float_32_tensor = torch.tensor([1.0, 2.0, 3.0], dtype=None)

float_32_tensor.dtype

torch.float32

In [29]:
float_16_tensor = torch.tensor([1.0, 2.0, 3.0], dtype=torch.float16)

float_16_tensor.dtype

torch.float16

In [30]:
float_32_tensor = torch.tensor(
    [1.0, 2.0, 3.0],
    dtype=None,         # Float32
    device=None,        # CPU
    requires_grad=False # no gradient tracking
)

float_32_tensor.dtype

torch.float32

In [31]:
int_64_tensor = float_32_tensor.to(dtype=torch.int64)

int_64_tensor

tensor([1, 2, 3])

In [32]:
result = float_32_tensor + int_64_tensor

result, result.dtype

(tensor([2., 4., 6.]), torch.float32)

### Getting Information from Tensors

In [33]:
some_tensor = torch.rand(3, 4)

some_tensor

tensor([[0.8309, 0.3000, 0.2769, 0.2634],
        [0.9806, 0.6923, 0.1045, 0.3810],
        [0.5922, 0.6483, 0.1228, 0.9988]])

In [34]:
some_tensor.dtype

torch.float32

In [35]:
some_tensor.shape

torch.Size([3, 4])

In [36]:
some_tensor.device

device(type='cpu')

### Tensor Operations

In [37]:
tensor = torch.tensor([1, 2, 3])

tensor

tensor([1, 2, 3])

In [38]:
tensor + 10, torch.add(tensor, 10)

(tensor([11, 12, 13]), tensor([11, 12, 13]))

In [39]:
tensor * 10, torch.mul(tensor, 10)

(tensor([10, 20, 30]), tensor([10, 20, 30]))

In [40]:
tensor - 10, torch.sub(tensor, 10)

(tensor([-9, -8, -7]), tensor([-9, -8, -7]))

In [41]:
tensor / 10, torch.div(tensor, 10)

(tensor([0.1000, 0.2000, 0.3000]), tensor([0.1000, 0.2000, 0.3000]))

In [42]:
tensor1 = torch.tensor([1, 2, 3])
tensor2 = torch.tensor([4, 5, 6])

print(f"Addition: {tensor1 + tensor2}")
print(f"Subtraction: {tensor1 - tensor2}")
print(f"Multiplication: {tensor1 * tensor2}")
print(f"Division: {tensor1 / tensor2}")

Addition: tensor([5, 7, 9])
Subtraction: tensor([-3, -3, -3])
Multiplication: tensor([ 4, 10, 18])
Division: tensor([0.2500, 0.4000, 0.5000])


In [43]:
torch.matmul(tensor1, tensor2)

tensor(32)

In [44]:
tensor3 = torch.tensor([[4], [5], [6]])

tensor3

tensor([[4],
        [5],
        [6]])

In [45]:
torch.matmul(tensor1, tensor3)

tensor([32])

In [46]:
%%time

value = 0

for i in range(len(tensor1)):
    value += tensor1[i] * tensor2[i]

value

CPU times: user 1.32 ms, sys: 918 μs, total: 2.24 ms
Wall time: 1.62 ms


tensor(32)

In [47]:
%%time

torch.dot(tensor1, tensor2)

CPU times: user 247 μs, sys: 171 μs, total: 418 μs
Wall time: 324 μs


tensor(32)

In [48]:
matrix1 = torch.tensor([[1, 2], [3, 4]])
matrix2 = torch.tensor([[5, 6], [7, 8]])

torch.matmul(matrix1, matrix2)

tensor([[19, 22],
        [43, 50]])

In [49]:
torch.mm(matrix1, matrix2)

tensor([[19, 22],
        [43, 50]])

In [50]:
matrix1 @ matrix2

tensor([[19, 22],
        [43, 50]])

In [51]:
matrix1 * matrix2

tensor([[ 5, 12],
        [21, 32]])

In [52]:
matrix1 = torch.tensor([[1, 2], [3, 4], [5, 6]])
matrix2 = torch.tensor([[7, 8], [9, 10], [11, 12]])

# torch.mm(matrix1, matrix2) -> Error because of shape mismatch (3x2 * 3x2)
torch.mm(matrix1, matrix2.T)

tensor([[ 23,  29,  35],
        [ 53,  67,  81],
        [ 83, 105, 127]])

### Tensor Statistics

In [53]:
tensor = torch.rand(2, 3, 4)

tensor

tensor([[[0.5987, 0.8626, 0.4945, 0.1179],
         [0.1738, 0.8444, 0.8929, 0.2670],
         [0.8834, 0.9407, 0.6459, 0.2209]],

        [[0.7268, 0.5808, 0.5197, 0.0457],
         [0.0327, 0.1077, 0.7308, 0.9787],
         [0.7949, 0.3290, 0.7138, 0.0975]]])

In [54]:
tensor.min(axis=0)

torch.return_types.min(
values=tensor([[0.5987, 0.5808, 0.4945, 0.0457],
        [0.0327, 0.1077, 0.7308, 0.2670],
        [0.7949, 0.3290, 0.6459, 0.0975]]),
indices=tensor([[0, 1, 0, 1],
        [1, 1, 1, 0],
        [1, 1, 0, 1]]))

In [55]:
tensor.min(axis=1)

torch.return_types.min(
values=tensor([[0.1738, 0.8444, 0.4945, 0.1179],
        [0.0327, 0.1077, 0.5197, 0.0457]]),
indices=tensor([[1, 1, 0, 0],
        [1, 1, 0, 0]]))

In [56]:
tensor.min(axis=2)

torch.return_types.min(
values=tensor([[0.1179, 0.1738, 0.2209],
        [0.0457, 0.0327, 0.0975]]),
indices=tensor([[3, 0, 3],
        [3, 0, 3]]))

In [57]:
tensor.min()

tensor(0.0327)

In [58]:
tensor.argmin()

tensor(16)

In [59]:
tensor.argmin(axis=0)

tensor([[0, 1, 0, 1],
        [1, 1, 1, 0],
        [1, 1, 0, 1]])

In [60]:
tensor.max()

tensor(0.9787)

In [61]:
tensor.mean()

tensor(0.5250)

In [62]:
tensor.median()

tensor(0.5808)

In [63]:
tensor.std()

tensor(0.3223)

### Reshaping, Viewing and Stacking Tensors

In [64]:
tensor = torch.rand(2, 3, 4)

tensor

tensor([[[0.0491, 0.9688, 0.4255, 0.1101],
         [0.2782, 0.2756, 0.8689, 0.9466],
         [0.2183, 0.5321, 0.4887, 0.6533]],

        [[0.0380, 0.9084, 0.1159, 0.1065],
         [0.6938, 0.3122, 0.1099, 0.8285],
         [0.4520, 0.7392, 0.8782, 0.4526]]])

#### Reshaping

In [65]:
tensor.reshape(2, tensor.shape[1] * tensor.shape[2])

tensor([[0.0491, 0.9688, 0.4255, 0.1101, 0.2782, 0.2756, 0.8689, 0.9466, 0.2183,
         0.5321, 0.4887, 0.6533],
        [0.0380, 0.9084, 0.1159, 0.1065, 0.6938, 0.3122, 0.1099, 0.8285, 0.4520,
         0.7392, 0.8782, 0.4526]])

In [66]:
tensor.reshape(2, -1) # -1 means "as many as needed"

tensor([[0.0491, 0.9688, 0.4255, 0.1101, 0.2782, 0.2756, 0.8689, 0.9466, 0.2183,
         0.5321, 0.4887, 0.6533],
        [0.0380, 0.9084, 0.1159, 0.1065, 0.6938, 0.3122, 0.1099, 0.8285, 0.4520,
         0.7392, 0.8782, 0.4526]])

In [67]:
tensor.reshape(-1) # equals to tensor.reshape(1, -1) or tensor.reshape(24)

tensor([0.0491, 0.9688, 0.4255, 0.1101, 0.2782, 0.2756, 0.8689, 0.9466, 0.2183,
        0.5321, 0.4887, 0.6533, 0.0380, 0.9084, 0.1159, 0.1065, 0.6938, 0.3122,
        0.1099, 0.8285, 0.4520, 0.7392, 0.8782, 0.4526])

In [68]:
tensor.flatten()

tensor([0.0491, 0.9688, 0.4255, 0.1101, 0.2782, 0.2756, 0.8689, 0.9466, 0.2183,
        0.5321, 0.4887, 0.6533, 0.0380, 0.9084, 0.1159, 0.1065, 0.6938, 0.3122,
        0.1099, 0.8285, 0.4520, 0.7392, 0.8782, 0.4526])

#### Viewing (shares the same memory with the original tensor)

In [69]:
tensor.view(2, 3, 4)

tensor([[[0.0491, 0.9688, 0.4255, 0.1101],
         [0.2782, 0.2756, 0.8689, 0.9466],
         [0.2183, 0.5321, 0.4887, 0.6533]],

        [[0.0380, 0.9084, 0.1159, 0.1065],
         [0.6938, 0.3122, 0.1099, 0.8285],
         [0.4520, 0.7392, 0.8782, 0.4526]]])

In [70]:
tensor.view(2, 6, 2)

tensor([[[0.0491, 0.9688],
         [0.4255, 0.1101],
         [0.2782, 0.2756],
         [0.8689, 0.9466],
         [0.2183, 0.5321],
         [0.4887, 0.6533]],

        [[0.0380, 0.9084],
         [0.1159, 0.1065],
         [0.6938, 0.3122],
         [0.1099, 0.8285],
         [0.4520, 0.7392],
         [0.8782, 0.4526]]])

In [71]:
z = tensor.view(-1)

z

tensor([0.0491, 0.9688, 0.4255, 0.1101, 0.2782, 0.2756, 0.8689, 0.9466, 0.2183,
        0.5321, 0.4887, 0.6533, 0.0380, 0.9084, 0.1159, 0.1065, 0.6938, 0.3122,
        0.1099, 0.8285, 0.4520, 0.7392, 0.8782, 0.4526])

In [72]:
z[3] = 20

tensor

tensor([[[ 0.0491,  0.9688,  0.4255, 20.0000],
         [ 0.2782,  0.2756,  0.8689,  0.9466],
         [ 0.2183,  0.5321,  0.4887,  0.6533]],

        [[ 0.0380,  0.9084,  0.1159,  0.1065],
         [ 0.6938,  0.3122,  0.1099,  0.8285],
         [ 0.4520,  0.7392,  0.8782,  0.4526]]])

#### Stacking

In [73]:
z = torch.rand(2, 3, 4)

z

tensor([[[0.8591, 0.0498, 0.5200, 0.3645],
         [0.3387, 0.7145, 0.6657, 0.9681],
         [0.4212, 0.4578, 0.6961, 0.2305]],

        [[0.3803, 0.3300, 0.7514, 0.1075],
         [0.2402, 0.3687, 0.9201, 0.2535],
         [0.3069, 0.6148, 0.5931, 0.1955]]])

In [74]:
torch.stack([z, z, z], dim=0)

tensor([[[[0.8591, 0.0498, 0.5200, 0.3645],
          [0.3387, 0.7145, 0.6657, 0.9681],
          [0.4212, 0.4578, 0.6961, 0.2305]],

         [[0.3803, 0.3300, 0.7514, 0.1075],
          [0.2402, 0.3687, 0.9201, 0.2535],
          [0.3069, 0.6148, 0.5931, 0.1955]]],


        [[[0.8591, 0.0498, 0.5200, 0.3645],
          [0.3387, 0.7145, 0.6657, 0.9681],
          [0.4212, 0.4578, 0.6961, 0.2305]],

         [[0.3803, 0.3300, 0.7514, 0.1075],
          [0.2402, 0.3687, 0.9201, 0.2535],
          [0.3069, 0.6148, 0.5931, 0.1955]]],


        [[[0.8591, 0.0498, 0.5200, 0.3645],
          [0.3387, 0.7145, 0.6657, 0.9681],
          [0.4212, 0.4578, 0.6961, 0.2305]],

         [[0.3803, 0.3300, 0.7514, 0.1075],
          [0.2402, 0.3687, 0.9201, 0.2535],
          [0.3069, 0.6148, 0.5931, 0.1955]]]])

In [75]:
torch.stack([z, z, z], dim=1)

tensor([[[[0.8591, 0.0498, 0.5200, 0.3645],
          [0.3387, 0.7145, 0.6657, 0.9681],
          [0.4212, 0.4578, 0.6961, 0.2305]],

         [[0.8591, 0.0498, 0.5200, 0.3645],
          [0.3387, 0.7145, 0.6657, 0.9681],
          [0.4212, 0.4578, 0.6961, 0.2305]],

         [[0.8591, 0.0498, 0.5200, 0.3645],
          [0.3387, 0.7145, 0.6657, 0.9681],
          [0.4212, 0.4578, 0.6961, 0.2305]]],


        [[[0.3803, 0.3300, 0.7514, 0.1075],
          [0.2402, 0.3687, 0.9201, 0.2535],
          [0.3069, 0.6148, 0.5931, 0.1955]],

         [[0.3803, 0.3300, 0.7514, 0.1075],
          [0.2402, 0.3687, 0.9201, 0.2535],
          [0.3069, 0.6148, 0.5931, 0.1955]],

         [[0.3803, 0.3300, 0.7514, 0.1075],
          [0.2402, 0.3687, 0.9201, 0.2535],
          [0.3069, 0.6148, 0.5931, 0.1955]]]])

In [76]:
torch.stack([z, z, z], dim=2)

tensor([[[[0.8591, 0.0498, 0.5200, 0.3645],
          [0.8591, 0.0498, 0.5200, 0.3645],
          [0.8591, 0.0498, 0.5200, 0.3645]],

         [[0.3387, 0.7145, 0.6657, 0.9681],
          [0.3387, 0.7145, 0.6657, 0.9681],
          [0.3387, 0.7145, 0.6657, 0.9681]],

         [[0.4212, 0.4578, 0.6961, 0.2305],
          [0.4212, 0.4578, 0.6961, 0.2305],
          [0.4212, 0.4578, 0.6961, 0.2305]]],


        [[[0.3803, 0.3300, 0.7514, 0.1075],
          [0.3803, 0.3300, 0.7514, 0.1075],
          [0.3803, 0.3300, 0.7514, 0.1075]],

         [[0.2402, 0.3687, 0.9201, 0.2535],
          [0.2402, 0.3687, 0.9201, 0.2535],
          [0.2402, 0.3687, 0.9201, 0.2535]],

         [[0.3069, 0.6148, 0.5931, 0.1955],
          [0.3069, 0.6148, 0.5931, 0.1955],
          [0.3069, 0.6148, 0.5931, 0.1955]]]])

### Squeezing, Unsqueezing and Permuting Tensors

In [77]:
tensor = torch.rand(1, 2, 1, 4)

tensor, tensor.size()

(tensor([[[[0.0163, 0.7965, 0.9034, 0.8381]],
 
          [[0.5239, 0.6741, 0.7455, 0.2578]]]]),
 torch.Size([1, 2, 1, 4]))

In [78]:
tensor_squeezed = tensor.squeeze()

tensor_squeezed, tensor_squeezed.size()

(tensor([[0.0163, 0.7965, 0.9034, 0.8381],
         [0.5239, 0.6741, 0.7455, 0.2578]]),
 torch.Size([2, 4]))

In [79]:
tensor_unsqueezed = tensor_squeezed.unsqueeze(dim=0)

tensor_unsqueezed, tensor_unsqueezed.size()

(tensor([[[0.0163, 0.7965, 0.9034, 0.8381],
          [0.5239, 0.6741, 0.7455, 0.2578]]]),
 torch.Size([1, 2, 4]))

In [80]:
tensor_permuted = tensor.permute(3, 2, 1, 0)

tensor_permuted, tensor_permuted.size()

(tensor([[[[0.0163],
           [0.5239]]],
 
 
         [[[0.7965],
           [0.6741]]],
 
 
         [[[0.9034],
           [0.7455]]],
 
 
         [[[0.8381],
           [0.2578]]]]),
 torch.Size([4, 1, 2, 1]))

# Accessing GPU

In [83]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

device

device(type='cuda')

In [84]:
torch.cuda.device_count()

1

### Tensors on GPU

In [85]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [88]:
tensor1 = torch.rand(2, 3, 4)

tensor1, tensor1.device

(tensor([[[0.1494, 0.7690, 0.1172, 0.7635],
          [0.2955, 0.4097, 0.8834, 0.3206],
          [0.7493, 0.5578, 0.0284, 0.8193]],
 
         [[0.4184, 0.4067, 0.8328, 0.9479],
          [0.9101, 0.8949, 0.8810, 0.0487],
          [0.3367, 0.2850, 0.2839, 0.1624]]]),
 device(type='cpu'))

In [89]:
tensor1_on_gpu = tensor1.to(device)

tensor1_on_gpu, tensor1_on_gpu.device

(tensor([[[0.1494, 0.7690, 0.1172, 0.7635],
          [0.2955, 0.4097, 0.8834, 0.3206],
          [0.7493, 0.5578, 0.0284, 0.8193]],
 
         [[0.4184, 0.4067, 0.8328, 0.9479],
          [0.9101, 0.8949, 0.8810, 0.0487],
          [0.3367, 0.2850, 0.2839, 0.1624]]], device='cuda:0'),
 device(type='cuda', index=0))

In [90]:
tensor2 = torch.rand(2, 3, 4)

tensor2, tensor2.device

(tensor([[[0.9557, 0.0832, 0.5995, 0.4378],
          [0.1710, 0.5705, 0.6380, 0.7600],
          [0.2245, 0.5000, 0.1620, 0.0832]],
 
         [[0.6957, 0.3643, 0.3662, 0.7514],
          [0.6955, 0.7723, 0.9337, 0.9145],
          [0.1370, 0.2875, 0.2137, 0.6269]]]),
 device(type='cpu'))

In [92]:
# tensor1_on_gpu + tensor2 -> Error because of different devices
tensor2_on_gpu = tensor2.to(device)

tensor1_on_gpu + tensor2_on_gpu

tensor([[[1.1051, 0.8522, 0.7168, 1.2012],
         [0.4664, 0.9802, 1.5214, 1.0806],
         [0.9738, 1.0578, 0.1903, 0.9025]],

        [[1.1141, 0.7711, 1.1990, 1.6993],
         [1.6056, 1.6672, 1.8146, 0.9632],
         [0.4737, 0.5725, 0.4976, 0.7893]]], device='cuda:0')