In [1]:
from sympy.codegen.ast import float32
!nvidia-smi

Sun Feb  2 17:36:00 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 561.19                 Driver Version: 561.19         CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4070 ...  WDDM  |   00000000:01:00.0 Off |                  N/A |
| N/A   47C    P0             15W /  140W |       0MiB /   8188MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import IPython

print(torch.__version__)
print(torch.cuda.is_available())
print(IPython.version_info)

2.4.1
True
(8, 12, 2, '')


## Introduction to Tensors

### Creating Tensors

In [3]:
# scalar

scalar = torch.tensor(7)

scalar

tensor(7)

In [4]:
scalar.ndim


0

In [5]:
#Get tensor back as python integer

scalar.item()

7

In [6]:
#Vector

vector = torch.tensor([7, 7])

vector

tensor([7, 7])

In [7]:
vector.ndim  #(rank 1 tensor, think of it like the number of pairs of square brackets)

1

In [8]:
vector.shape

torch.Size([2])

In [9]:
#Matrix

MATRIX = torch.tensor([
    [7, 8],
    [9, 10]
])

MATRIX

tensor([[ 7,  8],
        [ 9, 10]])

In [10]:
MATRIX.ndim

2

In [11]:
MATRIX.shape

torch.Size([2, 2])

In [12]:
rectangle_matrix = torch.tensor([
    [7, 8],
    [9, 10],
    [11, 12]
])
rectangle_matrix


tensor([[ 7,  8],
        [ 9, 10],
        [11, 12]])

In [13]:
print(rectangle_matrix.ndim)

# Shape = (Rows, Cols)
print(rectangle_matrix.shape)

2
torch.Size([3, 2])


In [14]:
MATRIX[0]

tensor([7, 8])

In [15]:
#Tensor

TENSOR = torch.tensor([
    [
        [1, 2, 3],
        [4, 5, 6],
        [7, 8, 9],
        [10, 11, 12]
    ]
])

TENSOR

tensor([[[ 1,  2,  3],
         [ 4,  5,  6],
         [ 7,  8,  9],
         [10, 11, 12]]])

In [16]:
print(TENSOR.ndim)

# Shape = (Number of matrices ,Rows, Cols)
print(TENSOR.shape)

3
torch.Size([1, 4, 3])


In [17]:
TENSOR[0]

tensor([[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9],
        [10, 11, 12]])

In [18]:
#Doesnt work because we have only 1 matrix which is at index 0
# TENSOR[1]

In [19]:
TENSOR_2 = torch.tensor([  #Contains array of matrices
    [  # Contains array of vectors (arrays)
        [1, 2, 3],  # Array of numbers
        [4, 5, 6],
        [7, 8, 9],
        [10, 11, 12]
    ],

    [  # Contains array of vectors (arrays)
        [13, 14, 15],
        [16, 17, 18],
        [19, 20, 21],
        [22, 23, 24]
    ]
])

# (2,4,3)
# Basically saying we have 2 matrices with 4 rows and 3 cols
print(TENSOR_2.shape)

torch.Size([2, 4, 3])


In [20]:
TENSOR_3 = torch.tensor(
    [  # Contains array of tensors
        [  #Contains array of matrices
            [  # Contains array of vectors (arrays)
                [1, 2, 3],  # Array of integers
                [4, 5, 6],
                [7, 8, 9],
                [10, 11, 12]
            ],

            [  # Contains array of vectors (arrays)
                [13, 14, 15],
                [16, 17, 18],
                [19, 20, 21],
                [22, 23, 24]
            ]
        ]
    ])

print(TENSOR_3.shape)
print(TENSOR_3[0])
print(TENSOR_3[0][0])
print(TENSOR_3[0][0][0])

torch.Size([1, 2, 4, 3])
tensor([[[ 1,  2,  3],
         [ 4,  5,  6],
         [ 7,  8,  9],
         [10, 11, 12]],

        [[13, 14, 15],
         [16, 17, 18],
         [19, 20, 21],
         [22, 23, 24]]])
tensor([[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9],
        [10, 11, 12]])
tensor([1, 2, 3])


In [21]:
MATRIX

tensor([[ 7,  8],
        [ 9, 10]])

In [22]:
MATRIX.is_cuda

False

In [23]:
MATRIX.transpose(1, 0)
#or 
# MATRIX.T

tensor([[ 7,  9],
        [ 8, 10]])

In [24]:
TENSOR

tensor([[[ 1,  2,  3],
         [ 4,  5,  6],
         [ 7,  8,  9],
         [10, 11, 12]]])

In [25]:
TENSOR.transpose(0, 2)

tensor([[[ 1],
         [ 4],
         [ 7],
         [10]],

        [[ 2],
         [ 5],
         [ 8],
         [11]],

        [[ 3],
         [ 6],
         [ 9],
         [12]]])

In [26]:
TENSOR.transpose(0, 1)

tensor([[[ 1,  2,  3]],

        [[ 4,  5,  6]],

        [[ 7,  8,  9]],

        [[10, 11, 12]]])

## Random Tensors

We require random numbers because neural networks start off their learning with random weights and biases which are then adjusted to the represent the patterns/features they learn.


Random tensors - https://pytorch.org/docs/stable/generated/torch.rand.html


In [27]:
#Create a random tensor of shape (3,4)

random_tensor = torch.rand(3, 4)

random_tensor

tensor([[0.1262, 0.5594, 0.4882, 0.9940],
        [0.9478, 0.6218, 0.2526, 0.0807],
        [0.3592, 0.6128, 0.1146, 0.6191]])

In [28]:
random_3d_tensor = torch.rand(2, 4, 5)

print(random_3d_tensor.shape)

random_3d_tensor

torch.Size([2, 4, 5])


tensor([[[0.7481, 0.0687, 0.5886, 0.0547, 0.2691],
         [0.0610, 0.7809, 0.1180, 0.1236, 0.4849],
         [0.2155, 0.2696, 0.4058, 0.3932, 0.3133],
         [0.2729, 0.2692, 0.0378, 0.7711, 0.4437]],

        [[0.5092, 0.2621, 0.6240, 0.1628, 0.8675],
         [0.3432, 0.8376, 0.0212, 0.9844, 0.4796],
         [0.2406, 0.2972, 0.3143, 0.7412, 0.2213],
         [0.7249, 0.1560, 0.9083, 0.5832, 0.3325]]])

In [29]:
#Create a random tensor with similar shape to an image tensor

random_image_size_tensor = torch.rand(size=(3, 224, 224))  #colour channels (R, G, B), height, width 

print(random_image_size_tensor.shape)
random_image_size_tensor

torch.Size([3, 224, 224])


tensor([[[0.4868, 0.7786, 0.5586,  ..., 0.8996, 0.4391, 0.0138],
         [0.9988, 0.2626, 0.7431,  ..., 0.4687, 0.0617, 0.8753],
         [0.8695, 0.1486, 0.9172,  ..., 0.5138, 0.8192, 0.0521],
         ...,
         [0.9036, 0.6805, 0.4172,  ..., 0.7063, 0.9123, 0.4232],
         [0.5166, 0.0974, 0.4748,  ..., 0.6025, 0.0593, 0.2498],
         [0.5835, 0.3168, 0.5312,  ..., 0.5280, 0.8616, 0.7776]],

        [[0.3088, 0.0172, 0.3750,  ..., 0.4113, 0.1544, 0.4760],
         [0.0516, 0.0646, 0.8560,  ..., 0.2738, 0.5245, 0.2149],
         [0.9148, 0.2588, 0.6541,  ..., 0.8069, 0.0994, 0.5665],
         ...,
         [0.7793, 0.9583, 0.2068,  ..., 0.0687, 0.6789, 0.2507],
         [0.0694, 0.0988, 0.3078,  ..., 0.0676, 0.8451, 0.5112],
         [0.3632, 0.1498, 0.1969,  ..., 0.7715, 0.1987, 0.8714]],

        [[0.6492, 0.6505, 0.0771,  ..., 0.5595, 0.8986, 0.3139],
         [0.2402, 0.4460, 0.7592,  ..., 0.7826, 0.4368, 0.5020],
         [0.5263, 0.3718, 0.3704,  ..., 0.2278, 0.6022, 0.

## Zeros and ones

In [30]:
zeros_3d = torch.zeros(size=(3, 4, 4))
zeros_3d

tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]])

In [31]:
zeros = torch.zeros(size=(3, 4))

zeros * random_tensor

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [32]:
ones = torch.ones(size=(3, 4))
ones


tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [33]:
#default data type for tensors
ones.dtype

torch.float32

### Creating a range of tensors and tensors-like

In [34]:
# Use torch.arange

two_to_20 = torch.arange(2, 21)

two_to_20

tensor([ 2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
        20])

In [35]:
torch.arange(start=0, end=100, step=2)

tensor([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34,
        36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70,
        72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98])

In [36]:
torch.arange(start=0, end=102, step=2)

tensor([  0,   2,   4,   6,   8,  10,  12,  14,  16,  18,  20,  22,  24,  26,
         28,  30,  32,  34,  36,  38,  40,  42,  44,  46,  48,  50,  52,  54,
         56,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,  78,  80,  82,
         84,  86,  88,  90,  92,  94,  96,  98, 100])

### Creating tensors like

In [38]:
# Creating a tensor like the shape of another tensor

tensor_like_twenty = torch.zeros_like(input=two_to_20)

print(tensor_like_twenty, two_to_20)
print(tensor_like_twenty.shape, two_to_20.shape)

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) tensor([ 2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
        20])
torch.Size([19]) torch.Size([19])


In [39]:
tensor_like_3d_random = torch.rand_like(input=random_tensor)

tensor_like_3d_random

tensor([[0.9088, 0.0551, 0.8231, 0.1849],
        [0.9503, 0.3352, 0.9698, 0.8274],
        [0.7263, 0.8415, 0.6680, 0.6830]])

In [40]:
#doesnt work because rand requires float not int
# torch.rand_like(input=TENSOR_2)

torch.ones_like(input=TENSOR_2)


tensor([[[1, 1, 1],
         [1, 1, 1],
         [1, 1, 1],
         [1, 1, 1]],

        [[1, 1, 1],
         [1, 1, 1],
         [1, 1, 1],
         [1, 1, 1]]])

## Tensor Datatypes

**Note:** Dtype is one 3 common errors when running PyTorch for DL
1. Tensor is not right dtype
2. Tensor not right shape
3. Tensor not on right device 

In [41]:
float_32_tensor = torch.tensor([3.0, 6.0], dtype=None)

float_32_tensor

tensor([3., 6.])

In [42]:

#Still shows dtype as float because its the default dtype in torch
float_32_tensor.dtype

torch.float32

In [43]:
all_param_tensor = torch.tensor([
    [6, 7],
    [8, 9]
], dtype=None,  # datatype of data in the tensor
    device=None,  # default is cpu,could be changed to cuda
    requires_grad=False  # If you want the tensor to track gradients when the values are updated
)

In [44]:
float_16_tensor = float_32_tensor.type(torch.half)
# or
# float_16_tensor = float_32_tensor.type(torch.float16)
float_16_tensor

tensor([3., 6.], dtype=torch.float16)

In [45]:
#type casted to higher capacity dtype
(float_32_tensor * float_16_tensor).dtype

torch.float32

In [46]:
int_32_tensor = torch.tensor([3, 6], dtype=torch.int32)
int_32_tensor

tensor([3, 6], dtype=torch.int32)

In [47]:
float_32_tensor * int_32_tensor

tensor([ 9., 36.])

In [48]:
long_tensor = torch.tensor(data=([3, 6]), dtype=torch.long)
long_tensor

tensor([3, 6])

In [50]:
float_32_tensor * long_tensor

tensor([ 9., 36.])

### Getting info from tensor

1. Tensor is not right dtype - to get dtype from a tensor - use `tensor.dtype` 
2. Tensor not right shape - to get shape, use `tensor.shape`
3. Tensor not on right device - to get which device tensor is on, use `tensor.device` 

In [51]:
some_tensor = torch.rand(3, 4)
some_tensor

tensor([[0.2374, 0.1248, 0.5041, 0.4262],
        [0.2105, 0.0701, 0.5027, 0.1591],
        [0.7522, 0.6307, 0.6122, 0.3476]])

In [52]:
print(some_tensor)
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"Shape of tensor: {some_tensor.shape}")
print(f"Device of tensor: {some_tensor.device}")

tensor([[0.2374, 0.1248, 0.5041, 0.4262],
        [0.2105, 0.0701, 0.5027, 0.1591],
        [0.7522, 0.6307, 0.6122, 0.3476]])
Datatype of tensor: torch.float32
Shape of tensor: torch.Size([3, 4])
Device of tensor: cpu


In [53]:
#Note tensor shape and size are the same
#Size is just a func, while shape is an attribute

some_tensor.shape, some_tensor.size()

(torch.Size([3, 4]), torch.Size([3, 4]))

In [54]:
gpu_tensor = torch.rand(3, 4, device=torch.device('cuda'))



## Manipulating Tensors (Tensor Operations)

Ops include:
* Addition
* Subtraction
* Element-wise multiplication
* Matrix multiplication
* Division

In [55]:
test_tensor = torch.tensor(data=([1, 2, 3]))
test_tensor + 10

tensor([11, 12, 13])

In [56]:
test_tensor - 10

tensor([-9, -8, -7])

In [57]:
test_tensor * 10

tensor([10, 20, 30])

In [58]:
test_tensor / 10

tensor([0.1000, 0.2000, 0.3000])

In [59]:
test_tensor // 10

tensor([0, 0, 0])

In [60]:
torch.multiply(test_tensor, 10)

tensor([10, 20, 30])

### Matrix multiplication

Two main ways of multiplication:
1. Element-wise multiplication (e.g. matrix * scalar)
2. Matrix multiplication (dot product => a . b)

Matrix multiplication rules
1. The **inner dimensions** must match: 
* `(3,2) @ (3,2)` wont work
* `(3,2) @ (2,3)` will work
* `(2,3) @ (3,2)` will work
2. The resulting matrix will have the shape of the **outer dimensions**:
* `(3,2) @ (2,3)` -> `(3,3)`
* `(2,3) @ (3,2)` -> `(2,2)`

Generalizing:
**mxn.nxp = mxp**

cols of first matrix = rows of second matrix
Result will have rows of first and cols of second

In [61]:
mul_tensor = torch.tensor(data=(
    [1, 2, 3],
    [4, 5, 6])
)

mul_tensor_2 = torch.tensor(data=(
    [7, 8],
    [10, 11],
    [13, 14])
)

In [62]:
#Element-wise multiplication
print(mul_tensor, "*", mul_tensor)
print(f"Equals: {mul_tensor * mul_tensor}")

# Cant multiply element-wise because shape mismatch
# print(f"Equals: {mul_tensor * mul_tensor_2}")

tensor([[1, 2, 3],
        [4, 5, 6]]) * tensor([[1, 2, 3],
        [4, 5, 6]])
Equals: tensor([[ 1,  4,  9],
        [16, 25, 36]])


In [63]:
#Matrix multiplication
torch.matmul(input=mul_tensor, other=mul_tensor_2)

tensor([[ 66,  72],
        [156, 171]])

In [64]:
torch.matmul(input=mul_tensor_2, other=mul_tensor)

tensor([[ 39,  54,  69],
        [ 54,  75,  96],
        [ 69,  96, 123]])

In [65]:
tensor_sca_1 = torch.tensor(data=([1, 2, 3]))



In [68]:
%%time
#Doesnt show it here but this is way faster for big tensor multiplications than a for loop
torch.matmul(tensor_sca_1, tensor_sca_1)

CPU times: total: 0 ns
Wall time: 0 ns


tensor(14)

In [69]:
%%time 
#Has to be first thing in cell
# The above cell is basically only doing a dot product between 2 vectors
sum = 0
for i in tensor_sca_1:
    sum += i * i
sum

# 1*1 + 2*2 + 3*3 

CPU times: total: 0 ns
Wall time: 0 ns


tensor(14)

In [70]:
# Another way of saying mat mul
tensor_sca_1 @ tensor_sca_1

tensor(14)

In [71]:
#This is working because it is being treated as a vector instead of a matrix
# If it considered it as a matrix of 1,3 then it would give an error
tensor_sca_1.shape

torch.Size([3])

In [72]:
shape_2_vector = torch.tensor(data=[[1, 2, 3]])
shape_2_vector.shape

torch.Size([1, 3])

In [70]:
# RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x3 and 1x3)
# torch.matmul(shape_2_vector, shape_2_vector)

In [73]:
# This works because it transposed the second vector making it 3x1
torch.matmul(shape_2_vector, shape_2_vector.T)

tensor([[14]])

#### One of the most common errors in DL : Shape errors

In [74]:
# Shapes for Matrix multiplication
Tensor_A = torch.tensor(data=[[1, 2, ],
                              [4, 5, ],
                              [7, 8, ]])

Tensor_B = torch.tensor(data=[[1, 2, ],
                              [3, 4, ],
                              [5, 6, ]])

In [75]:
Tensor_A.shape, Tensor_B.shape

(torch.Size([3, 2]), torch.Size([3, 2]))

Inner dimensions are different so cant multiply 

To fix our issue, we can **transpose** one of our matrices

A **transpose** switches the dimensions of our matrix

In [76]:
Tensor_B.T

tensor([[1, 3, 5],
        [2, 4, 6]])

In [77]:
Tensor_B

tensor([[1, 2],
        [3, 4],
        [5, 6]])

In [78]:
Tensor_B.shape, Tensor_B.T.shape

(torch.Size([3, 2]), torch.Size([2, 3]))

In [79]:
torch.matmul(Tensor_A, Tensor_B.T)

tensor([[ 5, 11, 17],
        [14, 32, 50],
        [23, 53, 83]])

In [80]:
torch.matmul(Tensor_A, Tensor_B.T).shape

torch.Size([3, 3])

## Tensor Aggregation

Finding min, max, sum, etc.



In [81]:
#Stacking 2 tensors to make a new tensor
tensor_agg = torch.stack([
    torch.arange(1, 6, 2),
    torch.arange(6, 11, 2)
])

tensor_agg

tensor([[ 1,  3,  5],
        [ 6,  8, 10]])

In [82]:
tensor_agg.shape

torch.Size([2, 3])

In [83]:
tensor_agg.min()

tensor(1)

In [84]:
tensor_agg.max()

tensor(10)

In [85]:
#Doesnt work because mean requires float dtype
# RuntimeError: mean(): could not infer output dtype. Input dtype must be either a floating point or complex dtype. Got: Long
# tensor_agg.mean()

In [86]:
# Casting to float
torch.mean(tensor_agg.type(torch.float32))

tensor(5.5000)

In [87]:
#The above created a copy of the tensor in float, the original tensor still exists in int64 type (long)
tensor_agg.dtype

torch.int64

In [88]:
tensor_agg.sum()

tensor(33)

### Finding the index of max or min (Positional max or min)

In [89]:
#Index of min is 0
tensor_agg.argmin()

tensor(0)

In [90]:
# tensor_agg.argmax() gives 5, meaning the maximum value is at the 5th position in a flattened version of the tensor.
tensor_agg.argmax()

tensor(5)

In [91]:
# torch.unravel_index(flat_index, shape) converts this flat index into a 2D coordinate (row, column)
max_idx_flat = tensor_agg.argmax()
torch.unravel_index(max_idx_flat, tensor_agg.shape)

(tensor(1), tensor(2))

## Reshaping, Stacking, Squeezing and unsqueezing tensors

* Reshaping - Reshaping an input tensor into a defined shape
* View - Return a view of a tensor in a certain shape but keep the original tensor unchanged. But keep in mind view shares the same memory as the original so any change in it will result in change in tensor
* Stacking - Combine multiple tensors on top of each other (vstack) or side by side (hstack) (or stack by itself with input dim)
* Squeeze - Removes all `1` dimensions from the tensor
* Unsqueeze - Adds a `1` dimension to a tensor
* Permute - Return a view of the input with dimensions permuted (swapped) in a certain way 

In [92]:
x = torch.arange(1., 10.)  #float

x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [93]:
#This wont work because we have 9 elements and we are trying to shape it into 1 row with 7 columns which wont fit
# RuntimeError: shape '[1, 7]' is invalid for input of size 9
# x_reshape = x.reshape(1, 7)
# x_reshape, x_reshape.shape

In [94]:
#Notice we added a new dimension (extra []), earlier shape was only 9
x_reshape = x.reshape(1, 9)
x_reshape, x_reshape.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [95]:
#Would require 18 elements to fill this
# x_reshape = x.reshape(2, 9)
# x_reshape, x_reshape.shape

In [96]:
#9 rows and 1 column
x_reshape = x.reshape(9, 1)
x_reshape, x_reshape.shape

(tensor([[1.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]),
 torch.Size([9, 1]))

Basically think this way, the product of dims is the number of elements that will fit

so if i have shape (4,3) I have 12 elements. These can be reshaped to any dimensions as long as their product is 12. e.g. (2,6) 

In [97]:
y = torch.tensor(data=([
    [1, 2, 3],
    [4, 5, 6],
    [6, 7, 8],
    [9, 10, 11]
]))

y, y.shape

(tensor([[ 1,  2,  3],
         [ 4,  5,  6],
         [ 6,  7,  8],
         [ 9, 10, 11]]),
 torch.Size([4, 3]))

In [98]:
y.reshape(2, 6), y.reshape(2, 6).shape

(tensor([[ 1,  2,  3,  4,  5,  6],
         [ 6,  7,  8,  9, 10, 11]]),
 torch.Size([2, 6]))

In [99]:
y.reshape(2, 1, 6), y.reshape(2, 1, 6).shape

(tensor([[[ 1,  2,  3,  4,  5,  6]],
 
         [[ 6,  7,  8,  9, 10, 11]]]),
 torch.Size([2, 1, 6]))

In [102]:
x

tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [100]:
#Change the view
z = x.view(9, 1)
z, z.shape

(tensor([[1.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]),
 torch.Size([9, 1]))

In [101]:
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [103]:
#Changed the first 2 rows of z to 5 for all columns
z[0:2, :] = 5

In [104]:
#Changing z changes x as well
x, z

(tensor([5., 5., 3., 4., 5., 6., 7., 8., 9.]),
 tensor([[5.],
         [5.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]))

In [105]:
#Stack tensors on top of each other
x_stacked = torch.stack([x, x, x, x], dim=0)
#Stacked vertically, as new rows
# x
# x
# x
# x
x_stacked

tensor([[5., 5., 3., 4., 5., 6., 7., 8., 9.],
        [5., 5., 3., 4., 5., 6., 7., 8., 9.],
        [5., 5., 3., 4., 5., 6., 7., 8., 9.],
        [5., 5., 3., 4., 5., 6., 7., 8., 9.]])

In [106]:
x_stacked = torch.stack([x, x, x, x], dim=1)
#Stacked horizontally, as new columns
# x x x x
x_stacked

tensor([[5., 5., 5., 5.],
        [5., 5., 5., 5.],
        [3., 3., 3., 3.],
        [4., 4., 4., 4.],
        [5., 5., 5., 5.],
        [6., 6., 6., 6.],
        [7., 7., 7., 7.],
        [8., 8., 8., 8.],
        [9., 9., 9., 9.]])

In [107]:
x_reshape, x_reshape.shape

(tensor([[5.],
         [5.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]),
 torch.Size([9, 1]))

In [108]:
p = x_reshape.squeeze()
p, p.shape

(tensor([5., 5., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [109]:
#unsqueeze adds a single dimension to a target tensor at a specific dim
p.unsqueeze(0), p.unsqueeze(0).shape

(tensor([[5., 5., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [110]:
#unsqueeze adds a single dimension to a target tensor at a specific dim
p.unsqueeze(1), p.unsqueeze(1).shape

(tensor([[5.],
         [5.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]),
 torch.Size([9, 1]))

In [111]:
p.unsqueeze(0).unsqueeze(0), p.unsqueeze(0).unsqueeze(0).shape

(tensor([[[5., 5., 3., 4., 5., 6., 7., 8., 9.]]]), torch.Size([1, 1, 9]))

In [112]:
p.unsqueeze(1).unsqueeze(1), p.unsqueeze(1).unsqueeze(1).shape

(tensor([[[5.]],
 
         [[5.]],
 
         [[3.]],
 
         [[4.]],
 
         [[5.]],
 
         [[6.]],
 
         [[7.]],
 
         [[8.]],
 
         [[9.]]]),
 torch.Size([9, 1, 1]))

In [113]:
#permute - Rearranges dims of a target tensor in a specified order
r = torch.rand(2, 3, 5)

r, r.shape

(tensor([[[0.2921, 0.3050, 0.7808, 0.4285, 0.8915],
          [0.7997, 0.9382, 0.0353, 0.5236, 0.4428],
          [0.1606, 0.7584, 0.8411, 0.7951, 0.5940]],
 
         [[0.1697, 0.9809, 0.9486, 0.9412, 0.9448],
          [0.9546, 0.3136, 0.5852, 0.6117, 0.5614],
          [0.8168, 0.4850, 0.8992, 0.8340, 0.4454]]]),
 torch.Size([2, 3, 5]))

In [114]:
# dim of index 2 (5) is at 0
# dim of index 0 (2) is at 1
# dim of index 1 (3) is at 2
torch.permute(r, (2, 0, 1)).shape

torch.Size([5, 2, 3])

### Indexing (selecting data from tensors)

Indexing with Pytorch is similar to NumPy

In [115]:
i = torch.arange(1, 10).reshape(1, 3, 3)

i, i.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [116]:
i[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [117]:
#Both are the same thing
i[0][0], i[0, 0]

(tensor([1, 2, 3]), tensor([1, 2, 3]))

In [118]:
i[0][0][0], i[0, 0, 0]

(tensor(1), tensor(1))

In [119]:
# You can use ":" to select  "all" of a target dimension
#from all the matrices, from all  the rows, select the 0th column
i[:, :, 0]

tensor([[1, 4, 7]])

In [120]:
#from all the matrices, from the rows 0 to 1, select the 0th column
i[:, 0:2, 0]

tensor([[1, 4]])

In [121]:
#Get all values of 0 dim, but only the 1 index value of 1st and 2nd dim
i[:, 1, 1]

tensor([5])

In [122]:
#Get index 0 of 0th and 1st dimension and all values of 2nd dimension
i[0, 0, :]

tensor([1, 2, 3])

### PyTorch tensors and NumPy

* Data in NumPy array, but want it in Pytorch tensor -> `torch.from_numpy(ndarray)`
* Tensor to Numpy -> `torch.Tensor.numpy()`

In [123]:
npArray = np.arange(1., 8.)
npArray

array([1., 2., 3., 4., 5., 6., 7.])

In [124]:
torchTensor = torch.from_numpy(npArray)

torchTensor

tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64)

In [125]:
#Note that torch's default float is float32, but np is 64 so to prevent lossy conversion it keeps it 64. We can change it explicitly though

torchTensor32 = torch.from_numpy(npArray).type(torch.float32)
torchTensor32, torchTensor32.dtype

(tensor([1., 2., 3., 4., 5., 6., 7.]), torch.float32)

In [126]:
#Deep copy, tensor is a new memory space
npArray = npArray+1
npArray,torchTensor

(array([2., 3., 4., 5., 6., 7., 8.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [127]:
ttensor = torch.ones(7)
npTensor = torch.Tensor.numpy(ttensor)

ttensor,npTensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [128]:
#Deep copy this way as well, ndarray is a new memory space
ttensor = ttensor+1
npTensor,ttensor

(array([1., 1., 1., 1., 1., 1., 1.], dtype=float32),
 tensor([2., 2., 2., 2., 2., 2., 2.]))

## Reproducability (trying to take the random out of random)

In short NN learn by:

`start with random numbers -> perform tensor ops -> Update random numbers to represent certain features or patterns ->again ->again...`

To reduce the randomness, we can use the **random seed**

Essentially what it does is it gives a starting point or deterministic parameter for the random number generating algorithm ensuring that it always spits out the same numbers in the same sequence

Extra resources:
* https://pytorch.org/docs/stable/notes/randomness.html
* https://en.wikipedia.org/wiki/Random_seed


In [129]:
#Will give different random numbers everytime we run
torch.rand(3,3)

tensor([[0.5899, 0.3246, 0.0792],
        [0.1633, 0.6366, 0.0396],
        [0.8985, 0.2918, 0.0361]])

In [130]:
r1 = torch.rand(3,4)
r2 = torch.rand(3,4)

print(r1)
print(r2)
print(r1==r2)

tensor([[0.8082, 0.1802, 0.1524, 0.3983],
        [0.1224, 0.0216, 0.2620, 0.7261],
        [0.6738, 0.6583, 0.6564, 0.4666]])
tensor([[0.2146, 0.4153, 0.5353, 0.7437],
        [0.0439, 0.7868, 0.6788, 0.8725],
        [0.2907, 0.9328, 0.5436, 0.5492]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [131]:
RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)
r1 = torch.rand(3,4)
r1

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])

In [132]:
#Think of it this way once we set the random seed it generates a list of numbers its going to output like 0.8,0.9,0.6..... So once we set it we will get those numbers anytime we want something random. Setting the seed again to the same value resets us to the beginning of the list, so we will generate the number at index 0 again
torch.manual_seed(RANDOM_SEED)

r2 = torch.rand(3,4)
r2

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])

In [133]:
print(r1==r2)

tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


### Running tensors or general PyTorch objects in GPUs



In [204]:
print(torch.__version__)
print(torch.cuda.is_available())

2.4.1
True


In [205]:
#Setup Device agnostic code to run on gpu if available or cpu
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [211]:
#Count number of devices
print(torch.cuda.device_count())
print(torch.cuda.get_device_name())


1
NVIDIA GeForce RTX 4070 Laptop GPU


### Putting tensors and models on the GPU




In [212]:
#Default tensor is on cpu
cpu_tensor = torch.rand(3, 4)
print(cpu_tensor.device)

cpu


In [214]:
gpu_tensor = torch.rand(3, 4,device=device)
print(gpu_tensor.device)

cuda:0


In [215]:
#We can also move a tensor to the gpu
tensor_on_gpu = cpu_tensor.to(device)
print(tensor_on_gpu,tensor_on_gpu.device)

tensor([[0.8694, 0.5677, 0.7411, 0.4294],
        [0.8854, 0.5739, 0.2666, 0.6274],
        [0.2696, 0.4414, 0.2969, 0.8317]], device='cuda:0') cuda:0


In [216]:
#This still exists though, it created a copy of it on the gpu 
cpu_tensor,cpu_tensor.device

(tensor([[0.8694, 0.5677, 0.7411, 0.4294],
         [0.8854, 0.5739, 0.2666, 0.6274],
         [0.2696, 0.4414, 0.2969, 0.8317]]),
 device(type='cpu'))

In [218]:
#NumPy does not work with data on the gpu
# tensor_on_gpu.numpy()

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [220]:
#Moving back to cpu
# or tensor_on_gpu.to("cpu")
prodigal_son = tensor_on_gpu.cpu()
prodigal_son,prodigal_son.device

(tensor([[0.8694, 0.5677, 0.7411, 0.4294],
         [0.8854, 0.5739, 0.2666, 0.6274],
         [0.2696, 0.4414, 0.2969, 0.8317]]),
 device(type='cpu'))

In [221]:
prodigal_son.numpy()

array([[0.86940444, 0.5677153 , 0.74109405, 0.4294045 ],
       [0.8854429 , 0.57390445, 0.26658005, 0.62744915],
       [0.26963168, 0.44136357, 0.29692084, 0.8316855 ]], dtype=float32)