# PyTorch Fundamentals

pytorch documentation : https://pytorch.org/docs/stable/tensors.html

In [1]:
# Libraries needed to work with pytorch

import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print(torch.__version__)

2.4.1+cu121


In [2]:
# If runnig on GPY/TPU you can find what machine you are running on

!nvidia-smi

Sat Sep 21 03:08:32 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   62C    P8              12W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

# 1.Introduction to Tensors

**Tensors** - is a multidimensional matrix that stores data of a single type. Tensors could be of any dimensions

Scalers and Vectors are represented by small letters and Matrix and Tensor are generally represented by Block letters



In [3]:
# A single scalar using torch tensor
scaler = torch.tensor(10)
scaler

tensor(10)

In [4]:
# A scaler can be added to a Tensor
scaler + 1

tensor(11)

In [5]:
# Another way to accomplish above operation
scaler + torch.tensor(1)

tensor(11)

In [6]:
# scaler has no dimension. ndim = 0
scaler.ndim, scaler.size()

(0, torch.Size([]))

In [7]:
# Vector in scientific terms has a direction and magnitude
vector = torch.tensor([45,45])
vector,vector.shape, vector.ndim, vector.dtype, vector.size()

(tensor([45, 45]), torch.Size([2]), 1, torch.int64, torch.Size([2]))

In [8]:
# Check shape, dim and dtype of a tensor
v = torch.tensor([[45,45],[34,0]])
v, v.shape, v.ndim, v.dtype, v.size()

(tensor([[45, 45],
         [34,  0]]),
 torch.Size([2, 2]),
 2,
 torch.int64,
 torch.Size([2, 2]))

In [9]:
# Indexing the tensor is similar to array indexing
v[0], v[1]

(tensor([45, 45]), tensor([34,  0]))

In [10]:
# 3 dimensional tensor
TENSOR = torch.tensor([[[45,45],
                        [34,111],
                       [4,5]],
                       [[451,425],
                        [3544,6],
                       [44,565]]])
TENSOR, TENSOR.shape, TENSOR.ndim, TENSOR.dtype, TENSOR.size()

(tensor([[[  45,   45],
          [  34,  111],
          [   4,    5]],
 
         [[ 451,  425],
          [3544,    6],
          [  44,  565]]]),
 torch.Size([2, 3, 2]),
 3,
 torch.int64,
 torch.Size([2, 3, 2]))

In [11]:
# Indexing a multidimensional tensor
TENSOR[0][2][1]

tensor(5)

In [12]:
T = torch.tensor([[0,0],[45,45],[34,0],[4,5]])
T.ndim

2

In [13]:
T[3][1]

tensor(5)

# 2.Generate Random Tensor

`Random` nums are important for NN

Read more on random number generation - https://pytorch.org/docs/stable/generated/torch.rand.html

In [14]:
# Let's geberate a tensor with random values with size [3,4]
random_nums = torch.rand(3,4)
random_nums, random_nums.dtype, random_nums.size(), random_nums.ndim

(tensor([[0.5273, 0.5966, 0.3705, 0.3289],
         [0.4582, 0.1435, 0.3704, 0.2810],
         [0.3568, 0.6724, 0.3091, 0.0493]]),
 torch.float32,
 torch.Size([3, 4]),
 2)

In [15]:
rand_img_size_tensor = torch.rand(size=(3, 224,224))
rand_img_size_tensor.dtype, rand_img_size_tensor.size(), rand_img_size_tensor.ndim

(torch.float32, torch.Size([3, 224, 224]), 3)

# 3.Zeros, ones and arange

 ## Zeros and ones

 Tensor with all zeros and all ones can be generated easily with torch

In [16]:
# Zeros and ones
tensor_zero = torch.zeros(size=(3,4))
tensor_ones = torch.ones(size=(3,4))

random_nums = torch.rand(3,4)
tensor_zero,tensor_ones,random_nums

(tensor([[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]),
 tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]),
 tensor([[0.4433, 0.3688, 0.8664, 0.4945],
         [0.4080, 0.0699, 0.8729, 0.8264],
         [0.2102, 0.9396, 0.2889, 0.5795]]))

In [17]:
tensor_ones.dtype

torch.float32

## Arange

Gives a tensor with range of values (from low to high) with specified interval

In [18]:
range_test = torch.arange(0,5,2)
range_test

tensor([0, 2, 4])

In [19]:
range_test = torch.arange(0,5)
for i in range_test:
  print(i)

tensor(0)
tensor(1)
tensor(2)
tensor(3)
tensor(4)


## Zeros like and ones_like

`zeros_like` returns a tensor filled with the scalar value 0, with the same size as input

`ones_like` returns a tensor filled with the scalar value 1, with the same size as input

In [20]:
range_test = torch.arange(0,5)
range_test

tensor([0, 1, 2, 3, 4])

In [21]:
torch.zeros_like(input=range_test)

tensor([0, 0, 0, 0, 0])

In [22]:
torch.ones_like(input=range_test)

tensor([1, 1, 1, 1, 1])

# 4.Tensor dtypes and device

Very important as dtype is one of the 3 problems we are going to encounter.

Common problems faced during model building
1. Right dtype -- is it float32 or float16 or int64 etc
2. Right shape -- if it is 3x4 or 4x3 etc
3. Right device -- "cpu", "cuda" (GPU vs CPU) etc

In [23]:
new_sample_tensor = torch.rand(size=(3,4),dtype=None,
                               device = None,
                               requires_grad= False)
print(new_sample_tensor)
new_sample_tensor.dtype, new_sample_tensor.device

tensor([[0.4291, 0.0479, 0.5578, 0.9583],
        [0.7125, 0.8071, 0.3699, 0.4137],
        [0.0860, 0.7324, 0.9983, 0.1503]])


(torch.float32, device(type='cpu'))

In [24]:
# convert float32 to float16
latest_tensor = new_sample_tensor.type(torch.float16)
latest_tensor.dtype

torch.float16

In [25]:
print(latest_tensor.shape, new_sample_tensor.shape)
latest_tensor,new_sample_tensor

torch.Size([3, 4]) torch.Size([3, 4])


(tensor([[0.4290, 0.0479, 0.5576, 0.9585],
         [0.7124, 0.8071, 0.3699, 0.4136],
         [0.0861, 0.7324, 0.9985, 0.1503]], dtype=torch.float16),
 tensor([[0.4291, 0.0479, 0.5578, 0.9583],
         [0.7125, 0.8071, 0.3699, 0.4137],
         [0.0860, 0.7324, 0.9983, 0.1503]]))

In [26]:
# When multiplied we get the float32 dtype
tensors_multiplied = latest_tensor * new_sample_tensor
tensors_multiplied.dtype

torch.float32

In [27]:
# We can also specify what datatype the tensor should be
int_32_tensor = torch.tensor([3,4],dtype=torch.int32)
float_32_tensor = torch.tensor([5,4],dtype=torch.float32)
print(int_32_tensor, float_32_tensor)
print(int_32_tensor * float_32_tensor)

tensor([3, 4], dtype=torch.int32) tensor([5., 4.])
tensor([15., 16.])


In [28]:
# When operations between two tensors are done, it is important to have both the tensors in the same device, else we may get an error
int_32_tensor.device

device(type='cpu')

# 5.Data Manipulations with Tensor

1. Addition
2. Subtraction
3. Multiplication
4. Division
5. Matrix Multiplication  -- Inner dims should match

All operations except for Matrix Multiplication is done at element level

In [29]:
ten_1 = torch.tensor([1,2,3])
ten_2 = torch.tensor([4,5,6])

In [30]:
ten_1 + ten_2

tensor([5, 7, 9])

In [31]:
ten_1 - ten_2

tensor([-3, -3, -3])

In [32]:
ten_1 * ten_2

tensor([ 4, 10, 18])

In [33]:
ten_1 / ten_2

tensor([0.2500, 0.4000, 0.5000])

In [34]:
# Power
ten_1 ** ten_2

tensor([  1,  32, 729])

In [35]:
ten_1 +1

tensor([2, 3, 4])

## Matrix Multiplication

In [36]:
ten_1 = torch.rand([3,4])
ten_2 = torch.rand([4,3])
ten_1,ten_2

(tensor([[0.4510, 0.9011, 0.7941, 0.7845],
         [0.2099, 0.0381, 0.9122, 0.1063],
         [0.5535, 0.1552, 0.9015, 0.3277]]),
 tensor([[0.9070, 0.8466, 0.7899],
         [0.2701, 0.8909, 0.1779],
         [0.0903, 0.3411, 0.9049],
         [0.7792, 0.2365, 0.0589]]))

In [37]:
# torch.matmul is used to get matmul results on tensor
torch.matmul(ten_1,ten_2)

tensor([[1.3355, 1.6410, 1.2814],
        [0.3659, 0.5479, 1.0043],
        [0.8808, 0.9919, 1.2999]])

In [38]:
t1 = torch.rand(5)
print(t1,t1.ndim, t1.shape)
# t1 = torch.tensor([1,2,3])
torch.matmul(t1,t1)

tensor([9.6259e-01, 7.5202e-01, 1.9819e-04, 4.9651e-01, 9.5978e-01]) 1 torch.Size([5])


tensor(2.6598)

### Check how fast matmul is vs loopin

In [39]:
t1 = torch.rand(100000)

In [40]:
# matmul
%%time
torch.matmul(t1,t1)

CPU times: user 66 µs, sys: 12 µs, total: 78 µs
Wall time: 82.3 µs


tensor(33293.1719)

In [41]:
%%time
value = 0
for i in t1:
  value += (i*i)
value  # will have a slightly different result because of precision round off

CPU times: user 887 ms, sys: 48.9 ms, total: 936 ms
Wall time: 961 ms


tensor(33292.9648)

In [42]:
# Same as mnatmul
%%time
value = t1 @ t1
value

CPU times: user 144 µs, sys: 21 µs, total: 165 µs
Wall time: 171 µs


tensor(33293.1719)

### Transpose can be used to fix shape errors sometimes

In [43]:
t1 = torch.rand([3,4])
t2 = torch.rand([3,4])

In [44]:
print(t1.shape, t2.shape)
t1, t2

torch.Size([3, 4]) torch.Size([3, 4])


(tensor([[2.0010e-01, 8.3914e-01, 4.9475e-01, 2.8877e-01],
         [3.3393e-01, 2.1599e-01, 2.1980e-01, 5.6596e-01],
         [1.8853e-01, 2.3544e-01, 7.9554e-04, 7.1439e-01]]),
 tensor([[0.2840, 0.7116, 0.9128, 0.4094],
         [0.3848, 0.8858, 0.5656, 0.5172],
         [0.1306, 0.0230, 0.7985, 0.3610]]))

In [45]:
torch.matmul(t2,t1.T)   #torch.matmul(t2,t1)  -- this will throw an error as shapes are not compatible

tensor([[1.2238, 0.6809, 0.5143],
        [1.2495, 0.7369, 0.6510],
        [0.5447, 0.4284, 0.2885]])

# 6.Tensor Aggregation -- min,max, sum, mean, count

In [46]:
# torch generate random integers of shape 3,4
t1 = torch.randint(low=0,high=10,size=(3,4))
t1

tensor([[8, 0, 1, 6],
        [9, 4, 2, 6],
        [8, 3, 9, 4]])

In [47]:
t1.dtype

torch.int64

In [48]:
# To get mean, we need to specify what dtype the result is going to be!
torch.mean(t1, dtype=torch.float, dim = 0)

tensor([8.3333, 2.3333, 4.0000, 5.3333])

In [49]:
# mean of tensor -- Need to make sure datatype should be float32
torch.mean(t1,dtype=torch.float32, dim=1)

tensor([3.7500, 5.2500, 6.0000])

In [50]:
torch.min(t1), torch.max(t1), torch.sum(t1), torch.mean(t1,dtype=torch.float32)

(tensor(0), tensor(9), tensor(60), tensor(5.))

In [51]:
# count all elements in tensor -- non zero and shape
t1.shape,torch.count_nonzero(t1)

(torch.Size([3, 4]), tensor(11))

In [52]:
torch.argmax(t1), torch.argmin(t1)

(tensor(4), tensor(1))

# 7.Reshape, stacking, squeezing and unsqueezing tensors

* Reshape - changes shape of a tensor
* View -  return view of an original tensor but keeps the memory as original
* Stacking - combines multiple tensors together -- vstack and hstack
* Squeeze - removes all 1 dims from the tensor
* Unsqueeze - Add a dimension
* permute -- rearranges the shape of a target tensor in a specified order

In [53]:
t1 = torch.randint(low=0,high=10,size=(3,4))
t1

tensor([[9, 3, 3, 1],
        [1, 3, 1, 7],
        [2, 4, 0, 4]])

In [54]:
# Reshape - changes shape of a tensor
new_t = t1.reshape(2,6)
t1, new_t

(tensor([[9, 3, 3, 1],
         [1, 3, 1, 7],
         [2, 4, 0, 4]]),
 tensor([[9, 3, 3, 1, 1, 3],
         [1, 7, 2, 4, 0, 4]]))

In [55]:
# View - return view of an original tensor but keeps the memory as original
t2 = t1.view(6,2)
t2,t1

(tensor([[9, 3],
         [3, 1],
         [1, 3],
         [1, 7],
         [2, 4],
         [0, 4]]),
 tensor([[9, 3, 3, 1],
         [1, 3, 1, 7],
         [2, 4, 0, 4]]))

Changing the view changes original tensor as they share the same memory

In [56]:
t2[0,0]=60
t2,t1

(tensor([[60,  3],
         [ 3,  1],
         [ 1,  3],
         [ 1,  7],
         [ 2,  4],
         [ 0,  4]]),
 tensor([[60,  3,  3,  1],
         [ 1,  3,  1,  7],
         [ 2,  4,  0,  4]]))

Stacking - combines multiple tensors together -- vstack and hstack

In [57]:
# hstack and vstack is to stack on each other
t3 = torch.vstack([t1,t1])
t3, t3.shape,

(tensor([[60,  3,  3,  1],
         [ 1,  3,  1,  7],
         [ 2,  4,  0,  4],
         [60,  3,  3,  1],
         [ 1,  3,  1,  7],
         [ 2,  4,  0,  4]]),
 torch.Size([6, 4]))

In [58]:
#hstack and vstack is to stack on each other
t3 = torch.hstack([t1,t1])
t3, t3.shape

(tensor([[60,  3,  3,  1, 60,  3,  3,  1],
         [ 1,  3,  1,  7,  1,  3,  1,  7],
         [ 2,  4,  0,  4,  2,  4,  0,  4]]),
 torch.Size([3, 8]))

In [59]:
t4 = torch.stack([t1,t1], dim = 0)
t4, t4.shape

(tensor([[[60,  3,  3,  1],
          [ 1,  3,  1,  7],
          [ 2,  4,  0,  4]],
 
         [[60,  3,  3,  1],
          [ 1,  3,  1,  7],
          [ 2,  4,  0,  4]]]),
 torch.Size([2, 3, 4]))

In [60]:
torch.rand([1,4]).shape

torch.Size([1, 4])

In [61]:
tensor_1_4 = torch.rand([1,4])
tensor_1_4_v2 = torch.rand([1,4])

In [62]:
t_stacked = torch.stack([tensor_1_4, tensor_1_4_v2])
t_stacked, t_stacked.shape

(tensor([[[0.1686, 0.4593, 0.6839, 0.8879]],
 
         [[0.4598, 0.5133, 0.8600, 0.2921]]]),
 torch.Size([2, 1, 4]))

In [63]:
torch.squeeze(t_stacked), torch.squeeze(t_stacked).shape

(tensor([[0.1686, 0.4593, 0.6839, 0.8879],
         [0.4598, 0.5133, 0.8600, 0.2921]]),
 torch.Size([2, 4]))

Squeeze removes all single dimensions from a tensor

Unsqueeze adds a dim

In [64]:
x = torch.zeros(2, 1, 2, 1, 2)
print(x.size())


torch.Size([2, 1, 2, 1, 2])


In [65]:
y = torch.squeeze(x)
print(y.size())

torch.Size([2, 2, 2])


In [66]:
y = torch.squeeze(x, 0)
print(y.size())

torch.Size([2, 1, 2, 1, 2])


In [67]:
y = torch.squeeze(x, 1)
print(y.size())

torch.Size([2, 2, 1, 2])


In [68]:
y = torch.squeeze(x, (1, 2, 3))
y

tensor([[[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]]])

In [69]:
x = torch.zeros(2, 1, 2, 1, 2)
x,x.ndim

(tensor([[[[[0., 0.]],
 
           [[0., 0.]]]],
 
 
 
         [[[[0., 0.]],
 
           [[0., 0.]]]]]),
 5)

In [70]:
x2 = x.squeeze()
x2,x2.ndim

(tensor([[[0., 0.],
          [0., 0.]],
 
         [[0., 0.],
          [0., 0.]]]),
 3)

Another example of squeeze -- removes all with dim 1

In [71]:
x = torch.arange(1,11)
x2 = x.reshape([1,10])
x2, x2.shape, x2.ndim

(tensor([[ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10]]), torch.Size([1, 10]), 2)

In [72]:
x3 = x2.squeeze()
x3, x3.shape, x3.ndim

(tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10]), torch.Size([10]), 1)

In [73]:
x = torch.randint(low = 0, high = 10, size = (3,1,1,4))
x, x.shape, x.ndim

(tensor([[[[0, 1, 7, 9]]],
 
 
         [[[5, 6, 9, 8]]],
 
 
         [[[5, 5, 1, 0]]]]),
 torch.Size([3, 1, 1, 4]),
 4)

In [74]:
x2 = x.squeeze()
x2

tensor([[0, 1, 7, 9],
        [5, 6, 9, 8],
        [5, 5, 1, 0]])

In [75]:
torch.squeeze(x,2)

tensor([[[0, 1, 7, 9]],

        [[5, 6, 9, 8]],

        [[5, 5, 1, 0]]])

Unsqueeze -- Adds a dimension

In [76]:
x2 = torch.randint(low = 0, high = 10, size = (3,4))
x2

tensor([[7, 3, 2, 3],
        [9, 9, 7, 5],
        [8, 9, 2, 1]])

In [77]:
torch.unsqueeze(x2,2)

tensor([[[7],
         [3],
         [2],
         [3]],

        [[9],
         [9],
         [7],
         [5]],

        [[8],
         [9],
         [2],
         [1]]])

In [78]:
torch.unsqueeze(x2,1)

tensor([[[7, 3, 2, 3]],

        [[9, 9, 7, 5]],

        [[8, 9, 2, 1]]])

In [79]:
torch.unsqueeze(x2,0)

tensor([[[7, 3, 2, 3],
         [9, 9, 7, 5],
         [8, 9, 2, 1]]])

In [80]:
torch.unsqueeze(x2,2).shape,torch.unsqueeze(x2,1).shape, torch.unsqueeze(x2,0).shape

(torch.Size([3, 4, 1]), torch.Size([3, 1, 4]), torch.Size([1, 3, 4]))

permute -- rearranges the shape of a target tensor in a specified order

In [81]:
# Permute means reorder dims
x = torch.randn([1,5,3])
x, x.shape, x.ndim

(tensor([[[ 2.0435,  0.3715,  0.2460],
          [ 0.4535, -0.7348, -0.8552],
          [ 1.0905,  0.2767,  0.2745],
          [-0.3210,  0.8202, -0.7710],
          [ 1.0732, -0.0748,  2.2483]]]),
 torch.Size([1, 5, 3]),
 3)

In [82]:
x2 = x.permute(2,0,1)
x2, x2.shape, x2.ndim

(tensor([[[ 2.0435,  0.4535,  1.0905, -0.3210,  1.0732]],
 
         [[ 0.3715, -0.7348,  0.2767,  0.8202, -0.0748]],
 
         [[ 0.2460, -0.8552,  0.2745, -0.7710,  2.2483]]]),
 torch.Size([3, 1, 5]),
 3)

# 8.Indexing (select data from Tensors)

In [83]:
x = torch.randint(low = 0, high = 20, size = (3,4))
x, x.shape, x.ndim

(tensor([[10, 13,  7,  2],
         [16, 13,  8,  0],
         [19, 10,  8, 15]]),
 torch.Size([3, 4]),
 2)

In [84]:
x[0]

tensor([10, 13,  7,  2])

In [85]:
x2 = x.reshape(1,3,4)
x2, x2.shape, x2.ndim

(tensor([[[10, 13,  7,  2],
          [16, 13,  8,  0],
          [19, 10,  8, 15]]]),
 torch.Size([1, 3, 4]),
 3)

In [86]:
x2[0],x2[0][0], x2[0][0][0]

(tensor([[10, 13,  7,  2],
         [16, 13,  8,  0],
         [19, 10,  8, 15]]),
 tensor([10, 13,  7,  2]),
 tensor(10))

In [87]:
x2[0][2][3]

tensor(15)

In [88]:
x2[:,1,1]

tensor([13])

# 9.Pytorch and Numpy

- Numpy is used for scientific computing
- Numpy is crucial for torch
- When converting np -> tensor, it keeps the default data type
- Changing the np array after converting to tensor, doesn't change tensor(its not a view)
   

In [89]:
import numpy as np

In [90]:
np_array = np.arange(0.0,10.0)
np_array

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [91]:
#convert to tensor from Numpy
t1 =  torch.from_numpy(np_array)
t1, t1.dtype

(tensor([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.], dtype=torch.float64),
 torch.float64)

In [92]:
np_array = np_array + 5
np_array, t1

(array([ 5.,  6.,  7.,  8.,  9., 10., 11., 12., 13., 14.]),
 tensor([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.], dtype=torch.float64))

In [93]:
# temsor to numpy
t2 = torch.randint(low = 0, high = 10, size = (1,10))
t2, t2.dtype

(tensor([[6, 9, 1, 7, 8, 4, 1, 4, 3, 7]]), torch.int64)

In [94]:
np_arr_new = t2.numpy()
np_arr_new

array([[6, 9, 1, 7, 8, 4, 1, 4, 3, 7]])

In [95]:
# Change the new np array
np_arr_new = np_arr_new + 10
np_arr_new,t2

(array([[16, 19, 11, 17, 18, 14, 11, 14, 13, 17]]),
 tensor([[6, 9, 1, 7, 8, 4, 1, 4, 3, 7]]))

# 10.Reproduceability (setting the random seed)

- Extra resources -- https://pytorch.org/docs/stable/notes/randomness.html

In [96]:
# Testing

random_t_A = torch.rand(3,4)
random_t_B = torch.rand(3,4)
print(random_t_A)
print(random_t_B)
print(random_t_A == random_t_B)

tensor([[0.8574, 0.9812, 0.6676, 0.0623],
        [0.2447, 0.9483, 0.3240, 0.8353],
        [0.5625, 0.7079, 0.6159, 0.4485]])
tensor([[0.8894, 0.3693, 0.4354, 0.3016],
        [0.4859, 0.8547, 0.9936, 0.3492],
        [0.1407, 0.6175, 0.2487, 0.6643]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [97]:
# Add seed
torch.manual_seed(0)
random_t_A = torch.rand(3,4)
torch.manual_seed(0)
random_t_B = torch.rand(3,4)
print(random_t_A)
print(random_t_B)
print(random_t_A == random_t_B)

tensor([[0.4963, 0.7682, 0.0885, 0.1320],
        [0.3074, 0.6341, 0.4901, 0.8964],
        [0.4556, 0.6323, 0.3489, 0.4017]])
tensor([[0.4963, 0.7682, 0.0885, 0.1320],
        [0.3074, 0.6341, 0.4901, 0.8964],
        [0.4556, 0.6323, 0.3489, 0.4017]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


# Running on GPUs --> CUDA + NVIDIA harware



In [98]:
!nvidia-smi

Sat Sep 21 03:08:34 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   62C    P8              12W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [99]:
# Check if GPU/TPU is available
import torch
torch.cuda.is_available()

True

In [100]:
### Device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [101]:
torch.cuda.device_count()

1

## Putting tensors and models on GPU -- for faster operation

In [102]:
# Tensor not on GPU by default
t1 = torch.tensor([1,2,3])
t1, t1.device

(tensor([1, 2, 3]), device(type='cpu'))

In [103]:
# move tensor to GPU if available
if torch.cuda.is_available():
  t1 = t1.to(device)
t1, t1.device

(tensor([1, 2, 3], device='cuda:0'), device(type='cuda', index=0))

In [104]:
device

'cuda'

In [105]:
# moving tensor back to CPU
t1 = t1.to("cpu")
t1, t1.device, t1.numpy()

(tensor([1, 2, 3]), device(type='cpu'), array([1, 2, 3]))

# Exercises and extra curriculum

1. Create a random tensor with shape (7, 7).

In [106]:
t1 = torch.rand(7,7)
t1.shape, t1.dtype

(torch.Size([7, 7]), torch.float32)

2. Perform a matrix multiplication on the tensor from 2 with another random tensor with shape (1, 7) (hint: you may have to transpose the second tensor).

In [107]:
t2 = torch.rand([1,7])
t2.shape, t2.dtype

(torch.Size([1, 7]), torch.float32)

In [108]:
torch.matmul(t1,t2.T)

tensor([[1.4622],
        [2.1803],
        [1.8105],
        [1.6405],
        [1.5135],
        [2.7092],
        [1.7404]])

3. Set the random seed to 0 and do exercises 2 & 3 over again.

In [109]:
torch.manual_seed(0)
tn = torch.rand(7,7)
torch.manual_seed(0)
tn2 = torch.rand([1,7])
torch.matmul(tn,tn2.T)

tensor([[1.5985],
        [1.1173],
        [1.2741],
        [1.6838],
        [0.8279],
        [1.0347],
        [1.2498]])

4. Speaking of random seeds, we saw how to set it with torch.manual_seed() but is there a GPU equivalent? (hint: you'll need to look into the documentation for torch.cuda for this one). If there is, set the GPU random seed to 1234.

In [110]:
torch.cuda.manual_seed(1234)


5. Make a random tensor with shape (1, 1, 1, 10) and then create a new tensor with all the 1 dimensions removed to be left with a tensor of shape (10). Set the seed to 7 when you create it and print out the first tensor and it's shape as well as the second tensor and it's shape.

In [111]:
torch.manual_seed(7)
t1 = torch.rand([1, 1, 1, 10])
t1, t1.shape, t1.ndim, t1.dtype

(tensor([[[[0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297,
            0.3653, 0.8513]]]]),
 torch.Size([1, 1, 1, 10]),
 4,
 torch.float32)

In [112]:
t2 = t1.squeeze()
t2, t2.shape, t2.ndim, t2.dtype

(tensor([0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297, 0.3653,
         0.8513]),
 torch.Size([10]),
 1,
 torch.float32)