# Importing PyTorch

In [1]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

torch.__version__

'2.7.0+cu128'

# Introduction to tensors 

## Creating tensors 

np.array([1, 2, 3]) → vector (1D tensor)

np.array([[1, 2], [3, 4]]) → matrix (2D tensor)

np.random.rand(3, 4, 5) → 3D tensor

![image.png](attachment:image.png)

### Scalar

In [2]:
scalar = torch.tensor(7)
print(scalar)
print(scalar.ndim)
# Get the Python number within a tensor (only works with one-element tensors)
scalar.item()

tensor(7)
0


7

### Vector

In [3]:
vector = torch.tensor([7, 8])
print(vector)
print(vector.ndim)  # number of dimensions which is number of brackets
print(vector.shape)  # number of elements in brackets

tensor([7, 8])
1
torch.Size([2])


### MATRIX

In [4]:
Matrix = torch.tensor([[7, 8], [9, 10]])
print(Matrix)
print(Matrix[1, 1])
print(Matrix.ndim)
print(Matrix.shape)

tensor([[ 7,  8],
        [ 9, 10]])
tensor(10)
2
torch.Size([2, 2])


### TENSOR

In [5]:
TENSOR = torch.tensor([[[1, 2, 3], [4, 5, 6], [7, 8, 9]]])
print(TENSOR)
print(TENSOR.ndim)
print(TENSOR.shape)
print(TENSOR[0])

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])
3
torch.Size([1, 3, 3])
tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])


![image.png](attachment:image.png)

TENSOR.shape → torch.Size([1, 3, 3])
→ Meaning:

1 → one matrix

3 → three rows

3 → three columns

In [6]:
TENSOR = torch.tensor(
    [
        [
            [11, 22, 33, 44],
            [44, 55, 66, 44],
            [77, 88, 99, 44],
            [10, 11, 12, 4],
        ],  # Matrix 0
        [
            [13, 14, 15, 4],
            [16, 17, 18, 4],
            [19, 20, 21, 4],
            [22, 23, 24, 4],
        ],  # Matrix 1
        [
            [25, 26, 27, 4],
            [28, 29, 30, 4],
            [31, 32, 33, 4],
            [34, 35, 36, 4],
        ],  # Matrix 2
        [
            [25, 26, 27, 4],
            [28, 29, 30, 4],
            [31, 32, 33, 4],
            [34, 35, 36, 4],
        ],  # Matrix 3
    ]
)

print(TENSOR.shape)  # torch.Size([4, 4, 4])

print(TENSOR)
print(TENSOR.ndim)
print(TENSOR.shape)

torch.Size([4, 4, 4])
tensor([[[11, 22, 33, 44],
         [44, 55, 66, 44],
         [77, 88, 99, 44],
         [10, 11, 12,  4]],

        [[13, 14, 15,  4],
         [16, 17, 18,  4],
         [19, 20, 21,  4],
         [22, 23, 24,  4]],

        [[25, 26, 27,  4],
         [28, 29, 30,  4],
         [31, 32, 33,  4],
         [34, 35, 36,  4]],

        [[25, 26, 27,  4],
         [28, 29, 30,  4],
         [31, 32, 33,  4],
         [34, 35, 36,  4]]])
3
torch.Size([4, 4, 4])


## Random Tensors

In [7]:
random_tensor = torch.rand(3, 3, 5)
print(random_tensor)
print(random_tensor.shape)
print(random_tensor.ndim)

tensor([[[0.6864, 0.0163, 0.4237, 0.9702, 0.6854],
         [0.8210, 0.2350, 0.1947, 0.3839, 0.3607],
         [0.6936, 0.9427, 0.1559, 0.0161, 0.6665]],

        [[0.3240, 0.5751, 0.1309, 0.6258, 0.8149],
         [0.2955, 0.5301, 0.1506, 0.0362, 0.7814],
         [0.5716, 0.5502, 0.3512, 0.8723, 0.6575]],

        [[0.7490, 0.0868, 0.3034, 0.1225, 0.9443],
         [0.7979, 0.7238, 0.1122, 0.8892, 0.7205],
         [0.4498, 0.2391, 0.6009, 0.5612, 0.6200]]])
torch.Size([3, 3, 5])
3


### Create Random Tensor Similar to Image

In [8]:
random_image_size_tensor = torch.rand(size=(3, 224, 224))
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([3, 224, 224]), 3)

### Tensor of Zeros 

In [9]:
zeros = torch.zeros(size=(3, 4))
print(zeros)

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])


### Tensor of Ones

In [10]:
ones = torch.ones(size=(3, 4))
print(ones)
print(ones.dtype)

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])
torch.float32


### Creating a range of tensors and tensors-like

In [11]:
a = torch.arange(start=0, end=100, step=10)
b = torch.arange(0, 100, dtype=torch.float32)
print(a)
print(b)

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])
tensor([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
        14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27.,
        28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41.,
        42., 43., 44., 45., 46., 47., 48., 49., 50., 51., 52., 53., 54., 55.,
        56., 57., 58., 59., 60., 61., 62., 63., 64., 65., 66., 67., 68., 69.,
        70., 71., 72., 73., 74., 75., 76., 77., 78., 79., 80., 81., 82., 83.,
        84., 85., 86., 87., 88., 89., 90., 91., 92., 93., 94., 95., 96., 97.,
        98., 99.])


In [12]:
ten_zeros = torch.zeros_like(b)
print(ten_zeros)
print(ten_zeros.dtype)

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.])
torch.float32


## Tensor Types

In [13]:
tensor_float_32 = torch.tensor(
    [1.0, 2.0, 3.0], dtype=torch.float32, device="cuda", requires_grad=False
)
print(tensor_float_32.dtype)

torch.float32


In [14]:
float_16_tensor = tensor_float_32.type(torch.float16)
print(float_16_tensor.dtype)

torch.float16


##  Manipulating tensors

In [15]:
random_gen_tensor = torch.rand(3, 4)
mult_tensor = torch.mul(random_gen_tensor, 10)
print(mult_tensor)
print(random_gen_tensor * 10)

tensor([[3.6239, 7.1001, 5.5739, 8.1279],
        [0.1795, 8.5311, 6.8249, 6.7952],
        [9.7925, 4.3198, 7.6923, 3.6478]])
tensor([[3.6239, 7.1001, 5.5739, 8.1279],
        [0.1795, 8.5311, 6.8249, 6.7952],
        [9.7925, 4.3198, 7.6923, 3.6478]])


In [16]:
random_gen_tensor = torch.rand(3, 4)
add_tensor = torch.add(random_gen_tensor, 10)
print(add_tensor)
print(random_gen_tensor + 10)

assert torch.all(add_tensor == (random_gen_tensor + 10)), "Condition not Met"
print("Condition Met")

tensor([[10.4745, 10.4371, 10.7555, 10.2594],
        [10.7154, 10.1046, 10.5143, 10.8065],
        [10.4758, 10.5369, 10.4457, 10.3498]])
tensor([[10.4745, 10.4371, 10.7555, 10.2594],
        [10.7154, 10.1046, 10.5143, 10.8065],
        [10.4758, 10.5369, 10.4457, 10.3498]])
Condition Met


In [17]:
random_gen_tensor = torch.rand(3, 4)
sub_tensor = torch.subtract(random_gen_tensor, 10)
print(sub_tensor)
print(random_gen_tensor - 10)

True if torch.all(sub_tensor == (random_gen_tensor - 10)) else None

tensor([[-9.1954, -9.2780, -9.5680, -9.7549],
        [-9.3846, -9.2512, -9.0350, -9.7526],
        [-9.2254, -9.5305, -9.7255, -9.8411]])
tensor([[-9.1954, -9.2780, -9.5680, -9.7549],
        [-9.3846, -9.2512, -9.0350, -9.7526],
        [-9.2254, -9.5305, -9.7255, -9.8411]])


True

In [18]:
random_gen_tensor = torch.rand(3, 4)
dev_tensor = torch.divide(random_gen_tensor, 10)
print(dev_tensor)
print(random_gen_tensor / 10)
assert torch.all(dev_tensor == (random_gen_tensor / 10)), "Condition not met"
print(True)

tensor([[0.0558, 0.0853, 0.0984, 0.0002],
        [0.0407, 0.0723, 0.0911, 0.0976],
        [0.0457, 0.0800, 0.0094, 0.0692]])
tensor([[0.0558, 0.0853, 0.0984, 0.0002],
        [0.0407, 0.0723, 0.0911, 0.0976],
        [0.0457, 0.0800, 0.0094, 0.0692]])
True


# Matrix multiplication (is all we need)

One of the most common operations in machine learning and deep learning algorithms (like neural networks) is [matrix multiplication](https://www.mathsisfun.com/algebra/matrix-multiplying.html).

PyTorch implements matrix multiplication functionality in the [`torch.matmul()`](https://pytorch.org/docs/stable/generated/torch.matmul.html) method.

The main two rules for matrix multiplication to remember are:

1. The **inner dimensions** must match:
  * `(3, 2) @ (3, 2)` won't work
  * `(2, 3) @ (3, 2)` will work
  * `(3, 2) @ (2, 3)` will work
2. The resulting matrix has the shape of the **outer dimensions**:
 * `(2, 3) @ (3, 2)` -> `(2, 2)`
 * `(3, 2) @ (2, 3)` -> `(3, 3)`

> **Note:** "`@`" in Python is the symbol for matrix multiplication.

> **Resource:** we can see all of the rules for matrix multiplication using `torch.matmul()` [in the PyTorch documentation](https://pytorch.org/docs/stable/generated/torch.matmul.html).



Without the transpose, the rules of matrix multiplication aren't fulfilled and we get an error like above.

How about a visual? 

![visual demo of matrix multiplication](https://github.com/mrdbourke/pytorch-deep-learning/raw/main/images/00-matrix-multiply-crop.gif)

we can create your own matrix multiplication visuals like this at http://matrixmultiplication.xyz/.

> **Note:** A matrix multiplication like this is also referred to as the [**dot product**](https://www.mathsisfun.com/algebra/vectors-dot-product.html) of two matrices.

In [19]:
# Shapes need to be in the right way
tensor_A = torch.tensor([[1, 2], [3, 4], [5, 6]], dtype=torch.float32)

tensor_B = torch.tensor([[7, 10], [8, 11], [9, 12]], dtype=torch.float32)

# torch.matmul(tensor_A, tensor_B)  # (this will error)

We can make matrix multiplication work between `tensor_A` and `tensor_B` by making their inner dimensions match.

One of the ways to do this is with a **transpose** (switch the dimensions of a given tensor).

You can perform transposes in PyTorch using either:
* `torch.transpose(input, dim0, dim1)` - where `input` is the desired tensor to transpose and `dim0` and `dim1` are the dimensions to be swapped.
* `tensor.T` - where `tensor` is the desired tensor to transpose.

In [20]:
# View tensor_A and tensor_B
print(tensor_A)
print(tensor_B)

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])
tensor([[ 7., 10.],
        [ 8., 11.],
        [ 9., 12.]])


In [21]:
# View tensor_A and tensor_B.T
print(tensor_A)
print(tensor_B.T)

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])
tensor([[ 7.,  8.,  9.],
        [10., 11., 12.]])


In [22]:
# The operation works when tensor_B is transposed
print(f"Original shapes: tensor_A = {tensor_A.shape}, tensor_B = {tensor_B.shape}\n")
print(
    f"New shapes: tensor_A = {tensor_A.shape} (same as above), tensor_B.T = {tensor_B.T.shape}\n"
)
print(f"Multiplying: {tensor_A.shape} * {tensor_B.T.shape} <- inner dimensions match\n")
print("Output:\n")
output = torch.matmul(tensor_A, tensor_B.T)
print(output)
print(f"\nOutput shape: {output.shape}")

Original shapes: tensor_A = torch.Size([3, 2]), tensor_B = torch.Size([3, 2])

New shapes: tensor_A = torch.Size([3, 2]) (same as above), tensor_B.T = torch.Size([2, 3])

Multiplying: torch.Size([3, 2]) * torch.Size([2, 3]) <- inner dimensions match

Output:

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

Output shape: torch.Size([3, 3])


You can also use [`torch.mm()`](https://pytorch.org/docs/stable/generated/torch.mm.html) which is a short for `torch.matmul()`.

In [23]:
# torch.mm is a shortcut for matmul
torch.mm(tensor_A, tensor_B.T)

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

### Reshaping, stacking, squeezing and unsqueezing

Often times you'll want to reshape or change the dimensions of your tensors without actually changing the values inside them.

To do so, some popular methods are:

| Method | One-line description |
| ----- | ----- |
| [`torch.reshape(input, shape)`](https://pytorch.org/docs/stable/generated/torch.reshape.html#torch.reshape) | Reshapes `input` to `shape` (if compatible), can also use `torch.Tensor.reshape()`. |
| [`Tensor.view(shape)`](https://pytorch.org/docs/stable/generated/torch.Tensor.view.html) | Returns a view of the original tensor in a different `shape` but shares the same data as the original tensor. |
| [`torch.stack(tensors, dim=0)`](https://pytorch.org/docs/1.9.1/generated/torch.stack.html) | Concatenates a sequence of `tensors` along a new dimension (`dim`), all `tensors` must be same size. |
| [`torch.squeeze(input)`](https://pytorch.org/docs/stable/generated/torch.squeeze.html) | Squeezes `input` to remove all the dimenions with value `1`. |
| [`torch.unsqueeze(input, dim)`](https://pytorch.org/docs/1.9.1/generated/torch.unsqueeze.html) | Returns `input` with a dimension value of `1` added at `dim`. | 
| [`torch.permute(input, dims)`](https://pytorch.org/docs/stable/generated/torch.permute.html) | Returns a *view* of the original `input` with its dimensions permuted (rearranged) to `dims`. | 

Why do any of these?

Because deep learning models (neural networks) are all about manipulating tensors in some way. And because of the rules of matrix multiplication, if you've got shape mismatches, you'll run into errors. These methods help you make sure the right elements of your tensors are mixing with the right elements of other tensors. 

In [24]:
x = torch.arange(start=1.0, end=10.0)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [25]:
x_reshaped = x.reshape(1, 9)  # (rows,columns)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [26]:
# Change view (keeps same data as original but changes view)

z = x.view(1, 9)
z[:, 0] = 5
print(z, z.shape)
print(x, x.shape)

tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]) torch.Size([1, 9])
tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]) torch.Size([9])


In [27]:
x_stacked = torch.stack([x, x, x], dim=0)  # stack along rows
x_stacked

tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [28]:
x_reshaped.shape

torch.Size([1, 9])

How about removing all single dimensions from a tensor?

To do so we can use `torch.squeeze()` (I remember this as *squeezing* the tensor to only have dimensions over 1).

In [29]:
print(f"Previous tensor: {x_reshaped}")
print(f"Previous shape: {x_reshaped.shape}")

# Remove extra dimension from x_reshaped
x_squeezed = x_reshaped.squeeze()
print(f"\nNew tensor: {x_squeezed}")
print(f"New shape: {x_squeezed.shape}")

Previous tensor: tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]])
Previous shape: torch.Size([1, 9])

New tensor: tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])
New shape: torch.Size([9])


And to do the reverse of `torch.squeeze()` we can use `torch.unsqueeze()` to add a dimension value of 1 at a specific index.

In [30]:
print(f"Previous tensor: {x_squeezed}")
print(f"Previous shape: {x_squeezed.shape}")

## Add an extra dimension with unsqueeze
x_unsqueezed = x_squeezed.unsqueeze(dim=0)
print(f"\nNew tensor: {x_unsqueezed}")
print(f"New shape: {x_unsqueezed.shape}")

Previous tensor: tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])
Previous shape: torch.Size([9])

New tensor: tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]])
New shape: torch.Size([1, 9])


In [31]:
x_original = torch.rand(
    244, 244, 3
)  # persume this is an image hieght, width, color channels
x_original
print(x_original.shape)  # torch.Size([244, 244, 3])
# x_original is in HWC format (height, width, color channels)
# We want to convert it to CHW format (color channels, height, width)
# This is done by permuting the dimensions
# Permute the dimensions of x_original
# The first dimension (0) becomes the last (2), the second (1) becomes the first (0), and the last (2) becomes the second (1)
# This is done by permuting the dimensions
x_original_permuted = x_original.permute(2, 0, 1)  # color channels first
print(x_original_permuted.shape)

torch.Size([244, 244, 3])
torch.Size([3, 244, 244])


# Indexing

In [32]:
x = torch.arange(start=1, end=10)
print(x.shape)
x_reshaped = x.reshape(1, 3, 3)
print(x_reshaped.shape)
print(x_reshaped)

torch.Size([9])
torch.Size([1, 3, 3])
tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])


In [33]:
x_reshaped[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [34]:
x_reshaped[0, 2, 2]

tensor(9)

In [35]:
# Get all values of 0th & 1st dimension but only index 1 of 2nd dimension
x_reshaped[:, :, 1]

tensor([[2, 5, 8]])

In [36]:
# Get all values of 0th but only index the 1st index value of 1st & 2nd dimension
x_reshaped[:, 1, 1]

tensor([5])

In [37]:
# get index of 0 of 0th & 1st dimmension but all values of 2nd dimension
x_reshaped[0, 0, :]

tensor([1, 2, 3])

In [38]:
# index on x to return 9
x_reshaped[0, 2, 2]
# x_reshaped[0, 2:3, 2:3]  # This gives a (1,1) subarray containing the element

tensor(9)

In [39]:
# index on X to return 3 6 9
x_reshaped[:, :, 2]

tensor([[3, 6, 9]])

## PyTorch tensors & NumPy

Since NumPy is a popular Python numerical computing library, PyTorch has functionality to interact with it nicely.  

The two main methods we'll want to use for NumPy to PyTorch (and back again) are: 
* [`torch.from_numpy(ndarray)`](https://pytorch.org/docs/stable/generated/torch.from_numpy.html) - NumPy array -> PyTorch tensor. 
* [`torch.Tensor.numpy()`](https://pytorch.org/docs/stable/generated/torch.Tensor.numpy.html) - PyTorch tensor -> NumPy array.

In [40]:
array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array)  # .type(torch.float32)
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

> **Note:** By default, NumPy arrays are created with the datatype `float64` and if we convert it to a PyTorch tensor, it'll keep the same datatype (as above). 
>
> However, many PyTorch calculations default to using `float32`. 
> 
> So if we want to convert our NumPy array (float64) -> PyTorch tensor (float64) -> PyTorch tensor (float32), we can use `tensor = torch.from_numpy(array).type(torch.float32)`.

Because we reassigned `tensor` above, if we change the tensor, the array stays the same.

In [41]:
# Change the array, keep the tensor
array = array + 1
array, tensor

(array([2., 3., 4., 5., 6., 7., 8.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [42]:
# Tensor to NumPy array
tensor = torch.ones(7)  # create a tensor of ones with dtype=float32
numpy_tensor = tensor.numpy()  # will be dtype=float32 unless changed
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

And the same rule applies as above, if you change the original `tensor`, the new `numpy_tensor` stays the same.

In [43]:
# Change the tensor, keep the array the same
tensor = tensor + 1
tensor, numpy_tensor

(tensor([2., 2., 2., 2., 2., 2., 2.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

## Reproducibility (trying to take the random out of random)

As you learn more about neural networks and machine learning, you'll start to discover how much randomness plays a part.

Well, pseudorandomness that is. Because after all, as they're designed, a computer is fundamentally deterministic (each step is predictable) so the randomness they create are simulated randomness (though there is debate on this too, but since I'm not a computer scientist, I'll let you find out more yourself).

How does this relate to neural networks and deep learning then?

We've discussed neural networks start with random numbers to describe patterns in data (these numbers are poor descriptions) and try to improve those random numbers using tensor operations (and a few other things we haven't discussed yet) to better describe patterns in data.

In short: 

``start with random numbers -> tensor operations -> try to make better (again and again and again)``

Although randomness is nice and powerful, sometimes you'd like there to be a little less randomness.

Why?

So you can perform repeatable experiments.

For example, you create an algorithm capable of achieving X performance.

And then your friend tries it out to verify you're not crazy.

How could they do such a thing?

That's where **reproducibility** comes in.

In other words, can you get the same (or very similar) results on your computer running the same code as I get on mine?

Let's see a brief example of reproducibility in PyTorch.

We'll start by creating two random tensors, since they're random, you'd expect them to be different right? 

In [44]:
import torch

# Create two random tensors
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)

print(f"Tensor A:\n{random_tensor_A}\n")
print(f"Tensor B:\n{random_tensor_B}\n")
print(f"Does Tensor A equal Tensor B? (anywhere)")
random_tensor_A == random_tensor_B

Tensor A:
tensor([[0.0494, 0.8902, 0.0441, 0.6192],
        [0.9306, 0.7600, 0.0558, 0.6852],
        [0.9543, 0.9112, 0.7811, 0.5537]])

Tensor B:
tensor([[0.4744, 0.4846, 0.6677, 0.1783],
        [0.3568, 0.4980, 0.3781, 0.8965],
        [0.3476, 0.0854, 0.3689, 0.3018]])

Does Tensor A equal Tensor B? (anywhere)


tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])

Just as we might've expected, the tensors come out with different values.

But what if we wanted to create two random tensors with the *same* values.

As in, the tensors would still contain random values but they would be of the same flavour.

That's where [`torch.manual_seed(seed)`](https://pytorch.org/docs/stable/generated/torch.manual_seed.html) comes in, where `seed` is an integer (like `42` but it could be anything) that flavours the randomness.

In [45]:
# # Set the random seed
RANDOM_SEED = 42  # try changing this to different values and see what happens to the numbers below
torch.manual_seed(seed=RANDOM_SEED)
random_tensor_C = torch.rand(3, 4)

# Have to reset the seed every time a new rand() is called
# Without this, tensor_D would be different to tensor_C
torch.random.manual_seed(
    seed=RANDOM_SEED
)  # try commenting this line out and seeing what happens
random_tensor_D = torch.rand(3, 4)

print(f"Tensor C:\n{random_tensor_C}\n")
print(f"Tensor D:\n{random_tensor_D}\n")
print(f"Does Tensor C equal Tensor D? (anywhere)")
random_tensor_C == random_tensor_D

Tensor C:
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])

Tensor D:
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])

Does Tensor C equal Tensor D? (anywhere)


tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])

Nice!

It looks like setting the seed worked. 

> **Resource:** What we've just covered only scratches the surface of reproducibility in PyTorch. For more, on reproducibility in general and random seeds, I'd checkout:
> * [The PyTorch reproducibility documentation](https://pytorch.org/docs/stable/notes/randomness.html) (a good exercise would be to read through this for 10-minutes and even if you don't understand it now, being aware of it is important).
> * [The Wikipedia random seed page](https://en.wikipedia.org/wiki/Random_seed) (this'll give a good overview of random seeds and pseudorandomness in general).

## Running tensors on GPUs (and making faster computations)

Deep learning algorithms require a lot of numerical operations.

And by default these operations are often done on a CPU (computer processing unit).

However, there's another common piece of hardware called a GPU (graphics processing unit), which is often much faster at performing the specific types of operations neural networks need (matrix multiplications) than CPUs.

Your computer might have one.

If so, you should look to use it whenever you can to train neural networks because chances are it'll speed up the training time dramatically.

There are a few ways to first get access to a GPU and secondly get PyTorch to use the GPU.

> **Note:** When I reference "GPU" throughout this course, I'm referencing a [Nvidia GPU with CUDA](https://developer.nvidia.com/cuda-gpus) enabled (CUDA is a computing platform and API that helps allow GPUs be used for general purpose computing & not just graphics) unless otherwise specified.




### 1. Getting a GPU

You may already know what's going on when I say GPU. But if not, there are a few ways to get access to one.

| **Method** | **Difficulty to setup** | **Pros** | **Cons** | **How to setup** |
| ----- | ----- | ----- | ----- | ----- |
| Google Colab | Easy | Free to use, almost zero setup required, can share work with others as easy as a link | Doesn't save your data outputs, limited compute, subject to timeouts | [Follow the Google Colab Guide](https://colab.research.google.com/notebooks/gpu.ipynb) |
| Use your own | Medium | Run everything locally on your own machine | GPUs aren't free, require upfront cost | Follow the [PyTorch installation guidelines](https://pytorch.org/get-started/locally/) |
| Cloud computing (AWS, GCP, Azure) | Medium-Hard | Small upfront cost, access to almost infinite compute | Can get expensive if running continually, takes some time to setup right | Follow the [PyTorch installation guidelines](https://pytorch.org/get-started/cloud-partners/) |

There are more options for using GPUs but the above three will suffice for now.

Personally, I use a combination of Google Colab and my own personal computer for small scale experiments (and creating this course) and go to cloud resources when I need more compute power.

> **Resource:** If you're looking to purchase a GPU of your own but not sure what to get, [Tim Dettmers has an excellent guide](https://timdettmers.com/2020/09/07/which-gpu-for-deep-learning/).

To check if you've got access to a Nvidia GPU, you can run `!nvidia-smi` where the `!` (also called bang) means "run this on the command line".



In [46]:
!nvidia-smi

Thu Apr 24 01:44:55 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 575.51.02              Driver Version: 576.02         CUDA Version: 12.9     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Quadro RTX 5000 with Max...    On  |   00000000:01:00.0 Off |                  N/A |
| N/A   60C    P0             36W /   90W |     270MiB /  16384MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

If you don't have a Nvidia GPU accessible, the above will output something like:

```
NVIDIA-SMI has failed because it couldn't communicate with the NVIDIA driver. Make sure that the latest NVIDIA driver is installed and running.
```

In that case, go back up and follow the install steps.

If you do have a GPU, the line above will output something like:

```
Thu Apr 24 01:39:55 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 575.51.02              Driver Version: 576.02         CUDA Version: 12.9     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|=========================================+========================+======================|
|   0  Quadro RTX 5000 with Max...    On  |   00000000:01:00.0 Off |                  N/A |
| N/A   59C    P0             36W /   90W |     270MiB /  16384MiB |      3%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                                                         
+-----------------------------------------------------------------------------------------+
| Processes:                                                                              |
|  GPU   GI   CI              PID   Type   Process name                        GPU Memory |
|        ID   ID                                                               Usage      |
|=========================================================================================|
|    0   N/A  N/A           17369      C   /python3.11                           N/A      |
|    0   N/A  N/A           20986      C   /python3.11                           N/A      |
+-----------------------------------------------------------------------------------------+
```



### 2. Getting PyTorch to run on the GPU

Once you've got a GPU ready to access, the next step is getting PyTorch to use for storing data (tensors) and computing on data (performing operations on tensors).

To do so, you can use the [`torch.cuda`](https://pytorch.org/docs/stable/cuda.html) package.

Rather than talk about it, let's try it out.

You can test if PyTorch has access to a GPU using [`torch.cuda.is_available()`](https://pytorch.org/docs/stable/generated/torch.cuda.is_available.html#torch.cuda.is_available).


In [47]:
torch.cuda.is_available()

True

If the above outputs `True`, PyTorch can see and use the GPU, if it outputs `False`, it can't see the GPU and in that case, you'll have to go back through the installation steps.

Now, let's say you wanted to setup your code so it ran on CPU *or* the GPU if it was available.

That way, if you or someone decides to run your code, it'll work regardless of the computing device they're using. 

Let's create a `device` variable to store what kind of device is available.

In [48]:
# Set device type
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

If the above output `"cuda"` it means we can set all of our PyTorch code to use the available CUDA device (a GPU) and if it output `"cpu"`, our PyTorch code will stick with the CPU.

> **Note:** In PyTorch, it's best practice to write [**device agnostic code**](https://pytorch.org/docs/master/notes/cuda.html#device-agnostic-code). This means code that'll run on CPU (always available) or GPU (if available).

If you want to do faster computing you can use a GPU but if you want to do *much* faster computing, you can use multiple GPUs.

You can count the number of GPUs PyTorch has access to using [`torch.cuda.device_count()`](https://pytorch.org/docs/stable/generated/torch.cuda.device_count.html#torch.cuda.device_count).

In [49]:
# Count number of devices
torch.cuda.device_count()

1

### 3. Putting tensors (and models) on the GPU

You can put tensors (and models, we'll see this later) on a specific device by calling [`to(device)`](https://pytorch.org/docs/stable/generated/torch.Tensor.to.html) on them. Where `device` is the target device you'd like the tensor (or model) to go to.

Why do this?

GPUs offer far faster numerical computing than CPUs do and if a GPU isn't available, because of our **device agnostic code** (see above), it'll run on the CPU.

> **Note:** Putting a tensor on GPU using `to(device)` (e.g. `some_tensor.to(device)`) returns a copy of that tensor, e.g. the same tensor will be on CPU and GPU. To overwrite tensors, reassign them:
>
> `some_tensor = some_tensor.to(device)`

Let's try creating a tensor and putting it on the GPU (if it's available).

In [50]:
# Create tensor (default on CPU)
tensor = torch.tensor([1, 2, 3])

# Tensor not on GPU
print(tensor, tensor.device)

# Move tensor to GPU (if available)
tensor_on_gpu = tensor.to(device)
tensor_on_gpu

tensor([1, 2, 3]) cpu


tensor([1, 2, 3], device='cuda:0')

If you have a GPU available, the above code will output something like:

```
tensor([1, 2, 3]) cpu
tensor([1, 2, 3], device='cuda:0')
```

Notice the second tensor has `device='cuda:0'`, this means it's stored on the 0th GPU available (GPUs are 0 indexed, if two GPUs were available, they'd be `'cuda:0'` and `'cuda:1'` respectively, up to `'cuda:n'`).



### 4. Moving tensors back to the CPU

What if we wanted to move the tensor back to CPU?

For example, you'll want to do this if you want to interact with your tensors with NumPy (NumPy does not leverage the GPU).

Let's try using the [`torch.Tensor.numpy()`](https://pytorch.org/docs/stable/generated/torch.Tensor.numpy.html) method on our `tensor_on_gpu`.

In [51]:
# If tensor is on GPU, can't transform it to NumPy (this will error)
# tensor_on_gpu.numpy()

Instead, to get a tensor back to CPU and usable with NumPy we can use [`Tensor.cpu()`](https://pytorch.org/docs/stable/generated/torch.Tensor.cpu.html).

This copies the tensor to CPU memory so it's usable with CPUs.

In [52]:
# Instead, copy the tensor back to cpu
tensor_back_on_cpu = tensor_on_gpu.cpu().numpy()
tensor_back_on_cpu

array([1, 2, 3])

The above returns a copy of the GPU tensor in CPU memory so the original tensor is still on GPU.

In [53]:
tensor_on_gpu

tensor([1, 2, 3], device='cuda:0')