<a href="https://colab.research.google.com/github/Mateusz-best-creator/Learning_PyTorch/blob/main/PyTorch_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [111]:
import torch
print(f"PyTroch version: {torch.__version__}")
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

PyTroch version: 2.3.1+cu121


In [112]:
!nvidia-smi

Fri Jul 26 07:27:31 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   77C    P0              34W /  70W |    151MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

## Introduction To Tensors

### Creating tensors

In [113]:
# scalar
scalar = torch.tensor(7)
scalar

tensor(7)

In [114]:
scalar.ndim

0

In [115]:
scalar.item()

7

In [116]:
# vector
vector = torch.tensor([7, 7])
vector

tensor([7, 7])

In [117]:
vector.ndim

1

In [118]:
# matrix
matrix = torch.tensor([[7, 8],
                       [9, 10]])
matrix

tensor([[ 7,  8],
        [ 9, 10]])

In [119]:
matrix.ndim

2

In [120]:
matrix.shape

torch.Size([2, 2])

In [121]:
# tensor
tensor = torch.tensor([[[1, 2, 3],
                        [4, 5, 6],
                        [7, 8, 9]]])
tensor

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

In [122]:
tensor.ndim

3

In [123]:
tensor.shape

torch.Size([1, 3, 3])

### Random Tensors

In [124]:
random_tensor = torch.rand(size=[3,4,5])
random_tensor

tensor([[[0.6977, 0.8000, 0.1610, 0.2823, 0.6816],
         [0.9152, 0.3971, 0.8742, 0.4194, 0.5529],
         [0.9527, 0.0362, 0.1852, 0.3734, 0.3051],
         [0.9320, 0.1759, 0.2698, 0.1507, 0.0317]],

        [[0.2081, 0.9298, 0.7231, 0.7423, 0.5263],
         [0.2437, 0.5846, 0.0332, 0.1387, 0.2422],
         [0.8155, 0.7932, 0.2783, 0.4820, 0.8198],
         [0.9971, 0.6984, 0.5675, 0.8352, 0.2056]],

        [[0.5932, 0.1123, 0.1535, 0.2417, 0.7262],
         [0.7011, 0.2038, 0.6511, 0.7745, 0.4369],
         [0.5191, 0.6159, 0.8102, 0.9801, 0.1147],
         [0.3168, 0.6965, 0.9143, 0.9351, 0.9412]]])

In [125]:
random_tensor.ndim, random_tensor.shape

(3, torch.Size([3, 4, 5]))

In [126]:
random_tensor2 = torch.rand(size=[224, 224, 3]) # height, width, color channels
random_tensor2.shape, random_tensor2.ndim

(torch.Size([224, 224, 3]), 3)

### Zeros and Ones

In [127]:
zero = torch.zeros(size=(3, 4))
zero

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [128]:
ones = torch.ones(size=(3, 4))
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [129]:
# default datatype
zero.dtype, ones.dtype

(torch.float32, torch.float32)

### Create range of tensors

In [130]:
one_to_ten = torch.arange(start=1, end=11)
one_to_ten

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [131]:
# Creating tensors like
ten_zeros = torch.zeros_like(one_to_ten)
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

## Tensor Datatypes

3 most common issues when working with tensors:

1. Tensors not right datatype.
2. Tensors not right shape.
3. Tensors not on right device.

In [132]:
# float32
float32_tensor = torch.tensor([3., 6., 9.])
float32_tensor.dtype

torch.float32

In [133]:
int64_tensor = torch.tensor([3, 6, 9])
int64_tensor.dtype

torch.int64

In [134]:
float_16_tensor = float32_tensor.type(torch.float16)
float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

In [135]:
mixed_tensor = float32_tensor * float_16_tensor
mixed_tensor.dtype, mixed_tensor

(torch.float32, tensor([ 9., 36., 81.]))

In [136]:
int32_tensor = torch.tensor(data=[3,6,9], dtype=torch.int32)
mixed_tensor2 = float32_tensor * int32_tensor
mixed_tensor2, mixed_tensor2.dtype

(tensor([ 9., 36., 81.]), torch.float32)

### Getting information from tensors

In [137]:
some_tensor = torch.rand(size=(2,3))
some_tensor

tensor([[0.5943, 0.4075, 0.5645],
        [0.6504, 0.1609, 0.6616]])

In [138]:
# Get basic onformations
some_tensor.dtype, some_tensor.shape, some_tensor.device

(torch.float32, torch.Size([2, 3]), device(type='cpu'))

In [139]:
# If we want (and have access to) change to gpu:
# some_tensor.to("cuda")
# some_tensor.device

### Manipulating tensors

In [140]:
tensor = torch.tensor(data=[1,2,3])
tensor += 5
tensor

tensor([6, 7, 8])

In [141]:
# multiply tensor
tensor *= 10
tensor

tensor([60, 70, 80])

In [142]:
# Subtraction
tensor -= 50
tensor

tensor([10, 20, 30])

In [143]:
# Using PyTorc build-int functions
torch.mul(tensor, 2)

tensor([20, 40, 60])

In [144]:
torch.add(tensor,10)

tensor([20, 30, 40])

### Matrix multiplication

The inner dimensions must match! The output matrix has the shape of outer dimensions.

In [145]:
# Matrix multiplication
torch.matmul(tensor, tensor)

tensor(1400)

In [146]:
tensor

tensor([10, 20, 30])

In [147]:
# Matrix multiplication by hand
%%time
tensor = torch.Tensor(data=[1,2,3])
value = 0
for i in range(len(tensor)):
  value += tensor[i] * tensor[i]

print(f"Value = {value}")

Value = 14.0
CPU times: user 616 µs, sys: 0 ns, total: 616 µs
Wall time: 660 µs


In [148]:
%%time
torch.matmul(tensor, tensor)

CPU times: user 43 µs, sys: 2 µs, total: 45 µs
Wall time: 49.1 µs


tensor(14.)

### Dealing with shape errors

In [149]:
tensor_A = torch.tensor(data=[[1,2],
                              [3,4],
                              [5, 6]])
tensor_B = torch.tensor(data=[[7, 8],
                             [9, 10],
                             [11, 12]])
tensor_A.shape, tensor_B.shape

(torch.Size([3, 2]), torch.Size([3, 2]))

In [150]:
# Shape error
# tensor_A @ tensor_B

In [151]:
# Use transpose to match both shapes
tensor_A.T @ tensor_B

tensor([[ 89,  98],
        [116, 128]])

### Tensor aggregation

In [152]:
x = torch.arange(start=1, end=100, step=10, dtype=torch.float32)
x

tensor([ 1., 11., 21., 31., 41., 51., 61., 71., 81., 91.])

In [153]:
x.min(), x.max()

(tensor(1.), tensor(91.))

In [154]:
# For mean we need floats
torch.mean(x.type(torch.float32))

tensor(46.)

In [155]:
# Standard deviation
torch.std(x)

tensor(30.2765)

In [156]:
# Find the sum
torch.sum(x)

tensor(460.)

In [157]:
# Find max and min indexes
torch.argmax(x), torch.argmin(x)

(tensor(9), tensor(0))

## Reshaping, Stacking, Squeezing

In [158]:
x = torch.arange(0, 10)

In [159]:
x.shape, x.size()

(torch.Size([10]), torch.Size([10]))

In [160]:
# Reshaping
torch.reshape(input=x, shape=(2, 5))

tensor([[0, 1, 2, 3, 4],
        [5, 6, 7, 8, 9]])

In [161]:
# Add extra deminsion
torch.reshape(input=x, shape=(2, 5, 1))

tensor([[[0],
         [1],
         [2],
         [3],
         [4]],

        [[5],
         [6],
         [7],
         [8],
         [9]]])

In [162]:
# Change the view, they share the same memory address
z = x.view(1, 10)
z

tensor([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])

In [163]:
z[0][0] = 10
z, x

(tensor([[10,  1,  2,  3,  4,  5,  6,  7,  8,  9]]),
 tensor([10,  1,  2,  3,  4,  5,  6,  7,  8,  9]))

In [164]:
# Stack tensors on top of each other
stacked_x = torch.stack((x, x), dim=0)
stacked_x.shape, stacked_x

(torch.Size([2, 10]),
 tensor([[10,  1,  2,  3,  4,  5,  6,  7,  8,  9],
         [10,  1,  2,  3,  4,  5,  6,  7,  8,  9]]))

In [165]:
# Squeezing
x = torch.reshape(x, shape=[1,10,1])
print(f"Before squeezing: {x.shape}")
squeezed_x = torch.squeeze(x)
print(f"After squeezing: {squeezed_x.shape}")

Before squeezing: torch.Size([1, 10, 1])
After squeezing: torch.Size([10])


In [166]:
# Permuting
x = torch.randn(2, 3, 5)
print(x.size())
torch.permute(x, (2, 0, 1)).size()

torch.Size([2, 3, 5])


torch.Size([5, 2, 3])

In [167]:
x_image = torch.rand(size=(224, 224, 3))
x_image_permuted = torch.permute(x_image, dims=(2, 0, 1))
x_image.shape, x_image_permuted.shape

(torch.Size([224, 224, 3]), torch.Size([3, 224, 224]))

In [168]:
x_image_permuted[:, 0, 0] # All three color channels of first pixel

tensor([0.0133, 0.0870, 0.6491])

## Numpy & PyTorch

* From numpy to PyTorch tensor -> **torch.from_numpy(tensor)**
* From PyTorch tensor to numpy -> **torch.Tensor.numpy()**

In [169]:
array = np.arange(10, dtype=np.float64)
array

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [170]:
tensor = torch.from_numpy(array)
tensor

tensor([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.], dtype=torch.float64)

In [171]:
type(array), type(tensor)

(numpy.ndarray, torch.Tensor)

In [172]:
array.dtype, tensor.dtype

(dtype('float64'), torch.float64)

In [173]:
# Tensor to numpy array
tensor = torch.ones(7)
numpy_tensor = tensor.numpy()
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [174]:
tensor += 1
tensor, numpy_tensor

(tensor([2., 2., 2., 2., 2., 2., 2.]),
 array([2., 2., 2., 2., 2., 2., 2.], dtype=float32))

## Reproducibility

In [175]:
# Lets make some random tensors
tensor_A = torch.rand(size=(3,4))
tensor_B = torch.rand(size=(3,4))

tensor_A == tensor_B

tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])

In [176]:
# Let's make some random tensors, but with random seed
torch.manual_seed(42)
tensor_C = torch.rand(size=(3,4))
torch.manual_seed(42)
tensor_D = torch.rand(size=(3,4))

tensor_C == tensor_D

tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])

## Accessing GPU (graphical processing unit)

### Getting GPU

1. Use google colab.
2. Use your local machine.
3. Use cloud computing platform (AWS).

In [177]:
# Check if we have access to GPU
!nvidia-smi

Fri Jul 26 07:27:33 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   77C    P0              34W /  70W |    151MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [178]:
# Check for GPU access with PyTorch
if torch.cuda.is_available():
  print(f"GPU is available")
else:
  print(f"GPU is not available")

GPU is available


In [179]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device we are going to use: {device}")

Device we are going to use: cuda


In [180]:
# Count number of devices
torch.cuda.device_count()

1

## Putting tensors and models on the GPU

In [181]:
# Create a tensor, default on cpu by default
tensor = torch.tensor(data=[1,2,3], device="cpu")

tensor, tensor.device

(tensor([1, 2, 3]), device(type='cpu'))

In [182]:
# Move tensor to GPU
tensor_on_gpu = tensor.to(device)
tensor_on_gpu

tensor([1, 2, 3], device='cuda:0')

In [183]:
# Moving tensors back to the cpu
tensor.numpy()

array([1, 2, 3])

In [184]:
# Numpy runs only on CPU
tensor_on_gpu.to("cpu").numpy()

array([1, 2, 3])

# Exercises

In [185]:
# Set random seed for reproducibility
torch.manual_seed(0)
torch.cuda.manual_seed(1234)

random_tensor = torch.rand(size=[7,7])
random_tensor2 = torch.rand(size=[1, 7])
random_tensor, random_tensor2

(tensor([[0.4963, 0.7682, 0.0885, 0.1320, 0.3074, 0.6341, 0.4901],
         [0.8964, 0.4556, 0.6323, 0.3489, 0.4017, 0.0223, 0.1689],
         [0.2939, 0.5185, 0.6977, 0.8000, 0.1610, 0.2823, 0.6816],
         [0.9152, 0.3971, 0.8742, 0.4194, 0.5529, 0.9527, 0.0362],
         [0.1852, 0.3734, 0.3051, 0.9320, 0.1759, 0.2698, 0.1507],
         [0.0317, 0.2081, 0.9298, 0.7231, 0.7423, 0.5263, 0.2437],
         [0.5846, 0.0332, 0.1387, 0.2422, 0.8155, 0.7932, 0.2783]]),
 tensor([[0.4820, 0.8198, 0.9971, 0.6984, 0.5675, 0.8352, 0.2056]]))

In [186]:
random_tensor.device

device(type='cpu')

In [187]:
# 7x7 @ 7x1 = 7x1
mul_tensor = random_tensor @ random_tensor2.T
mul_tensor

tensor([[1.8542],
        [1.9611],
        [2.2884],
        [3.0481],
        [1.7067],
        [2.5290],
        [1.7989]])

In [188]:
torch.cuda.manual_seed(1234)

gpu_tensor = torch.rand(size=(2, 3), device=device)
gpu_tensor2 = torch.rand(size=(2, 3), device=device)

# 2x3 @ 3x2 = 2x2
mul_gpu_tensor = gpu_tensor @ gpu_tensor2.T
mul_gpu_tensor

tensor([[0.2786, 0.7668],
        [0.7343, 0.6102]], device='cuda:0')

In [189]:
mul_gpu_tensor.device

device(type='cuda', index=0)

In [190]:
mul_gpu_tensor.min(), mul_gpu_tensor.max()

(tensor(0.2786, device='cuda:0'), tensor(0.7668, device='cuda:0'))

In [191]:
mul_gpu_tensor.argmin(), mul_gpu_tensor.argmax()

(tensor(0, device='cuda:0'), tensor(1, device='cuda:0'))

In [192]:
torch.argmin(mul_gpu_tensor), torch.argmax(mul_gpu_tensor)

(tensor(0, device='cuda:0'), tensor(1, device='cuda:0'))

In [193]:
torch.manual_seed(0)
r_tensor = torch.rand(size=[1,1,1,10])
squeezed_tensor = torch.squeeze(r_tensor)
r_tensor.shape, squeezed_tensor.shape

(torch.Size([1, 1, 1, 10]), torch.Size([10]))

In [194]:
r_tensor, squeezed_tensor

(tensor([[[[0.4963, 0.7682, 0.0885, 0.1320, 0.3074, 0.6341, 0.4901, 0.8964,
            0.4556, 0.6323]]]]),
 tensor([0.4963, 0.7682, 0.0885, 0.1320, 0.3074, 0.6341, 0.4901, 0.8964, 0.4556,
         0.6323]))

## PyTorch quickstart guide

Tutorial is from: https://pytorch.org/tutorials/beginner/basics/quickstart_tutorial.html

In [195]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

In [196]:
# Download training data
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# Download test data
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

In [197]:
BATCH_SIZE = 64

train_dataloader = DataLoader(training_data, batch_size=BATCH_SIZE)
test_dataloader = DataLoader(test_data, batch_size=BATCH_SIZE)

for X, y in test_dataloader:
  print(f"Shape of X, [batch_size, color_channels, height, width] = {X.shape}")
  print(f"Shape of y, [batch_size] = {y.shape}")
  break

Shape of X, [batch_size, color_channels, height, width] = torch.Size([64, 1, 28, 28])
Shape of y, [batch_size] = torch.Size([64])


In [209]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Using {DEVICE} device.")

class NeuralNetwork(nn.Module):
  def __init__(self):
    super().__init__()
    self.flatten = nn.Flatten()
    self.linear_relu_stack = nn.Sequential(
        nn.Linear(in_features=28*28, out_features=512),
        nn.ReLU(),
        nn.Linear(512, 256),
        nn.ReLU(),
        nn.Linear(256, 10)
    )

  def forward(self, x):
    x = self.flatten(x)
    logits = self.linear_relu_stack(x)
    return logits

model = NeuralNetwork().to(device)
print(model)

Using cuda device.
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=256, bias=True)
    (3): ReLU()
    (4): Linear(in_features=256, out_features=10, bias=True)
  )
)


In [210]:
# Specify loss and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [211]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [212]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.300149  [   64/60000]
loss: 2.284201  [ 6464/60000]
loss: 2.261929  [12864/60000]
loss: 2.257622  [19264/60000]
loss: 2.243247  [25664/60000]
loss: 2.206360  [32064/60000]
loss: 2.232623  [38464/60000]
loss: 2.195404  [44864/60000]
loss: 2.187284  [51264/60000]
loss: 2.161118  [57664/60000]
Test Error: 
 Accuracy: 32.5%, Avg loss: 2.154014 

Epoch 2
-------------------------------
loss: 2.167362  [   64/60000]
loss: 2.150221  [ 6464/60000]
loss: 2.094291  [12864/60000]
loss: 2.116154  [19264/60000]
loss: 2.060853  [25664/60000]
loss: 1.989439  [32064/60000]
loss: 2.044222  [38464/60000]
loss: 1.956017  [44864/60000]
loss: 1.958373  [51264/60000]
loss: 1.905964  [57664/60000]
Test Error: 
 Accuracy: 48.6%, Avg loss: 1.896025 

Epoch 3
-------------------------------
loss: 1.922960  [   64/60000]
loss: 1.887266  [ 6464/60000]
loss: 1.782759  [12864/60000]
loss: 1.840354  [19264/60000]
loss: 1.721840  [25664/60000]
loss: 1.660652  [32064/600

In [213]:
# Saving our model
torch.save(model.state_dict(), "model.pth")

In [215]:
# Load our model
model = NeuralNetwork().to(device)
model.load_state_dict(torch.load("model.pth"))

<All keys matched successfully>

In [220]:
# Making custom predictions
classes = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]


model.eval()
X, y = test_data[0][0], test_data[0][1]
with torch.no_grad():
  X = X.to(device)
  pred = model(X).argmax()
  predicted = classes[pred]
  actual = classes[y]
  print(f"Predicted = {predicted}\nActual = {actual}")

Predicted = Ankle boot
Actual = Ankle boot
