# Deeplearning intro

In [1]:
import torch
from torchvision import datasets
from torchvision.transforms import ToTensor

from torchvision import transforms
device = "cuda" if torch.cuda.is_available() else "cpu"

In [2]:
norm_tr = transforms.Compose([ # regularyzacja
    ToTensor(),
    transforms.Normalize((0.5), (0.5)), # avg, std; dla jednego kanału
])

training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=norm_tr # ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=norm_tr # ToTensor()
)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data/FashionMNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/26421880 [00:00<?, ?it/s]

Extracting data/FashionMNIST/raw/train-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/29515 [00:00<?, ?it/s]

Extracting data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/4422102 [00:00<?, ?it/s]

Extracting data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/5148 [00:00<?, ?it/s]

Extracting data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw



In [3]:
from torch import nn

class NeuralNetwork(nn.Module): # sieć jako klasa; może też być funkcyjne
  def __init__(self):
    super().__init__()
    self.flatten = nn.Flatten() # domyślnie start_dim = 1; batch_size na pozycji 0, dlatego batch_size nie zostanie ruszony
    self.linear = nn.Linear(28*28, 10) # hardkodowanie, jako parametr sieci
    # był bug, drugi arg to liczba klas

  def forward(self, x):
    x = self.flatten(x)
    x = self.linear(x)
    return x

model = NeuralNetwork()
# sprawdzenie modelu poprzez losowe zdjęcie lub macierz ustalonego modelu

In [4]:
from torch.utils.data import DataLoader
from matplotlib import pyplot as plt

batch_size = 64 # potęgi dwojki bo lepiej sie liczy
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for X, y in train_dataloader:
  # print(f'Shape of X [N,C,H,W]: {X.shape}') # N - number liczba zdjec, C - channels
  # print(f'Shape of y: {y.shape}')
  # plt.imshow(X[0].permute(1,2,0), cmap="gray") # wybór 1 zdjęcia, zamiana pozycji, która liczba oznacza liczbę kanałów; pytorch - 1 liczba, plt - ostatnia
  # print(y)
  # print(training_data.class_to_idx)

  break

In [6]:
training_data.classes, len(training_data.classes)

(['T-shirt/top',
  'Trouser',
  'Pullover',
  'Dress',
  'Coat',
  'Sandal',
  'Shirt',
  'Sneaker',
  'Bag',
  'Ankle boot'],
 10)

In [None]:
# model.forward(torch.rand([64, 1, 28, 28]))

In [5]:
def train(dataloader, model, loss_fn, optimizer):
  size = len(dataloader.dataset)
  model.train()
  for batch, (X,y) in enumerate(dataloader): # petla po 64 - batchsize
    pred = model(X)
    loss = loss_fn(pred, y)

    optimizer.zero_grad() # ma historie poprzednich gradientow i dlatego trzeba go wyzerowac
    loss.backward()
    optimizer.step()

    if batch % 100 == 0:
      loss, current = loss.item(), (batch+1) * len(X) # loss jest tensorem o 1 elemencie
      print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")


def test(dataloader, model, loss_fn): # model może miec w 
  size = len(dataloader.dataset)
  num_batches = len(dataloader)
  test_loss = 0
  model.eval()
  correct = 0
  
  with torch.no_grad(): # nie zachowywuj obliczen gradientow, nie wykonywuj ich nawet
    for X, y in dataloader:
      pred = model(X)
      test_loss += loss_fn(pred, y).item()
      correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f'Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n')

## Tryby modelu
model może mieć dwa stany: `test` i `eval`

np. w treningu można blokować neurony -> przykład regularyzacji


dlatego w ewaluacji już nie strzelamy do własnych neuronów i używamy wszystkiego co mamy


## Co dostajemy na wyjściu?

dostajemy miare pewności modelu, który przedmiot jest na zdjęciu.
Dlatego bierzemy sobie argmax



In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3) # najczesciej wybor miedzy SGD a Adam
# torch.optim.Adam()
# softmax psuje mocno; jest juz zaszyty w srodku
# lr domyslnie 1e-3 ale to hiperparametr 

epochs = 5

for t in range(epochs):
  print(f"Epoch {t+1}\n")
  train(train_dataloader, model, loss_fn, optimizer)
  test(test_dataloader, model, loss_fn)

## Po co używamy `.item` ?

In [None]:
torch.Tensor([1]).item()

1.0

In [None]:
torch.Tensor([1])

tensor([1.])

# 2-D Convolution layer

[slajdy deep learning intro](https://c.d2l.ai/gtc2020/slides/CNN.pdf)

Model będzie się uczył teraz kerneli a nie samego obrazu. Dzieki temu uczymy się zależności między obiektami na zdjęciu.

---
### Intuicja

Nie nakładmy dokładnie płaskiego filtra, nakładamy kernel w przestrzeni 3D.
Tworzymy bloki i potem łączymy sobie bloczki. Ilość kanałów to ~głębokość.

Cechy niosą informacje -> połączenie danych tabelarycznych z cechami.

![przykladowy gif](https://miro.medium.com/v2/resize:fit:1400/1*q95f1mqXAVsj_VMHaOm6Sw.gif)

### Padding

Jesli coś ważnego jest w rogu używa się paddingu (uzupelnienie zerami)

Da sie tez wykorzystywac pixele dalsze niz 1

### Pooling

- Max pooling - Wybieranie max wart pixeli, zmniejszenie rozmiaru zdj
- Avg pooling - analogicznie


# Teraz coś poważniejszego - użycie konwolucji
- Laguje więc zmieniamy środowisko wykonawcze: runtime -> change runtime -> gpu
- odpalamy wszystko na `cuda` dla macbooków `mps`
- zamiast pisać ręcznie kolejne conv{i} to robimy blok

In [None]:
from torch import nn

device = "cuda" if torch.cuda.is_available() else "cpu"

class NeuralNetwork(nn.Module):
  def __init__(self):
    super().__init__()
    self.flatten = nn.Flatten()
    # self.conv1 = nn.Conv2d(1, 16, 3) # channels, no. of output channels - param filters, kernel_size - default symetric matrixes
    self.conv_block = nn.Sequential(
        nn.Conv2d(1, 16, 3),
        nn.ReLU(),
        nn.Conv2d(16, 64, 3),
        nn.ReLU(),
        nn.AvgPool2d(2)
    )

    self.conv_block2 = nn.Sequential(
        nn.Conv2d(64, 128, 3),
        nn.ReLU(),
        nn.Conv2d(128, 256, 3),
        nn.ReLU(),
        nn.AvgPool2d(2)
    )

    self.linear = nn.Linear(4096, 10)

  def forward(self, x):
    x = self.conv_block(x)
    x = self.conv_block2(x)
    x = self.flatten(x)
    x = self.linear(x)
    return x

model = NeuralNetwork().to(device)

In [None]:
def train(dataloader, model, loss_fn, optimizer):
  size = len(dataloader.dataset)
  model.train()
  for batch, (X,y) in enumerate(dataloader):
    X, y = X.to(device), y.to(device)
    pred = model(X)
    loss = loss_fn(pred, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if batch % 100 == 0:
      loss, current = loss.item(), (batch+1) * len(X)
      print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")


def test(dataloader, model, loss_fn): # model może miec w 
  size = len(dataloader.dataset)
  num_batches = len(dataloader)
  test_loss = 0
  model.eval()
  correct = 0
  
  with torch.no_grad(): # nie zachowywuj obliczen gradientow, nie wykonywuj ich nawet
    for X, y in dataloader:
      X, y = X.to(device), y.to(device)
      pred = model(X)
      test_loss += loss_fn(pred, y).item()
      correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f'Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n')

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)
epochs = 15

for t in range(epochs):
  print(f"Epoch {t+1}\n")
  train(train_dataloader, model, loss_fn, optimizer)
  test(test_dataloader, model, loss_fn)

Epoch 1

loss: 0.347418 [   64/60000]
loss: 0.349168 [ 6464/60000]
loss: 0.235297 [12864/60000]
loss: 0.440244 [19264/60000]
loss: 0.448519 [25664/60000]
loss: 0.491282 [32064/60000]
loss: 0.347596 [38464/60000]
loss: 0.550899 [44864/60000]
loss: 0.551320 [51264/60000]
loss: 0.402817 [57664/60000]
Test Error: 
 Accuracy: 84.9%, Avg loss: 0.425732 

Epoch 2

loss: 0.335587 [   64/60000]
loss: 0.345342 [ 6464/60000]
loss: 0.228222 [12864/60000]
loss: 0.425594 [19264/60000]
loss: 0.443138 [25664/60000]
loss: 0.485313 [32064/60000]
loss: 0.340365 [38464/60000]
loss: 0.546068 [44864/60000]
loss: 0.544098 [51264/60000]
loss: 0.404381 [57664/60000]
Test Error: 
 Accuracy: 85.4%, Avg loss: 0.414483 

Epoch 3

loss: 0.327142 [   64/60000]
loss: 0.343597 [ 6464/60000]
loss: 0.220586 [12864/60000]
loss: 0.413309 [19264/60000]
loss: 0.432277 [25664/60000]
loss: 0.481383 [32064/60000]
loss: 0.333699 [38464/60000]
loss: 0.539440 [44864/60000]
loss: 0.540023 [51264/60000]
loss: 0.403522 [57664/60000]

# Korzystanie z gotowego modelu

trzeba nasycić model, żeby nie strzelać z armaty do mrówki

Pretrained, część wag będzie zamrożona, odmrażanie od końca żeby nie zachwiać modelu

In [7]:
!pip install timm # do odpalenia komendy linuxowej na collabie

import timm

resnet = timm.create_model("resnet18", pretrained=True)
# print(resnet)
resnet.conv1 = nn.Conv2d(1, 64, 7, stride=2, padding=3) # added pretrained

for param in resnet.parameters():
    param.requires_grad = False

resnet.fc = nn.Linear(512, 10) # podmiana ostatniego punktu
# print(resnet.fc.requires)
print(resnet)
resnet.to(device)

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting timm
  Downloading timm-0.6.12-py3-none-any.whl (549 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m549.1/549.1 KB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub
  Downloading huggingface_hub-0.13.3-py3-none-any.whl (199 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.8/199.8 KB[0m [31m26.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: huggingface-hub, timm
Successfully installed huggingface-hub-0.13.3 timm-0.6.12


Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /root/.cache/torch/hub/checkpoints/resnet18-5c106cde.pth


ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (drop_block): Identity()
      (act1): ReLU(inplace=True)
      (aa): Identity()
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act2): ReLU(inplace=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1,

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (drop_block): Identity()
      (act1): ReLU(inplace=True)
      (aa): Identity()
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act2): ReLU(inplace=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1,

In [None]:
for params in resnet.fc.parameters(): # sprawdzanie czy parametry są zamrożone
  print(params)

Parameter containing:
tensor([[ 0.0266,  0.0343,  0.0234,  ..., -0.0172,  0.0073, -0.0165],
        [ 0.0314, -0.0039,  0.0069,  ...,  0.0298, -0.0023,  0.0305],
        [ 0.0388,  0.0010, -0.0120,  ..., -0.0138, -0.0438, -0.0272],
        ...,
        [-0.0326, -0.0263,  0.0227,  ...,  0.0341,  0.0362, -0.0130],
        [ 0.0273, -0.0006, -0.0120,  ...,  0.0356,  0.0348, -0.0417],
        [ 0.0418,  0.0345,  0.0270,  ..., -0.0060, -0.0298, -0.0013]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([ 0.0057, -0.0182,  0.0191, -0.0220, -0.0077,  0.0231, -0.0056, -0.0139,
        -0.0372,  0.0323], device='cuda:0', requires_grad=True)


In [8]:
def train(dataloader, model, loss_fn, optimizer):
  size = len(dataloader.dataset)
  model.train()
  for batch, (X,y) in enumerate(dataloader):
    X, y = X.to(device), y.to(device)
    pred = model(X)
    loss = loss_fn(pred, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if batch % 100 == 0:
      loss, current = loss.item(), (batch+1) * len(X)
      print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")


def test(dataloader, model, loss_fn): # model może miec w 
  size = len(dataloader.dataset)
  num_batches = len(dataloader)
  test_loss = 0
  model.eval()
  correct = 0
  
  with torch.no_grad(): # nie zachowywuj obliczen gradientow, nie wykonywuj ich nawet
    for X, y in dataloader:
      X, y = X.to(device), y.to(device)
      pred = model(X)
      test_loss += loss_fn(pred, y).item()
      correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f'Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n')

In [9]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(resnet.parameters(), lr=1e-3) # zmienić model
epochs = 10

for t in range(epochs):
  print(f"Epoch {t+1}\n")
  train(train_dataloader, resnet, loss_fn, optimizer)
  test(test_dataloader, resnet, loss_fn)

Epoch 1

loss: 2.735301 [   64/60000]
loss: 2.442510 [ 6464/60000]
loss: 2.034686 [12864/60000]
loss: 1.881823 [19264/60000]
loss: 1.883213 [25664/60000]
loss: 1.620081 [32064/60000]
loss: 1.682190 [38464/60000]
loss: 1.598865 [44864/60000]
loss: 1.576184 [51264/60000]
loss: 1.504682 [57664/60000]
Test Error: 
 Accuracy: 51.7%, Avg loss: 1.506809 

Epoch 2

loss: 1.516931 [   64/60000]
loss: 1.592655 [ 6464/60000]
loss: 1.304179 [12864/60000]
loss: 1.568592 [19264/60000]
loss: 1.508674 [25664/60000]
loss: 1.240472 [32064/60000]
loss: 1.442739 [38464/60000]
loss: 1.378656 [44864/60000]
loss: 1.378535 [51264/60000]
loss: 1.274484 [57664/60000]
Test Error: 
 Accuracy: 57.6%, Avg loss: 1.323753 

Epoch 3

loss: 1.302450 [   64/60000]
loss: 1.398527 [ 6464/60000]
loss: 1.127051 [12864/60000]
loss: 1.476590 [19264/60000]
loss: 1.393832 [25664/60000]
loss: 1.130542 [32064/60000]
loss: 1.366148 [38464/60000]
loss: 1.298005 [44864/60000]
loss: 1.296295 [51264/60000]
loss: 1.165917 [57664/60000]