<a href="https://colab.research.google.com/github/RichardDominik/neural-networks-CV/blob/master/PNSPV_hw1_final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms

import numpy as np
from matplotlib import pyplot as plt

from torch.nn import Sequential, Linear, ReLU, Softmax, Conv2d, Dropout2d, Dropout, MaxPool2d, BatchNorm2d, BatchNorm1d, Flatten, CrossEntropyLoss, Sigmoid, Tanh, ELU, LeakyReLU, PReLU
from torch.optim import Adam, SGD, RMSprop, AdamW
from torch.utils.data import DataLoader

Načítanie datasetu Cifar 10

In [None]:
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
transform = transforms.ToTensor()
generator = torch.Generator().manual_seed(42)

dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

trainset, valset = torch.utils.data.random_split(dataset, [45000, 5000], generator=generator)

Zobrazenie obrázkov z datasetu

In [None]:
previewloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=True)

for i, sample in enumerate(previewloader):
  if i > 1:
    break
  x, y = sample
  img = np.moveaxis(x.numpy()[0], 0, -1)
  plt.imshow(img)
  plt.show()
  print(classes[y.item()])

# Základný model

In [None]:
from torch.nn import Sequential, Linear, ReLU, Softmax, Conv2d, MaxPool2d, Flatten, CrossEntropyLoss
from torch.optim import Adam
from torch.utils.data import DataLoader

KERNEL_SIZE_CONV = 3
STRIDE = 2
PADDING = 1
MAX_POOL_KERNEL = 2
BATCH_SIZE = 32
LEARNING_RATE = 1e-3
NUMBER_OF_EPOCHS = 10

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def build_model():
  model = Sequential(Conv2d(3, 16, KERNEL_SIZE_CONV, 1, PADDING), MaxPool2d(MAX_POOL_KERNEL, STRIDE), ReLU(),
                    Conv2d(16, 32, KERNEL_SIZE_CONV, 1, PADDING), MaxPool2d(MAX_POOL_KERNEL, STRIDE), ReLU(),
                    Conv2d(32, 64, KERNEL_SIZE_CONV, 1, PADDING), MaxPool2d(MAX_POOL_KERNEL, STRIDE), ReLU(),
                    Flatten(),
                    Linear(1024, 512), ReLU(),
                    Linear(512, 256), ReLU(),
                    Linear(256, 128), ReLU(),
                    Linear(128, 10))
  
  model_inference = Sequential(model, Softmax())
  return model, model_inference


model, model_inference = build_model()
model, model_inference = model.to(device), model_inference.to(device)

dataloader_train = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
dataloader_val = DataLoader(valset, batch_size=BATCH_SIZE, shuffle=True)

ce_loss = CrossEntropyLoss().to(device)
optimizer = Adam(model.parameters(), lr=LEARNING_RATE)

epoch_train_losses = []
epoch_val_losses = []
epoch_val_accs = []

def one_epoch(model, loss, optimizer, dataloader_train, dataloader_val, device, verbose=True):
  train_losses = []
  val_losses = []

  model.train()

  for i, batch in enumerate(dataloader_train):  
    x, y = batch[0].to(device), batch[1].to(device) 
    optimizer.zero_grad()

    out = model(x)

    loss = ce_loss(out, y)
    loss.backward()
    train_losses.append(loss.item())
    optimizer.step()
    if i % 100 == 0 and verbose:
      print("Training loss at step {}: {}".format(i, loss.item()))

  model.eval()

  with torch.no_grad():
    correct = 0
    total = 0
    for i, batch in enumerate(dataloader_val):  
      x, y = batch[0].to(device), batch[1].to(device)  

      out = model(x)
      loss = ce_loss(out, y)
      acc = torch.sum(torch.argmax(out, dim=-1) == y)
      correct += acc.item()
      total += len(batch[1])
      val_losses.append(loss.item())

  val_acc = correct / total

  return np.mean(train_losses), np.mean(val_losses), val_acc

for e in range(NUMBER_OF_EPOCHS):
  train_loss, val_loss, val_acc = one_epoch(model, ce_loss, optimizer, dataloader_train, dataloader_val, device)

  print("Val loss at epoch {}: {}".format(e, val_loss))
  print("Val acc at epoch {}: {}".format(e, val_acc))

  epoch_train_losses.append(train_loss)
  epoch_val_losses.append(val_loss)
  epoch_val_accs.append(val_acc)

plt.plot(epoch_train_losses, c='r')
plt.plot(epoch_val_losses, c='b')
plt.show()

plt.plot(epoch_val_accs, c='r')
plt.show()

# Aktivácie

Funkcie pre budovanie modelov. Oskúšané aktivačné funkcie: Sigmoid, tanh, ELU, LeakyReLU a PReLU

In [None]:
from torch.nn import Sigmoid, Tanh, ELU, LeakyReLU, PReLU

# Sigmoid
def build_model_using_sigmoid_activation_function():
  model = Sequential(Conv2d(3, 16, KERNEL_SIZE_CONV, 1, PADDING), MaxPool2d(MAX_POOL_KERNEL, STRIDE), Sigmoid(),
                  Conv2d(16, 32, KERNEL_SIZE_CONV, 1, PADDING), MaxPool2d(MAX_POOL_KERNEL, STRIDE), Sigmoid(),
                  Conv2d(32, 64, KERNEL_SIZE_CONV, 1, PADDING), MaxPool2d(MAX_POOL_KERNEL, STRIDE), Sigmoid(),
                  Flatten(),
                  Linear(1024, 512), Sigmoid(),
                  Linear(512, 256), Sigmoid(),
                  Linear(256, 128), Sigmoid(),
                  Linear(128, 10))

  model_inference = Sequential(model, Softmax())
  return model, model_inference

# Tanh
def build_model_using_tanh_activation_function():
  model = Sequential(Conv2d(3, 16, KERNEL_SIZE_CONV, 1, PADDING), MaxPool2d(MAX_POOL_KERNEL, STRIDE), Tanh(),
                    Conv2d(16, 32, KERNEL_SIZE_CONV, 1, PADDING), MaxPool2d(MAX_POOL_KERNEL, STRIDE), Tanh(),
                    Conv2d(32, 64, KERNEL_SIZE_CONV, 1, PADDING), MaxPool2d(MAX_POOL_KERNEL, STRIDE), Tanh(),
                    Flatten(),
                    Linear(1024, 512), Tanh(),
                    Linear(512, 256), Tanh(),
                    Linear(256, 128), Tanh(),
                    Linear(128, 10))

  model_inference = Sequential(model, Softmax())
  return model, model_inference

# ELU
def build_model_using_elu_activation_function():
  model = Sequential(Conv2d(3, 16, KERNEL_SIZE_CONV, 1, PADDING), MaxPool2d(MAX_POOL_KERNEL, STRIDE), ELU(),
                    Conv2d(16, 32, KERNEL_SIZE_CONV, 1, PADDING), MaxPool2d(MAX_POOL_KERNEL, STRIDE), ELU(),
                    Conv2d(32, 64, KERNEL_SIZE_CONV, 1, PADDING), MaxPool2d(MAX_POOL_KERNEL, STRIDE), ELU(),
                    Flatten(),
                    Linear(1024, 512), ELU(),
                    Linear(512, 256), ELU(),
                    Linear(256, 128), ELU(),
                    Linear(128, 10))

  model_inference = Sequential(model, Softmax())
  return model, model_inference

# LeakyReLu
ALPHA = 0.1

def build_model_using_leaky_relu_activation_function():
  model = Sequential(Conv2d(3, 16, KERNEL_SIZE_CONV, 1, PADDING), MaxPool2d(MAX_POOL_KERNEL, STRIDE), LeakyReLU(ALPHA),
                    Conv2d(16, 32, KERNEL_SIZE_CONV, 1, PADDING), MaxPool2d(MAX_POOL_KERNEL, STRIDE), LeakyReLU(ALPHA),
                    Conv2d(32, 64, KERNEL_SIZE_CONV, 1, PADDING), MaxPool2d(MAX_POOL_KERNEL, STRIDE), LeakyReLU(ALPHA),
                    Flatten(),
                    Linear(1024, 512), LeakyReLU(ALPHA),
                    Linear(512, 256), LeakyReLU(ALPHA),
                    Linear(256, 128), LeakyReLU(ALPHA),
                    Linear(128, 10))

  model_inference = Sequential(model, Softmax())
  return model, model_inference

# PReLU
def build_model_using_prelu_activation_function():
  model = Sequential(Conv2d(3, 16, KERNEL_SIZE_CONV, 1, PADDING), MaxPool2d(MAX_POOL_KERNEL, STRIDE), PReLU(),
                    Conv2d(16, 32, KERNEL_SIZE_CONV, 1, PADDING), MaxPool2d(MAX_POOL_KERNEL, STRIDE), PReLU(),
                    Conv2d(32, 64, KERNEL_SIZE_CONV, 1, PADDING), MaxPool2d(MAX_POOL_KERNEL, STRIDE), PReLU(),
                    Flatten(),
                    Linear(1024, 512), PReLU(),
                    Linear(512, 256), PReLU(),
                    Linear(256, 128), PReLU(),
                    Linear(128, 10))

  model_inference = Sequential(model, Softmax())
  return model, model_inference

Inicialializácia modelov podľa aktivačných funkcií

In [None]:
# Sigmoid model
sigmoid_model, sigmoid_model_inference = build_model_using_sigmoid_activation_function()
sigmoid_model, sigmoid_model_inference = sigmoid_model.to(device), sigmoid_model_inference.to(device)

# Tanh model
tanh_model, tanh_model_inference = build_model_using_tanh_activation_function()
tanh_model, tanh_model_inference = tanh_model.to(device), tanh_model_inference.to(device)

# ELU model
elu_model, elu_model_inference = build_model_using_elu_activation_function()
elu_model, elu_model_inference = elu_model.to(device), elu_model_inference.to(device)

# LeakyReLu model
leaky_relu_model, leaky_relu_model_inference = build_model_using_leaky_relu_activation_function()
leaky_relu_model, leaky_relu_model_inference = leaky_relu_model.to(device), leaky_relu_model_inference.to(device)

# PReLU model
prelu_model, prelu_model_inference = build_model_using_prelu_activation_function()
prelu_model, prelu_model_inference = prelu_model.to(device), prelu_model_inference.to(device)

# All models
models = [('sigmoid', sigmoid_model), ('tanh', tanh_model), ('elu', elu_model), ('leaky relu', leaky_relu_model), ('prelu', prelu_model)]

Trénovanie modelov

In [None]:
for activation_function_name, model in models:
  optimizer = Adam(model.parameters(), lr=LEARNING_RATE)
  epoch_train_losses = []
  epoch_val_losses = []
  epoch_val_accs = []

  print(activation_function_name) 

  for e in range(NUMBER_OF_EPOCHS):
    train_loss, val_loss, val_acc = one_epoch(model, ce_loss, optimizer, dataloader_train, dataloader_val, device, False)

    print("Val loss at epoch {}: {}".format(e, val_loss))
    print("Val acc at epoch {}: {}".format(e, val_acc))

    epoch_train_losses.append(train_loss)
    epoch_val_losses.append(val_loss)
    epoch_val_accs.append(val_acc)

  print('')

  # pridat nazvy + legendy
  plt.title('Loss: ' + activation_function_name)
  plt.plot(epoch_train_losses, c='r')
  plt.plot(epoch_val_losses, c='b')
  plt.show()

  plt.title('Acc: ' + activation_function_name)
  plt.plot(epoch_val_accs, c='r')
  plt.show()

# Optimalizácia

In [None]:
from torch.optim import Adam, SGD, RMSprop, AdamW

NUMBER_OF_OPT_EPOCHS = 15

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
baseline_model, baseline_model_inference = build_model()
baseline_model, baseline_model_inference = baseline_model.to(device), baseline_model_inference.to(device)

dataloader_train = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
dataloader_val = DataLoader(valset, batch_size=BATCH_SIZE, shuffle=True)

optimizers = [
    ('Adam: lr= 1e-5', Adam(baseline_model.parameters(), lr=1e-5)),
    ('SGD: lr= 1e-3', SGD(baseline_model.parameters(), lr=1e-3)),
    ('RMSprop: lr= 1e-2, eps=1.1', RMSprop(baseline_model.parameters(), lr=1e-2, eps=1.1)),
    ('AdamW: lr= 1e-3', AdamW(baseline_model.parameters(), lr=1e-3)),
]

for optDetails, optimizerTest in optimizers:
  ce_loss = CrossEntropyLoss().to(device)
  epoch_train_losses = []
  epoch_val_losses = []
  epoch_val_accs = []

  print(optDetails) 
  print(optimizerTest)

  for e in range(NUMBER_OF_OPT_EPOCHS):
    train_loss, val_loss, val_acc = one_epoch(baseline_model, ce_loss, optimizerTest, dataloader_train, dataloader_val, device, False)

    print("Val loss at epoch {}: {}".format(e, val_loss))
    print("Val acc at epoch {}: {}".format(e, val_acc))

    epoch_train_losses.append(train_loss)
    epoch_val_losses.append(val_loss)
    epoch_val_accs.append(val_acc)

  print('')

  plt.title('Loss: ' + optDetails)
  plt.plot(epoch_train_losses, c='r')
  plt.plot(epoch_val_losses, c='b')
  plt.show()

  plt.title('Acc: ' + optDetails)
  plt.plot(epoch_val_accs, c='r')
  plt.show()


Batch sizes

In [None]:
from torch.optim import Adam, SGD, RMSprop, AdamW
batch_sizes = [8, 16, 64]
NUMBER_OF_OPT_EPOCHS = 15

for bs in batch_sizes:
  baseline_model, baseline_model_inference = build_model()
  baseline_model, baseline_model_inference = baseline_model.to(device), baseline_model_inference.to(device)
  dataloader_train = DataLoader(trainset, batch_size=bs, shuffle=True)
  dataloader_val = DataLoader(valset, batch_size=bs, shuffle=True)
  optimizer = AdamW(baseline_model.parameters(), lr=LEARNING_RATE)

  ce_loss = CrossEntropyLoss().to(device)
  epoch_train_losses = []
  epoch_val_losses = []
  epoch_val_accs = []
  print('Batch size: ' + str(bs))

  for e in range(NUMBER_OF_OPT_EPOCHS):
    train_loss, val_loss, val_acc = one_epoch(baseline_model, ce_loss, optimizer, dataloader_train, dataloader_val, device, False)

    print("Val loss at epoch {}: {}".format(e, val_loss))
    print("Val acc at epoch {}: {}".format(e, val_acc))

    epoch_train_losses.append(train_loss)
    epoch_val_losses.append(val_loss)
    epoch_val_accs.append(val_acc)

  print('')

  plt.title('Loss: ' + str(bs))
  plt.plot(epoch_train_losses, c='r')
  plt.plot(epoch_val_losses, c='b')
  plt.show()

  plt.title('Acc: ' + str(bs))
  plt.plot(epoch_val_accs, c='r')
  plt.show()

# Dropout a Augmentácia

In [None]:
def build_dropout_model_fully_connected(dropout_p=0.5):
  modules = []
  modules.append(Conv2d(3, 16, KERNEL_SIZE_CONV, 1, PADDING))
  modules.append(MaxPool2d(MAX_POOL_KERNEL, STRIDE))
  modules.append(ReLU())
  modules.append(Conv2d(16, 32, KERNEL_SIZE_CONV, 1, PADDING))
  modules.append(MaxPool2d(MAX_POOL_KERNEL, STRIDE))
  modules.append(ReLU())
  modules.append(Conv2d(32, 64, KERNEL_SIZE_CONV, 1, PADDING))
  modules.append(MaxPool2d(MAX_POOL_KERNEL, STRIDE))
  modules.append(ReLU())
  modules.append(Flatten())

  modules.append(Linear(1024, 512))
  modules.append(ReLU())
  if dropout_p:
    modules.append(Dropout(dropout_p))

  modules.append(Linear(512, 256))
  modules.append(ReLU())
  if dropout_p:
    modules.append(Dropout(dropout_p))

  modules.append(Linear(256, 128))
  modules.append(ReLU())
  if dropout_p:
    modules.append(Dropout(dropout_p))

  modules.append(Linear(128, 10))

  model = Sequential(*modules)
  model_inference = Sequential(model, Softmax())

  return model, model_inference


In [None]:
KERNEL_SIZE_CONV = 3
STRIDE = 2
PADDING = 1
MAX_POOL_KERNEL = 2
BATCH_SIZE = 32
LEARNING_RATE = 1e-3

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
trainset, valset = torch.utils.data.random_split(dataset, [45000, 5000], generator=generator)

model_dropout_fully_connected, model_inference_dropout_fully_connected = build_dropout_model_fully_connected()
model_dropout_fully_connected, model_inference_dropout_fully_connected = model_dropout_fully_connected.to(device), model_inference_dropout_fully_connected.to(device)

dataloader_train = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
dataloader_val = DataLoader(valset, batch_size=BATCH_SIZE, shuffle=True)

ce_loss = CrossEntropyLoss().to(device)
optimizer = Adam(model_dropout_fully_connected.parameters(), lr=LEARNING_RATE)

epoch_train_losses = []
epoch_val_losses = []
epoch_val_accs = []

for e in range(30):
  train_loss, val_loss, val_acc = one_epoch(model_dropout_fully_connected, ce_loss, optimizer, dataloader_train, dataloader_val, device, False)

  print("Val loss at epoch {}: {}".format(e, val_loss))
  print("Val acc at epoch {}: {}".format(e, val_acc))

  epoch_train_losses.append(train_loss)
  epoch_val_losses.append(val_loss)
  epoch_val_accs.append(val_acc)

plt.plot(epoch_train_losses, c='r')
plt.plot(epoch_val_losses, c='b')
plt.show()

plt.plot(epoch_val_accs, c='r')
plt.show()


In [None]:
trainset, valset, _ = torch.utils.data.random_split(dataset, [1000, 5000, 44000], generator=generator)

model_dropout_fully_connected, model_inference_dropout_fully_connected = build_dropout_model_fully_connected()
model_dropout_fully_connected, model_inference_dropout_fully_connected = model_dropout_fully_connected.to(device), model_inference_dropout_fully_connected.to(device)

dataloader_train = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
dataloader_val = DataLoader(valset, batch_size=BATCH_SIZE, shuffle=True)

ce_loss = CrossEntropyLoss().to(device)
optimizer = Adam(model_dropout_fully_connected.parameters(), lr=LEARNING_RATE)

epoch_train_losses = []
epoch_val_losses = []
epoch_val_accs = []

for e in range(100):
  train_loss, val_loss, val_acc = one_epoch(model_dropout_fully_connected, ce_loss, optimizer, dataloader_train, dataloader_val, device, False)

  print("Val loss at epoch {}: {}".format(e, val_loss))
  print("Val acc at epoch {}: {}".format(e, val_acc))

  epoch_train_losses.append(train_loss)
  epoch_val_losses.append(val_loss)
  epoch_val_accs.append(val_acc)

plt.plot(epoch_train_losses, c='r')
plt.plot(epoch_val_losses, c='b')
plt.show()

plt.plot(epoch_val_accs, c='r')
plt.show()



In [None]:
def build_dropout_model_conv_layers(dropout_p=0.5, droupout_conv = 0.2):
  modules = []
  modules.append(Conv2d(3, 16, KERNEL_SIZE_CONV, 1, PADDING))
  modules.append(MaxPool2d(MAX_POOL_KERNEL, STRIDE))
  modules.append(ReLU())
  if droupout_conv:
    modules.append(Dropout2d(droupout_conv))
  
  modules.append(Conv2d(16, 32, KERNEL_SIZE_CONV, 1, PADDING))
  modules.append(MaxPool2d(MAX_POOL_KERNEL, STRIDE))
  modules.append(ReLU())

  if droupout_conv:
    modules.append(Dropout2d(droupout_conv))

  modules.append(Conv2d(32, 64, KERNEL_SIZE_CONV, 1, PADDING))
  modules.append(MaxPool2d(MAX_POOL_KERNEL, STRIDE))
  modules.append(ReLU())

  if droupout_conv:
    modules.append(Dropout2d(droupout_conv))

  modules.append(Flatten())

  modules.append(Linear(1024, 512))
  modules.append(ReLU())
  if dropout_p:
    modules.append(Dropout(dropout_p))

  modules.append(Linear(512, 256))
  modules.append(ReLU())
  if dropout_p:
    modules.append(Dropout(dropout_p))

  modules.append(Linear(256, 128))
  modules.append(ReLU())
  if dropout_p:
    modules.append(Dropout(dropout_p))

  modules.append(Linear(128, 10))

  model = Sequential(*modules)
  model_inference = Sequential(model, Softmax())

  return model, model_inference

In [None]:
trainset, valset = torch.utils.data.random_split(dataset, [45000, 5000], generator=generator)

model_dropout_conv_layers, model_inference_dropout_conv_layers = build_dropout_model_conv_layers()
model_dropout_conv_layers, model_inference_dropout_conv_layers = model_dropout_conv_layers.to(device), model_inference_dropout_conv_layers.to(device)

dataloader_train = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
dataloader_val = DataLoader(valset, batch_size=BATCH_SIZE, shuffle=True)

ce_loss = CrossEntropyLoss().to(device)
optimizer = Adam(model_dropout_conv_layers.parameters(), lr=LEARNING_RATE)

epoch_train_losses = []
epoch_val_losses = []
epoch_val_accs = []

for e in range(40):
  train_loss, val_loss, val_acc = one_epoch(model_dropout_conv_layers, ce_loss, optimizer, dataloader_train, dataloader_val, device, False)

  print("Val loss at epoch {}: {}".format(e, val_loss))
  print("Val acc at epoch {}: {}".format(e, val_acc))

  epoch_train_losses.append(train_loss)
  epoch_val_losses.append(val_loss)
  epoch_val_accs.append(val_acc)

plt.plot(epoch_train_losses, c='r')
plt.plot(epoch_val_losses, c='b')
plt.show()

plt.plot(epoch_val_accs, c='r')
plt.show()


In [None]:
trainset, valset, _ = torch.utils.data.random_split(dataset, [1000, 5000, 44000], generator=generator)

dataloader_train = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
dataloader_val = DataLoader(valset, batch_size=BATCH_SIZE, shuffle=True)

ce_loss = CrossEntropyLoss().to(device)
optimizer = Adam(model_dropout_conv_layers.parameters(), lr=LEARNING_RATE)

epoch_train_losses = []
epoch_val_losses = []
epoch_val_accs = []

for e in range(100):
  train_loss, val_loss, val_acc = one_epoch(model_dropout_conv_layers, ce_loss, optimizer, dataloader_train, dataloader_val, device, False)

  print("Val loss at epoch {}: {}".format(e, val_loss))
  print("Val acc at epoch {}: {}".format(e, val_acc))

  epoch_train_losses.append(train_loss)
  epoch_val_losses.append(val_loss)
  epoch_val_accs.append(val_acc)

plt.plot(epoch_train_losses, c='r')
plt.plot(epoch_val_losses, c='b')
plt.show()

plt.plot(epoch_val_accs, c='r')
plt.show()

In [None]:
def build_dropout_model_v3(dropout_p=0.5, droupout_conv = 0.2):
  modules = []
  modules.append(Conv2d(3, 16, KERNEL_SIZE_CONV, 1, PADDING))
  if droupout_conv:
    modules.append(Dropout2d(droupout_conv))

  modules.append(MaxPool2d(MAX_POOL_KERNEL, STRIDE))
  modules.append(ReLU())
  
  modules.append(Conv2d(16, 32, KERNEL_SIZE_CONV, 1, PADDING))

  if droupout_conv:
    modules.append(Dropout2d(droupout_conv))

  modules.append(MaxPool2d(MAX_POOL_KERNEL, STRIDE))
  modules.append(ReLU())

  modules.append(Conv2d(32, 64, KERNEL_SIZE_CONV, 1, PADDING))

  if droupout_conv:
    modules.append(Dropout2d(droupout_conv))

  modules.append(MaxPool2d(MAX_POOL_KERNEL, STRIDE))
  modules.append(ReLU())

  modules.append(Flatten())

  modules.append(Linear(1024, 512))
  modules.append(ReLU())

  if dropout_p:
    modules.append(Dropout(dropout_p))
 
  modules.append(Linear(512, 256))
  modules.append(ReLU())

  if dropout_p:
    modules.append(Dropout(dropout_p))

  modules.append(Linear(256, 128))
  modules.append(ReLU())
  if dropout_p:
    modules.append(Dropout(dropout_p))

  modules.append(Linear(128, 10))

  model = Sequential(*modules)
  model_inference = Sequential(model, Softmax())

  return model, model_inference

In [None]:
trainset, valset = torch.utils.data.random_split(dataset, [45000, 5000], generator=generator)

model_dropout_v3, model_inference_dropout_v3 = build_dropout_model_v3()
model_dropout_v3, model_inference_dropout_v3 = model_dropout_v3.to(device), model_inference_dropout_v3.to(device)

dataloader_train = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
dataloader_val = DataLoader(valset, batch_size=BATCH_SIZE, shuffle=True)

ce_loss = CrossEntropyLoss().to(device)
optimizer = Adam(model_dropout_v3.parameters(), lr=LEARNING_RATE)

epoch_train_losses = []
epoch_val_losses = []
epoch_val_accs = []

for e in range(40):
  train_loss, val_loss, val_acc = one_epoch(model_dropout_v3, ce_loss, optimizer, dataloader_train, dataloader_val, device, False)

  print("Val loss at epoch {}: {}".format(e, val_loss))
  print("Val acc at epoch {}: {}".format(e, val_acc))

  epoch_train_losses.append(train_loss)
  epoch_val_losses.append(val_loss)
  epoch_val_accs.append(val_acc)

plt.plot(epoch_train_losses, c='r')
plt.plot(epoch_val_losses, c='b')
plt.show()

plt.plot(epoch_val_accs, c='r')
plt.show()


In [None]:
trainset, valset, _ = torch.utils.data.random_split(dataset, [1000, 5000, 44000], generator=generator)

model_dropout_v3, model_inference_dropout_v3 = build_dropout_model_v3()
model_dropout_v3, model_inference_dropout_v3 = model_dropout_v3.to(device), model_inference_dropout_v3.to(device)

dataloader_train = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
dataloader_val = DataLoader(valset, batch_size=BATCH_SIZE, shuffle=True)

ce_loss = CrossEntropyLoss().to(device)
optimizer = Adam(model_dropout_v3.parameters(), lr=LEARNING_RATE)

epoch_train_losses = []
epoch_val_losses = []
epoch_val_accs = []

for e in range(100):
  train_loss, val_loss, val_acc = one_epoch(model_dropout_v3, ce_loss, optimizer, dataloader_train, dataloader_val, device, False)

  print("Val loss at epoch {}: {}".format(e, val_loss))
  print("Val acc at epoch {}: {}".format(e, val_acc))

  epoch_train_losses.append(train_loss)
  epoch_val_losses.append(val_loss)
  epoch_val_accs.append(val_acc)

plt.plot(epoch_train_losses, c='r')
plt.plot(epoch_val_losses, c='b')
plt.show()

plt.plot(epoch_val_accs, c='r')
plt.show()


In [None]:
# prva augmentacia
aug_transforms_first = transforms.Compose([transforms.RandomRotation(degrees=(0, 180)),
                                     transforms.RandomHorizontalFlip(p=0.5),
                                     transforms.RandomVerticalFlip(p=0.5),
                                     transforms.RandomResizedCrop(size=(32, 32)),
                                     transforms.ToTensor()])

dataset_aug_first = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=aug_transforms_first)
trainset_aug_first, _ = torch.utils.data.random_split(dataset_aug_first, [45000, 5000], generator=torch.Generator().manual_seed(42))
dataloader_preview_aug_first = torch.utils.data.DataLoader(trainset_aug_first, batch_size=1, shuffle=False)

batch_size = 32

dataloader_train = torch.utils.data.DataLoader(trainset_aug_first, batch_size=batch_size, shuffle=True)
dataloader_val = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Using device: ", device)

ce_loss = torch.nn.CrossEntropyLoss().to(device)

baseline_model, baseline_model_inference = build_model()

baseline_model.to(device)
baseline_model_inference.to(device)

optimizer = Adam(baseline_model.parameters(), lr=1e-3)

epoch_train_losses = []
epoch_val_losses = []
epoch_val_accs = []

for e in range(30):
  train_loss, val_loss, val_acc = one_epoch(baseline_model, ce_loss, optimizer, dataloader_train, dataloader_val, device, False)

  print("Val loss at epoch {}: {}".format(e, val_loss))
  print("Val acc at epoch {}: {}".format(e, val_acc))

  torch.save(model.state_dict(), "{:03d}.pth".format(e))
  epoch_train_losses.append(train_loss)
  epoch_val_losses.append(val_loss)
  epoch_val_accs.append(val_acc)
  
plt.plot(epoch_train_losses, c='r')
plt.plot(epoch_val_losses, c='b')
plt.show()

plt.plot(epoch_val_accs, c='r')
plt.show()

plt.plot(epoch_val_accs, c='r')
plt.show()

In [None]:
# prva augmentacia iba 1000
aug_transforms_first = transforms.Compose([transforms.RandomRotation(degrees=(0, 180)),
                                     transforms.RandomHorizontalFlip(p=0.5),
                                     transforms.RandomVerticalFlip(p=0.5),
                                     transforms.RandomResizedCrop(size=(32, 32)),
                                     transforms.ToTensor()])

dataset_aug_first = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=aug_transforms_first)
trainset_aug_first, _, _ = torch.utils.data.random_split(dataset_aug_first, [1000, 5000, 44000], generator=torch.Generator().manual_seed(42))
dataloader_preview_aug_first = torch.utils.data.DataLoader(trainset_aug_first, batch_size=1, shuffle=False)

batch_size = 32

dataloader_train = torch.utils.data.DataLoader(trainset_aug_first, batch_size=batch_size, shuffle=True)
dataloader_val = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Using device: ", device)

ce_loss = torch.nn.CrossEntropyLoss().to(device)

baseline_model, baseline_model_inference = build_model()

baseline_model.to(device)
baseline_model_inference.to(device)

optimizer = Adam(baseline_model.parameters(), lr=1e-3)

epoch_train_losses = []
epoch_val_losses = []
epoch_val_accs = []

for e in range(100):
  train_loss, val_loss, val_acc = one_epoch(baseline_model, ce_loss, optimizer, dataloader_train, dataloader_val, device, False)

  print("Val loss at epoch {}: {}".format(e, val_loss))
  print("Val acc at epoch {}: {}".format(e, val_acc))

  torch.save(model.state_dict(), "{:03d}.pth".format(e))
  epoch_train_losses.append(train_loss)
  epoch_val_losses.append(val_loss)
  epoch_val_accs.append(val_acc)
  
plt.plot(epoch_train_losses, c='r')
plt.plot(epoch_val_losses, c='b')
plt.show()

plt.plot(epoch_val_accs, c='r')
plt.show()



In [None]:
# druha augmentacia
aug_transforms_second = transforms.Compose([transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
                                     transforms.RandomHorizontalFlip(p=0.5),
                                     transforms.RandomResizedCrop(size=(32, 32)),
                                     transforms.ToTensor()])

dataset_aug_second = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=aug_transforms_second)
trainset_aug_second, _ = torch.utils.data.random_split(dataset_aug_second, [45000, 5000], generator=torch.Generator().manual_seed(42))
dataloader_preview_aug_second = torch.utils.data.DataLoader(trainset_aug_second, batch_size=1, shuffle=False)

batch_size = 32

dataloader_train = torch.utils.data.DataLoader(trainset_aug_second, batch_size=batch_size, shuffle=True)
dataloader_val = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Using device: ", device)

ce_loss = torch.nn.CrossEntropyLoss().to(device)

baseline_model, baseline_model_inference = build_model()

baseline_model.to(device)
baseline_model_inference.to(device)

optimizer = Adam(baseline_model.parameters(), lr=1e-3)

epoch_train_losses = []
epoch_val_losses = []
epoch_val_accs = []

for e in range(30):
  train_loss, val_loss, val_acc = one_epoch(baseline_model, ce_loss, optimizer, dataloader_train, dataloader_val, device, False)

  print("Val loss at epoch {}: {}".format(e, val_loss))
  print("Val acc at epoch {}: {}".format(e, val_acc))

  torch.save(model.state_dict(), "{:03d}.pth".format(e))
  epoch_train_losses.append(train_loss)
  epoch_val_losses.append(val_loss)
  epoch_val_accs.append(val_acc)
  
plt.plot(epoch_train_losses, c='r')
plt.plot(epoch_val_losses, c='b')
plt.show()

plt.plot(epoch_val_accs, c='r')
plt.show()

In [None]:
# druha augmentacia iba 1000
aug_transforms_second = transforms.Compose([transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
                                     transforms.RandomHorizontalFlip(p=0.5),
                                     transforms.RandomResizedCrop(size=(32, 32)),
                                     transforms.ToTensor()])

dataset_aug_second = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=aug_transforms_second)
trainset_aug_second, _, _ = torch.utils.data.random_split(dataset_aug_second, [1000, 5000, 44000], generator=torch.Generator().manual_seed(42))
dataloader_preview_aug_second = torch.utils.data.DataLoader(trainset_aug_second, batch_size=1, shuffle=False)

batch_size = 32

dataloader_train = torch.utils.data.DataLoader(trainset_aug_second, batch_size=batch_size, shuffle=True)
dataloader_val = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Using device: ", device)

ce_loss = torch.nn.CrossEntropyLoss().to(device)

baseline_model, baseline_model_inference = build_model()

baseline_model.to(device)
baseline_model_inference.to(device)

optimizer = Adam(baseline_model.parameters(), lr=1e-3)

epoch_train_losses = []
epoch_val_losses = []
epoch_val_accs = []

for e in range(100):
  train_loss, val_loss, val_acc = one_epoch(baseline_model, ce_loss, optimizer, dataloader_train, dataloader_val, device, False)

  print("Val loss at epoch {}: {}".format(e, val_loss))
  print("Val acc at epoch {}: {}".format(e, val_acc))

  torch.save(model.state_dict(), "{:03d}.pth".format(e))
  epoch_train_losses.append(train_loss)
  epoch_val_losses.append(val_loss)
  epoch_val_accs.append(val_acc)
  
plt.plot(epoch_train_losses, c='r')
plt.plot(epoch_val_losses, c='b')
plt.show()

plt.plot(epoch_val_accs, c='r')
plt.show()

In [None]:
# tretia augmentacia
aug_transforms_third = transforms.Compose([transforms.ColorJitter(brightness=.5, hue=.3),
                                     transforms.RandomAffine(degrees=(30, 70), translate=(0.1, 0.3), scale=(0.5, 0.75)),
                                     transforms.ToTensor()])

dataset_aug_third = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=aug_transforms_third)
trainset_aug_third, _ = torch.utils.data.random_split(dataset_aug_third, [45000, 5000], generator=torch.Generator().manual_seed(42))
dataloader_preview_aug_third = torch.utils.data.DataLoader(trainset_aug_third, batch_size=1, shuffle=False)

batch_size = 32

dataloader_train = torch.utils.data.DataLoader(trainset_aug_third, batch_size=batch_size, shuffle=True)
dataloader_val = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Using device: ", device)

ce_loss = torch.nn.CrossEntropyLoss().to(device)

baseline_model, baseline_model_inference = build_model()

baseline_model.to(device)
baseline_model_inference.to(device)

optimizer = Adam(baseline_model.parameters(), lr=1e-3)

epoch_train_losses = []
epoch_val_losses = []
epoch_val_accs = []

for e in range(30):
  train_loss, val_loss, val_acc = one_epoch(baseline_model, ce_loss, optimizer, dataloader_train, dataloader_val, device, False)

  print("Val loss at epoch {}: {}".format(e, val_loss))
  print("Val acc at epoch {}: {}".format(e, val_acc))

  torch.save(model.state_dict(), "{:03d}.pth".format(e))
  epoch_train_losses.append(train_loss)
  epoch_val_losses.append(val_loss)
  epoch_val_accs.append(val_acc)
  
plt.plot(epoch_train_losses, c='r')
plt.plot(epoch_val_losses, c='b')
plt.show()

plt.plot(epoch_val_accs, c='r')
plt.show()

In [None]:
# tretia augmentacia iba 1000
aug_transforms_third = transforms.Compose([transforms.ColorJitter(brightness=.5, hue=.3),
                                     transforms.RandomAffine(degrees=(30, 70), translate=(0.1, 0.3), scale=(0.5, 0.75)),
                                     transforms.ToTensor()])

dataset_aug_third = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=aug_transforms_third)
trainset_aug_third, _, _= torch.utils.data.random_split(dataset_aug_third, [1000, 5000, 44000], generator=torch.Generator().manual_seed(42))
dataloader_preview_aug_third = torch.utils.data.DataLoader(trainset_aug_third, batch_size=1, shuffle=False)

batch_size = 32

dataloader_train = torch.utils.data.DataLoader(trainset_aug_third, batch_size=batch_size, shuffle=True)
dataloader_val = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Using device: ", device)

ce_loss = torch.nn.CrossEntropyLoss().to(device)

baseline_model, baseline_model_inference = build_model()

baseline_model.to(device)
baseline_model_inference.to(device)

optimizer = Adam(baseline_model.parameters(), lr=1e-3)

epoch_train_losses = []
epoch_val_losses = []
epoch_val_accs = []

for e in range(100):
  train_loss, val_loss, val_acc = one_epoch(baseline_model, ce_loss, optimizer, dataloader_train, dataloader_val, device, False)

  print("Val loss at epoch {}: {}".format(e, val_loss))
  print("Val acc at epoch {}: {}".format(e, val_acc))

  torch.save(model.state_dict(), "{:03d}.pth".format(e))
  epoch_train_losses.append(train_loss)
  epoch_val_losses.append(val_loss)
  epoch_val_accs.append(val_acc)
  
plt.plot(epoch_train_losses, c='r')
plt.plot(epoch_val_losses, c='b')
plt.show()

plt.plot(epoch_val_accs, c='r')
plt.show()

# Hlboká sieť

In [None]:
KERNEL_SIZE_CONV = 3
STRIDE = 2
PADDING = 2
MAX_POOL_KERNEL = 2
BATCH_SIZE = 32
LEARNING_RATE = 1e-3
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def build_model():
  model = Sequential(Conv2d(3, 32, KERNEL_SIZE_CONV, 1, PADDING), MaxPool2d(MAX_POOL_KERNEL), ReLU(),
                      Conv2d(32, 64, KERNEL_SIZE_CONV, 1, PADDING), MaxPool2d(MAX_POOL_KERNEL), ReLU(),
                      Conv2d(64, 128, KERNEL_SIZE_CONV, 1, PADDING), MaxPool2d(MAX_POOL_KERNEL), ReLU(),
                      Conv2d(128, 256, KERNEL_SIZE_CONV, 1, PADDING), MaxPool2d(MAX_POOL_KERNEL), ReLU(),
                      Conv2d(256, 512, KERNEL_SIZE_CONV, 1, PADDING), MaxPool2d(MAX_POOL_KERNEL), ReLU(),
                      Conv2d(512, 10, 2),
                      Flatten())
  
  model_inference = Sequential(model, Softmax())
  return model, model_inference

model, model_inference = build_model()
model, model_inference = model.to(device), model_inference.to(device)

dataloader_train = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
dataloader_val = DataLoader(valset, batch_size=BATCH_SIZE, shuffle=True)

ce_loss = CrossEntropyLoss().to(device)
optimizer = Adam(model.parameters(), lr=LEARNING_RATE)

epoch_train_losses = []
epoch_val_losses = []
epoch_val_accs = []

def one_epoch(model, loss, optimizer, dataloader_train, dataloader_val, device, verbose=True):
  train_losses = []
  val_losses = []

  model.train()

  for i, batch in enumerate(dataloader_train):  
    x, y = batch[0].to(device), batch[1].to(device) 
    optimizer.zero_grad()
    out = model(x)
    loss = ce_loss(out, y)
    loss.backward()
    train_losses.append(loss.item())
    optimizer.step()
    if i % 100 == 0 and verbose:
      print("Training loss at step {}: {}".format(i, loss.item()))

  model.eval()

  with torch.no_grad():
    correct = 0
    total = 0
    for i, batch in enumerate(dataloader_val):  
      x, y = batch[0].to(device), batch[1].to(device)  

      out = model(x)
      loss = ce_loss(out, y)
      acc = torch.sum(torch.argmax(out, dim=-1) == y)
      correct += acc.item()
      total += len(batch[1])
      val_losses.append(loss.item())

  val_acc = correct / total

  return np.mean(train_losses), np.mean(val_losses), val_acc

for e in range(20):
  train_loss, val_loss, val_acc = one_epoch(model, ce_loss, optimizer, dataloader_train, dataloader_val, device, False)

  print("Val loss at epoch {}: {}".format(e, val_loss))
  print("Val acc at epoch {}: {}".format(e, val_acc))

  epoch_train_losses.append(train_loss)
  epoch_val_losses.append(val_loss)
  epoch_val_accs.append(val_acc)

plt.plot(epoch_train_losses, c='r')
plt.plot(epoch_val_losses, c='b')
plt.show()

plt.plot(epoch_val_accs, c='r')
plt.show()

In [None]:
KERNEL_SIZE_CONV = 3
STRIDE = 2
PADDING = 2
MAX_POOL_KERNEL = 2
BATCH_SIZE = 32
LEARNING_RATE = 1e-3
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def build_model():
  model = Sequential(Conv2d(3, 32, KERNEL_SIZE_CONV, 1, PADDING), MaxPool2d(MAX_POOL_KERNEL), ReLU(),
                      Conv2d(32, 64, KERNEL_SIZE_CONV, 1, PADDING), MaxPool2d(MAX_POOL_KERNEL), ReLU(),
                      Conv2d(64, 128, KERNEL_SIZE_CONV, 1, PADDING), MaxPool2d(MAX_POOL_KERNEL), ReLU(),
                      Conv2d(128, 256, KERNEL_SIZE_CONV, 1, PADDING), MaxPool2d(MAX_POOL_KERNEL), ReLU(),
                      Conv2d(256, 512, KERNEL_SIZE_CONV, 1, PADDING), MaxPool2d(MAX_POOL_KERNEL), ReLU(),
                      Flatten(),
                      Linear(2048, 1024), ReLU(),
                      Linear(1024, 512), ReLU(),
                      Linear(512, 256), ReLU(),
                      Linear(256, 128), ReLU(),
                      Linear(128, 10))
  
  model_inference = Sequential(model, Softmax())
  return model, model_inference


model, model_inference = build_model()
model, model_inference = model.to(device), model_inference.to(device)

dataloader_train = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
dataloader_val = DataLoader(valset, batch_size=BATCH_SIZE, shuffle=True)

ce_loss = CrossEntropyLoss().to(device)
optimizer = Adam(model.parameters(), lr=LEARNING_RATE)

epoch_train_losses = []
epoch_val_losses = []
epoch_val_accs = []

def one_epoch(model, loss, optimizer, dataloader_train, dataloader_val, device, verbose=True):
  train_losses = []
  val_losses = []

  model.train()

  for i, batch in enumerate(dataloader_train):  
    # i hovori o tom ktora davka
   
    # x je obrazok 
    # y je trieda do ktorej obr patri
    x, y = batch[0].to(device), batch[1].to(device) 
    optimizer.zero_grad()
    out = model(x)

    loss = ce_loss(out, y)
    loss.backward()
    train_losses.append(loss.item())
    optimizer.step()
    if i % 100 == 0 and verbose:
      print("Training loss at step {}: {}".format(i, loss.item()))

  model.eval()

  with torch.no_grad():
    correct = 0
    total = 0
    for i, batch in enumerate(dataloader_val):  
      x, y = batch[0].to(device), batch[1].to(device)  

      out = model(x)
      loss = ce_loss(out, y)
      acc = torch.sum(torch.argmax(out, dim=-1) == y)
      correct += acc.item()
      total += len(batch[1])
      val_losses.append(loss.item())

  val_acc = correct / total

  return np.mean(train_losses), np.mean(val_losses), val_acc

for e in range(20):
  train_loss, val_loss, val_acc = one_epoch(model, ce_loss, optimizer, dataloader_train, dataloader_val, device, False)

  print("Val loss at epoch {}: {}".format(e, val_loss))
  print("Val acc at epoch {}: {}".format(e, val_acc))

  epoch_train_losses.append(train_loss)
  epoch_val_losses.append(val_loss)
  epoch_val_accs.append(val_acc)

plt.plot(epoch_train_losses, c='r')
plt.plot(epoch_val_losses, c='b')
plt.show()

plt.plot(epoch_val_accs, c='r')
plt.show()

In [None]:
# batch norm

KERNEL_SIZE_CONV = 3
STRIDE = 2
PADDING = 2
MAX_POOL_KERNEL = 2
BATCH_SIZE = 32
LEARNING_RATE = 1e-3
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def build_model():
  model = Sequential(Conv2d(3, 32, KERNEL_SIZE_CONV, 1, PADDING), BatchNorm2d(32), MaxPool2d(MAX_POOL_KERNEL), ReLU(),
                      Conv2d(32, 64, KERNEL_SIZE_CONV, 1, PADDING), BatchNorm2d(64), MaxPool2d(MAX_POOL_KERNEL), ReLU(),
                      Conv2d(64, 128, KERNEL_SIZE_CONV, 1, PADDING), BatchNorm2d(128),MaxPool2d(MAX_POOL_KERNEL), ReLU(),
                      Conv2d(128, 256, KERNEL_SIZE_CONV, 1, PADDING), BatchNorm2d(256), MaxPool2d(MAX_POOL_KERNEL), ReLU(),
                      Conv2d(256, 512, KERNEL_SIZE_CONV, 1, PADDING), BatchNorm2d(512), MaxPool2d(MAX_POOL_KERNEL), ReLU(),
                      Flatten(),
                      Linear(2048, 1024), BatchNorm1d(1024), ReLU(),
                      Linear(1024, 512), BatchNorm1d(512), ReLU(),
                      Linear(512, 256), BatchNorm1d(256), ReLU(),
                      Linear(256, 128), BatchNorm1d(128), ReLU(),
                      Linear(128, 10))
  
  model_inference = Sequential(model, Softmax())
  return model, model_inference


model, model_inference = build_model()
model, model_inference = model.to(device), model_inference.to(device)

dataloader_train = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
dataloader_val = DataLoader(valset, batch_size=BATCH_SIZE, shuffle=True)

ce_loss = CrossEntropyLoss().to(device)
optimizer = Adam(model.parameters(), lr=LEARNING_RATE)

epoch_train_losses = []
epoch_val_losses = []
epoch_val_accs = []

def one_epoch(model, loss, optimizer, dataloader_train, dataloader_val, device, verbose=True):
  train_losses = []
  val_losses = []

  model.train()

  for i, batch in enumerate(dataloader_train):  
    x, y = batch[0].to(device), batch[1].to(device) 
    optimizer.zero_grad()
    out = model(x)

    loss = ce_loss(out, y)
    loss.backward()
    train_losses.append(loss.item())
    optimizer.step()
    if i % 100 == 0 and verbose:
      print("Training loss at step {}: {}".format(i, loss.item()))

  model.eval()

  with torch.no_grad():
    correct = 0
    total = 0
    for i, batch in enumerate(dataloader_val):  
      x, y = batch[0].to(device), batch[1].to(device)  

      out = model(x)
      loss = ce_loss(out, y)
      acc = torch.sum(torch.argmax(out, dim=-1) == y)
      correct += acc.item()
      total += len(batch[1])
      val_losses.append(loss.item())

  val_acc = correct / total

  return np.mean(train_losses), np.mean(val_losses), val_acc

for e in range(20):
  train_loss, val_loss, val_acc = one_epoch(model, ce_loss, optimizer, dataloader_train, dataloader_val, device, False)

  print("Val loss at epoch {}: {}".format(e, val_loss))
  print("Val acc at epoch {}: {}".format(e, val_acc))

  epoch_train_losses.append(train_loss)
  epoch_val_losses.append(val_loss)
  epoch_val_accs.append(val_acc)

plt.plot(epoch_train_losses, c='r')
plt.plot(epoch_val_losses, c='b')
plt.show()

plt.plot(epoch_val_accs, c='r')
plt.show()

# Najlepší model 

In [None]:
def adjust_lr(optimizer, ep):
    if ep < 10:
        lr = 1e-4 * (ep + 1) / 2
    elif ep < 40:
        lr = 1e-3 
    elif ep < 70:
        lr = 1e-4 
    elif ep < 100:
        lr = 1e-5 
    elif ep < 130:
        lr = 1e-6
    elif ep < 160:
        lr = 1e-4 
    else:
        lr = 1e-5 
    for p in optimizer.param_groups:
        p['lr'] = lr

In [None]:
KERNEL_SIZE_CONV = 3
PADDING = 2
MAX_POOL_KERNEL = 2
BATCH_SIZE = 32
LEARNING_RATE = 1e-3
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def build_model(dropout_p=0.5, droupout_conv = 0.2):
  model = Sequential(Conv2d(3, 32, KERNEL_SIZE_CONV, 1, PADDING), BatchNorm2d(32), MaxPool2d(MAX_POOL_KERNEL), PReLU(), Dropout2d(droupout_conv),
                    Conv2d(32, 64, KERNEL_SIZE_CONV, 1, PADDING), BatchNorm2d(64), MaxPool2d(MAX_POOL_KERNEL), PReLU(), Dropout2d(droupout_conv),
                    Conv2d(64, 128, KERNEL_SIZE_CONV, 1, PADDING), BatchNorm2d(128),MaxPool2d(MAX_POOL_KERNEL), PReLU(), Dropout2d(droupout_conv),
                    Conv2d(128, 256, KERNEL_SIZE_CONV, 1, PADDING), BatchNorm2d(256), MaxPool2d(MAX_POOL_KERNEL), PReLU(),Dropout2d(droupout_conv),
                    Conv2d(256, 512, KERNEL_SIZE_CONV, 1, PADDING), BatchNorm2d(512), MaxPool2d(MAX_POOL_KERNEL), PReLU(),Dropout2d(droupout_conv),
                    Flatten(),
                    Linear(2048, 1024), BatchNorm1d(1024), PReLU(), Dropout(dropout_p),
                    Linear(1024, 512), BatchNorm1d(512), PReLU(), Dropout(dropout_p),
                    Linear(512, 256), BatchNorm1d(256), PReLU(), Dropout(dropout_p),
                    Linear(256, 128), BatchNorm1d(128), PReLU(), Dropout(dropout_p),
                    Linear(128, 10))

  model_inference = Sequential(model, Softmax())
  return model, model_inference


model, model_inference = build_model()
model, model_inference = model.to(device), model_inference.to(device)

aug_transforms = transforms.Compose([transforms.AutoAugment(transforms.AutoAugmentPolicy.CIFAR10), transforms.ToTensor()])

dataset_aug = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=aug_transforms)
trainset_aug, _ = torch.utils.data.random_split(dataset_aug, [45000, 5000], generator=torch.Generator().manual_seed(42))

batch_size = 32

dataloader_train = torch.utils.data.DataLoader(trainset_aug, batch_size=batch_size, shuffle=True)
dataloader_val = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False)

ce_loss = CrossEntropyLoss().to(device)
optimizer = AdamW(model.parameters(), lr=LEARNING_RATE)

epoch_train_losses = []
epoch_val_losses = []
epoch_val_accs = []

def one_epoch(model, loss, optimizer, dataloader_train, dataloader_val, device, verbose=True):
  train_losses = []
  val_losses = []

  model.train()

  for i, batch in enumerate(dataloader_train):  
    x, y = batch[0].to(device), batch[1].to(device) 
    optimizer.zero_grad()
    out = model(x)

    loss = ce_loss(out, y)
    loss.backward()
    train_losses.append(loss.item())
    optimizer.step()
    if i % 100 == 0 and verbose:
      print("Training loss at step {}: {}".format(i, loss.item()))

  model.eval()

  with torch.no_grad():
    correct = 0
    total = 0
    for i, batch in enumerate(dataloader_val):  
      x, y = batch[0].to(device), batch[1].to(device)  

      out = model(x)
      loss = ce_loss(out, y)
      acc = torch.sum(torch.argmax(out, dim=-1) == y)
      correct += acc.item()
      total += len(batch[1])
      val_losses.append(loss.item())

  val_acc = correct / total

  return np.mean(train_losses), np.mean(val_losses), val_acc

for e in range(50):
  adjust_lr(optimizer, e)
  train_loss, val_loss, val_acc = one_epoch(model, ce_loss, optimizer, dataloader_train, dataloader_val, device, False)

  print("Val loss at epoch {}: {}".format(e, val_loss))
  print("Val acc at epoch {}: {}".format(e, val_acc))

  epoch_train_losses.append(train_loss)
  epoch_val_losses.append(val_loss)
  epoch_val_accs.append(val_acc)

plt.plot(epoch_train_losses, c='r')
plt.plot(epoch_val_losses, c='b')
plt.show()

plt.plot(epoch_val_accs, c='r')
plt.show()


In [None]:
torch.save(model.state_dict(), "{:03d}.pth".format(50))