# Load the dataset
Don't change this code


In [1]:
import random
import numpy as np
import torch
from tqdm import tqdm


# Seed everything
def set_random_seed(seed):
    torch.backends.cudnn.deterministic = True
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)


set_random_seed(323212)

In [2]:
# Load and preprocess the data. Don't change this code
from torchvision import models, datasets, transforms
from torch.utils.data import DataLoader, random_split
import pickle

# CIFAR10 z-normalization const https://github.com/facebookarchive/fb.resnet.torch/issues/180
cifar10_mean = (0.491, 0.482, 0.447)
cifar10_std = (0.247, 0.244, 0.262)

# Data preprocessing
transform = transforms.Compose([
    transforms.ToTensor(),  # PIL Image to Pytorch tensor
    transforms.Normalize(cifar10_mean, cifar10_std)
    # https://pytorch.org/docs/stable/torchvision/transforms.html?highlight=transforms%20normalize#torchvision.transforms.Normalize
])

dataset = datasets.CIFAR10("/datasets", train=True, transform=transform, download=True)

# Load class names
with open("/datasets/cifar-10-batches-py/batches.meta", 'rb') as infile:
    cifar_meta = pickle.load(infile)
labels = cifar_meta['label_names']

# Split dataset into train and val
train_ds, val_ds, _ = random_split(dataset, [10000, 2000, 38000])
batch_size = 256

# Create dataloaders
train_loader = DataLoader(train_ds, batch_size=batch_size, num_workers=4)
val_loader = DataLoader(val_ds, batch_size=batch_size, num_workers=4)

Files already downloaded and verified


# Function for accuracy checking

Don't change this code

In [3]:
def validate(model, testloader, device="cpu"):
    correct = 0
    total = 0
    # Добавим подсчет лосса
    loss = 0
    with torch.no_grad():
        for images, labels in tqdm(testloader):
            labels = labels.to(device)
            outputs = model(images.to(device))
            loss += criterion(outputs, labels).item()

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels.to(device)).sum().item()

    return correct / total, loss / len(testloader)

# Implement CNN class for CIFAR10

**In constructor**

Define 2 - 3 convolutional layers 

 https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html

with corresponding in/out dimensions W_out = 1 + ((W_in - F + 2*P) / S)


Also define max pooling : https://pytorch.org/docs/stable/generated/torch.nn.MaxPool2d.html

and fully connected layers: https://pytorch.org/docs/stable/generated/torch.nn.Linear.html#torch.nn.Linear


**In forward**

Write code for forward pass.
Remember that first dimension is the batch dimension

In [4]:
import torch
import torch.nn as nn


class TwoLayerCNN(nn.Module):
    def __init__(self, class_nums=10, act=nn.ReLU()):
        super().__init__()

        # Что-то вроде vgg блока
        self.vgg1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=16, kernel_size=(3, 3), padding=1),
            act,
            nn.MaxPool2d(kernel_size=(2, 2), stride=2),
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3), padding=1),
            act,
            # nn.MaxPool2d(kernel_size=(2, 2), stride=2),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3, 3), padding=1),
            act,
            nn.MaxPool2d(kernel_size=(2, 2), stride=2)
        )

        self.flatten = nn.Flatten()

        # Линейный классификатор
        self.classifier = nn.Sequential(
            nn.Linear(4 * 4 * 64, 1024),
            act,
            nn.Linear(1024, class_nums)
        )

    def forward(self, x):
        x = self.vgg1(x)
        x = self.flatten(x)
        x = self.classifier(x)

        return x

In [5]:
TwoLayerCNN()

TwoLayerCNN(
  (vgg1): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (classifier): Sequential(
    (0): Linear(in_features=1024, out_features=1024, bias=True)
    (1): ReLU()
    (2): Linear(in_features=1024, out_features=10, bias=True)
  )
)

# Train the model




## I will use wandb instead of TensorBoard

In [6]:
import wandb

wandb.login()
wandb.init(project="cv-convs", entity="dmitysh")

[34m[1mwandb[0m: Currently logged in as: [33mdmitysh[0m. Use [1m`wandb login --relogin`[0m to force relogin


## Implement training loop

- Create optimizer,
- Save loss and accuracy values into tensorboard log
- Use GPU to speedup training process.


In [7]:
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torch.optim as optim

set_random_seed(9696)

"""
  Send model to GPU
"""

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = TwoLayerCNN(10)
model = model.to(device)
# wandb.watch(model)

"""
  Adding criterion and setting train mode on
"""
criterion = nn.CrossEntropyLoss()

"""
  Setup optimizer for your model
"""
optimizer = optim.Adam(model.parameters(), lr=7e-4)
# scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=1)

for epoch in range(20):
    model.train()
    train_loss = 0
    for img_batch, labels_batch in tqdm(train_loader):
        img_batch = img_batch.to(device)
        labels_batch = labels_batch.to(device)

        output = model(img_batch)
        loss = criterion(output, labels_batch)
        train_loss += loss.item()
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    model.eval()

    accuracy, mean_val_loss = validate(model, val_loader, device)
    mean_train_loss = train_loss / len(train_loader)
    # scheduler.step(mean_val_loss)

    """
      Write data to wandb
    """
    wandb.log({"epoch": epoch,
               "train loss": mean_train_loss,
               "val loss": mean_val_loss,
               "val accuracy": accuracy})

    print("Epoch {} Loss {:.2f} Accuracy {:.2f}".format(epoch, mean_train_loss, accuracy))

100%|██████████| 40/40 [00:13<00:00,  2.87it/s]
100%|██████████| 8/8 [00:08<00:00,  1.06s/it]


Epoch 0 Loss 1.99 Accuracy 0.37


100%|██████████| 40/40 [00:09<00:00,  4.32it/s]
100%|██████████| 8/8 [00:08<00:00,  1.08s/it]


Epoch 1 Loss 1.64 Accuracy 0.44


100%|██████████| 40/40 [00:09<00:00,  4.33it/s]
100%|██████████| 8/8 [00:08<00:00,  1.10s/it]


Epoch 2 Loss 1.48 Accuracy 0.48


100%|██████████| 40/40 [00:09<00:00,  4.08it/s]
100%|██████████| 8/8 [00:08<00:00,  1.11s/it]


Epoch 3 Loss 1.38 Accuracy 0.51


100%|██████████| 40/40 [00:09<00:00,  4.35it/s]
100%|██████████| 8/8 [00:08<00:00,  1.08s/it]


Epoch 4 Loss 1.31 Accuracy 0.53


100%|██████████| 40/40 [00:08<00:00,  4.54it/s]
100%|██████████| 8/8 [00:08<00:00,  1.07s/it]


Epoch 5 Loss 1.25 Accuracy 0.54


100%|██████████| 40/40 [00:09<00:00,  4.35it/s]
100%|██████████| 8/8 [00:08<00:00,  1.12s/it]


Epoch 6 Loss 1.19 Accuracy 0.55


100%|██████████| 40/40 [00:09<00:00,  4.09it/s]
100%|██████████| 8/8 [00:09<00:00,  1.24s/it]


Epoch 7 Loss 1.14 Accuracy 0.56


100%|██████████| 40/40 [00:08<00:00,  4.45it/s]
100%|██████████| 8/8 [00:08<00:00,  1.10s/it]


Epoch 8 Loss 1.09 Accuracy 0.56


100%|██████████| 40/40 [00:09<00:00,  4.17it/s]
100%|██████████| 8/8 [00:08<00:00,  1.08s/it]


Epoch 9 Loss 1.05 Accuracy 0.59


100%|██████████| 40/40 [00:09<00:00,  4.42it/s]
100%|██████████| 8/8 [00:08<00:00,  1.12s/it]


Epoch 10 Loss 1.00 Accuracy 0.60


100%|██████████| 40/40 [00:09<00:00,  4.36it/s]
100%|██████████| 8/8 [00:08<00:00,  1.06s/it]


Epoch 11 Loss 0.95 Accuracy 0.61


100%|██████████| 40/40 [00:08<00:00,  4.45it/s]
100%|██████████| 8/8 [00:08<00:00,  1.05s/it]


Epoch 12 Loss 0.90 Accuracy 0.61


100%|██████████| 40/40 [00:08<00:00,  4.48it/s]
100%|██████████| 8/8 [00:08<00:00,  1.08s/it]


Epoch 13 Loss 0.86 Accuracy 0.62


100%|██████████| 40/40 [00:09<00:00,  4.41it/s]
100%|██████████| 8/8 [00:08<00:00,  1.04s/it]


Epoch 14 Loss 0.82 Accuracy 0.60


100%|██████████| 40/40 [00:09<00:00,  4.36it/s]
100%|██████████| 8/8 [00:09<00:00,  1.13s/it]


Epoch 15 Loss 0.81 Accuracy 0.59


100%|██████████| 40/40 [00:09<00:00,  4.35it/s]
100%|██████████| 8/8 [00:09<00:00,  1.15s/it]


Epoch 16 Loss 0.76 Accuracy 0.60


100%|██████████| 40/40 [00:09<00:00,  4.23it/s]
100%|██████████| 8/8 [00:08<00:00,  1.10s/it]


Epoch 17 Loss 0.71 Accuracy 0.60


100%|██████████| 40/40 [00:09<00:00,  4.17it/s]
100%|██████████| 8/8 [00:08<00:00,  1.12s/it]


Epoch 18 Loss 0.68 Accuracy 0.61


100%|██████████| 40/40 [00:09<00:00,  4.34it/s]
100%|██████████| 8/8 [00:08<00:00,  1.08s/it]

Epoch 19 Loss 0.64 Accuracy 0.62





## Validate results on test dataset

You must get accuracy above 0.65

In [9]:
test_dataset = datasets.CIFAR10("/datasets",
                                train=False,
                                transform=dataset.transform,  # Transforms stay the same
                                download=True)

test_loader = DataLoader(test_dataset, batch_size=batch_size)
model.eval()

accuracy = validate(model, test_loader, device)
print(f"Accuracy on test:{accuracy}")


Files already downloaded and verified


100%|██████████| 40/40 [00:03<00:00, 13.23it/s]

Accuracy on test:(0.6093, 1.182271096110344)





# Place for brief conclusion:

Получить аккураси >0.65 используя 2-3 сверточных слоя без тотального переобучения кажется непостижимой задачей.
Я пробовал более 70 запусков (посмотреть можно тут : https://wandb.ai/dmitysh/cv-convs?workspace=user-dmitysh)
Начал с двух сверточных слоев, потом добавил третий, что чуть улучшило качество.
Менял число эпох.
Пробовал различный learning rate, в какой-то момент прикрутил шедулер, но потом убрал, так как эффект был не супер сильный.
Менял линейные слои в classifier, добавлял батч нормы и дропауты, причем как в линейные слои, так и после сверток.
Лучшее, что у меня получилось было в районе 0.63.

Кажется, что модели просто не хватает данных для обучения (всего 10к).
И качество сильно меняется от сида к сиду при разбиении.
В какой-то момент обучил простейшую модель на 25к вместо 10к изображений и с легкостью получил почти 0.7 accuracy.




# Ideas for extra work

---
1. Evaluate the impact of the number and size of filters in convolutional layers on the accuracy. Кажется для такой маленькой картинки брать больше 3 на 3 точно не стоит. А вот число фильтров качество улучшало, пока не доходило до переобучения.

2. Evaluate the impact of the convolutional layers count on the accuracy. Попробовал, третий слой улучшил качество по сравнению с 2-мя при остальном неизменном.


3. Visualization something ... ссылка на запуски снизу
# https://wandb.ai/dmitysh/cv-convs?workspace=user-dmitysh

---







