In [1]:
train_batch_size = 64
val_batch_size = 32
num_workers = 4

num_epochs = 40

learning_rate = 0.001

log_dir = './logs'
train_file = '~/paulbahush/data/sign_mnist_train.csv'
test_file = '~/paulbahush/data/sign_mnist_test.csv'

In [3]:
import pandas as pd
import torch


### Задание 1.
#### Загрузите данные. Разделите исходный набор данных на обучающую и валидационную выборки.


In [4]:
train_csv = pd.read_csv(train_file)
test_csv = pd.read_csv(test_file)
train_csv.iloc[:, 0].describe()

### Задание 2.
#### Реализуйте глубокую нейронную сеть со сверточными слоями. Какое качество классификации получено? Какая архитектура сети была использована?

In [6]:
from torch.utils.data import Dataset
import numpy as np
from torch.utils.data import DataLoader
from torchvision import transforms

class SignLanguageDataset(Dataset):
    def __init__(self, data, transforms=None):
        self.data = data
        self.transforms = transforms

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        label = self.data.iloc[idx, 0]
        image = self.data.iloc[idx, 1:].values.reshape((28,28))

        if self.transforms:
            image = self.transforms(np.uint8(image))
        else:
            image = image.astype(np.float32)

        return image, label

tfms = transforms.Compose([transforms.ToTensor()])
train_dataset = SignLanguageDataset(train_csv, transforms=tfms)
train_loader = DataLoader(train_dataset, num_workers=num_workers, batch_size=train_batch_size, shuffle=True)

test_tfms = transforms.Compose([transforms.ToTensor()])
test_dataset = SignLanguageDataset(test_csv, transforms=test_tfms)
test_loader = DataLoader(test_dataset, num_workers=num_workers, batch_size=val_batch_size, shuffle=False)

In [7]:
import torch.nn as nn


class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()

        self.cnn1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=2)
        self.relu1 = nn.ReLU()

        self.maxpool1 = nn.MaxPool2d(kernel_size=2)

        self.cnn2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=2)
        self.relu2 = nn.ReLU()

        self.maxpool2 = nn.MaxPool2d(kernel_size=2)

        self.fc1 = nn.Linear(32 * 7 * 7, 25)

    def forward(self, x):
        out = self.cnn1(x)
        out = self.relu1(out)

        out = self.maxpool1(out)

        out = self.cnn2(out)
        out = self.relu2(out)

        out = self.maxpool2(out)

        out = out.view(out.size(0), -1)

        out = self.fc1(out)

        return out

model = CNNModel()
criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

def train(model, train_loader, test_loader, num_epochs,
    optimizer, criterion, logs_writer):
    for epoch in tqdm(range(num_epochs)):

        for i, (images, labels) in enumerate(train_loader):
            images = images.requires_grad_()
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            logs_writer.add_scalar('Itearation Loss/train', loss, epoch*len(train_loader) + i)

        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.requires_grad_()
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum()

        accuracy = 100 * correct / total

train(model, train_loader, test_loader, num_epochs, 
    optimizer, criterion, logs_writer)

### Задание 3.
#### Примените дополнение данных (data augmentation). Как это повлияло на качество классификатора?

In [9]:
from torch.utils.data import DataLoader
from torchvision import transforms


tfms = transforms.Compose([
        transforms.ToPILImage(mode=None),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomPerspective(distortion_scale=0.5, p=0.5, interpolation=3),
        transforms.ToTensor(),
    ])

train_dataset = SignLanguageDataset(train_csv, transforms=tfms)
train_loader = DataLoader(train_dataset, num_workers=num_workers, batch_size=train_batch_size, shuffle=True)

test_tfms = transforms.Compose([transforms.ToTensor()])
test_dataset = SignLanguageDataset(test_csv, transforms=test_tfms)
test_loader = DataLoader(test_dataset, num_workers=num_workers, batch_size=val_batch_size, shuffle=False)

In [10]:
import torch.nn as nn


model = CNNModel()
criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

train(model, train_loader, test_loader, num_epochs, 
    optimizer, criterion, logs_writer)

### Задание 4.
#### Поэкспериментируйте с готовыми нейронными сетями (например, AlexNet, VGG16, Inception и т.п.), применив передаточное обучение. Как это повлияло на качество классификатора? Можно ли было обойтись без него? Какой максимальный результат удалось получить на контрольной выборке?


In [12]:
from torch.utils.data import DataLoader
from torchvision import transforms


tfms = transforms.Compose([
        transforms.ToPILImage(mode=None),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomPerspective(distortion_scale=0.5, p=0.5, interpolation=3),
        transforms.ToTensor(),
    ])

train_dataset = SignLanguageDataset(train_csv, transforms=tfms)
train_loader = DataLoader(train_dataset, num_workers=num_workers, batch_size=train_batch_size, shuffle=True)

test_tfms = transforms.Compose([transforms.ToTensor()])
test_dataset = SignLanguageDataset(test_csv, transforms=test_tfms)
test_loader = DataLoader(test_dataset, num_workers=num_workers, batch_size=val_batch_size, shuffle=False)

In [13]:
from torchvision import models
from torch.optim import lr_scheduler


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 25)

model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_ft = torch.optim.AdamW(model_ft.parameters(), lr=learning_rate)
exp_lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer_ft, 60)

In [14]:
train(model, train_loader, test_loader, num_epochs,
    optimizer, criterion, logs_writer)

100%|██████████| 40/40 [18:37<00:00, 27.93s/it]


In [15]:
model_ft = models.resnet50(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 25)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()

optimizer_ft = torch.optim.AdamW(model_ft.parameters(), lr=learning_rate)
exp_lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer_ft, 60)

In [16]:
train(model, train_loader, test_loader, num_epochs,
    optimizer, criterion, logs_writer)

100%|██████████| 40/40 [19:12<00:00, 28.82s/it]
