Вспомогательные функции для загрузки данных и правильного формирования тестовой выборки:

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split

import os, sys
from urllib.request import urlretrieve
import pickle

def download_tinyImg200(path,
                     url='http://cs231n.stanford.edu/tiny-imagenet-200.zip',
                     tarname='tiny-imagenet-200.zip'):
    
    if not os.path.exists(path):
        os.mkdir(path)
    urlretrieve(url, os.path.join(path,tarname))
    
    import zipfile
    zip_ref = zipfile.ZipFile(os.path.join(path,tarname), 'r')
    zip_ref.extractall()
    zip_ref.close()


def fix_test_data(data_path, root='tiny-imagenet-200/val'):
    root = os.path.join(data_path, root)
    with open(os.path.join(root, 'val_annotations.txt')) as f:
        annotations = list(map(lambda x: x.split('\t')[0:2], f.readlines()))
        classes = sorted(set(map(lambda x: x[1], annotations)))
     
    try:
        for folder in classes:
            os.system('mkdir ' + os.path.join(root, folder))
            os.system('mkdir ' + os.path.join(root, folder, 'images'))

        for item in annotations:
            name, folder = item
            new_name = folder + '_' + name
            path, new_path = os.path.join(root, 'images', name), os.path.join(root, folder, 'images', new_name)
            os.system('cp ' + path + ' ' + new_path)
    
    except Exception as e:
        for folder in classes:
            os.system('rm ' + os.path.join(root, folder) + ' -rf')
        raise Exception('Something went wrong during copying val images. Check your access rights!')
            
    os.system('rm ' + os.path.join(root, 'images') + ' -rf')

Загрузим данные:

In [None]:
data_path = '.'
download_tinyImg200(data_path)
fix_test_data(data_path)

Импортируем нужные библиотеки:

In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import torch, torch.nn as nn, torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision, torchvision.transforms as transforms
from torch.autograd import Variable
from torchsummary import summary
import torchvision.models as models

Разделим "tiny-imagenet-200/train" на тренировочную и валидационную выборки, а "tiny-imagenet-200/val" оставим в качестве тренировочной выборки. Для тренировочного и валидационного набора данных будем производить случайный горизонтальный поворот:

In [None]:
transform_augment_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor()
])

transform_augment_test = transforms.Compose([
    transforms.ToTensor()
])

In [None]:
dataset = torchvision.datasets.ImageFolder('tiny-imagenet-200/train', transform=transform_augment_train)
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [80000, 20000], generator=torch.Generator().manual_seed(42))

test_dataset = torchvision.datasets.ImageFolder('tiny-imagenet-200/val', transform=transform_augment_test)

In [None]:
batch_size = 32
train_batch_gen = torch.utils.data.DataLoader(train_dataset, 
                                              batch_size=batch_size,
                                              shuffle=True,
                                              num_workers=1)

In [None]:
val_batch_gen = torch.utils.data.DataLoader(val_dataset, 
                                            batch_size=batch_size,
                                            shuffle=True,
                                            num_workers=1)

In [None]:
test_batch_gen = torch.utils.data.DataLoader(test_dataset, 
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=1)

Набор данных содержит 100000 цветных изображений размером 64x64. Всего предсталвенно 200 классов.

Для решения задачи классификации я попробовал реализовать небольшую свёрточную сеть. Посмотрим, какое качество получится добиться с реализованной сетью:

In [None]:
model = nn.Sequential()

model.add_module('conv_1', nn.Conv2d(3, 64, 3))
model.add_module('bn_1', nn.BatchNorm2d(64))
model.add_module('relu_1', nn.ReLU())
model.add_module('pool_1', nn.MaxPool2d(2, 2))

model.add_module('conv_2', nn.Conv2d(64, 128, 3))
model.add_module('bn_2', nn.BatchNorm2d(128))
model.add_module('relu_2', nn.ReLU())
model.add_module('pool_2', nn.MaxPool2d(2, 2))

model.add_module('conv_3', nn.Conv2d(128, 256, 3))
model.add_module('bn_3', nn.BatchNorm2d(256))
model.add_module('relu_3', nn.ReLU())
model.add_module('pool_3', nn.MaxPool2d(2, 2))

model.add_module('conv_4', nn.Conv2d(256, 512, 3))
model.add_module('bn_4', nn.BatchNorm2d(512))
model.add_module('relu_4', nn.ReLU())
model.add_module('pool_4', nn.MaxPool2d(2, 2))

model.add_module('flatten_5', nn.Flatten())

model.add_module('linear_6', nn.Linear(2048, 1024))
model.add_module("dropout_6", nn.Dropout(p=0.1))
model.add_module('relu_6', nn.ReLU())

model.add_module('linear_7', nn.Linear(1024, 512))
model.add_module("droput_7", nn.Dropout(p=0.1))
model.add_module('relu_7', nn.ReLU())

model.add_module('dense_8', nn.Linear(512, 200))


if torch.cuda.is_available():
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")
    
model.to(device)
device

device(type='cuda', index=0)

In [None]:
summary(model.cuda(), (3, 64, 64))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 62, 62]           1,792
       BatchNorm2d-2           [-1, 64, 62, 62]             128
              ReLU-3           [-1, 64, 62, 62]               0
         MaxPool2d-4           [-1, 64, 31, 31]               0
            Conv2d-5          [-1, 128, 29, 29]          73,856
       BatchNorm2d-6          [-1, 128, 29, 29]             256
              ReLU-7          [-1, 128, 29, 29]               0
         MaxPool2d-8          [-1, 128, 14, 14]               0
            Conv2d-9          [-1, 256, 12, 12]         295,168
      BatchNorm2d-10          [-1, 256, 12, 12]             512
             ReLU-11          [-1, 256, 12, 12]               0
        MaxPool2d-12            [-1, 256, 6, 6]               0
           Conv2d-13            [-1, 512, 4, 4]       1,180,160
      BatchNorm2d-14            [-1, 51

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


In [None]:
def compute_loss(X_batch, y_batch):
    X_batch = torch.FloatTensor(X_batch).to(device=device)
    y_batch = torch.LongTensor(y_batch).to(device=device)
    logits = model.to(device)(X_batch)
    return F.cross_entropy(logits, y_batch).mean()

In [None]:
opt = torch.optim.Adam(model.parameters(), lr=1e-4)

train_loss = []
val_accuracy = []

num_epochs = 15

import time

for epoch in range(num_epochs):
    start_time = time.time()
    
    model.train(True)
    for (X_batch, y_batch) in train_batch_gen:
        loss = compute_loss(X_batch, y_batch)
        loss.backward()
        opt.step()
        opt.zero_grad()
        train_loss.append(loss.cpu().data.numpy())
    
    model.train(False)
    for X_batch, y_batch in val_batch_gen:
        logits = model(Variable(torch.FloatTensor(X_batch)).cuda())
        y_pred = logits.max(1)[1].data
        val_accuracy.append(np.mean( (y_batch.cpu() == y_pred.cpu()).numpy() ))
    
    print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time.time() - start_time))
    print("  training loss (in-iteration): \t{:.6f}".format(np.mean(train_loss[-len(train_dataset) // batch_size :])))
    print("  validation accuracy: \t\t\t{:.2f} %".format(np.mean(val_accuracy[-len(val_dataset) // batch_size :]) * 100))

Epoch 1 of 15 took 59.039s
  training loss (in-iteration): 	4.411328
  validation accuracy: 			15.50 %
Epoch 2 of 15 took 58.095s
  training loss (in-iteration): 	3.679241
  validation accuracy: 			22.08 %
Epoch 3 of 15 took 58.160s
  training loss (in-iteration): 	3.365742
  validation accuracy: 			24.62 %
Epoch 4 of 15 took 57.539s
  training loss (in-iteration): 	3.134482
  validation accuracy: 			28.92 %
Epoch 5 of 15 took 58.416s
  training loss (in-iteration): 	2.954643
  validation accuracy: 			29.96 %
Epoch 6 of 15 took 57.894s
  training loss (in-iteration): 	2.798206
  validation accuracy: 			32.00 %
Epoch 7 of 15 took 58.303s
  training loss (in-iteration): 	2.656835
  validation accuracy: 			33.72 %
Epoch 8 of 15 took 56.955s
  training loss (in-iteration): 	2.529060
  validation accuracy: 			34.70 %
Epoch 9 of 15 took 57.213s
  training loss (in-iteration): 	2.411285
  validation accuracy: 			35.87 %
Epoch 10 of 15 took 56.660s
  training loss (in-iteration): 	2.297968
  v

Посчитаем качество на тестовой выборке:

In [None]:
model.train(False)
test_batch_acc = []

for X_batch, y_batch in test_batch_gen:
    logits = model(Variable(torch.FloatTensor(X_batch)).cuda())
    y_pred = logits.max(1)[1].data
    test_batch_acc.append(np.mean( (y_batch.cpu() == y_pred.cpu()).numpy() ))

test_accuracy = np.mean(test_batch_acc)

print("test accuracy:\t\t{:.2f} %".format(test_accuracy * 100))

test accuracy:		36.63 %


Как мы видими, результат не самый лучший, но это уже что-то.

Чтобы улучшить точность предсказания и не тратить много времени на разработку сети, я взял предварительно обученную свёрточную сеть - ResNet50. Сеть обучалась на датасете задачи классификации картинок ImageNet с 1000 классами. На вход данная сеть принимает цветные картинки размером 224x224. Чтобы мы могли применить ResNet50 к нашей задаче, необходимо изменить размер картинок в нашем датасете. Также, мы не будем обучать сеть заново, а просто изменим последний слой, чтобы вместо предсказания на 1000 классов, сеть производила предсказание на 200 классов.

In [None]:
transform_augment_train = transforms.Compose([
    transforms.Resize(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform_augment_test = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
dataset = torchvision.datasets.ImageFolder('tiny-imagenet-200/train', transform=transform_augment_train)
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [80000, 20000], generator=torch.Generator().manual_seed(42))

test_dataset = torchvision.datasets.ImageFolder('tiny-imagenet-200/val', transform=transform_augment_test)

In [None]:
batch_size = 32
train_batch_gen = torch.utils.data.DataLoader(train_dataset, 
                                              batch_size=batch_size,
                                              shuffle=True,
                                              num_workers=1)

In [None]:
val_batch_gen = torch.utils.data.DataLoader(val_dataset, 
                                            batch_size=batch_size,
                                            shuffle=True,
                                            num_workers=1)

In [None]:
test_batch_gen = torch.utils.data.DataLoader(test_dataset, 
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=1)

Загрузим ResNet50:

In [None]:
model = models.resnet50(pretrained=True)

In [None]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")
    
model.to(device)

Отключим обновления весов у всей сети:

In [None]:
for param in model.parameters():
    param.requires_grad = False

Изменим последний слой и только для него включим возможность обучения:

In [None]:
model.fc = nn.Sequential(
    nn.Linear(2048, 1024),
    nn.ReLU(),
    nn.Linear(1024, 200)
    )

for param in model.fc.parameters():
    param.requires_grad = True

In [None]:
def compute_loss(X_batch, y_batch):
    X_batch = torch.FloatTensor(X_batch).to(device=device)
    y_batch = torch.LongTensor(y_batch).to(device=device)
    output = model.to(device)(X_batch)
    return F.cross_entropy(output, y_batch).mean()

In [None]:
opt = torch.optim.Adam(model.fc.parameters(), lr=1e-4)

train_loss = []
val_accuracy = []

num_epochs = 15

import time

for epoch in range(num_epochs):
    start_time = time.time()
    
    model.train(True)
    for (X_batch, y_batch) in train_batch_gen:
        loss = compute_loss(X_batch, y_batch)
        loss.backward()
        opt.step()
        opt.zero_grad()
        train_loss.append(loss.cpu().data.numpy())
    
    model.train(False)
    for X_batch, y_batch in val_batch_gen:
        output = model(Variable(torch.FloatTensor(X_batch)).cuda())
        y_pred = output.max(1)[1].data
        val_accuracy.append(np.mean( (y_batch.cpu() == y_pred.cpu()).numpy() ))
    
    print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time.time() - start_time))
    print("  training loss (in-iteration): \t{:.6f}".format(np.mean(train_loss[-len(train_dataset) // batch_size :])))
    print("  validation accuracy: \t\t\t{:.2f} %".format(np.mean(val_accuracy[-len(val_dataset) // batch_size :]) * 100))

Epoch 1 of 15 took 358.107s
  training loss (in-iteration): 	2.841921
  validation accuracy: 			54.93 %
Epoch 2 of 15 took 356.986s
  training loss (in-iteration): 	1.789467
  validation accuracy: 			58.79 %
Epoch 3 of 15 took 357.016s
  training loss (in-iteration): 	1.629168
  validation accuracy: 			60.15 %
Epoch 4 of 15 took 357.073s
  training loss (in-iteration): 	1.551868
  validation accuracy: 			61.25 %
Epoch 5 of 15 took 356.687s
  training loss (in-iteration): 	1.494007
  validation accuracy: 			61.29 %
Epoch 6 of 15 took 356.825s
  training loss (in-iteration): 	1.452723
  validation accuracy: 			61.76 %
Epoch 7 of 15 took 356.563s
  training loss (in-iteration): 	1.416271
  validation accuracy: 			61.76 %
Epoch 8 of 15 took 356.819s
  training loss (in-iteration): 	1.391402
  validation accuracy: 			62.06 %
Epoch 9 of 15 took 356.900s
  training loss (in-iteration): 	1.364478
  validation accuracy: 			62.67 %
Epoch 10 of 15 took 356.703s
  training loss (in-iteration): 	1.

In [None]:
model.train(False)
test_batch_acc = []

for X_batch, y_batch in test_batch_gen:
    output = model(Variable(torch.FloatTensor(X_batch)).cuda())
    y_pred = output.max(1)[1].data
    test_batch_acc.append(np.mean( (y_batch.cpu() == y_pred.cpu()).numpy() ))

test_accuracy = np.mean(test_batch_acc)

print("test accuracy:\t\t{:.2f} %".format(test_accuracy * 100))

test accuracy:		63.25 %


Без особых усилий удалось увеличить точность предсказания в 1.7 раз. Это довольно неплохой результат :)

В качестве более глубокого исследования для достижения лучшего результата можно было увеличить количество "последних" слоев; рассмотреть различные функции активации; подобрать шаг обучения в зависимости от изменения значения функции ошибок; увеличить количество эпох; попробовать различные алгоритмы оптимизации; попробовать различные размеры батча; провести аугментацию данных и др.