In [1]:
import torch
import torchvision.transforms as transforms
from torch import optim
import numpy as np
import torch.nn.functional as F
from torch import nn
import torchvision

In [2]:
train_dataset = torchvision.datasets.CIFAR10(root='data/',
                                             train=True,  
                                             transform=transforms.ToTensor(), 
                                             download=True)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                            download=True, transform=transforms.ToTensor())

Files already downloaded and verified
Files already downloaded and verified


In [3]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=64, 
                                           shuffle=True)

In [4]:
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=8,
                                          shuffle=False)

In [5]:
image, label = train_dataset[0]
print(image.size())
print(label)

torch.Size([3, 32, 32])
6


In [6]:
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [7]:
class Net(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, 8 * hidden_dim)
        self.fc2 = nn.Linear(8 * hidden_dim, 4 * hidden_dim)
        self.fc3 = nn.Linear(4 * hidden_dim, 4 * hidden_dim)
        self.fc4 = nn.Linear(4 * hidden_dim, 2 * hidden_dim)
        self.fc5 = nn.Linear(2 * hidden_dim, hidden_dim)
        self.fc6 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = F.elu(x)
        x = self.fc2(x)
        x = F.elu(x)
        x = self.fc3(x)
        x = F.elu(x)
        x = self.fc4(x)
        x = F.elu(x)
        x = self.fc5(x)
        x = F.elu(x)
        x = self.fc6(x)
        return x
    
    def predict(self, x):
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = F.elu(x)
        x = self.fc2(x)
        x = F.elu(x)
        x = self.fc3(x)
        x = F.elu(x)       
        x = self.fc4(x)
        x = F.elu(x)
        x = self.fc5(x)
        x = F.elu(x)
        x = self.fc6(x)
        x = F.softmax(x)
        return x

net = Net(3072, 200, 10)
net.train()

Net(
  (fc1): Linear(in_features=3072, out_features=1600, bias=True)
  (fc2): Linear(in_features=1600, out_features=800, bias=True)
  (fc3): Linear(in_features=800, out_features=800, bias=True)
  (fc4): Linear(in_features=800, out_features=400, bias=True)
  (fc5): Linear(in_features=400, out_features=200, bias=True)
  (fc6): Linear(in_features=200, out_features=10, bias=True)
)

In [8]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.00001)

In [9]:
# вычисляет accuracy по всей выборке
# batch = 0 вычисляется accuracy только по первому бачу
def accuracy(net, loader, batch=None):
    net.eval()
    len_total = 0
    predict_ture = 0
    for i, data in enumerate(loader):
        images, labels = data[0], data[1]
        len_total += len(labels) 
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        predict_ture += (labels==predicted).sum().item()
        if batch == 0:
            break
    net.train()
    return predict_ture/len_total

По условию задачи необходимо оценивать сеть по первому батчу (что на мой взгляд не совсем корректно).
Поэтому в решении предложен поиск сразу двух вариантов
- обучение сети с максимальным accuracy по первому батчу
- обучение сети с максимальным accuracy по всей тестовой выборке

In [10]:
num_epochs = 10
acc_max = 0.0
acc_max_batch1 = 0.0
PATH_MODEL = './best_model.pth'
PATH_MODEL = './first_batch_model.pth'


for epoch in range(num_epochs):
    running_loss = 0.0
    running_items = 0.0

    for i, data in enumerate(train_loader):
        inputs, labels = data[0], data[1]

        # Обнуляем градиент
        optimizer.zero_grad()
        # Делаем предсказание
        outputs = net(inputs)
        # Рассчитываем лосс-функцию
        loss = criterion(outputs, labels)
        # Делаем шаг назад по лоссу
        loss.backward()
        # Делаем шаг нашего оптимайзера
        optimizer.step()

        # выводим статистику о процессе обучения
        running_loss += loss.item()
        running_items += len(labels)
        if i % 300 == 0:    # печатаем каждые 300 mini-batches
            acc = accuracy(net, test_loader)
            first_acc = accuracy(net, test_loader, 0)
            # сохраним модель если метрика на тесте лучшая
            if acc > acc_max:
                acc_max = acc
                torch.save(net, PATH_MODEL)
            # сохраним модель если метрика на первом батче лучшая
            if first_acc > acc_max_batch1:
                acc_max_batch1 = first_acc
                torch.save(net, PATH_MODEL_BATCH)
            print(f'Epoch [{epoch + 1:2}/{num_epochs}]. ' \
                  f'Step [{i + 1:3}/{len(train_loader)}]. ' \
                  f'Loss: {running_loss / running_items:.3f},  ' \
                  f'acc_train: {accuracy(net, train_loader):.4f},  ' \
                  f'acc_test: {acc:.4f},  ' \
                 f'acc_first: {first_acc:.3}')
            running_loss, running_items = 0.0, 0.0

print('Training is finished!')

Epoch [ 1/10]. Step [  1/782]. Loss: 0.036,  acc_train: 0.1002,  acc_test: 0.1011,  acc_first: 0.0
Epoch [ 1/10]. Step [301/782]. Loss: 0.034,  acc_train: 0.2712,  acc_test: 0.2736,  acc_first: 0.375
Epoch [ 1/10]. Step [601/782]. Loss: 0.030,  acc_train: 0.3097,  acc_test: 0.3031,  acc_first: 0.25
Epoch [ 2/10]. Step [  1/782]. Loss: 0.029,  acc_train: 0.3234,  acc_test: 0.3201,  acc_first: 0.5
Epoch [ 2/10]. Step [301/782]. Loss: 0.029,  acc_train: 0.3401,  acc_test: 0.3373,  acc_first: 0.375
Epoch [ 2/10]. Step [601/782]. Loss: 0.029,  acc_train: 0.3486,  acc_test: 0.3460,  acc_first: 0.375
Epoch [ 3/10]. Step [  1/782]. Loss: 0.027,  acc_train: 0.3561,  acc_test: 0.3508,  acc_first: 0.25
Epoch [ 3/10]. Step [301/782]. Loss: 0.028,  acc_train: 0.3690,  acc_test: 0.3658,  acc_first: 0.375
Epoch [ 3/10]. Step [601/782]. Loss: 0.028,  acc_train: 0.3749,  acc_test: 0.3707,  acc_first: 0.375
Epoch [ 4/10]. Step [  1/782]. Loss: 0.027,  acc_train: 0.3717,  acc_test: 0.3685,  acc_first: 0.

In [11]:
# загрузим модели
best_net = torch.load(PATH_MODEL)
first_batch_net = torch.load(PATH_MODEL_BATCH)

In [18]:
print(f'first_batch_net:\n'\
      f'   total_accuracy:       {accuracy(first_batch_net, test_loader)}\n' \
      f'   first_batch_accuracy: {accuracy(first_batch_net, test_loader, 0)}')
print(f'Best_net:\n'
      f'   total_accuracy:       {accuracy(best_net, test_loader)}\n' \
      f'   first_batch_accuracy: {accuracy(best_net, test_loader, 0)}')

first_batch_net:
   total_accuracy:       0.3867
   first_batch_accuracy: 0.625
Best_net:
   total_accuracy:       0.4264
   first_batch_accuracy: 0.625
