В коде я попытался реализовать архитектуру LeNet-5 для классификации изображений CIFAR10, описанную на данном сайте https://medium.com/@siddheshb008/lenet-5-architecture-explained-3b559cb2d52b. Точность на тестовой выборке – 0.5993.

In [1]:
import numpy as np

import torch
from torch import nn
from torch.nn import functional as F

import torchvision
from torchvision import datasets, transforms

from matplotlib import pyplot as plt
from IPython.display import clear_output

In [None]:
# Downloading dataset from torchvision
train_data = datasets.CIFAR10(root="./cifar10_data", train=True, download=True, transform=transforms.ToTensor())
test_data = datasets.CIFAR10(root="./cifar10_data", train=False, download=True, transform=transforms.ToTensor())

# Train and val split
train_size = int(len(train_data) * 0.8)
val_size = len(train_data) - train_size

train_data, val_data = torch.utils.data.random_split(train_data, [train_size, val_size])

# Dataloaders to generate batches
train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=128, shuffle=False)

Definition of LeNet-5

In [3]:

class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()

        # Convolution layers
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=(5,5)) # 28x28
        self.norm1 = nn.BatchNorm2d(6)
        self.pool1 = nn.MaxPool2d(kernel_size=(2,2)) # 14x14
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=(5,5)) # 10x10
        self.norm2 = nn.BatchNorm2d(16)
        self.pool2 = nn.MaxPool2d(kernel_size=(2,2)) # 5x5
        self.conv3 = nn.Conv2d(in_channels=16, out_channels=120, kernel_size=(5,5)) # 1x1
        self.norm2 = nn.BatchNorm2d(120)

        # Fully connected layers
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(1*1*120, 84)
        self.fc2 = nn.Linear(84, 10)


    def forward(self, x):

        x = F.tanh(self.conv1(x))
        x = self.pool1(x)
        x = F.tanh(self.conv2(x))
        x = self.pool2(x)
        x = F.tanh(self.conv3(x))
        x = self.flatten(x)
        x = F.tanh(self.fc1(x))
        x = self.fc2(x)
        return x

In [4]:
model = ConvNet()

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [6]:
def evaluate(model, dataloader, loss_fn):

    losses = []

    num_correct = 0
    num_elements = 0

    for i, batch in enumerate(dataloader):

        X_batch, y_batch = batch
        num_elements += len(y_batch)

        with torch.no_grad():
            logits = model(X_batch.to(device))

            loss = loss_fn(logits, y_batch.to(device))
            losses.append(loss.item())

            y_pred = torch.argmax(logits, dim=1)

            num_correct += torch.sum(y_pred.cpu() == y_batch)

    accuracy = num_correct / num_elements

    return accuracy.numpy(), np.mean(losses)

def train(model, loss_fn, optimizer, n_epoch=3):

    for epoch in range(n_epoch):

        print("Epoch:", epoch+1)

        model.train(True)

        running_losses = []
        running_accuracies = []
        for i, batch in enumerate(train_loader):
            X_batch, y_batch = batch

            logits = model(X_batch.to(device))

            loss = loss_fn(logits, y_batch.to(device))
            running_losses.append(loss.item())

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            model_answers = torch.argmax(logits, dim=1)
            train_accuracy = torch.sum(y_batch == model_answers.cpu()) / len(y_batch)
            running_accuracies.append(train_accuracy)

            if (i+1) % 100 == 0:
                print("Средние train лосс и accuracy на последних 50 итерациях:",
                      np.mean(running_losses), np.mean(running_accuracies), end='\n')

        model.train(False)

        val_accuracy, val_loss = evaluate(model, val_loader, loss_fn=loss_fn)
        print("Эпоха {}/{}: val лосс и accuracy:".format(epoch+1, n_epoch,),
                      val_loss, val_accuracy, end='\n')

    return model

In [7]:
model = ConvNet()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

loss_fn = torch.nn.CrossEntropyLoss()

learning_rate = 1e-3
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
model = train(model, loss_fn, optimizer, n_epoch=10)

In [9]:
test_accuracy, _ = evaluate(model, test_loader, loss_fn)
print('Accuracy:', test_accuracy)

Accuracy: 0.5993
