Импорт библиотек

In [None]:
import os
from tqdm.notebook import tqdm, trange

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler

import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time

from PIL import Image

import seaborn as sns

Подготовка

In [None]:
torch.cuda.is_available()

In [None]:
torch.cuda.empty_cache()

In [None]:
class_names = ['a', 'b', 'v', 'g', 'd', 'e', 'zh', 'z', 'i', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 's', 't', 'u', 'f', 'h', 'c', 'ch', 'sh', 'shya', 'aie', 'znak', 'ae', 'you', 'ya']

In [None]:
class_names_rus = ['а', 'б', 'в', 'г', 'д', 'е', 'ж', 'з', 'и', 'к', 'л', 'м', 'н', 'о', 'п', 'р', 'с', 'т', 'у', 'ф', 'х', 'ц', 'ч', 'ш', 'щ', 'ы', 'ь', 'э', 'ю', 'я']

Датасет

In [None]:
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize(244),
        transforms.CenterCrop(244),
        transforms.ToTensor()
    ]),
    'val': transforms.Compose([
        transforms.Resize(244),
        transforms.CenterCrop(244),
        transforms.ToTensor()
    ]),
}

data_dir = './letters_full'
img_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])
                for x in ['train', 'val']}

dataloaders = {x: torch.utils.data.DataLoader(img_datasets[x], batch_size=4, shuffle=True, num_workers=2)
                for x in ['train', 'val']}
dataset_sizes = {x: len(img_datasets[x]) for x in ['train', 'val']}
class_names = img_datasets['train'].classes

use_gpu = torch.cuda.is_available()

In [None]:
len(class_names), dataset_sizes

In [None]:
next(iter(dataloaders['train']))[0].size()

In [None]:
def imshow(inp, title=None):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    
    inp = np.clip(inp, 0, 1)
    plt.figure(figsize=(15, 12))
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)


# Получим 1 батч (картнки-метки) из обучающей выборки
inputs, classes = next(iter(dataloaders['val']))

# Расположим картинки рядом
out = torchvision.utils.make_grid(inputs)

imshow(out, title=[class_names[x] for x in classes])

Код обучения и валидации

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = model.state_dict()
    best_acc = 0.0
    # ведение статистики
    losses = {'train': [], 'val': []}
    accuracies = {'train': [], 'val': []}
    pbar = trange(num_epochs, desc='Epoch:')

    for epoch in pbar:

        # проверка стадии обучения (train/val)
        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model.train(True)
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            
            for data in tqdm(dataloaders[phase], leave=False, desc=f'{phase} iter:'):

                inputs, labels = data

                # передача данных на GPU при возможности
                if use_gpu:
                    inputs = inputs.cuda()
                    labels = labels.cuda()
                else:
                    inputs, labels = inputs, labels

                if phase == 'train':
                    optimizer.zero_grad()

                if phase == 'eval':
                    with torch.no_grad():
                        outputs = model(inputs)
                else:
                    outputs = model(inputs)
                preds = torch.argmax(outputs, -1)
                loss = criterion(outputs, labels)
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                # обновление статистики
                running_loss += loss.item()
                running_corrects += int(torch.sum(preds == labels.data))

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects / dataset_sizes[phase]
            
            losses[phase].append(epoch_loss)
            accuracies[phase].append(epoch_acc)
            pbar.set_description('{} Loss: {:.4f} Acc: {:.4f}'.format(
                                    phase, epoch_loss, epoch_acc
                                ))

            # сохранение весов, давших лучшее качество
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()
                torch.save(model.state_dict(), 'stdict')

    # вывод прогресса обучения
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # загрузка весов
    model.load_state_dict(best_model_wts)
    return model, losses, accuracies

In [None]:
def evaluate(model):
    model.eval()
    
    runninig_correct = 0
    for data in dataloaders['val']:
        inputs, labels = data
        
        if use_gpu:
            inputs = inputs.cuda()
            labels = labels.cuda()
            
        output = model(inputs)
        _, predicted = torch.max(output, 1)
        
        runninig_correct += int(torch.sum(predicted == labels))
        
    return runninig_correct / dataset_sizes['val']

VGG16

In [None]:
model_extractor = models.vgg16(weights='VGG16_Weights.IMAGENET1K_V1')

In [None]:
model_extractor

In [None]:
# transfer learning
for param in model_extractor.parameters():
    param.requires_grad = False

num_features = 25088
# нейронная сеть (classifier)
model_extractor.classifier = nn.Sequential(
    nn.Linear(num_features, 868),
    nn.BatchNorm1d(num_features=868),
    nn.ReLU(),
    nn.Linear(868, 30)
)

if use_gpu:
    model_extractor = model_extractor.cuda()

# выбор функции потерь и оптимизатора
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_extractor.classifier.parameters(), lr=1e-3)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [None]:
# обучение
model_extractor, losses, accuracies = train_model(model_extractor, loss_fn, optimizer, exp_lr_scheduler, num_epochs=25)

Графики обучения

In [None]:
sns.set(style="whitegrid", font_scale=1.4)

plt.figure(figsize=(12, 8))
plt.plot(losses['train'], label="train")
plt.plot(losses['val'], label="validation")
plt.title('Loss')
plt.legend()
plt.show()

In [None]:
sns.set(style="whitegrid", font_scale=1.4)

plt.figure(figsize=(12, 8))
plt.plot(accuracies['train'], label="train")
plt.plot(accuracies['val'], label="validation")
plt.title('Accuracy')
plt.legend()
plt.show()

In [None]:
model_extractor.load_state_dict(torch.load('stdict'))
model_extractor.eval()
print(f"Accuracy: {evaluate(model_extractor)}")

In [None]:
torch.save(model_extractor, 'new_dataset1_v7.2.pth')

Проверка

In [None]:
img_transforms = transforms.Compose([
        transforms.Resize(244),
        transforms.CenterCrop(244),
        transforms.ToTensor()
    ])
img =  Image.open(r"./letters_for_hand_check/_1011.png")
img_transformed = img_transforms(img)
img_transformed = img_transformed.to(torch.device('cuda'))

In [None]:
model = torch.load('new_dataset1_v7.2.pth', map_location=torch.device('cuda'))
model.eval()

In [None]:
img_transformed = torch.stack([img_transformed, img_transformed, img_transformed, img_transformed])
outp = torch.argmax(model(img_transformed))
print(class_names[outp], outp)

Точность по классам

In [None]:
img_transforms = transforms.Compose([
        transforms.Resize(244),
        transforms.CenterCrop(244),
        transforms.ToTensor()
    ])

model = torch.load('new_dataset1_v7.2.pth', map_location=torch.device('cpu'))
model.eval()

accuracies = []
wrongs = []
outp_wrongs = []


for i in range(30):
    corrects = 0
    for j in range(1, 11):
        img =  Image.open(fr'./letters_accuracy_by_class/{i + 1}/{class_names[i]} ({j}).png')
        img_transformed = img_transforms(img)
        img_transformed = img_transformed
        img_transformed = torch.stack([img_transformed, img_transformed, img_transformed, img_transformed])
        outp = torch.argmax(model(img_transformed))
        if class_names[outp] == class_names[i]:
            corrects += 1
        else:
            wrongs.append(fr'{class_names[i]}({j}).png')
            outp_wrongs.append(class_names[outp])
    else:
        accuracies.append(corrects / 10)
print('Accuracies of model v7.2 (with val accuracy 0.968):')
for i in range(30):
    print(f'{class_names[i]}: {accuracies[i]}')
print(len(wrongs))
for i in range(len(wrongs)):
    print(f'file: {wrongs[i]}, output: {outp_wrongs[i]}')