In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
OUTPUT_ROOT = "output"

In [2]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [3]:
import random
import numpy as np

# Разбиение обучающих данных
# labels_num -  количество меток (от 0 до 9)
# train_len - длина того обучающего множества, которое мы хотим использовать для обучения, оно должно делиться на количество меток
def balancing(full_train_dataset, lables_num, train_len):
  # Длина всего обучающего множества и того обучающего множества, которое мы хотим использовать для обучения
  full_train_len = len(full_train_dataset)
  # Количество данных с одной меткой
  label_group_num = int(train_len/lables_num)

  # Создаём группы для хранения индексов каждой метки в обучающем наборе данных
  label_groups_index = [[] for _ in range(lables_num)]
  for i in range(full_train_len):
    label = full_train_dataset[i][1]
    label_groups_index[label].append(i)

  # Обрезаем группы, оставляя случайные, неповторяющиеся элементы в каждой и объединяем их всех в один набор индексов
  all_index = np.array([], dtype=int)
  for i in range(lables_num):
    all_index = np.append(all_index, random.sample(label_groups_index[i], label_group_num))
  np.random.shuffle(all_index)

  # Формируем обучающий набор данных
  train_dataset = torch.utils.data.Subset(full_train_dataset, all_index)
  return train_dataset

In [21]:
# Определение модели
# neurons_num - количество нейронов в каждом слое
# img_size - размер изображений из MNIST
class Net(nn.Module):
    def __init__(self, neurons_num, img_size):
        super(Net, self).__init__()
        self.neurons_num = neurons_num
        self.img_size = img_size
        self.fc1 = nn.Linear(img_size, neurons_num)
        self.fc2 = nn.Linear(neurons_num, neurons_num)
        self.fc3 = nn.Linear(neurons_num, neurons_num)
        self.fc4 = nn.Linear(neurons_num, 10)

    def forward(self, x):
        x = x.view(-1, self.img_size)
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        x = nn.functional.relu(self.fc3(x))
        x = self.fc4(x)
        return x
    def info(self):
        layers = 'layers: 4\n'
        neurons = f'neurons_num: {self.neurons_num}\n'
        img = f'img_size: {self.img_size}\n'
        active_fun = 'activation function: relu'
        return layers + neurons + img + active_fun

In [5]:
# обучение в течение одной эпохи
def train_step(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Ошибка предсказания
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        # вывод текущего прогресса, для того, чтобы убедиться, что обучение идёт
        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [6]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    return 100*correct, test_loss

In [7]:
import time
import numpy as np
import os
# обучение модели и получение результатов обучения на тестовой и обучающих выборках
def train(model, epochs, train_dataloader, test_dataloader, optimizer, loss_fn, output_path):
    train_results = []
    test_results = []
    for t in range(epochs):
        print(f"Epoch {t+1}\n-------------------------------")
        startTime = time.time()
        train_step(train_dataloader, model, loss_fn, optimizer)
        print('learning took {:.2f} s'.format(time.time() - startTime))
        train_accuracy, train_loss = test(train_dataloader, model, loss_fn)
        train_results.append((train_accuracy, train_loss))
        test_accuracy, test_loss = test(test_dataloader, model, loss_fn)
        test_results.append((test_accuracy, test_loss))
    os.makedirs(output_path, exist_ok=True)
    np.savetxt(os.path.join(OUTPUT_ROOT, 'train.txt'), np.array(train_results), fmt='%.1f %.8f')
    np.savetxt(os.path.join(OUTPUT_ROOT, 'test.txt'), np.array(test_results), fmt='%.1f %.8f')
    print("Done!")

In [113]:
# Загрузка данных MNIST
train_dataset = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)

In [118]:
train_dataset = balancing(train_dataset, 10, 1000)

In [120]:
batch_size = 64

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64


In [121]:
# Инициализация модели 
model = Net(800, 28*28).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

In [122]:
train(
      model=model, 
      epochs=1200, 
      test_dataloader=test_dataloader, 
      train_dataloader=train_dataloader, 
      optimizer=optimizer, 
      loss_fn=loss_fn,
      output_path=OUTPUT_ROOT
      )

Epoch 1
-------------------------------
loss: 2.304408  [   64/ 1000]
learning took 0.25 s
Epoch 2
-------------------------------
loss: 2.285063  [   64/ 1000]
learning took 0.18 s
Epoch 3
-------------------------------
loss: 2.256618  [   64/ 1000]
learning took 0.19 s
Epoch 4
-------------------------------
loss: 2.198928  [   64/ 1000]
learning took 0.22 s
Epoch 5
-------------------------------
loss: 2.050682  [   64/ 1000]
learning took 0.22 s
Epoch 6
-------------------------------
loss: 1.664004  [   64/ 1000]
learning took 0.36 s
Epoch 7
-------------------------------
loss: 1.062984  [   64/ 1000]
learning took 0.22 s
Epoch 8
-------------------------------
loss: 0.741294  [   64/ 1000]
learning took 0.28 s
Epoch 9
-------------------------------
loss: 0.579225  [   64/ 1000]
learning took 0.29 s
Epoch 10
-------------------------------
loss: 0.467049  [   64/ 1000]
learning took 0.23 s
Epoch 11
-------------------------------
loss: 0.412812  [   64/ 1000]
learning took 0.24