In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
OUTPUT_ROOT = "output"

In [2]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [3]:
import random
import numpy as np

# Разбиение обучающих данных
# labels_num -  количество меток (от 0 до 9)
# train_len - длина того обучающего множества, которое мы хотим использовать для обучения, оно должно делиться на количество меток
def balancing(full_train_dataset, lables_num, train_len):
  # Длина всего обучающего множества и того обучающего множества, которое мы хотим использовать для обучения
  full_train_len = len(full_train_dataset)
  # Количество данных с одной меткой
  label_group_num = int(train_len/lables_num)

  # Создаём группы для хранения индексов каждой метки в обучающем наборе данных
  label_groups_index = [[] for _ in range(lables_num)]
  for i in range(full_train_len):
    label = full_train_dataset[i][1]
    label_groups_index[label].append(i)

  # Обрезаем группы, оставляя случайные, неповторяющиеся элементы в каждой и объединяем их всех в один набор индексов
  all_index = np.array([], dtype=int)
  for i in range(lables_num):
    all_index = np.append(all_index, random.sample(label_groups_index[i], label_group_num))
  np.random.shuffle(all_index)

  # Формируем обучающий набор данных
  train_dataset = torch.utils.data.Subset(full_train_dataset, all_index)
  return train_dataset

In [4]:
# Определение модели
# neurons_num - количество нейронов в каждом слое
# img_size - размер изображений из MNIST
class Net4(nn.Module):
    def __init__(self, neurons_num, img_size):
        super(Net4, self).__init__()
        self.neurons_num = neurons_num
        self.img_size = img_size
        self.fc1 = nn.Linear(img_size, neurons_num)
        self.fc2 = nn.Linear(neurons_num, neurons_num)
        self.fc3 = nn.Linear(neurons_num, neurons_num)
        self.fc4 = nn.Linear(neurons_num, 10)

    def forward(self, x):
        x = x.view(-1, self.img_size)
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        x = nn.functional.relu(self.fc3(x))
        x = self.fc4(x)
        return x
    def info(self):
        layers = 'layers: 4\n'
        neurons = f'neurons_num: {self.neurons_num}\n'
        img = f'img_size: {self.img_size}\n'
        active_fun = 'activation function: relu\n'
        return layers + neurons + img + active_fun

In [5]:
# Определение модели
# neurons_num - количество нейронов в каждом слое
# img_size - размер изображений из MNIST
class Net6(nn.Module):
    def __init__(self, neurons_num, img_size):
        super(Net6, self).__init__()
        self.neurons_num = neurons_num
        self.img_size = img_size
        self.fc1 = nn.Linear(img_size, neurons_num)
        self.fc2 = nn.Linear(neurons_num, neurons_num)
        self.fc3 = nn.Linear(neurons_num, neurons_num)
        self.fc4 = nn.Linear(neurons_num, neurons_num)
        self.fc5 = nn.Linear(neurons_num, neurons_num)
        self.fc6 = nn.Linear(neurons_num, 10)

    def forward(self, x):
        x = x.view(-1, self.img_size)
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        x = nn.functional.relu(self.fc3(x))
        x = nn.functional.relu(self.fc4(x))
        x = nn.functional.relu(self.fc5(x))
        x = self.fc6(x)
        return x
    def info(self):
        layers = 'layers: 6\n'
        neurons = f'neurons_num: {self.neurons_num}\n'
        img = f'img_size: {self.img_size}\n'
        active_fun = 'activation function: relu\n'
        return layers + neurons + img + active_fun

In [6]:
# обучение в течение одной эпохи
def train_step(dataloader, model, loss_fn, optimizer, show_progress=False):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Ошибка предсказания
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        # вывод текущего прогресса, для того, чтобы убедиться, что обучение идёт
        if show_progress and batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [7]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    return 100*correct, test_loss

In [19]:
# обучение модели и получение результатов обучения на тестовой и обучающих выборках
# dont_skip - до какой эпохи не пропускать тесты (значение меньше нуля будет означать не пропускать тесты)
# test_every - тестировать каждую test_every эпоху, все остальноё - пропустить
def train(model, epochs, train_dataloader, test_dataloader, optimizer, loss_fn, dont_skip=-1, test_every=1, initital_epoch=1):
    if dont_skip < 0:
        dont_skip = epochs
    train_results = []
    test_results = []
    for epoch in range(epochs):
        print(f"Epoch {epoch+1}\n-------------------------------")
        train_step(train_dataloader, model, loss_fn, optimizer)
        if epoch < dont_skip or epoch % test_every == 0 or epoch == epochs-1:
            train_accuracy, train_loss = test(train_dataloader, model, loss_fn)
            train_results.append((initital_epoch+epoch, train_accuracy, train_loss))
            test_accuracy, test_loss = test(test_dataloader, model, loss_fn)
            test_results.append((initital_epoch+epoch, test_accuracy, test_loss))
    print("Done!")
    return train_results, test_results

In [13]:
# Загрузка данных MNIST
train_num = 60000
train_dataset = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)

In [14]:
train_num = 100
train_dataset = balancing(train_dataset, 10, train_num)

In [15]:
batch_size = 128

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([128, 1, 28, 28])
Shape of y: torch.Size([128]) torch.int64


In [16]:
import os
import numpy as np
import time
epochs = 60*(60000//train_num)
# epochs = 1200
dont_skip = 200
test_every = 200
learning_rate = 0.1
# Инициализация модели 
model = Net4(800, 28*28).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
train_results, test_results = train(
      model=model, 
      epochs=epochs, 
      test_dataloader=test_dataloader, 
      train_dataloader=train_dataloader, 
      optimizer=optimizer, 
      loss_fn=loss_fn,
      dont_skip=dont_skip,
      test_every=test_every
      )

# генерация уникального имени
cur_time = (str(time.time())).replace('.', '_')
folder_path_base = os.path.join(OUTPUT_ROOT, 'treck_' + cur_time)
# проверка на то, что это имя не существует
cnt = 0 
folder_path = folder_path_base
while os.path.isdir(folder_path):
      folder_path = folder_path_base + '_' + str(cnt) 
      cnt += 1 
os.makedirs(folder_path, exist_ok=True)
np.savetxt(os.path.join(folder_path, 'train_results.txt'), np.array(train_results), fmt='%d %.2f %.8f')
np.savetxt(os.path.join(folder_path, 'test_results.txt'), np.array(test_results), fmt='%d %.2f %.8f')
with open(os.path.join(folder_path, 'info.txt'), 'w+') as f:
      f.write(model.info())
      f.write(f'Epochs: {epochs}\n')
      f.write(f'unskippable epochs: {dont_skip}\n')
      f.write(f'test every {test_every} epochs\n')
      f.write(f'loss function: {loss_fn}\n')
      f.write(f'optimizer: {optimizer}\n')
      f.write(f'batch size: {batch_size}\n')
      f.write(f'train data size: {train_num}\n')
# сохранение модели
torch.save(model, os.path.join(folder_path, 'model.pt'))
# сохранение тренировочных данных
torch.save(train_dataset, os.path.join(folder_path, 'train.pt'))


Epoch 1
-------------------------------
Epoch 2
-------------------------------
Epoch 3
-------------------------------
Epoch 4
-------------------------------
Epoch 5
-------------------------------
Epoch 6
-------------------------------
Epoch 7
-------------------------------
Epoch 8
-------------------------------
Epoch 9
-------------------------------
Epoch 10
-------------------------------
Epoch 11
-------------------------------
Epoch 12
-------------------------------
Epoch 13
-------------------------------
Epoch 14
-------------------------------
Epoch 15
-------------------------------
Epoch 16
-------------------------------
Epoch 17
-------------------------------
Epoch 18
-------------------------------
Epoch 19
-------------------------------
Epoch 20
-------------------------------
Epoch 21
-------------------------------
Epoch 22
-------------------------------
Epoch 23
-------------------------------
Epoch 24
-------------------------------
Epoch 25
----------------

In [20]:
import os
import numpy as np
import time
# epochs = 20*(60000//train_num)
epochs = 36000
dont_skip = 0
test_every = 400
initital_epoch = 60*(60000//train_num) + 12000 + 1
#learning_rate = 0.05
# Инициализация модели 
#model = Net4(800, 28*28).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
train_results, test_results = train(
      model=model, 
      epochs=epochs, 
      test_dataloader=test_dataloader, 
      train_dataloader=train_dataloader, 
      optimizer=optimizer, 
      loss_fn=loss_fn,
      dont_skip=dont_skip,
      test_every=test_every,
      initital_epoch=initital_epoch
      )

# генерация уникального имени
cur_time = (str(time.time())).replace('.', '_')
folder_path_base = os.path.join(OUTPUT_ROOT, 'treck_' + cur_time)
# проверка на то, что это имя не существует
cnt = 0 
folder_path = folder_path_base
while os.path.isdir(folder_path):
      folder_path = folder_path_base + '_' + str(cnt) 
      cnt += 1 
os.makedirs(folder_path, exist_ok=True)
np.savetxt(os.path.join(folder_path, 'train_results.txt'), np.array(train_results), fmt='%d %.2f %.8f')
np.savetxt(os.path.join(folder_path, 'test_results.txt'), np.array(test_results), fmt='%d %.2f %.8f')
with open(os.path.join(folder_path, 'info.txt'), 'w+') as f:
      f.write(model.info())
      f.write(f'Epochs: {epochs}\n')
      f.write(f'unskippable epochs: {dont_skip}\n')
      f.write(f'test every {test_every} epochs\n')
      f.write(f'loss function: {loss_fn}\n')
      f.write(f'optimizer: {optimizer}\n')
      f.write(f'batch size: {batch_size}\n')
      f.write(f'train data size: {train_num}\n')
# сохранение модели
torch.save(model, os.path.join(folder_path, 'model.pt'))
# сохранение тренировочных данных
torch.save(train_dataset, os.path.join(folder_path, 'train.pt'))


Epoch 1
-------------------------------
Epoch 2
-------------------------------
Epoch 3
-------------------------------
Epoch 4
-------------------------------
Epoch 5
-------------------------------
Epoch 6
-------------------------------
Epoch 7
-------------------------------
Epoch 8
-------------------------------
Epoch 9
-------------------------------
Epoch 10
-------------------------------
Epoch 11
-------------------------------
Epoch 12
-------------------------------
Epoch 13
-------------------------------
Epoch 14
-------------------------------
Epoch 15
-------------------------------
Epoch 16
-------------------------------
Epoch 17
-------------------------------
Epoch 18
-------------------------------
Epoch 19
-------------------------------
Epoch 20
-------------------------------
Epoch 21
-------------------------------
Epoch 22
-------------------------------
Epoch 23
-------------------------------
Epoch 24
-------------------------------
Epoch 25
----------------

In [51]:
import os
import numpy as np
import time
# epochs = 20*(60000//train_num)
epochs = 600
dont_skip = 40
test_every = 40
learning_rate = 0.01
# Инициализация модели 
#model = Net4(800, 28*28).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
train_results, test_results = train(
      model=model, 
      epochs=epochs, 
      test_dataloader=test_dataloader, 
      train_dataloader=train_dataloader, 
      optimizer=optimizer, 
      loss_fn=loss_fn,
      dont_skip=dont_skip,
      test_every=test_every
      )

# генерация уникального имени
cur_time = (str(time.time())).replace('.', '_')
folder_path_base = os.path.join(OUTPUT_ROOT, 'treck_' + cur_time)
# проверка на то, что это имя не существует
cnt = 0 
folder_path = folder_path_base
while os.path.isdir(folder_path):
      folder_path = folder_path_base + '_' + str(cnt) 
      cnt += 1 
os.makedirs(folder_path, exist_ok=True)
np.savetxt(os.path.join(folder_path, 'train_results.txt'), np.array(train_results), fmt='%d %.2f %.8f')
np.savetxt(os.path.join(folder_path, 'test_results.txt'), np.array(test_results), fmt='%d %.2f %.8f')
with open(os.path.join(folder_path, 'info.txt'), 'w+') as f:
      f.write(model.info())
      f.write(f'Epochs: {epochs}\n')
      f.write(f'unskippable epochs: {dont_skip}\n')
      f.write(f'test every {test_every} epochs\n')
      f.write(f'loss function: {loss_fn}\n')
      f.write(f'optimizer: {optimizer}\n')
      f.write(f'batch size: {batch_size}\n')
      f.write(f'train data size: {train_num}\n')
# сохранение модели
torch.save(model, os.path.join(folder_path, 'model.pt'))
# сохранение тренировочных данных
torch.save(train_dataset, os.path.join(folder_path, 'train.pt'))


Epoch 1
-------------------------------
Epoch 2
-------------------------------
Epoch 3
-------------------------------
Epoch 4
-------------------------------
Epoch 5
-------------------------------
Epoch 6
-------------------------------
Epoch 7
-------------------------------
Epoch 8
-------------------------------
Epoch 9
-------------------------------
Epoch 10
-------------------------------
Epoch 11
-------------------------------
Epoch 12
-------------------------------
Epoch 13
-------------------------------
Epoch 14
-------------------------------
Epoch 15
-------------------------------
Epoch 16
-------------------------------
Epoch 17
-------------------------------
Epoch 18
-------------------------------
Epoch 19
-------------------------------
Epoch 20
-------------------------------
Epoch 21
-------------------------------
Epoch 22
-------------------------------
Epoch 23
-------------------------------
Epoch 24
-------------------------------
Epoch 25
----------------

In [49]:
train_acc, train_loss = test(model=model, loss_fn=loss_fn, dataloader=train_dataloader)
test_acc, test_loss = test(model=model, loss_fn=loss_fn, dataloader=test_dataloader)
print(f'{train_acc} {train_loss:>.8f}')
print(f'{test_acc} {test_loss:>.8f}')

100.0 0.00000241
88.12 1.09070744
