In [None]:
import torch, torchvision
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor

from torch import nn, optim
from torch.nn import functional as F
from torch.autograd import Variable
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import DataLoader

import numpy as np
import matplotlib.pyplot as plt

In [None]:
batch_size_train = 32
batch_size_test = 10000
valid_size = 0.2

In [None]:
train_data = datasets.MNIST(root='data', train=True, download=True, transform=ToTensor())
test_data = datasets.MNIST(root='data', train=False, download=True, transform=ToTensor())

In [None]:
print(train_data)
print(test_data)
print(train_data.data.size())
print(test_data.data.size())

Dataset MNIST
    Number of datapoints: 60000
    Root location: data
    Split: Train
    StandardTransform
Transform: ToTensor()
Dataset MNIST
    Number of datapoints: 10000
    Root location: data
    Split: Test
    StandardTransform
Transform: ToTensor()
torch.Size([60000, 28, 28])
torch.Size([10000, 28, 28])


In [None]:
def choose_batch(batch_size_trn, batch_size_tst):
    train_loader = DataLoader(train_data, batch_size=batch_size_trn, shuffle=True)
    test_loader = DataLoader(train_data, batch_size=batch_size_tst, shuffle=True)

    return train_loader, test_loader

In [None]:
# создание модели с 2-мя сверточными слоями и 3-мя полносвязными
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv_layers = nn.Sequential(nn.Conv2d(1, 10, kernel_size=5),
                                         nn.MaxPool2d(2),
                                         nn.ReLU(),
                                         nn.Conv2d(10, 20, kernel_size=5),
                                         nn.MaxPool2d(2),
                                         nn.ReLU(),)
        self.fc_layers = nn.Sequential(nn.Linear(320,  128),
                                       nn.ReLU(),
                                       nn.Linear(128,  64),
                                       nn.ReLU(),
                                       nn.Linear(64, 10),
                                       nn.Softmax(dim = 1))

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(-1, 320)
        out = self.fc_layers(x)
        return out

In [None]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_indx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output.log(), target)
        loss.backward()
        optimizer.step()
        if batch_indx % 100 == 0:
            print('Epoch {0}  Loss {1} '.format(epoch, loss.item()))

In [None]:
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output.log(), target).item()
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    correct /= len(test_loader.dataset)
    print('Avg loss {0} Avg acc {1}'.format(test_loss, correct))

In [None]:
batch_size = 128
epochs = 3
device = torch.device('cpu')

In [None]:
train_loader = DataLoader(datasets.MNIST('mnist_data', train = True, download=True, transform=transforms.Compose([transforms.ToTensor()])), batch_size=batch_size, shuffle=True)
test_loader = DataLoader(datasets.MNIST('mnist_data', train = False, download=True, transform=transforms.Compose([transforms.ToTensor()])), batch_size=batch_size, shuffle=True)


In [None]:
#создаем 5 моделей с разными оптимизаторами(отличаются сами оптимизаторы и скорость обучения(lr))
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum = 0.5)

In [None]:
model2 = Net().to(device)
optimizer2 = optim.SGD(model.parameters(), lr = 0.001, momentum = 0.5)

In [None]:
model3 = Net().to(device)
optimizer3 = optim.SGD(model.parameters(), lr = 0.1, momentum = 0.5)

In [None]:
model4 = Net().to(device)
optimizer4 = optim.Adam(model.parameters(), lr = 0.01)

In [None]:
model5 = Net().to(device)
optimizer5 = optim.RMSprop(model.parameters(), lr = 0.01, momentum = 0.5)

In [None]:
for epoch in range(1, epochs+1):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

Epoch 1  Loss 2.3066132068634033 
Epoch 1  Loss 2.2918484210968018 
Epoch 1  Loss 2.2913944721221924 
Epoch 1  Loss 2.293863296508789 
Epoch 1  Loss 2.2911133766174316 
Avg loss 0.01804625790119171 Avg acc 0.1135
Epoch 2  Loss 2.290665864944458 
Epoch 2  Loss 2.271789312362671 
Epoch 2  Loss 2.20024037361145 
Epoch 2  Loss 1.7083793878555298 
Epoch 2  Loss 0.695245623588562 
Avg loss 0.004125655797123909 Avg acc 0.8384
Epoch 3  Loss 0.4633777439594269 
Epoch 3  Loss 0.3728172183036804 
Epoch 3  Loss 0.3551027774810791 
Epoch 3  Loss 0.3563787639141083 
Epoch 3  Loss 0.2076406329870224 
Avg loss 0.0019362385407090186 Avg acc 0.9229


In [None]:
for epoch in range(1, epochs+1):
    train(model2, device, train_loader, optimizer2, epoch)
    test(model2, device, test_loader)

Epoch 1  Loss 2.2904775142669678 
Epoch 1  Loss 2.31174635887146 
Epoch 1  Loss 2.305121421813965 
Epoch 1  Loss 2.3018758296966553 
Epoch 1  Loss 2.3051257133483887 
Avg loss 0.01821616551876068 Avg acc 0.1009
Epoch 2  Loss 2.30610728263855 
Epoch 2  Loss 2.2997233867645264 
Epoch 2  Loss 2.301445960998535 
Epoch 2  Loss 2.3103220462799072 
Epoch 2  Loss 2.3135461807250977 
Avg loss 0.018219946455955507 Avg acc 0.1009
Epoch 3  Loss 2.3080084323883057 
Epoch 3  Loss 2.3108415603637695 
Epoch 3  Loss 2.2995717525482178 
Epoch 3  Loss 2.3128790855407715 
Epoch 3  Loss 2.3056411743164062 
Avg loss 0.01821963655948639 Avg acc 0.1009


In [None]:
for epoch in range(1, epochs+1):
    train(model3, device, train_loader, optimizer3, epoch)
    test(model3, device, test_loader)

Epoch 1  Loss 2.307481527328491 
Epoch 1  Loss 2.312453031539917 
Epoch 1  Loss 2.2994372844696045 
Epoch 1  Loss 2.3138155937194824 
Epoch 1  Loss 2.3047173023223877 
Avg loss 0.018226488852500916 Avg acc 0.0982
Epoch 2  Loss 2.3061587810516357 
Epoch 2  Loss 2.319164991378784 
Epoch 2  Loss 2.314134120941162 
Epoch 2  Loss 2.3055810928344727 
Epoch 2  Loss 2.2988760471343994 
Avg loss 0.01822683675289154 Avg acc 0.0982
Epoch 3  Loss 2.300570487976074 
Epoch 3  Loss 2.3016722202301025 
Epoch 3  Loss 2.306119203567505 
Epoch 3  Loss 2.297546625137329 
Epoch 3  Loss 2.3094191551208496 
Avg loss 0.01822712137699127 Avg acc 0.0982


In [None]:
for epoch in range(1, epochs+1):
    train(model4, device, train_loader, optimizer4, epoch)
    test(model4, device, test_loader)

Epoch 1  Loss 2.3166301250457764 
Epoch 1  Loss 2.3039121627807617 
Epoch 1  Loss 2.310493230819702 
Epoch 1  Loss 2.303271532058716 
Epoch 1  Loss 2.30814528465271 
Avg loss 0.018244272089004518 Avg acc 0.0909
Epoch 2  Loss 2.295989751815796 
Epoch 2  Loss 2.308619737625122 
Epoch 2  Loss 2.3069348335266113 
Epoch 2  Loss 2.307103157043457 
Epoch 2  Loss 2.3053982257843018 
Avg loss 0.01824000401496887 Avg acc 0.0909
Epoch 3  Loss 2.309016704559326 
Epoch 3  Loss 2.3128795623779297 
Epoch 3  Loss 2.30873441696167 
Epoch 3  Loss 2.294670820236206 
Epoch 3  Loss 2.299774646759033 
Avg loss 0.01824345269203186 Avg acc 0.0909


In [None]:
for epoch in range(1, epochs+1):
    train(model5, device, train_loader, optimizer5, epoch)
    test(model5, device, test_loader)

Epoch 1  Loss 2.3235738277435303 
Epoch 1  Loss 2.302422523498535 
Epoch 1  Loss 2.2882039546966553 
Epoch 1  Loss 2.304358720779419 
Epoch 1  Loss 2.2983908653259277 
Avg loss 0.018205712366104125 Avg acc 0.105
Epoch 2  Loss 2.292454719543457 
Epoch 2  Loss 2.3124310970306396 
Epoch 2  Loss 2.310509443283081 
Epoch 2  Loss 2.2995877265930176 
Epoch 2  Loss 2.2937326431274414 
Avg loss 0.018203898692131043 Avg acc 0.105
Epoch 3  Loss 2.3046927452087402 
Epoch 3  Loss 2.3006303310394287 
Epoch 3  Loss 2.3095858097076416 
Epoch 3  Loss 2.3039369583129883 
Epoch 3  Loss 2.294588565826416 
Avg loss 0.018205688786506653 Avg acc 0.105


ВЫВОД: исходя из данных точности и ошибки, полученых при обучении моделей с разными оптимизаторами, можем сделать вывод, что самая лучшая модель с оптимизатором SGD и скорость обучения(lr) равной 0.01


In [None]:
#