In [1]:
import torch
from torch import nn, optim, cuda
from torch.utils.data import DataLoader
from torch.autograd import Variable
from torchvision import transforms
from torchvision.datasets import MNIST

base_file_path = './datasets/mnist-linear'
is_cuda_available = cuda.is_available()

In [2]:
batch_size = 64

transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,), (1.0,))])

In [3]:
train_set = MNIST(root=base_file_path, train=True, download=True, transform=transform)
test_set = MNIST(root=base_file_path, train=False, transform=transform)

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=2)

In [4]:
print(f'train batch length: {len(train_loader)}')
print(f'test batch length: {len(test_loader)}')

train batch length: 938
test batch length: 157


In [5]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.main = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 10),
            nn.Sigmoid(),
        )
    
    def forward(self, input):
        output = self.main(input.view(-1, 28*28))
        return output
    
net = Model()
if is_cuda_available:
    net.cuda()

In [6]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0002)

In [7]:
for epoch in range(10):
    average_loss = 0
    for i, (x, target) in enumerate(train_loader):
        if is_cuda_available:
            x, target = x.cuda(), target.cuda()
        
        optimizer.zero_grad()
        x, target = Variable(x), Variable(target)
        out = net(x)
        loss = criterion(out, target)
        average_loss = average_loss * 0.9 + loss.data[0] * 0.1
        loss.backward()
        optimizer.step()
        
        if i % 100 == 0:
            print('epoch: {}, batch index: {}, loss: {:.6f}'.format(epoch, i, average_loss))
        

epoch: 0, batch index: 0, loss: 0.230135
epoch: 0, batch index: 100, loss: 1.822987
epoch: 0, batch index: 200, loss: 1.684910
epoch: 0, batch index: 300, loss: 1.638735
epoch: 0, batch index: 400, loss: 1.619424
epoch: 0, batch index: 500, loss: 1.595652
epoch: 0, batch index: 600, loss: 1.592176
epoch: 0, batch index: 700, loss: 1.578991
epoch: 0, batch index: 800, loss: 1.581174
epoch: 0, batch index: 900, loss: 1.562377
epoch: 1, batch index: 0, loss: 0.159342
epoch: 1, batch index: 100, loss: 1.557920
epoch: 1, batch index: 200, loss: 1.557756
epoch: 1, batch index: 300, loss: 1.544618
epoch: 1, batch index: 400, loss: 1.548617
epoch: 1, batch index: 500, loss: 1.534168
epoch: 1, batch index: 600, loss: 1.539785
epoch: 1, batch index: 700, loss: 1.542710
epoch: 1, batch index: 800, loss: 1.542438
epoch: 1, batch index: 900, loss: 1.531007
epoch: 2, batch index: 0, loss: 0.153298
epoch: 2, batch index: 100, loss: 1.527688
epoch: 2, batch index: 200, loss: 1.530555
epoch: 2, batch i

In [10]:

total_count = 0
correct_count = 0
average_loss = 0
for i, (x, target) in enumerate(test_loader):
    if is_cuda_available:
        x, target = x.cuda(), target.cuda()
        
    x, target = Variable(x), Variable(target)
    out = net(x)
    loss = criterion(out, target)
    _, predicted_label = torch.max(out.data, 1)
    total_count += x.data.size()[0]
    correct_count += (predicted_label == target.data).sum()
    average_loss = average_loss * 0.9 + loss.data[0] * 0.1
    

print('test loss: {:.6f}, acc: {:.3f}'.format(average_loss, correct_count * 1.0 / total_count))

test loss: 1.487247, acc: 0.972
