In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

In [2]:
device = torch.device('cuda' if torch.cuda.is_available else 'cpu')
device

device(type='cuda')

In [3]:
input_size = 28*28
hidden_size = 1000
num_classes = 10
num_epochs = 5
batch_size = 100
learning_rate = 0.001

In [4]:
train_dataset = torchvision.datasets.MNIST(root='mnist_data/', 
                                          train=True, 
                                          transform=transforms.ToTensor())
test_dataset = torchvision.datasets.MNIST(root='mnist_data/', 
                                         train=False, 
                                         transform=transforms.ToTensor())

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                         batch_size=batch_size, 
                                         shuffle=False)

In [5]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        out = self.fc2(self.relu(self.fc1(x)))
        return out

In [6]:
model = NeuralNet(input_size, hidden_size, num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [7]:
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.reshape(-1, input_size).to(device)
        labels = labels.to(device)
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print('Epoch: [{}/{}]\tStep: [{}/{}]\tLoss:{:.4f}'.format(epoch+1, num_epochs, i+1, total_step, loss.item()))

Epoch: [1/5]	Step: [100/600]	Loss:0.4719
Epoch: [1/5]	Step: [200/600]	Loss:0.1799
Epoch: [1/5]	Step: [300/600]	Loss:0.1398
Epoch: [1/5]	Step: [400/600]	Loss:0.1177
Epoch: [1/5]	Step: [500/600]	Loss:0.2759
Epoch: [1/5]	Step: [600/600]	Loss:0.1443
Epoch: [2/5]	Step: [100/600]	Loss:0.0682
Epoch: [2/5]	Step: [200/600]	Loss:0.1516
Epoch: [2/5]	Step: [300/600]	Loss:0.1677
Epoch: [2/5]	Step: [400/600]	Loss:0.0662
Epoch: [2/5]	Step: [500/600]	Loss:0.0488
Epoch: [2/5]	Step: [600/600]	Loss:0.0522
Epoch: [3/5]	Step: [100/600]	Loss:0.1072
Epoch: [3/5]	Step: [200/600]	Loss:0.0440
Epoch: [3/5]	Step: [300/600]	Loss:0.0304
Epoch: [3/5]	Step: [400/600]	Loss:0.0067
Epoch: [3/5]	Step: [500/600]	Loss:0.0361
Epoch: [3/5]	Step: [600/600]	Loss:0.0810
Epoch: [4/5]	Step: [100/600]	Loss:0.0769
Epoch: [4/5]	Step: [200/600]	Loss:0.0358
Epoch: [4/5]	Step: [300/600]	Loss:0.0225
Epoch: [4/5]	Step: [400/600]	Loss:0.0908
Epoch: [4/5]	Step: [500/600]	Loss:0.0454
Epoch: [4/5]	Step: [600/600]	Loss:0.0112
Epoch: [5/5]	Ste

In [17]:
with torch.no_grad():
    correct = 0
    total = batch_size * len(test_loader)
    
    for images, labels in test_loader:
        images = images.reshape(-1, input_size).to(device)
        labels = labels.to(device)
        
        outputs = model(images)
        _, predicted = torch.max(outputs.detach(), 1)
        
        correct += (predicted == labels).sum().item()
        
    print('Accuracy: {:.4f}'.format((correct/total)))

Accuracy: 0.9794


In [18]:
torch.save(model.state_dict(), 'perception.ckpt')

In [19]:
model.state_dict()

OrderedDict([('fc1.weight',
              tensor([[-0.0049, -0.0236, -0.0304,  ...,  0.0350, -0.0177,  0.0024],
                      [-0.0081, -0.0236,  0.0084,  ..., -0.0350,  0.0133,  0.0306],
                      [-0.0240, -0.0143, -0.0084,  ...,  0.0103, -0.0105, -0.0173],
                      ...,
                      [ 0.0318,  0.0232,  0.0097,  ...,  0.0293, -0.0032,  0.0230],
                      [ 0.0265, -0.0015,  0.0119,  ...,  0.0226,  0.0018,  0.0006],
                      [-0.0353,  0.0238,  0.0215,  ...,  0.0206, -0.0117,  0.0248]],
                     device='cuda:0')),
             ('fc1.bias',
              tensor([-4.4795e-02, -5.4820e-02,  2.9204e-02, -1.7473e-02,  1.0371e-02,
                       6.7405e-02,  1.9405e-02, -1.4014e-02,  3.6928e-02,  6.3212e-03,
                       1.7157e-02,  1.9340e-02,  2.8342e-02, -7.3563e-03,  8.9394e-03,
                       6.1044e-03,  3.6285e-02,  2.4531e-02,  6.0869e-03, -4.0040e-02,
                       1.4