In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable

  warn(f"Failed to load image Python extension: {e}")


In [2]:
class MnistModel(nn.Module):
    def __init__(self):
        super(MnistModel, self).__init__()
        # input is 28x28
        # padding=2 for same padding
        self.conv1 = nn.Conv2d(1, 32, 5, padding=2)
        # feature map size is 14*14 by pooling
        # padding=2 for same padding
        self.conv2 = nn.Conv2d(32, 64, 5, padding=2)
        # feature map size is 7*7 by pooling
        self.fc1 = nn.Linear(64*7*7, 1024)
        self.fc2 = nn.Linear(1024, 10)
        
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), 2)
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, 64*7*7)   # reshape Variable
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x)
    
model = MnistModel()
model

MnistModel(
  (conv1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (fc1): Linear(in_features=3136, out_features=1024, bias=True)
  (fc2): Linear(in_features=1024, out_features=10, bias=True)
)

In [3]:
batch_size = 50
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True, transform=transforms.ToTensor()),
    batch_size=batch_size, shuffle=True)

In [4]:
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.ToTensor()),
    batch_size=1000)

In [5]:
# Define the regularization strength
weight_decay = 0.001
#optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=weight_decay)
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [6]:
model.train()
train_loss = []
train_accu = []
i = 0

#loss_fn = nn.MSELoss()
loss_fn = nn.CrossEntropyLoss()
for epoch in range(5):
    for data, target in train_loader:
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        #loss = F.nll_loss(output, target)
        loss= loss_fn(output, target)
        loss.backward()    # calc gradients
        train_loss.append(loss.item())
        optimizer.step()   # update gradients
        prediction = output.data.max(1)[1]   # first column has actual prob.
        accuracy = prediction.eq(target.data).sum()/batch_size*100
        train_accu.append(accuracy)
        if i % 1000 == 0:
            print('Train Step: {}\tLoss: {:.3f}\tAccuracy: {:.3f}'.format(i, loss.item(), accuracy))
        i += 1

  return F.log_softmax(x)


Train Step: 0	Loss: 2.305	Accuracy: 12.000
Train Step: 1000	Loss: 0.375	Accuracy: 86.000
Train Step: 2000	Loss: 0.077	Accuracy: 96.000
Train Step: 3000	Loss: 0.089	Accuracy: 96.000
Train Step: 4000	Loss: 0.152	Accuracy: 96.000
Train Step: 5000	Loss: 0.079	Accuracy: 98.000


In [7]:
model.eval()
correct = 0
for data, target in test_loader:
    data, target = Variable(data, volatile=True), Variable(target)
    output = model(data)
    loss = F.nll_loss(output, target)
    prediction = output.data.max(1)[1]
    correct += prediction.eq(target.data).sum()

print('\nTest set: \tLoss: {:.3f}\tAccuracy: {:.3f}'.format(loss, 100. * correct / len(test_loader.dataset)))

  data, target = Variable(data, volatile=True), Variable(target)
  return F.log_softmax(x)



Test set: 	Loss: 0.065	Accuracy: 98.030
