In [3]:
import torch
import torch.nn as nn # Base model for all neural nets
import torchvision.datasets as dsets # Contains MNIST dataset
import torchvision.transforms as transforms # Common image transforms
from torch.autograd import Variable # Used for auto differential on vectors

In [5]:
input_size = 784 # Images are 28x28=784 pixels
hidden_size = 500
num_classes = 10
num_epochs = 5
batch_size = 100
learning_rate = 0.0001

In [6]:
# MNIST Dataset
train_dataset = dsets.MNIST(root='./data', # Location to download
                           train=True, # Get Train data vs Test data
                           transform=transforms.ToTensor(), # Transforms PIL image to single tensor
                           download=True) # Download dataset

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing...
Done!


In [8]:
test_dataset = dsets.MNIST(root='./data',
                          train=False,
                          transform=transforms.ToTensor())

In [27]:
# Data Loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                       batch_size=batch_size,
                                       shuffle=False)

In [17]:
# Neural Network w/ 1 hidden layer
# TODO Describe each part later
class Net(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(Net,self).__init__()
        self.fc1 = nn.Linear(input_size,hidden_size)
        self.relu=nn.ReLU()
        self.fc2 = nn.Linear(hidden_size,num_classes)
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [18]:
net = Net(input_size, hidden_size, num_classes)

In [21]:
# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

In [23]:
# Training the Model
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Convert tensors to variables
        images = Variable(images.view(-1,28*28))
        labels = Variable(labels)
        
        # Forward + Backward + Optimize
        optimizer.zero_grad() # Zero the gradient buffer
        outputs = net(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        if(i+1)%100 == 0:
            print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' 
                   %(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]))

Epoch [1/5], Step [100/600], Loss: 1.2813
Epoch [1/5], Step [200/600], Loss: 0.6801
Epoch [1/5], Step [300/600], Loss: 0.4665
Epoch [1/5], Step [400/600], Loss: 0.5080
Epoch [1/5], Step [500/600], Loss: 0.4384
Epoch [1/5], Step [600/600], Loss: 0.2955
Epoch [2/5], Step [100/600], Loss: 0.2986
Epoch [2/5], Step [200/600], Loss: 0.3982
Epoch [2/5], Step [300/600], Loss: 0.2041
Epoch [2/5], Step [400/600], Loss: 0.2661
Epoch [2/5], Step [500/600], Loss: 0.3060
Epoch [2/5], Step [600/600], Loss: 0.2675
Epoch [3/5], Step [100/600], Loss: 0.2830
Epoch [3/5], Step [200/600], Loss: 0.2656
Epoch [3/5], Step [300/600], Loss: 0.2207
Epoch [3/5], Step [400/600], Loss: 0.3658
Epoch [3/5], Step [500/600], Loss: 0.2277
Epoch [3/5], Step [600/600], Loss: 0.2368
Epoch [4/5], Step [100/600], Loss: 0.2691
Epoch [4/5], Step [200/600], Loss: 0.1961
Epoch [4/5], Step [300/600], Loss: 0.1871
Epoch [4/5], Step [400/600], Loss: 0.2684
Epoch [4/5], Step [500/600], Loss: 0.2801
Epoch [4/5], Step [600/600], Loss:

In [28]:
# Test Model
correct = 0
total = 0
for images, labels in test_loader:
    images = Variable(images.view(-1, 28*28))
    outputs = net(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()

In [29]:
print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

Accuracy of the network on the 10000 test images: 94 %


In [30]:
# Save the model to disk
torch.save(net.state_dict(), 'model.pkl')