In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('using device:', device)
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**2,1), 'MB')

using device: cuda
TITAN Xp
Memory usage:
Allocated: 0.0 MB


In [8]:
batch_size = 64

train_dataset = datasets.MNIST( root='./mnist_data/',
                              train=True,
                              transform=transforms.ToTensor(),
                              download=True)
test_dataset = datasets.MNIST( root='./mnist_data/',
                             train = False,
                             transform=transforms.ToTensor())
train_loader = torch.utils.data.DataLoader( dataset=train_dataset,
                                          batch_size = batch_size,
                                          shuffle=True)
test_loader = torch.utils.data.DataLoader( dataset=test_dataset,
                                          batch_size = batch_size,
                                          shuffle=False)
# train_loader has (x, y) where x.size()=[64, 1, 28, 28] and y.size()=[64]

In [9]:
#class NNModel(torch.nn.Module):
#    def __init__(self):
#        super(NNModel, self).__init__()
#        self.conv1 = nn.Conv2d(1, 6, 5) # input channel, output channels, and filter size
#        self.conv2 = nn.Conv2d(6, 16, 5)
#        self.fc1 = nn.Linear(256,64)
#        self.fc2 = nn.Linear(64,10)
#        
#    def forward(self, x):
#        x = F.max_pool2d( F.relu(self.conv1(x)), 2)
#        x = F.max_pool2d( F.relu(self.conv2(x)), 2)
#        x = x.view(-1, 256)
#        x = F.relu(self.fc1(x))
#        x = self.fc2(x)
#        return F.softmax(x)

class NNModel(torch.nn.Module):
    def __init__(self):
        super(NNModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 5, padding=(2,2)) # input channel, output channels, and filter size
        self.pool1 = nn.MaxPool2d(2, stride=2)
        self.conv2 = nn.Conv2d(32, 64, 5, padding=(2,2))
        self.pool2 = nn.MaxPool2d(2, stride=2)
        self.fc1 = nn.Linear(3136,1024)
        self.fc2 = nn.Linear(1024,10)
        
    def forward(self, x):
        x = self.pool1( F.relu(self.conv1(x)) )
        x = self.pool2( F.relu(self.conv2(x)) )
        x = x.view(-1, 3136)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.softmax(x)    
    

In [10]:
model = NNModel()

def weights_init(m):
    if isinstance(m, nn.Conv2d):
        torch.nn.init.xavier_uniform_(m.weight)
        torch.nn.init.zeros_(m.bias)

model.apply(weights_init)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

model = model.to(device)

def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data = data.to(device)
        target = target.to(device)
        
        output = model(data)
        optimizer.zero_grad()
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx%50==0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, 
                batch_idx*len(data), 
                len(train_loader.dataset),
                100. * batch_idx / len(train_loader), 
                loss))

def test():
    model.eval()
    with torch.no_grad():
        test_loss=0
        correct = 0
        for data, target in train_loader:
            data = data.to(device)
            target = target.to(device)
            output = model(data)
            test_loss += criterion(output,target)
            pred = output.data.max(1,keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).sum()
    
        test_loss /=len(train_loader.dataset)
        print('Train set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
            test_loss, correct, 
            len(train_loader.dataset),
            100. * correct / len(train_loader.dataset)))

        test_loss=0
        correct = 0
        for data, target in test_loader:
            data = data.to(device)
            target = target.to(device)
            output = model(data)
            test_loss += criterion(output,target)
            pred = output.data.max(1,keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).sum()
    
        test_loss /=len(test_loader.dataset)
        print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, correct, 
            len(test_loader.dataset),
            100. * correct / len(test_loader.dataset)))

for epoch in range(0,100):
    train(epoch)
    if epoch%10==0:
        test()
test()



Train set: Average loss: 0.0358, Accuracy: 20603/60000 (34%)
Test set: Average loss: 0.0360, Accuracy: 3507/10000 (35%)



Train set: Average loss: 0.0233, Accuracy: 58332/60000 (97%)
Test set: Average loss: 0.0234, Accuracy: 9733/10000 (97%)



Train set: Average loss: 0.0231, Accuracy: 59134/60000 (98%)
Test set: Average loss: 0.0232, Accuracy: 9828/10000 (98%)



Train set: Average loss: 0.0230, Accuracy: 59412/60000 (99%)
Test set: Average loss: 0.0232, Accuracy: 9848/10000 (98%)



Train set: Average loss: 0.0230, Accuracy: 59525/60000 (99%)
Test set: Average loss: 0.0232, Accuracy: 9869/10000 (98%)





Train set: Average loss: 0.0229, Accuracy: 59657/60000 (99%)
Test set: Average loss: 0.0231, Accuracy: 9884/10000 (98%)



Train set: Average loss: 0.0229, Accuracy: 59715/60000 (99%)
Test set: Average loss: 0.0231, Accuracy: 9894/10000 (98%)



Train set: Average loss: 0.0229, Accuracy: 59720/60000 (99%)
Test set: Average loss: 0.0231, Accuracy: 9902/10000 (99%)



Train set: Average loss: 0.0229, Accuracy: 59748/60000 (99%)
Test set: Average loss: 0.0231, Accuracy: 9893/10000 (98%)



Train set: Average loss: 0.0229, Accuracy: 59792/60000 (99%)
Test set: Average loss: 0.0231, Accuracy: 9896/10000 (98%)



Train set: Average loss: 0.0229, Accuracy: 59802/60000 (99%)
Test set: Average loss: 0.0231, Accuracy: 9899/10000 (98%)

