In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim

from torch.autograd import Variable

from torchvision import datasets, transforms

from torch.utils.data import Dataset, DataLoader

from tqdm import tqdm

In [2]:
batch_size = 64

train_dataset = datasets.MNIST(root='../data/', train=True, transform= transforms.ToTensor(), download=True)
test_dataset = datasets.MNIST(root='../data/', train=False, transform= transforms.ToTensor())

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [3]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.L1 = nn.Linear(784, 520)
        self.L2 = nn.Linear(520, 320)
        self.L3 = nn.Linear(320, 240)
        self.L4 = nn.Linear(240, 120)
        self.L5 = nn.Linear(120, 10)
        
    def forward(self, x):
        x = x.view(-1, 784)
        x = F.relu(self.L1(x))
        x = F.relu(self.L2(x))
        x = F.relu(self.L3(x))
        x = F.relu(self.L4(x))
        return self.L5(x)
    
model = Net()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    model = nn.DataParallel(model)

model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

Let's use 2 GPUs!


In [4]:
def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = Variable(data),Variable(target)
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\t Loss:{:.6f}'.format(epoch, batch_idx*len(data)
                                                                           ,len(train_loader.dataset),
                                                                          100.*batch_idx /len(train_loader),
                                                                          loss.data[0]))

In [5]:
def test():
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        data, target = Variable(data, volatile=True),Variable(target)
        data, target = data.to(device), target.to(device)
        
        output = model(data)
        
        test_loss += criterion(output, target).data[0]
        
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()
    
    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [6]:
for epoch in range(1, 10):
    train(epoch)
test()





  
  # This is added back by InteractiveShellApp.init_path()



Test set: Average loss: 0.0017, Accuracy: 9665/10000 (96%)

