In [3]:
import torch

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import visdom
import time
import numpy as np
from torch.utils.data.sampler import SubsetRandomSampler
import matplotlib.pyplot as plt

In [5]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5, 1)
        self.conv2 = nn.Conv2d(20, 50, 5, 1)
        self.fc1 = nn.Linear(4*4*50, 500)
        self.fc2 = nn.Linear(500, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4*4*50)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [12]:
def train(model, train_loader, valid_loader, optimizer, epochs, log_interval):
    since = time.time()
    model.train()
    best_model = model.load_state_dict(torch.load('./mnist/models/best_model.pt'))
    best_loss = float("inf")
    train_losses = []
    train_loss = 0 
    valid_losses = []
    for epoch in range(1, epochs + 1):
        with torch.set_grad_enabled(True):
            for batch_idx, (data, target) in enumerate(train_loader):
                optimizer.zero_grad()
                output = model(data)
                loss = F.nll_loss(output, target)
                train_loss+=loss
                loss.backward()
                optimizer.step()
                    #torch.save(the_model, './models/best_model.pt')
                if batch_idx % log_interval == 0:
                    print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                        epoch, batch_idx * len(data), len(train_loader.dataset),
                        100. * batch_idx / len(train_loader), loss.item()))
        train_losses.append(train_loss/len(train_loader.dataset))
        valid_loss = 0
        correct = 0
        with torch.no_grad():
            for data, target in valid_loader:
                output = model(data)
                valid_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
                pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
                correct += pred.eq(target.view_as(pred)).sum().item()
                #print(output)

        valid_loss /= len(valid_loader.dataset)
        train_losses.append(valid_loss)
        print('\n Validation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            valid_loss, correct, len(valid_loader.dataset),
            100. * correct / len(valid_loader.dataset)))

        if valid_loss <best_loss:
            best_loss = valid_loss
            best_model = model
        valid_losses.append(valid_loss)
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val loss: {:4f}'.format(best_loss))
    torch.save(best_model.state_dict(), './mnist/models/best_model.pt')
    train_counter = [i for i in range(len(train_losses))]
    valid_counter = [i for i in range(len(valid_losses))]
    fig = plt.figure()
    plt.plot(train_counter, train_losses, color='blue')
    plt.scatter(valid_counter, test_losses, color='red')
    plt.legend(['Train Loss', 'Test Loss'], loc='upper right')
    plt.xlabel('number of training examples seen')
    plt.ylabel('negative log likelihood loss')
    fig


def test(test_loader, model=None):
    if (model==None):
        model = Net()
        model = model.load_state_dict(torch.load('./models/best_model.pt'))
        #model = torch.load('./models/best_model.pt')
    model.eval()
    test_loss = 0
    correct = 0
    outputs = []
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)
            outputs.append(output)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
            #print(output)

    test_loss /= len(test_loader.dataset)
    
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    
    return test_loss, test_model, outputs

In [None]:
train_batch_size = 300
test_batch_size = 200

lr = 0.1
momentum = 0.9

epochs = 3

valid_size = 0.2

train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('./mnist/data', train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=train_batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('./mnist/data', train=False, transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=test_batch_size, shuffle=True)

#normalize = transforms.Normalize((0.1307,), (0.3081,))  # MNIST

    # define transforms
#valid_transform = transforms.Compose([
#            transforms.ToTensor(),
#            normalize
#        ])
#train_transform = transforms.Compose([
#            transforms.ToTensor(),
#            normalize
#        ])

    # load the dataset
#train_dataset = datasets.MNIST(root='./data', train=True, 
#                download=True, transform=train_transform)

#valid_dataset = datasets.MNIST(root='./data', train=True, 
#                download=True, transform=valid_transform)
    
#num_train = len(train_dataset)
##num_train = 400
#indices = list(range(num_train))
#split = int(np.floor(valid_size * num_train))

##if shuffle == True:
#np.random.seed(30)
##        np.random.shuffle(indices)

#train_idx, valid_idx = indices[split:], indices[:split]
    
#train_sampler = SubsetRandomSampler(train_idx)
#valid_sampler = SubsetRandomSampler(valid_idx)
#train_loader = torch.utils.data.DataLoader(
#        train_dataset, batch_size=batch_size, sampler=train_sampler)
#valid_loader = torch.utils.data.DataLoader(
#        valid_dataset, batch_size=batch_size, sampler=valid_sampler)
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('.\mnist\data', train=True, download=True,
                             transform=transforms.Compose([
                               transforms.ToTensor(),
                               transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=train_batch_size, shuffle=True)

test_loader = torch.utils.data.DataLoader(
datasets.MNIST('.\mnist\data', train=False, download=True,
                             transform=transforms.Compose([
                               transforms.ToTensor(),
                               transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=test_batch_size, shuffle=True)



log_interval = int(len(train_loader.dataset)/train_batch_size)

model = Net()
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
train(model, train_loader, test_loader, optimizer, epochs, log_interval)

        


 Validation set: Average loss: 2.3020, Accuracy: 1135/10000 (11%)

