# Step 1
Import modules

In [1]:
''' import modules '''
import time
import argparse
import os.path as osp

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable


from torchvision import datasets, transforms
from logger import Logger



# Step 2
Define an easy network

In [2]:
# Fully connected neural network with three hidden layer
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784, 320)
        self.fc2 = nn.Linear(320, 160)
        self.fc3 = nn.Linear(160, 80)
        self.fc4 = nn.Linear(80, 40)
        self.fc5 = nn.Linear(40, 10)
        

    def forward(self, x):
        x = x.view(-1,28*28)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = self.fc5(x)
        return F.log_softmax(x)
    

model = Net() 
model.cuda()

Net(
  (fc1): Linear(in_features=784, out_features=320, bias=True)
  (fc2): Linear(in_features=320, out_features=160, bias=True)
  (fc3): Linear(in_features=160, out_features=80, bias=True)
  (fc4): Linear(in_features=80, out_features=40, bias=True)
  (fc5): Linear(in_features=40, out_features=10, bias=True)
)

# Step 3
Hyper-parameters and data

In [3]:
''' Hyper-parameters'''
epochs = 10
batch_size = 64
lr = 0.01
momentum = 0.5
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

In [4]:
''' Prapare data'''
kwargs = {'num_workers': 1, 'pin_memory': True}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))]
                   )),
    batch_size=batch_size, shuffle=True, **kwargs)


test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))]
                   )),
    batch_size=batch_size, shuffle=True, **kwargs)


# Step 4
Define train and validation function

In [5]:
''' Train and test'''

def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        # print(data.shape) [64,1,28,28]
        data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data[0]))


def test(epoch):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        
        data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        test_loss += F.nll_loss(output, target).data[0]
        # get the index of the max log-probability
        pred = output.data.max(1)[1]
        correct += pred.eq(target.data).cpu().sum()

    # loss function already averages over batch size
    test_loss /= len(test_loader)
    acccuracy = 100. * correct / len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, '
          'Accuracy: {}/{} ({:.0f}%)\n'.format(test_loss,
                                               correct,
                                               len(test_loader.dataset),
                                               acccuracy))
    return test_loss, acccuracy, data

def test_train(epoch):
    model.eval()
    train_loss = 0
    correct = 0
    for data, target in train_loader:
        
        data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        train_loss += F.nll_loss(output, target).data[0]
        # get the index of the max log-probability
        pred = output.data.max(1)[1]
        correct += pred.eq(target.data).cpu().sum()

    # loss function already averages over batch size
    train_loss /= len(train_loader)
    acccuracy = 100. * correct / len(train_loader.dataset)
    print('\nTrain set: Average loss: {:.4f}, '
          'Accuracy: {}/{} ({:.0f}%)\n'.format(train_loss,
                                               correct,
                                               len(train_loader.dataset),
                                               acccuracy))
    return train_loss, acccuracy, data


# Step 5
Train and record

In [None]:
''' train'''
best_loss = None
logger_train = Logger('./logs/Momentum/0.5_trian')
logger_val = Logger('./logs/Momentum/0.5_val')
for epoch in range(1, epochs + 1):
    epoch_start_time = time.time()
    train(epoch)
    test_loss, test_acc, images = test(epoch)
    train_loss, train_acc, _ = test_train(epoch)
    print('-' * 89)
    timeLength = time.time() - epoch_start_time
    print('| end of epoch {:3d} | time: {:5.2f}s '.format(
        epoch, time.time() - epoch_start_time))
    print('-' * 89)
    best_loss = test_loss
    with open('model.pt', 'wb') as fp:
            state = model.state_dict()
            torch.save(state, fp)
    # ================================================================== #
    #                        Tensorboard Logging                         #
    # ================================================================== #
    # 1. Log scalar values (scalar summary)
    info = { 'loss': test_loss, 'accuracy': test_acc }
    for tag, value in info.items():
        logger_val.scalar_summary(tag, value, epoch+1)
    info = { 'loss': train_loss, 'accuracy': train_acc }
    for tag, value in info.items():
        logger_train.scalar_summary(tag, value, epoch+1)

    # 2. Log values and gradients of the parameters (histogram summary)
    for tag, value in model.named_parameters():
            tag = tag.replace('.', '/')
            logger_train.histo_summary(tag, value.data.cpu().numpy(), epoch+1)
            logger_train.histo_summary(tag+'/grad', value.grad.data.cpu().numpy(), epoch+1)

    # 3. Log training images (image summary)
    info = { 'images': images.view(-1, 28, 28)[:10].cpu().numpy() }

    for tag, images in info.items():
            logger_val.image_summary(tag, images, epoch+1)









Test set: Average loss: 0.3786, Accuracy: 8868/10000 (88%)






Train set: Average loss: 0.3850, Accuracy: 53152/60000 (88%)

-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 14.01s 
-----------------------------------------------------------------------------------------


In [None]:
done = True