# Using LBFGS

In [1]:
from __future__ import print_function

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x)

In [8]:
# Parameters
epochs = 9
batch_sizes = [64]
learning_rates = [1, 0.1, 0.05]

In [9]:
def train(train_loader):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = Variable(data), Variable(target)
        def closure():
            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            #print('loss:', loss.data[0])
            loss.backward()
            return loss
        #output = model(data)
        #loss = F.nll_loss(output, target)
        #loss.backward()
        optimizer.step(closure)
    return test(train_loader)


def test(test_loader):
    # TODO: Test the model on the test-set and report the loss and accuracy.
    correct = 0
    total = 0
    total_loss = 0
    batch_count = 0
    for data in test_loader:
        images, labels = data
        #images, labels = Variable(images), Variable(labels)
        output = model(Variable(images))
        
        loss = F.nll_loss(output, Variable(labels))
        #print('Loss: {0}'.format(loss.data[0]))
        total_loss += loss.data[0]
        batch_count += 1
        _, prediction = torch.max(output.data, 1)
        batch_corr = (prediction == labels).sum()
        #print('Batch correct: {0}, Batch size: {1}'.format(batch_corr, labels.size(0)))
        correct += batch_corr
        total += labels.size(0)

    acc = 100*correct/total
    #print('Accuracy: {:.2f}'.format(acc))
    error = 100-acc
    avg_loss = total_loss/batch_count
    #print('Avg Test loss for epoch {0}: {1}'.format(epoch, avg_loss))
    return avg_loss, error

In [10]:
for batch_size in batch_sizes:
    for lr in learning_rates:
        seed = 1
        log_interval = 10
        test_batch_size = 1000
        torch.manual_seed(seed)

        train_loader = torch.utils.data.DataLoader(
                            datasets.MNIST('../data', train=True, download=True,
                                           transform=transforms.Compose([
                                               transforms.ToTensor(),
                                               transforms.Normalize((0.1307,), (0.3081,))
                                           ])),
                            batch_size=batch_size, shuffle=True)
        test_loader = torch.utils.data.DataLoader(
                            datasets.MNIST('../data', train=False, transform=transforms.Compose([
                                               transforms.ToTensor(),
                                               transforms.Normalize((0.1307,), (0.3081,))
                                           ])),
                            batch_size=test_batch_size, shuffle=True)
                        
        # Initialize Model and optimizer
        model = Net()
        optimizer = optim.LBFGS(model.parameters(), lr = lr)
                        

        for epoch in range(1, epochs + 1):
            train_loss, train_error = train(train_loader)
            if train_loss < 0.13:
                break
            #test_loss.append(test())
        test_loss, test_error = test(test_loader)
        print('Final epoch: {0}, train loss: {5}, train error: {1}, test error: {2} for parameters: batch_size = {3}, lr = {4}'.format(epoch,
        train_error, test_error, batch_size, lr, train_loss))
        
        



KeyboardInterrupt: 

## Observations

- LBFGS is a computation-wise heavy and high memory consuming optimizer.
- It also has a lot of parameters to select from but difficult to do parameter optimizations due to time taken.
- Hence it is not used much in practice now.
- As of submission, it has not completed it's first run.