In [0]:
try:
    import torch
except:
    from os.path import exists
    from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
    platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
    cuda_output = !ldconfig -p|grep cudart.so|sed -e 's/.*\.\([0-9]*\)\.\([0-9]*\)$/cu\1\2/'
    accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'

    !pip install -q http://download.pytorch.org/whl/{accelerator}/torch-1.0.0-{platform}-linux_x86_64.whl torchvision

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Encoder(nn.Module):

    def __init__(self, input_dim):
        super(Encoder, self).__init__()
        self.fc1 = nn.Linear(input_dim, 500)
        
        #add
        #self.fc2 = nn.Linear(1000, 500)
        self.fc3 = nn.Linear(500, 250)
        self.fc4 = nn.Linear(250, 30)
        
        
    def forward(self, x):
        out = self.fc1(x)
        out = torch.sigmoid(out)
        #out = self.fc2(out)
        #out = torch.sigmoid(out)
        out = self.fc3(out)
        out = torch.sigmoid(out)
        out = self.fc4(out)
        return out

class Decoder(nn.Module):

    def __init__(self, input_dim):
        super(Decoder, self).__init__()

        self.fc1 = nn.Linear(input_dim, 250)
        self.fc2 = nn.Linear(250, 500)
        #self.fc3 = nn.Linear(500, 1000)
        self.fc4 = nn.Linear(500, 784)

    def forward(self, x):

        out = self.fc1(x)
        
        out = torch.sigmoid(out)
        
        out = self.fc2(out)
        out = torch.sigmoid(out)
        #out = self.fc3(out)
        #out = torch.sigmoid(out)
        out = self.fc4(out)
        out = torch.sigmoid(out)
        return out

In [0]:
from torch.optim.optimizer import Optimizer, required
import copy

class AccSGD(Optimizer):
    r"""Implements the algorithm proposed in https://arxiv.org/pdf/1704.08227.pdf, which is a provably accelerated method 
    for stochastic optimization. This has been employed in https://openreview.net/forum?id=rJTutzbA- for training several 
    deep learning models of practical interest. This code has been implemented by building on the construction of the SGD 
    optimization module found in pytorch codebase.
    Args:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float): learning rate (required)
        kappa (float, optional): ratio of long to short step (default: 1000)
        xi (float, optional): statistical advantage parameter (default: 10)
        smallConst (float, optional): any value <=1 (default: 0.7)
    Example:
        >>> from AccSGD import *
        >>> optimizer = AccSGD(model.parameters(), lr=0.1, kappa = 1000.0, xi = 10.0)
        >>> optimizer.zero_grad()
        >>> loss_fn(model(input), target).backward()
        >>> optimizer.step()
    """

    def __init__(self, params, lr=required, kappa = 1000.0, xi = 10.0, smallConst = 0.7, weight_decay=0):
        defaults = dict(lr=lr, kappa=kappa, xi=xi, smallConst=smallConst,
                        weight_decay=weight_decay)
        super(AccSGD, self).__init__(params, defaults)

    def __setstate__(self, state):
        super(AccSGD, self).__setstate__(state)

    def step(self, closure=None):
        """ Performs a single optimization step.
        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        """
        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:
            weight_decay = group['weight_decay']
            large_lr = (group['lr']*group['kappa'])/(group['smallConst'])
            Alpha = 1.0 - ((group['smallConst']*group['smallConst']*group['xi'])/group['kappa'])
            Beta = 1.0 - Alpha
            zeta = group['smallConst']/(group['smallConst']+Beta)
            for p in group['params']:
                if p.grad is None:
                    continue
                d_p = p.grad.data
                if weight_decay != 0:
                    d_p.add_(weight_decay, p.data)
                param_state = self.state[p]
                if 'momentum_buffer' not in param_state:
                    param_state['momentum_buffer'] = copy.deepcopy(p.data)
                buf = param_state['momentum_buffer']
                buf.mul_((1.0/Beta)-1.0)
                buf.add_(-large_lr,d_p)
                buf.add_(p.data)
                buf.mul_(Beta)

                p.data.add_(-group['lr'],d_p)
                p.data.mul_(zeta)
                p.data.add_(1.0-zeta,buf)

        return loss

In [0]:
device = torch.device("cuda:3")

In [0]:
seed = 7
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True

In [0]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.gridspec as gridspec    
import os
import torch.optim as optim
import numpy as np

from tqdm import tqdm
from itertools import chain

device = torch.device("cuda:3")


def MN_SGD(batch_size, nEpoch, lr):
#batch_size = 32

# dataset construction
    transform = transforms.Compose([
        transforms.ToTensor(), # convert to tensor
        transforms.Lambda(lambda x: x.view(image_dim)) # flatten into vector
        ])

    train_set = torchvision.datasets.FashionMNIST(
        root='./data/FashionMNIST'
        ,train=True
        ,download=True
        ,transform=transform
    )

    train_loader = torch.utils.data.DataLoader(
        train_set, batch_size=batch_size
    )



    enc_dim = 30
    image_dim = 784  # [flattened]

    # construct the encoder, decoder and optimiser
    enc = Encoder(image_dim).to(device)
    dec = Decoder(enc_dim).to(device)
    #optimizer = optim.Adam(chain(enc.parameters(), dec.parameters()), lr=1e-3)
    #optimizer = AccSGD(chain(enc.parameters(), dec.parameters()), lr=0.025, kappa = 100.0, xi = 2.5)
    optimizer = optim.SGD(chain(enc.parameters(), dec.parameters()), lr)

    # training loop
    loss_all2 = []
    for epoch in range(nEpoch):
        losses = []
        trainloader = tqdm(train_loader)

        for i, data in enumerate(trainloader, 0):
            if i * batch_size >= 10000:
                break
            else:
                inputs, _ = data
                optimizer.zero_grad()

                inputs = inputs.to(device)
                z = enc(inputs)
                outputs = dec(z)

                loss = F.mse_loss(outputs, inputs, size_average=False) / inputs.shape[0]
                loss.backward()
                optimizer.step()

                # keep track of the loss and update the stats
                losses.append(loss.item())
                trainloader.set_postfix(loss=np.mean(losses), epoch=epoch)
        loss_all2.append(np.sqrt(np.mean(losses)))
    return loss_all2
    


In [0]:
def MN_ASGD(batch_size, nEpoch, lr, kappa, xi):
#batch_size = 32

# dataset construction
    transform = transforms.Compose([
        transforms.ToTensor(), # convert to tensor
        transforms.Lambda(lambda x: x.view(image_dim)) # flatten into vector
        ])

    train_set = torchvision.datasets.FashionMNIST(
        root='./data/FashionMNIST'
        ,train=True
        ,download=True
        ,transform=transform
    )

    train_loader = torch.utils.data.DataLoader(
        train_set, batch_size=batch_size
    )



    enc_dim = 30
    image_dim = 784  # [flattened]

    # construct the encoder, decoder and optimiser
    enc = Encoder(image_dim).to(device)
    dec = Decoder(enc_dim).to(device)
    #optimizer = optim.Adam(chain(enc.parameters(), dec.parameters()), lr=1e-3)
    #optimizer = AccSGD(chain(enc.parameters(), dec.parameters()), lr=0.025, kappa = 100.0, xi = 2.5)
    optimizer = AccSGD(chain(enc.parameters(), dec.parameters()), lr, kappa, xi)

    # training loop
    loss_all2 = []
    for epoch in range(nEpoch):
        losses = []
        trainloader = tqdm(train_loader)

        for i, data in enumerate(trainloader, 0):
            if i * batch_size >= 10000:
                break
            else:
                    
                inputs, _ = data
                optimizer.zero_grad()

                inputs = inputs.to(device)
                z = enc(inputs)
                outputs = dec(z)

                loss = F.mse_loss(outputs, inputs, size_average=False) / inputs.shape[0]
                loss.backward()
                optimizer.step()

                # keep track of the loss and update the stats
                losses.append(loss.item())
                trainloader.set_postfix(loss=np.mean(losses), epoch=epoch)
        loss_all2.append(np.sqrt(np.mean(losses)))
    return loss_all2

In [0]:
def MN_NAG(batch_size, nEpoch, lr, momentum):

# dataset construction
    transform = transforms.Compose([
        transforms.ToTensor(), # convert to tensor
        transforms.Lambda(lambda x: x.view(image_dim)) # flatten into vector
        ])

    train_set = torchvision.datasets.FashionMNIST(
        root='./data/FashionMNIST'
        ,train=True
        ,download=True
        ,transform=transform
    )

    train_loader = torch.utils.data.DataLoader(
        train_set, batch_size=batch_size
    )



    enc_dim = 30
    image_dim = 784  # [flattened]

    # construct the encoder, decoder and optimiser
    enc = Encoder(image_dim).to(device)
    dec = Decoder(enc_dim).to(device)
    #optimizer = optim.Adam(chain(enc.parameters(), dec.parameters()), lr=1e-3)
    #optimizer = AccSGD(chain(enc.parameters(), dec.parameters()), lr=0.025, kappa = 100.0, xi = 2.5)
    optimizer = optim.SGD(chain(enc.parameters(), dec.parameters()), lr, momentum, nesterov = True)

    # training loop
    loss_all2 = []
    for epoch in range(nEpoch):
        losses = []
        trainloader = tqdm(train_loader)

        for i, data in enumerate(trainloader, 0):
            if i * batch_size >= 10000:
                break
            else:
                inputs, _ = data
                optimizer.zero_grad()

                inputs = inputs.to(device)
                z = enc(inputs)
                outputs = dec(z)

                loss = F.mse_loss(outputs, inputs, size_average=False) / inputs.shape[0]
                loss.backward()
                optimizer.step()

                # keep track of the loss and update the stats
                losses.append(loss.item())
                trainloader.set_postfix(loss=np.mean(losses), epoch=epoch)
        loss_all2.append(np.sqrt(np.mean(losses)))
    return loss_all2

In [0]:
def MN_HB(batch_size, nEpoch, lr, momentum):

# dataset construction
    transform = transforms.Compose([
        transforms.ToTensor(), # convert to tensor
        transforms.Lambda(lambda x: x.view(image_dim)) # flatten into vector
        ])

    train_set = torchvision.datasets.FashionMNIST(
        root='./data/FashionMNIST'
        ,train=True
        ,download=True
        ,transform=transform
    )

    train_loader = torch.utils.data.DataLoader(
        train_set, batch_size=batch_size
    )



    enc_dim = 30
    image_dim = 784  # [flattened]

    # construct the encoder, decoder and optimiser
    enc = Encoder(image_dim).to(device)
    dec = Decoder(enc_dim).to(device)
    #optimizer = optim.Adam(chain(enc.parameters(), dec.parameters()), lr=1e-3)
    #optimizer = AccSGD(chain(enc.parameters(), dec.parameters()), lr=0.025, kappa = 100.0, xi = 2.5)
    optimizer = optim.SGD(chain(enc.parameters(), dec.parameters()), lr, momentum)

    # training loop
    loss_all2 = []
    for epoch in range(nEpoch):
        losses = []
        trainloader = tqdm(train_loader)

        for i, data in enumerate(trainloader, 0):
            if i * batch_size >= 10000:
                break
            else:
                inputs, _ = data
                optimizer.zero_grad()

                inputs = inputs.to(device)
                z = enc(inputs)
                outputs = dec(z)

                loss = F.mse_loss(outputs, inputs, size_average=False) / inputs.shape[0]
                loss.backward()
                optimizer.step()

                # keep track of the loss and update the stats
                losses.append(loss.item())
                trainloader.set_postfix(loss=np.mean(losses), epoch=epoch)
        loss_all2.append(np.sqrt(np.mean(losses)))
    return loss_all2

In [0]:
seed = 7
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True

nEpoch = 30
Batch_size = 1
loss_SGD = MN_SGD(Batch_size, nEpoch, 0.00001*np.sqrt(10))
loss_NAG = MN_NAG(Batch_size, nEpoch, 0.00002, 0.52)
loss_HB = MN_HB(Batch_size, nEpoch, 0.00002, 0.5)
loss_ASGD = MN_ASGD(Batch_size, nEpoch, 0.0001, 100, 0.25)


In [0]:
# loss_init = max(loss_SGD[0], loss_NAG[0], loss_HB[0], loss_ASGD[0])
# dif = np.zeros(4)
# dif = loss_init * np.ones(4) - [loss_SGD[0], loss_NAG[0], loss_HB[0], loss_ASGD[0]]

# loss_SGD = loss_SGD + dif[0]
# loss_NAG = loss_NAG + dif[1]
# loss_HB = loss_HB + dif[2]
# loss_ASGD = loss_ASGD + dif[3]
# loss_SGD[0] = loss_init
# loss_NAG[0] = loss_init
# loss_HB[0] = loss_init
# loss_ASGD[0] = loss_init

import matplotlib.pyplot as plt

plt.figure()
plt.title('Comparison of different methods (FashionMNIST)')
plt.xlabel('Epochs')
plt.ylabel('MSE lose')
x = np.linspace(0, nEpoch, nEpoch)
plt.plot(x, loss_SGD, label = 'SGD')
plt.plot(x, loss_NAG, label = 'NAG')
plt.plot(x, loss_HB, label = 'HB')
plt.plot(x, loss_ASGD, label = 'AccSGD')
plt.legend()
plt.savefig('Compare(New dataset).png')
plt.show