In [None]:
try:
    import torch
except:
    from os.path import exists
    from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
    platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
    cuda_output = !ldconfig -p|grep cudart.so|sed -e 's/.*\.\([0-9]*\)\.\([0-9]*\)$/cu\1\2/'
    accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'

    !pip install -q http://download.pytorch.org/whl/{accelerator}/torch-1.0.0-{platform}-linux_x86_64.whl torchvision

try: 
    import torchbearer
except:
    !pip install torchbearer
import torch
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchbearer
from torch import nn
from torch import optim
from torch.utils.data import DataLoader
from torchbearer import Trial
import os
import numpy as np
device = "cuda:0" if torch.cuda.is_available() else "cpu"
print(device)

In [3]:
class CNN(nn.Module):
    """     
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, (5, 5), stride = 1, padding=0)
        self.conv2 = nn.Conv2d(64, 64, (5, 5), stride = 1, padding=0)
        self.conv3 = nn.Conv2d(64, 128, (5, 5), stride = 1, padding=0)
        self.fc1 = nn.Linear(128*1, 10)
        self.Softmax = nn.Softmax(1)
            
    def forward(self, x):
        #print(x.shape)
        out = self.conv1(x)
        out = F.relu(out)
        out = F.max_pool2d(out, (2,2), stride=2)
        #print(out.shape)
        out = self.conv2(out)
        out = F.relu(out)
        out = F.max_pool2d(out, (2,2), stride=2) 
        #print(out.shape)       
        out = self.conv3(out)
        out = F.relu(out)
        #print(out.shape)
        out = out.view(out.shape[0], -1)
        out = self.fc1(out)
        out = F.dropout(out, 0.5)
        out = F.relu(out)
        #print(out.shape)
        out = self.Softmax(out)      
        return out
        """           
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 48, (3, 3), stride = 1, padding=0)
        self.conv2 = nn.Conv2d(48, 48, (3, 3), stride = 1, padding=0)
        self.fc1 = nn.Linear(48 * 8**2, 128)
        self.fc2 = nn.Linear(128, 10)
            
    def forward(self, x):
        out = self.conv1(x)
        out = F.relu(out)
        out = self.conv2(out)
        out = F.relu(out)
        m = torch.nn.AdaptiveMaxPool2d(8)
        out = m(out)
        #print(out.shape)
        out = out.view(out.shape[0], -1)
        out = self.fc1(out)
        #out = F.dropout(out, 0.5)
        out = F.relu(out)
        out = self.fc2(out) 
        return out  
  

In [4]:
import torch
from torch.optim import Optimizer


class MySGD(Optimizer):

    def __init__(self, params, lr=0, weight_decay=0, alpha=0, kappa_t=0):
        if lr and lr < 0.0:
            raise ValueError("Invalid learning rate: {}".format(lr))

        if weight_decay < 0.0:
            raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
        if alpha <0.0:
            raise ValueError("Invalid alpha value: {}".format(alpha))
        if kappa_t <0.0:
            raise ValueError("Invalid alpha value: {}".format(kappa_t))

        defaults = dict(lr=lr, weight_decay=weight_decay, alpha=alpha, kappa_t=kappa_t)

        super(MySGD, self).__init__(params, defaults)

    def __setstate__(self, state):
        super(MySGD, self).__setstate__(state)

    @torch.no_grad()
    def step(self, closure=None):

        loss = None
        if closure is not None:
            with torch.enable_grad():
                loss = closure()

        for group in self.param_groups:
            params_with_grad = []
            d_p_list = []
            momentum_buffer_list = []
            weight_decay = group['weight_decay']
            lr = group['lr']
            alpha = group['alpha']
            kappa_t = group['kappa_t']

            for p in group['params']:
                if p.grad is not None:
                    params_with_grad.append(p)    
                    d_p_list.append(p.grad)        

                    state = self.state[p]
                    if 'momentum_buffer' not in state:
                        momentum_buffer_list.append(None)
                    else:
                        momentum_buffer_list.append(state['momentum_buffer'])
            #SGD COMPUTE
            for i, param in enumerate(params_with_grad):
                d_p = d_p_list[i]
                
                if weight_decay != 0:
                    d_p = d_p.add(param, alpha=weight_decay)

                buf = momentum_buffer_list[i]

                if buf is None:
                    buf = torch.clone(d_p).detach()
                    momentum_buffer_list[i] = buf
                else:
                    delta = lr/alpha/kappa_t
                    #delta = lr/alpha
                    buf.mul_(1 - alpha).add_(d_p, alpha=-(delta)).add_(param.data, alpha=alpha)

                param.mul_(1/(1+alpha)).add_(d_p, alpha=-(lr/(1+alpha))).add_(buf, alpha=(alpha / (1 + alpha)))

            # update momentum_buffers in state
            for p, momentum_buffer in zip(params_with_grad, momentum_buffer_list):
                state = self.state[p]
                state['momentum_buffer'] = momentum_buffer

        return loss

In [10]:
import torch
import torchvision
import torchvision.transforms as transforms
 
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=100,shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
'deer', 'dog', 'frog', 'horse', 'ship', 'truck')


Files already downloaded and verified
Files already downloaded and verified


In [None]:
import torchbearer

# define the loss function and the optimiser
model = CNN()
loss_function = nn.CrossEntropyLoss()
optimiser = MySGD(model.parameters(), lr=0.1, alpha=0.1, kappa_t=2)

# Construct a trial object with the model, optimiser and loss.
# Also specify metrics we wish to compute.
trial = torchbearer.Trial(model, optimiser, loss_function, metrics=['loss', 'accuracy']).to(device)

# Provide the data to the trial
trial.with_generators(trainloader, val_generator=testloader)

# Run 10 epochs of training
history = trial.run(epochs=60)
print(history)
# test the performance
results = trial.evaluate(data_key=torchbearer.TEST_DATA)
print(results)

In [14]:
Loss1 = []
Acc1 = []
Val_loss1 = []

for i in range(60):
  Loss1.append(history[i]['loss'])
  Acc1.append(history[i]['acc'])
  Val_loss1.append(history[i]['val_loss'])
print(Loss1)
print(Acc1)


[1.6595302820205688, 1.089608907699585, 0.8624507784843445, 0.715684175491333, 0.5968138575553894, 0.49000096321105957, 0.39606109261512756, 0.32508528232574463, 0.2593444883823395, 0.21017181873321533, 0.18876521289348602, 0.16819941997528076, 0.16294121742248535, 0.15155437588691711, 0.12669378519058228, 0.12201657146215439, 0.11998090893030167, 0.1255798637866974, 0.10911770910024643, 0.1059427261352539, 0.11743929982185364, 0.11702372878789902, 0.1035865992307663, 0.11321404576301575, 0.10667584091424942, 0.10058553516864777, 0.09835244715213776, 0.09684310853481293, 0.112906813621521, 0.14667607843875885, 0.12447988986968994, 0.10417234897613525, 0.10207054018974304, 0.10060425847768784, 0.11114154756069183, 0.13306912779808044, 0.14495858550071716, 0.10528937727212906, 0.12258496880531311, 0.13374245166778564, 0.12920373678207397, 0.15796063840389252, 0.13962580263614655, 0.16657689213752747, 0.1585366427898407, 0.1680462658405304, 0.16857866942882538, 0.15619727969169617, 0.1957

In [None]:
model = CNN()
loss_function = nn.CrossEntropyLoss()
optimiser = optim.SGD(model.parameters(), lr=0.1)

# Construct a trial object with the model, optimiser and loss.
# Also specify metrics we wish to compute.
trial = torchbearer.Trial(model, optimiser, loss_function, metrics=['loss', 'accuracy']).to(device)

# Provide the data to the trial
trial.with_generators(trainloader, val_generator=testloader)

# Run 10 epochs of training
#history1 = torch.zeros([2])
history1 = trial.run(epochs=60)
print(history)
# test the performance
results1 = trial.evaluate(data_key=torchbearer.TEST_DATA)
print(results1)

In [10]:
Loss2 = []
for i in range(60):
  Loss2.append(history1[i]['loss'])
print(Loss2)
np.size(Loss2)

[1.6765738725662231, 1.2009621858596802, 0.9952007532119751, 0.8623884916305542, 0.7522647380828857, 0.6593022346496582, 0.5731284022331238, 0.49313122034072876, 0.41510865092277527, 0.3448365032672882, 0.28007635474205017, 0.21756933629512787, 0.1654188334941864, 0.133222758769989, 0.11244583874940872, 0.08245682716369629, 0.06250142306089401, 0.034286778420209885, 0.031165331602096558, 0.021016495302319527, 0.00478616077452898, 0.0017345347441732883, 0.001032016589306295, 0.0007828242378309369, 0.000658349716104567, 0.0005736048333346844, 0.0005072026397101581, 0.0004568419244606048, 0.00041555846109986305, 0.0003822885046247393, 0.00035371549893170595, 0.0003285490965936333, 0.00030735431937500834, 0.0002889794996008277, 0.0002726642997004092, 0.0002581454173196107, 0.0002445896971039474, 0.00023309704556595534, 0.00022218325466383249, 0.0002123851445503533, 0.0002034950884990394, 0.0001950357691384852, 0.0001875679736258462, 0.00018040501163341105, 0.00017382549413014203, 0.0001678

60

In [None]:
model = CNN()
loss_function = nn.CrossEntropyLoss()
optimiser = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, nesterov=True)

# Construct a trial object with the model, optimiser and loss.
# Also specify metrics we wish to compute.
trial = torchbearer.Trial(model, optimiser, loss_function, metrics=['loss', 'accuracy']).to(device)

# Provide the data to the trial
trial.with_generators(trainloader, val_generator=testloader)

# Run 10 epochs of training
history3 = trial.run(epochs=60)
print(history)
# test the performance
results3 = trial.evaluate(data_key=torchbearer.TEST_DATA)
print(results)

In [13]:
Loss3 = []
for i in range(60):
  Loss3.append(history2[i]['loss'])
print(Loss3)
np.size(Loss3)

[1.4680876731872559, 1.1098555326461792, 0.9701194167137146, 0.8800005912780762, 0.8325933218002319, 0.7760983109474182, 0.7314146757125854, 0.7388599514961243, 0.7045684456825256, 0.6835907697677612, 0.6802206635475159, 0.706415057182312, 0.7283248901367188, 0.728633463382721, 0.7593652606010437, 0.7576196193695068, 0.879523754119873, 0.8105671405792236, 0.8322906494140625, 0.9014932513237, 0.986574113368988, 0.9778113961219788, 1.0157983303070068, 1.096746802330017, 1.1240644454956055, 1.1506317853927612, 1.2120808362960815, 1.225786566734314, 1.3148812055587769, 1.4331473112106323, 1.761904239654541, 2.157270908355713, 2.267277717590332, 2.304593563079834, 2.305290937423706, 2.3046748638153076, 2.304579973220825, 2.304532766342163, 2.304641008377075, 2.3044843673706055, 2.3049025535583496, 2.304755926132202, 2.3048110008239746, 2.3048255443573, 2.304434299468994, 2.3046817779541016, 2.304640054702759, 2.3048062324523926, 2.3045578002929688, 2.304250478744507, 2.3043854236602783, 2.3

60

In [None]:
import matplotlib.pyplot as plt
import matplotlib
epoch = list(range(0,60))
plt.plot(epoch, Loss1, label = 'MaSS')
plt.plot(epoch, Loss2, label = 'SGD')
plt.plot(epoch, Loss3, label = 'SGD+nesterov')
plt.legend()
plt.xlabel('epoch')
plt.ylabel('loss')
plt.title("CNN on CIFAR-10")
plt.savefig('curve.png')
plt.show()