In [43]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [44]:
transformer = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

In [45]:
trainset = datasets.MNIST('./data/MNIST_data/', download=True, train=True, transform=transformer)
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)

In [46]:
from torch import nn, optim
import torch.nn.functional as F

class ClassifierNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.ly1 = nn.Linear(784, 128)
        self.ly2 = nn.Linear(128,64)
        self.ly3 = nn.Linear(64, 10)

    
    def forward(self, x):
        x = x.view(x.shape[0], -1) #flatten

        x = F.relu(self.ly1(x))
        x = F.relu(self.ly2(x))

        return F.log_softmax(self.ly3(x), dim=1)


EPOCHS = 70
criterion = nn.NLLLoss()

In [47]:
def train(model, optimizer, log_title=""):
    j_history = []
    for e in range(EPOCHS):
        epoch_loss = 0
        for imgs, labels in trainloader:
            probab = model(imgs)
            loss = criterion(probab, labels)
            epoch_loss += loss.item()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        
        if log_title:
            print(f'{log_title} @ epoch {e+1} :: loss = {epoch_loss/len(trainloader)}')
        j_history.append(epoch_loss/len(trainloader))
    
    return j_history

#### SGD


In [48]:
model = ClassifierNN()

optimizer = optim.SGD(model.parameters(), lr=0.03)
sgd_losses = train(model, optimizer, "SGD")



SGD @ epoch 1 :: loss = 0.6152523378732362
SGD @ epoch 2 :: loss = 0.2807812547243671
SGD @ epoch 3 :: loss = 0.21742715007031777
SGD @ epoch 4 :: loss = 0.17581133170327398
SGD @ epoch 5 :: loss = 0.14681547358910094
SGD @ epoch 6 :: loss = 0.12500252016285843
SGD @ epoch 7 :: loss = 0.11007254887054534
SGD @ epoch 8 :: loss = 0.09772952987220702
SGD @ epoch 9 :: loss = 0.08768878578802129
SGD @ epoch 10 :: loss = 0.07914540283818806
SGD @ epoch 11 :: loss = 0.07235865304464979
SGD @ epoch 12 :: loss = 0.06655947653763393
SGD @ epoch 13 :: loss = 0.06117905895096629
SGD @ epoch 14 :: loss = 0.056332898391748286
SGD @ epoch 15 :: loss = 0.05182529879567116
SGD @ epoch 16 :: loss = 0.04870602336458004
SGD @ epoch 17 :: loss = 0.04445923399229818
SGD @ epoch 18 :: loss = 0.041548487062363436
SGD @ epoch 19 :: loss = 0.03848372612209089
SGD @ epoch 20 :: loss = 0.035908889348012035
SGD @ epoch 21 :: loss = 0.03312955668525222
SGD @ epoch 22 :: loss = 0.030855967105601602
SGD @ epoch 23 ::

##### Momentum


In [49]:
model = ClassifierNN()
optimizer = optim.SGD(model.parameters(), lr=0.03, momentum=0.9)
mtm_losses = train(model, optimizer, "Momentum")


Momentum @ epoch 1 :: loss = 0.3506138671789247
Momentum @ epoch 2 :: loss = 0.1687912673228728
Momentum @ epoch 3 :: loss = 0.133046903224515
Momentum @ epoch 4 :: loss = 0.11100080851783185
Momentum @ epoch 5 :: loss = 0.09949152359216691
Momentum @ epoch 6 :: loss = 0.08498761556627574
Momentum @ epoch 7 :: loss = 0.07483103296740341
Momentum @ epoch 8 :: loss = 0.06869970910284154
Momentum @ epoch 9 :: loss = 0.061800605370174115
Momentum @ epoch 10 :: loss = 0.05942660960505209
Momentum @ epoch 11 :: loss = 0.056431063069978846
Momentum @ epoch 12 :: loss = 0.05278043846440524
Momentum @ epoch 13 :: loss = 0.05138230281383065
Momentum @ epoch 14 :: loss = 0.04863370526810366
Momentum @ epoch 15 :: loss = 0.043976276353492015
Momentum @ epoch 16 :: loss = 0.04585115477683329
Momentum @ epoch 17 :: loss = 0.04103133875273738
Momentum @ epoch 18 :: loss = 0.0412435893109211
Momentum @ epoch 19 :: loss = 0.03707508980534533
Momentum @ epoch 20 :: loss = 0.034315812059623745
Momentum @

##### Nesterov

In [50]:
model = ClassifierNN()
optimizer = optim.SGD(model.parameters(), lr=0.03, momentum=0.9, nesterov=True)
nestv_losses = train(model, optimizer, "Nesterov")

Nesterov @ epoch 1 :: loss = 0.32700309465760424
Nesterov @ epoch 2 :: loss = 0.15528091838134567
Nesterov @ epoch 3 :: loss = 0.1229873645588009
Nesterov @ epoch 4 :: loss = 0.10092318460751357
Nesterov @ epoch 5 :: loss = 0.0880665971608614
Nesterov @ epoch 6 :: loss = 0.07713217000996542
Nesterov @ epoch 7 :: loss = 0.07247735617899778
Nesterov @ epoch 8 :: loss = 0.06359403644430775
Nesterov @ epoch 9 :: loss = 0.05971834635522628
Nesterov @ epoch 10 :: loss = 0.057002115394122824
Nesterov @ epoch 11 :: loss = 0.05106404230393507
Nesterov @ epoch 12 :: loss = 0.05221546721423908
Nesterov @ epoch 13 :: loss = 0.05182842702497568
Nesterov @ epoch 14 :: loss = 0.04466537788274141
Nesterov @ epoch 15 :: loss = 0.042764380485774627
Nesterov @ epoch 16 :: loss = 0.03856047143492458
Nesterov @ epoch 17 :: loss = 0.03688265363974684
Nesterov @ epoch 18 :: loss = 0.038468587592150195
Nesterov @ epoch 19 :: loss = 0.04187263135745156
Nesterov @ epoch 20 :: loss = 0.04083148409586538
Nesterov

##### Adagrad

In [51]:
model = ClassifierNN()
optimizer = optim.Adagrad(model.parameters(), lr=0.03, eps=1e-8 )
adgd_losses = train(model, optimizer, "Adagrad")

Adagrad @ epoch 1 :: loss = 0.39952713315849747
Adagrad @ epoch 2 :: loss = 0.16495432685226646
Adagrad @ epoch 3 :: loss = 0.128333741848641
Adagrad @ epoch 4 :: loss = 0.10873087396376582
Adagrad @ epoch 5 :: loss = 0.09648636189837462
Adagrad @ epoch 6 :: loss = 0.08684126697758686
Adagrad @ epoch 7 :: loss = 0.0788474696328931
Adagrad @ epoch 8 :: loss = 0.07305026507831172
Adagrad @ epoch 9 :: loss = 0.06780134544753508
Adagrad @ epoch 10 :: loss = 0.06358407477864118
Adagrad @ epoch 11 :: loss = 0.058914115763892896
Adagrad @ epoch 12 :: loss = 0.0551839428606318
Adagrad @ epoch 13 :: loss = 0.05258623373072678
Adagrad @ epoch 14 :: loss = 0.048664606813498254
Adagrad @ epoch 15 :: loss = 0.04621072389733499
Adagrad @ epoch 16 :: loss = 0.04377545394325581
Adagrad @ epoch 17 :: loss = 0.0419877212676309
Adagrad @ epoch 18 :: loss = 0.039124745799703646
Adagrad @ epoch 19 :: loss = 0.03718797534407337
Adagrad @ epoch 20 :: loss = 0.03568633419383607
Adagrad @ epoch 21 :: loss = 0.

##### RMSProp

In [52]:
model = ClassifierNN()
optimizer = optim.RMSprop(model.parameters(), lr=0.03, momentum=0.9, eps=1e-8 )
rms_losses = train(model, optimizer, "RMSProp")

RMSProp @ epoch 1 :: loss = 13.951126111977135
RMSProp @ epoch 2 :: loss = 2.2638618860925948
RMSProp @ epoch 3 :: loss = 2.304280276491698
RMSProp @ epoch 4 :: loss = 2.304263625317799
RMSProp @ epoch 5 :: loss = 2.3041053001306206
RMSProp @ epoch 6 :: loss = 2.3040398011075407
RMSProp @ epoch 7 :: loss = 2.304174480407731
RMSProp @ epoch 8 :: loss = 2.304006122322733
RMSProp @ epoch 9 :: loss = 2.303946364663049
RMSProp @ epoch 10 :: loss = 2.3040899788138707
RMSProp @ epoch 11 :: loss = 2.3041130677977604
RMSProp @ epoch 12 :: loss = 2.304160317632435
RMSProp @ epoch 13 :: loss = 2.3042141994687793
RMSProp @ epoch 14 :: loss = 2.304062442484695
RMSProp @ epoch 15 :: loss = 2.3039230822500136
RMSProp @ epoch 16 :: loss = 2.304133212642629
RMSProp @ epoch 17 :: loss = 2.303661875379111
RMSProp @ epoch 18 :: loss = 2.3043538733586066
RMSProp @ epoch 19 :: loss = 2.30385009439261
RMSProp @ epoch 20 :: loss = 2.30402466762803
RMSProp @ epoch 21 :: loss = 2.3039864713449214
RMSProp @ epoc

##### Adam

In [53]:
model = ClassifierNN()
optimizer = optim.Adam(model.parameters(), lr=0.03, betas=(0.9, 0.998))
adam_losses = train(model, optimizer, "Adam")


Adam @ epoch 1 :: loss = 0.7008632733019938
Adam @ epoch 2 :: loss = 0.5501073752956858
Adam @ epoch 3 :: loss = 0.577424107838287
Adam @ epoch 4 :: loss = 0.542257477336728
Adam @ epoch 5 :: loss = 0.5552074499348841
Adam @ epoch 6 :: loss = 0.5593465002202022
Adam @ epoch 7 :: loss = 0.5763628329517745
Adam @ epoch 8 :: loss = 0.5655600128969404
Adam @ epoch 9 :: loss = 0.5851348993906589
Adam @ epoch 10 :: loss = 0.578043876362762
Adam @ epoch 11 :: loss = 0.560443198280548
Adam @ epoch 12 :: loss = 0.5546745009767984
Adam @ epoch 13 :: loss = 0.5660284589856927
Adam @ epoch 14 :: loss = 0.558677095514752
Adam @ epoch 15 :: loss = 0.5659635899735412
Adam @ epoch 16 :: loss = 0.5698063774927974
Adam @ epoch 17 :: loss = 0.5939882439749835
Adam @ epoch 18 :: loss = 0.6608983554533804
Adam @ epoch 19 :: loss = 0.6551227209442205
Adam @ epoch 20 :: loss = 0.6565487606566089
Adam @ epoch 21 :: loss = 0.6485524595355683
Adam @ epoch 22 :: loss = 0.6248300408106496
Adam @ epoch 23 :: loss 

In [1]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10,6))
plt.plot(sgd_losses, label='SGD', color='r')
plt.plot(mtm_losses, label='Momentum', color='g')
plt.plot(nestv_losses, label='Nesterov', color='b')
plt.plot(adgd_losses, label='Adagrad', color='y')
plt.plot(rms_losses, label='RMSProp', color='m')
plt.plot(adam_losses, label='Adam', color='k')

plt.title('Loss trends')
plt.xlabel('Epoch')
plt.ylabel('Loss')

plt.legend()
plt.show()

NameError: name 'sgd_losses' is not defined

<Figure size 1000x600 with 0 Axes>

In [55]:
import numpy as np

keep_prob = 0.8   # 0 <= keep_prob <= 1
l = 3  # this code is only for layer 3
# the generated number that are less than 0.8 will be dropped. 80% stay, 20% dropped
d3 = np.random.rand(a[l].shape[0], a[l].shape[1]) < keep_prob

NameError: name 'a' is not defined