In [34]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.optim.lr_scheduler import StepLR

In [35]:
# Set seed
torch.manual_seed(0)

<torch._C.Generator at 0x13f7b7e50>

In [36]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5,), (0.5,))])

In [37]:
train_dataset = dsets.MNIST(root='./data', 
                            train=True, 
                            transform=transforms.ToTensor(),
                            download=True)

test_dataset = dsets.MNIST(root='./data', 
                           train=False, 
                           transform=transforms.ToTensor())

# train_dataset = train_dataset.data[:100]
# test_dataset = test_dataset.data[:20]

In [38]:
train_dataset.data.shape, test_dataset.data.shape

(torch.Size([60000, 28, 28]), torch.Size([10000, 28, 28]))

In [39]:
batch_size = 8
num_epochs = 5

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True) 

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False) 

len(train_loader)


7500

In [40]:
class FeedforwardNeuralNetModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FeedforwardNeuralNetModel, self).__init__()
        # Linear function
        self.fc1 = nn.Linear(input_dim, hidden_dim) 
        # Non-linearity
        self.relu = nn.ReLU()
        # Linear function (readout)
        self.fc2 = nn.Linear(hidden_dim, output_dim)  

    def forward(self, x):
        # Linear function
        out = self.fc1(x)
        # Non-linearity
        out = self.relu(out)
        # Linear function (readout)
        out = self.fc2(out)
        return out

In [41]:
input_dim = 28*28
hidden_dim = 100
output_dim = 10
learning_rate = 0.1

In [42]:
model = FeedforwardNeuralNetModel(input_dim, hidden_dim, output_dim)
model

FeedforwardNeuralNetModel(
  (fc1): Linear(in_features=784, out_features=100, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=100, out_features=10, bias=True)
)

In [43]:
criterion = nn.CrossEntropyLoss()

In [44]:
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, nesterov=True)

In [45]:
# step_size: at how many multiples of epoch you decay
# step_size = 1, after every 1 epoch, new_lr = lr * gamma 
# step_size = 2, after every 2 epoch, new_lr = lr * gamma 

# gamma = decaying factor
scheduler = StepLR(optimizer, step_size=1, gamma=0.1)

In [46]:
for i, (images, labels) in enumerate(train_loader):
    # print(images.view(-1, 28*28).shape)
    images = images.view(-1, 28*28).requires_grad_() # torch.Size(1,784)

    # Clear gradients w.r.t. parameters
    optimizer.zero_grad()

    # Forward pass to get output/logits
    outputs = model(images)
    loss = criterion(outputs, labels)
    break

In [47]:
print(outputs.shape)

torch.Size([8, 10])


In [48]:
iter = 0
for epoch in range(num_epochs):
    # Print Learning Rate
    print('Epoch:', epoch,'LR:', scheduler.get_lr())
    for i, (images, labels) in enumerate(train_loader):
        # Load images
        images = images.view(-1, 28*28).requires_grad_() # torch.Size(2,784)

        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()

        # Forward pass to get output/logits
        outputs = model(images) # logits, dimension 10

        # Calculate Loss: softmax --> cross entropy loss
        loss = criterion(outputs, labels)

        # Getting gradients w.r.t. parameters
        loss.backward()

        # Updating parameters
        optimizer.step()

        iter += 1

        if iter % 500 == 0:
            # Calculate Accuracy         
            correct = 0
            total = 0
            # Iterate through test dataset
            for images, labels in test_loader:
                # Load images to a Torch Variable
                images = images.view(-1, 28*28)

                # Forward pass only to get logits/output
                outputs = model(images)

                # Get predictions from the maximum value
                _, predicted = torch.max(outputs.data, 1)

                # Total number of labels
                total += labels.size(0)

                # Total correct predictions
                correct += (predicted == labels).sum()

            accuracy = correct / total

            # Print Loss
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item()/8, accuracy))
        
    # Decay Learning Rate
    scheduler.step()

Epoch: 0 LR: [0.1]
Iteration: 500. Loss: 0.20129147171974182. Accuracy: 0.6754999756813049
Iteration: 1000. Loss: 0.041485805064439774. Accuracy: 0.6621000170707703
Iteration: 1500. Loss: 0.25040629506111145. Accuracy: 0.5716000199317932
Iteration: 2000. Loss: 0.195490762591362. Accuracy: 0.5327000021934509
Iteration: 2500. Loss: 0.133193239569664. Accuracy: 0.6402999758720398
Iteration: 3000. Loss: 0.169741690158844. Accuracy: 0.6416000127792358
Iteration: 3500. Loss: 0.22962498664855957. Accuracy: 0.5105000138282776
Iteration: 4000. Loss: 0.04928706958889961. Accuracy: 0.628600001335144
Iteration: 4500. Loss: 0.10088468343019485. Accuracy: 0.5314000248908997
Iteration: 5000. Loss: 0.14670239388942719. Accuracy: 0.5123999714851379
Iteration: 5500. Loss: 0.223624587059021. Accuracy: 0.5368000268936157
Iteration: 6000. Loss: 0.19441582262516022. Accuracy: 0.40860000252723694
Iteration: 6500. Loss: 0.17741283774375916. Accuracy: 0.4999000132083893
Iteration: 7000. Loss: 0.247692644596099