In [1]:
# Setup environment
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
torch.manual_seed(1)

# Build MLP
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        
        self.layers= nn.Sequential(nn.Flatten(),nn.Linear(28*28,300,),nn.ReLU(),nn.Linear(300,100),nn.ReLU(),nn.Linear(100,10))

    def forward(self, x):
        return self.layers(x)

        return x


mlp = MLP()
print(f'The MLP structure you built is as follow: \n{mlp}')

The MLP structure you built is as follow: 
MLP(
  (layers): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=300, bias=True)
    (2): ReLU()
    (3): Linear(in_features=300, out_features=100, bias=True)
    (4): ReLU()
    (5): Linear(in_features=100, out_features=10, bias=True)
  )
)


In [2]:
# Setup environment
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
torch.manual_seed(1)

# Prepare dataset
train_set = torchvision.datasets.MNIST("data/", train=True, transform=torchvision.transforms.ToTensor(), download=True)
test_set = torchvision.datasets.MNIST("data/", train=False, transform=torchvision.transforms.ToTensor(), download=True)


#######################
train_loader = torch.utils.data.DataLoader(train_set,batch_size=25)
test_loader = torch.utils.data.DataLoader(test_set,batch_size=100,shuffle=False)
#######################



# Build MLP
mlp = MLP()
print(f'The MLP structure you built is as follow: \n{mlp}')

# Train MLP
lossFunc = nn.CrossEntropyLoss()
optimizer = optim.SGD(mlp.parameters(), lr=0.01, momentum=0.9)


for epoch in range(0, 3):
  print(f'Starting epoch {epoch+1}')
  current_loss = 0.0
  for i, data in enumerate(train_loader, 0):
    
    inputs, targets = data
    optimizer.zero_grad()
    outputs = mlp(inputs)
    loss = lossFunc(outputs, targets)
    loss.backward()
    optimizer.step()
    current_loss += loss.item()
    if i % 500 == 499:
      print('Loss after mini-batch %5d: %.3f' %(i + 1, current_loss / 500))
      current_loss = 0.0

# Evaluate MLP
mlp.eval() # toggle evaluation mode
# define an evaluation function
def evaluate_a_model(model, dataloader):
    correct = 0
    with torch.no_grad():
        for inputs, labels in dataloader:
            output = model(inputs)
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(labels.data.view_as(pred)).sum()
    accuracy = 100.*correct / len(dataloader.dataset)
    return accuracy
# evaluate on training data
train_acc = evaluate_a_model(mlp, train_loader)
# evaluate on training data
test_acc = evaluate_a_model(mlp, test_loader)
print(f'\nFinal training accuracy: {train_acc:.2f}%\nFinal testing accuracy: {test_acc:.2f}%')

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw

The MLP structure you built is as follow: 
MLP(
  (layers): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=300, bias=True)
    (2): ReLU()
    (3): Linear(in_features=300, out_features=100, bias=True)
    (4): ReLU()
    (5): Linear(in_features=100, out_features=10, bias=True)
  )
)
Starting epoch 1
Loss after mini-batch   500: 0.770
Loss after mini-batch  1000: 0.328
Loss after mini-batch  1500: 0.235
Loss after mini-batch  2000: 0.204
Starting epoch 2
Loss after mini-batch   500: 0.138
Loss after mini-batch  1000: 0.129
Loss after mini-batch  1500: 0.115
Loss after mini-batch  2000: 0.115
Starting epoch 3
Loss after mini-batch   500: 0.085
Loss after mini-batch  1000: 0.079
Loss after mini-batch  1500: 0.076
Loss after mini-batch  2000: 0.078

Final training accuracy: 96.94%
Final testing accuracy: 96.09%


In [3]:
summary(mlp, (1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
           Flatten-1                  [-1, 784]               0
            Linear-2                  [-1, 300]         235,500
              ReLU-3                  [-1, 300]               0
            Linear-4                  [-1, 100]          30,100
              ReLU-5                  [-1, 100]               0
            Linear-6                   [-1, 10]           1,010
Total params: 266,610
Trainable params: 266,610
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.01
Params size (MB): 1.02
Estimated Total Size (MB): 1.03
----------------------------------------------------------------


In [6]:
# Setup environment
import torch
import torch.nn as nn
import torch.nn.functional as F
torch.manual_seed(1)

# Build Convolutional Neural Network (CNN)
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        self.cnn_layers = nn.Sequential(
            nn.Conv2d(1,6,kernel_size=5,padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(6, 16,kernel_size=5,padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)

        )

        self.linear_layers= nn.Sequential(
            nn.Linear(16*5*5,120),
            nn.ReLU(),
            nn.Linear(120,84),
            nn.ReLU(),
            nn.Linear(84,10)
        )

    

    def forward(self, x):
        x = self.cnn_layers(x)
        x = x.view(x.size(0), -1)
        x = self.linear_layers(x)

        return x

cnn = CNN()
print(f'The CNN structure you built is as follow: \n{cnn}')

The CNN structure you built is as follow: 
CNN(
  (cnn_layers): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (linear_layers): Sequential(
    (0): Linear(in_features=400, out_features=120, bias=True)
    (1): ReLU()
    (2): Linear(in_features=120, out_features=84, bias=True)
    (3): ReLU()
    (4): Linear(in_features=84, out_features=10, bias=True)
  )
)


In [7]:
# Setup environment
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
torch.manual_seed(1)

# Prepare dataset
train_set = torchvision.datasets.MNIST("data/", train=True, transform=torchvision.transforms.ToTensor(), download=True)
test_set = torchvision.datasets.MNIST("data/", train=False, transform=torchvision.transforms.ToTensor(), download=True)

#######################
train_loader = torch.utils.data.DataLoader(train_set,batch_size=200)
test_loader = torch.utils.data.DataLoader(test_set,batch_size=1000,shuffle=False)
#######################


# Build Convolutional Neural Network (CNN)
cnn = CNN()
print(f'The CNN structure you built is as follow: \n{cnn}')

# Train CNN
lossFunc = nn.CrossEntropyLoss()
optimizer = optim.SGD(cnn.parameters(), lr=0.01, momentum=0.9)

for epoch in range(0, 3):
  print(f'Starting epoch {epoch+1}')
  current_loss = 0.0
  for i, data in enumerate(train_loader, 0):
    inputs, targets = data
    optimizer.zero_grad()
    outputs = cnn(inputs)
    loss = lossFunc(outputs, targets)
    loss.backward()
    optimizer.step()
    current_loss += loss.item()
    
    if i % 50 == 49:
      print('Loss after mini-batch %5d: %.3f' %(i + 1, current_loss / 50))
      current_loss = 0.0


# Evaluate CNN
cnn.eval() # toggle evaluation mode
# define an evaluation function
def evaluate_a_model(model, dataloader):
    correct = 0
    with torch.no_grad():
        for inputs, labels in dataloader:
            output = model(inputs)
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(labels.data.view_as(pred)).sum()
    accuracy = 100.*correct / len(dataloader.dataset)
    return accuracy
# evaluate on training data
train_acc = evaluate_a_model(cnn, train_loader)
# evaluate on training data
test_acc = evaluate_a_model(cnn, test_loader)
print(f'\nFinal training accuracy: {train_acc:.2f}%\nFinal testing accuracy: {test_acc:.2f}%')

The CNN structure you built is as follow: 
CNN(
  (cnn_layers): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (linear_layers): Sequential(
    (0): Linear(in_features=400, out_features=120, bias=True)
    (1): ReLU()
    (2): Linear(in_features=120, out_features=84, bias=True)
    (3): ReLU()
    (4): Linear(in_features=84, out_features=10, bias=True)
  )
)
Starting epoch 1
Loss after mini-batch    50: 2.296
Loss after mini-batch   100: 2.211
Loss after mini-batch   150: 1.113
Loss after mini-batch   200: 0.501
Loss after mini-batch   250: 0.367
Loss after mini-batch   300: 0.239
Starting epoch 2
Loss after mini-batch    50: 0.212
Loss after mini-batch   100: 0.231
Loss after mini-batch  

In [8]:
summary(cnn, (1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 28, 28]             156
              ReLU-2            [-1, 6, 28, 28]               0
         MaxPool2d-3            [-1, 6, 14, 14]               0
            Conv2d-4           [-1, 16, 10, 10]           2,416
              ReLU-5           [-1, 16, 10, 10]               0
         MaxPool2d-6             [-1, 16, 5, 5]               0
            Linear-7                  [-1, 120]          48,120
              ReLU-8                  [-1, 120]               0
            Linear-9                   [-1, 84]          10,164
             ReLU-10                   [-1, 84]               0
           Linear-11                   [-1, 10]             850
Total params: 61,706
Trainable params: 61,706
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/ba