# CONFIG

In [90]:
!nvidia-smi

Wed Nov  8 06:13:13 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P8     9W /  70W |      3MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [91]:
!pip show torch

Name: torch
Version: 2.1.0+cu118
Summary: Tensors and Dynamic neural networks in Python with strong GPU acceleration
Home-page: https://pytorch.org/
Author: PyTorch Team
Author-email: packages@pytorch.org
License: BSD-3
Location: /usr/local/lib/python3.10/dist-packages
Requires: filelock, fsspec, jinja2, networkx, sympy, triton, typing-extensions
Required-by: fastai, torchaudio, torchdata, torchtext, torchvision


In [92]:
import time

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as Data

import torchvision.datasets as dsets
import torchvision.transforms as transforms

In [93]:
torch.manual_seed(101)

print("Using torch", torch.__version__)
print(f"Is the GPU available? {torch.cuda.is_available()}")

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print("Device", device)

# GPU operations have a separate seed we also want to set
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    torch.cuda.manual_seed_all(42)

# Additionally, some operations on a GPU are implemented stochastic for efficiency
# We want to ensure that all operations are deterministic on GPU (if used) for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

Using torch 2.1.0+cu118
Is the GPU available? True
Device cuda


# MNIST

In [94]:
batch_size = 128

# MNIST Dataset
train_dataset = dsets.MNIST(root='./data',
                            train=True,
                            transform=transforms.ToTensor(),
                            download=True)

test_dataset = dsets.MNIST(root='./data',
                           train=False,
                           transform=transforms.ToTensor())

# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

#test_x = Variable(torch.unsqueeze(test_dataset.data, dim=1)).type(torch.FloatTensor)
test_x = torch.unsqueeze(test_dataset.data, dim=1).type(torch.FloatTensor)
test_y = test_dataset.targets

# NN

In [95]:
class Net(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

        self.hidden_size = hidden_size

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [96]:
def rightness(predictions, labels):
    '''
    Calculate the prediction error rate
    1. "predictions" gives a series of predictions, which is a  "batch_size" x "num_classes" matrix.
    2. "labels" are correct answer
    '''
    pred = torch.max(predictions.data, 1)[1]
    # For the first dimension of every row (every image), ouput the index of the biggest elements in every row.
    rights = pred.eq(labels.data.view_as(pred)).sum()
    # Compare the indexs with categories in "labels", and get the accumulated correct numbers.
    return rights, len(labels)
    # Return the correct numbers and all samples.


In [97]:
def train_test_evaluate(MLP, num_epochs, batch_size, learning_rate, criterion, optimizer):
  record = [] # A container recording the training accuracies

  train_start_time = time.time()
  for epoch in range(num_epochs):

      train_rights = [] # Record the training accuracies

      for i, (images, labels) in enumerate(train_loader):

          # Convert torch tensor to Variable
          #images = Variable(images.view(-1, 28*28))
          images = images.view(-1, 28*28)
          #labels = Variable(labels)
          labels = labels

          MLP.train() # Indicate the model that it's training time

          # Forward + Backward + Optimizer
          optimizer.zero_grad()  # zero the gradient buffer
          #outputs = net(images)
          outputs = MLP(images)
          loss = criterion(outputs, labels)
          loss.backward()
          optimizer.step()

          right = rightness(outputs, labels) # (outputs, labels) = (correct numbers, all samples)
          train_rights.append(right)

          if (i+1) % 200 == 0:

            MLP.eval() # Indicate the model that it's validation/test time

            train_r = (sum([tup[0] for tup in train_rights]), sum([tup[1] for tup in train_rights]))
            train_accuracy = 100. * train_r[0].numpy() / train_r[1]
            total_step = len(train_dataset)//batch_size

            print ('Epoch [{:d}/{:d}], Step [{:3d}/{:d}], Loss: {:.4f} | training accuracy: {:5.2f} %'.format(
                  epoch+1, num_epochs, i+1, total_step, loss.data, train_accuracy))

            record.append(100 - 100. * train_r[0] / train_r[1])
  train_end_time = time.time()

  correct = 0
  total = 0
  for images, labels in test_loader:
      #images = Variable(images.view(-1, 28*28))
      images = images.view(-1, 28*28)
      outputs = MLP(images)
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum()

  print('Accuracy of the MLP on the 60000 training images: %.4f %%' %
        (100 - record[-1]))

  print('Accuracy of the MLP on the 10000 test     images: %.4f %%' %
        (100 * torch.true_divide(correct,total)))

  return [MLP.hidden_size, num_epochs, batch_size, learning_rate, type(criterion), type(optimizer), '%.4f %%' %(100 - record[-1]), '%.4f %%' %(100 * torch.true_divide(correct,total)), f"{(train_end_time - train_start_time):6.5f}s"]

# Testes

In [98]:
outputs = []

In [99]:
num_epochs = 5
batch_size = 128
learning_rate = 0.001

input_size = 784
hidden_size = 500
num_classes = 10

MLP = Net(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(MLP.parameters(), lr=learning_rate)

outputs.append(train_test_evaluate(MLP, num_epochs, batch_size, learning_rate, criterion, optimizer))

Epoch [1/5], Step [200/468], Loss: 2.2560 | training accuracy: 17.14 %
Epoch [1/5], Step [400/468], Loss: 2.2050 | training accuracy: 25.01 %
Epoch [2/5], Step [200/468], Loss: 2.1560 | training accuracy: 51.09 %
Epoch [2/5], Step [400/468], Loss: 2.0959 | training accuracy: 55.07 %
Epoch [3/5], Step [200/468], Loss: 2.0192 | training accuracy: 66.54 %
Epoch [3/5], Step [400/468], Loss: 1.9621 | training accuracy: 67.95 %
Epoch [4/5], Step [200/468], Loss: 1.8917 | training accuracy: 71.90 %
Epoch [4/5], Step [400/468], Loss: 1.7818 | training accuracy: 72.32 %
Epoch [5/5], Step [200/468], Loss: 1.6827 | training accuracy: 74.11 %
Epoch [5/5], Step [400/468], Loss: 1.6325 | training accuracy: 74.40 %
Accuracy of the MLP on the 60000 training images: 74.3965 %
Accuracy of the MLP on the 10000 test     images: 76.0200 %


In [100]:
num_epochs = 5
batch_size = 128
learning_rate = 0.001

input_size = 784
hidden_size = 500
num_classes = 10

MLP = Net(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(MLP.parameters(), lr=learning_rate)

outputs.append(train_test_evaluate(MLP, num_epochs, batch_size, learning_rate, criterion, optimizer))

Epoch [1/5], Step [200/468], Loss: 0.3535 | training accuracy: 87.27 %
Epoch [1/5], Step [400/468], Loss: 0.1768 | training accuracy: 90.57 %
Epoch [2/5], Step [200/468], Loss: 0.1069 | training accuracy: 95.98 %
Epoch [2/5], Step [400/468], Loss: 0.0822 | training accuracy: 96.21 %
Epoch [3/5], Step [200/468], Loss: 0.0899 | training accuracy: 97.39 %
Epoch [3/5], Step [400/468], Loss: 0.1696 | training accuracy: 97.50 %
Epoch [4/5], Step [200/468], Loss: 0.0324 | training accuracy: 98.26 %
Epoch [4/5], Step [400/468], Loss: 0.0474 | training accuracy: 98.18 %
Epoch [5/5], Step [200/468], Loss: 0.0782 | training accuracy: 98.72 %
Epoch [5/5], Step [400/468], Loss: 0.0514 | training accuracy: 98.76 %
Accuracy of the MLP on the 60000 training images: 98.7559 %
Accuracy of the MLP on the 10000 test     images: 97.9100 %


In [101]:
num_epochs = 5
batch_size = 128
learning_rate = 0.01

input_size = 784
hidden_size = 500
num_classes = 10

MLP = Net(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(MLP.parameters(), lr=learning_rate)

outputs.append(train_test_evaluate(MLP, num_epochs, batch_size, learning_rate, criterion, optimizer))

Epoch [1/5], Step [200/468], Loss: 1.7441 | training accuracy: 56.15 %
Epoch [1/5], Step [400/468], Loss: 1.1253 | training accuracy: 66.00 %
Epoch [2/5], Step [200/468], Loss: 0.7091 | training accuracy: 82.97 %
Epoch [2/5], Step [400/468], Loss: 0.6235 | training accuracy: 83.91 %
Epoch [3/5], Step [200/468], Loss: 0.5103 | training accuracy: 86.62 %
Epoch [3/5], Step [400/468], Loss: 0.4473 | training accuracy: 87.09 %
Epoch [4/5], Step [200/468], Loss: 0.4440 | training accuracy: 87.70 %
Epoch [4/5], Step [400/468], Loss: 0.3815 | training accuracy: 88.22 %
Epoch [5/5], Step [200/468], Loss: 0.3546 | training accuracy: 88.80 %
Epoch [5/5], Step [400/468], Loss: 0.3500 | training accuracy: 89.17 %
Accuracy of the MLP on the 60000 training images: 89.1738 %
Accuracy of the MLP on the 10000 test     images: 90.1400 %


In [102]:
num_epochs = 5
batch_size = 128
learning_rate = 0.01

input_size = 784
hidden_size = 500
num_classes = 10

MLP = Net(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(MLP.parameters(), lr=learning_rate)

outputs.append(train_test_evaluate(MLP, num_epochs, batch_size, learning_rate, criterion, optimizer))

Epoch [1/5], Step [200/468], Loss: 0.0997 | training accuracy: 91.09 %
Epoch [1/5], Step [400/468], Loss: 0.3076 | training accuracy: 93.17 %
Epoch [2/5], Step [200/468], Loss: 0.1349 | training accuracy: 96.75 %
Epoch [2/5], Step [400/468], Loss: 0.1532 | training accuracy: 96.65 %
Epoch [3/5], Step [200/468], Loss: 0.0608 | training accuracy: 97.52 %
Epoch [3/5], Step [400/468], Loss: 0.0735 | training accuracy: 97.49 %
Epoch [4/5], Step [200/468], Loss: 0.0993 | training accuracy: 97.86 %
Epoch [4/5], Step [400/468], Loss: 0.3198 | training accuracy: 97.75 %
Epoch [5/5], Step [200/468], Loss: 0.0646 | training accuracy: 97.95 %
Epoch [5/5], Step [400/468], Loss: 0.0885 | training accuracy: 97.96 %
Accuracy of the MLP on the 60000 training images: 97.9570 %
Accuracy of the MLP on the 10000 test     images: 97.3500 %


In [103]:
num_epochs = 5
batch_size = 128
learning_rate = 0.001

input_size = 784
hidden_size = 256
num_classes = 10

MLP = Net(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(MLP.parameters(), lr=learning_rate)

outputs.append(train_test_evaluate(MLP, num_epochs, batch_size, learning_rate, criterion, optimizer))

Epoch [1/5], Step [200/468], Loss: 2.2707 | training accuracy: 13.31 %
Epoch [1/5], Step [400/468], Loss: 2.2388 | training accuracy: 18.28 %
Epoch [2/5], Step [200/468], Loss: 2.1884 | training accuracy: 36.75 %
Epoch [2/5], Step [400/468], Loss: 2.1406 | training accuracy: 42.46 %
Epoch [3/5], Step [200/468], Loss: 2.0722 | training accuracy: 59.95 %
Epoch [3/5], Step [400/468], Loss: 2.0131 | training accuracy: 62.79 %
Epoch [4/5], Step [200/468], Loss: 1.9428 | training accuracy: 69.64 %
Epoch [4/5], Step [400/468], Loss: 1.8804 | training accuracy: 70.36 %
Epoch [5/5], Step [200/468], Loss: 1.7970 | training accuracy: 73.28 %
Epoch [5/5], Step [400/468], Loss: 1.7314 | training accuracy: 73.74 %
Accuracy of the MLP on the 60000 training images: 73.7422 %
Accuracy of the MLP on the 10000 test     images: 75.5300 %


In [104]:
num_epochs = 5
batch_size = 128
learning_rate = 0.01

input_size = 784
hidden_size = 256
num_classes = 10

MLP = Net(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(MLP.parameters(), lr=learning_rate)

outputs.append(train_test_evaluate(MLP, num_epochs, batch_size, learning_rate, criterion, optimizer))

Epoch [1/5], Step [200/468], Loss: 0.1676 | training accuracy: 91.14 %
Epoch [1/5], Step [400/468], Loss: 0.2404 | training accuracy: 93.06 %
Epoch [2/5], Step [200/468], Loss: 0.0601 | training accuracy: 96.49 %
Epoch [2/5], Step [400/468], Loss: 0.0583 | training accuracy: 96.61 %
Epoch [3/5], Step [200/468], Loss: 0.0326 | training accuracy: 97.52 %
Epoch [3/5], Step [400/468], Loss: 0.0422 | training accuracy: 97.46 %
Epoch [4/5], Step [200/468], Loss: 0.0262 | training accuracy: 97.93 %
Epoch [4/5], Step [400/468], Loss: 0.1305 | training accuracy: 97.75 %
Epoch [5/5], Step [200/468], Loss: 0.0304 | training accuracy: 98.11 %
Epoch [5/5], Step [400/468], Loss: 0.1057 | training accuracy: 97.97 %
Accuracy of the MLP on the 60000 training images: 97.9746 %
Accuracy of the MLP on the 10000 test     images: 96.9900 %


In [105]:
num_epochs = 5
batch_size = 128
learning_rate = 0.01

input_size = 784
hidden_size = 256
num_classes = 10

MLP = Net(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adagrad(MLP.parameters(), lr=learning_rate)

outputs.append(train_test_evaluate(MLP, num_epochs, batch_size, learning_rate, criterion, optimizer))

Epoch [1/5], Step [200/468], Loss: 0.2997 | training accuracy: 88.65 %
Epoch [1/5], Step [400/468], Loss: 0.2161 | training accuracy: 91.19 %
Epoch [2/5], Step [200/468], Loss: 0.1248 | training accuracy: 95.21 %
Epoch [2/5], Step [400/468], Loss: 0.1197 | training accuracy: 95.35 %
Epoch [3/5], Step [200/468], Loss: 0.1161 | training accuracy: 96.19 %
Epoch [3/5], Step [400/468], Loss: 0.1516 | training accuracy: 96.27 %
Epoch [4/5], Step [200/468], Loss: 0.0521 | training accuracy: 96.73 %
Epoch [4/5], Step [400/468], Loss: 0.1526 | training accuracy: 96.74 %
Epoch [5/5], Step [200/468], Loss: 0.1472 | training accuracy: 97.16 %
Epoch [5/5], Step [400/468], Loss: 0.1113 | training accuracy: 97.12 %
Accuracy of the MLP on the 60000 training images: 97.1172 %
Accuracy of the MLP on the 10000 test     images: 96.8400 %


In [106]:
num_epochs = 5
batch_size = 128
learning_rate = 0.01

input_size = 784
hidden_size = 128
num_classes = 10

MLP = Net(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(MLP.parameters(), lr=learning_rate)

outputs.append(train_test_evaluate(MLP, num_epochs, batch_size, learning_rate, criterion, optimizer))

Epoch [1/5], Step [200/468], Loss: 0.1212 | training accuracy: 90.25 %
Epoch [1/5], Step [400/468], Loss: 0.1105 | training accuracy: 92.84 %
Epoch [2/5], Step [200/468], Loss: 0.0485 | training accuracy: 96.61 %
Epoch [2/5], Step [400/468], Loss: 0.0641 | training accuracy: 96.57 %
Epoch [3/5], Step [200/468], Loss: 0.0662 | training accuracy: 97.36 %
Epoch [3/5], Step [400/468], Loss: 0.1493 | training accuracy: 97.30 %
Epoch [4/5], Step [200/468], Loss: 0.0106 | training accuracy: 97.89 %
Epoch [4/5], Step [400/468], Loss: 0.1161 | training accuracy: 97.89 %
Epoch [5/5], Step [200/468], Loss: 0.0536 | training accuracy: 98.00 %
Epoch [5/5], Step [400/468], Loss: 0.0242 | training accuracy: 97.91 %
Accuracy of the MLP on the 60000 training images: 97.9102 %
Accuracy of the MLP on the 10000 test     images: 96.5700 %


In [107]:
num_epochs = 5
batch_size = 128
learning_rate = 0.01

input_size = 784
hidden_size = 64
num_classes = 10

MLP = Net(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(MLP.parameters(), lr=learning_rate)

outputs.append(train_test_evaluate(MLP, num_epochs, batch_size, learning_rate, criterion, optimizer))

Epoch [1/5], Step [200/468], Loss: 0.1517 | training accuracy: 89.80 %
Epoch [1/5], Step [400/468], Loss: 0.2179 | training accuracy: 92.19 %
Epoch [2/5], Step [200/468], Loss: 0.2122 | training accuracy: 96.11 %
Epoch [2/5], Step [400/468], Loss: 0.2623 | training accuracy: 96.28 %
Epoch [3/5], Step [200/468], Loss: 0.1208 | training accuracy: 96.89 %
Epoch [3/5], Step [400/468], Loss: 0.2268 | training accuracy: 96.94 %
Epoch [4/5], Step [200/468], Loss: 0.0714 | training accuracy: 97.59 %
Epoch [4/5], Step [400/468], Loss: 0.0512 | training accuracy: 97.54 %
Epoch [5/5], Step [200/468], Loss: 0.0471 | training accuracy: 97.87 %
Epoch [5/5], Step [400/468], Loss: 0.0990 | training accuracy: 97.64 %
Accuracy of the MLP on the 60000 training images: 97.6426 %
Accuracy of the MLP on the 10000 test     images: 96.6700 %


In [108]:
num_epochs = 10
batch_size = 128
learning_rate = 0.01

input_size = 784
hidden_size = 64
num_classes = 10

MLP = Net(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(MLP.parameters(), lr=learning_rate)

outputs.append(train_test_evaluate(MLP, num_epochs, batch_size, learning_rate, criterion, optimizer))

Epoch [1/10], Step [200/468], Loss: 0.1471 | training accuracy: 89.57 %
Epoch [1/10], Step [400/468], Loss: 0.1946 | training accuracy: 92.17 %
Epoch [2/10], Step [200/468], Loss: 0.1131 | training accuracy: 96.27 %
Epoch [2/10], Step [400/468], Loss: 0.0689 | training accuracy: 96.34 %
Epoch [3/10], Step [200/468], Loss: 0.1179 | training accuracy: 97.04 %
Epoch [3/10], Step [400/468], Loss: 0.2260 | training accuracy: 97.00 %
Epoch [4/10], Step [200/468], Loss: 0.0819 | training accuracy: 97.43 %
Epoch [4/10], Step [400/468], Loss: 0.1080 | training accuracy: 97.37 %
Epoch [5/10], Step [200/468], Loss: 0.1147 | training accuracy: 97.83 %
Epoch [5/10], Step [400/468], Loss: 0.0570 | training accuracy: 97.72 %
Epoch [6/10], Step [200/468], Loss: 0.0421 | training accuracy: 98.05 %
Epoch [6/10], Step [400/468], Loss: 0.1018 | training accuracy: 98.01 %
Epoch [7/10], Step [200/468], Loss: 0.0930 | training accuracy: 98.30 %
Epoch [7/10], Step [400/468], Loss: 0.1055 | training accuracy: 

In [109]:
num_epochs = 15
batch_size = 128
learning_rate = 0.01

input_size = 784
hidden_size = 64
num_classes = 10

MLP = Net(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(MLP.parameters(), lr=learning_rate)

outputs.append(train_test_evaluate(MLP, num_epochs, batch_size, learning_rate, criterion, optimizer))

Epoch [1/15], Step [200/468], Loss: 0.2347 | training accuracy: 89.41 %
Epoch [1/15], Step [400/468], Loss: 0.1045 | training accuracy: 92.15 %
Epoch [2/15], Step [200/468], Loss: 0.0719 | training accuracy: 96.21 %
Epoch [2/15], Step [400/468], Loss: 0.1264 | training accuracy: 96.32 %
Epoch [3/15], Step [200/468], Loss: 0.0997 | training accuracy: 97.17 %
Epoch [3/15], Step [400/468], Loss: 0.0317 | training accuracy: 97.11 %
Epoch [4/15], Step [200/468], Loss: 0.0911 | training accuracy: 97.45 %
Epoch [4/15], Step [400/468], Loss: 0.1379 | training accuracy: 97.46 %
Epoch [5/15], Step [200/468], Loss: 0.0518 | training accuracy: 97.88 %
Epoch [5/15], Step [400/468], Loss: 0.0848 | training accuracy: 97.77 %
Epoch [6/15], Step [200/468], Loss: 0.0401 | training accuracy: 98.12 %
Epoch [6/15], Step [400/468], Loss: 0.1488 | training accuracy: 97.86 %
Epoch [7/15], Step [200/468], Loss: 0.0456 | training accuracy: 98.37 %
Epoch [7/15], Step [400/468], Loss: 0.1296 | training accuracy: 

In [110]:
num_epochs = 2
batch_size = 128
learning_rate = 0.01

input_size = 784
hidden_size = 1028
num_classes = 10

MLP = Net(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(MLP.parameters(), lr=learning_rate)

outputs.append(train_test_evaluate(MLP, num_epochs, batch_size, learning_rate, criterion, optimizer))

Epoch [1/2], Step [200/468], Loss: 0.3321 | training accuracy: 90.83 %
Epoch [1/2], Step [400/468], Loss: 0.2617 | training accuracy: 93.06 %
Epoch [2/2], Step [200/468], Loss: 0.0788 | training accuracy: 96.56 %
Epoch [2/2], Step [400/468], Loss: 0.2429 | training accuracy: 96.59 %
Accuracy of the MLP on the 60000 training images: 96.5938 %
Accuracy of the MLP on the 10000 test     images: 96.8300 %


In [111]:
num_epochs = 2
batch_size = 64
learning_rate = 0.01

input_size = 784
hidden_size = 1028
num_classes = 10

MLP = Net(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(MLP.parameters(), lr=learning_rate)

outputs.append(train_test_evaluate(MLP, num_epochs, batch_size, learning_rate, criterion, optimizer))

Epoch [1/2], Step [200/937], Loss: 0.0960 | training accuracy: 90.68 %
Epoch [1/2], Step [400/937], Loss: 0.1848 | training accuracy: 93.07 %
Epoch [2/2], Step [200/937], Loss: 0.2602 | training accuracy: 96.59 %
Epoch [2/2], Step [400/937], Loss: 0.1539 | training accuracy: 96.69 %
Accuracy of the MLP on the 60000 training images: 96.6875 %
Accuracy of the MLP on the 10000 test     images: 96.8500 %


In [112]:
num_epochs = 10
batch_size = 512
learning_rate = 0.01

input_size = 784
hidden_size = 128
num_classes = 10

MLP = Net(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(MLP.parameters(), lr=learning_rate)

outputs.append(train_test_evaluate(MLP, num_epochs, batch_size, learning_rate, criterion, optimizer))

Epoch [1/10], Step [200/117], Loss: 0.2271 | training accuracy: 90.45 %
Epoch [1/10], Step [400/117], Loss: 0.0924 | training accuracy: 92.74 %
Epoch [2/10], Step [200/117], Loss: 0.0911 | training accuracy: 96.72 %
Epoch [2/10], Step [400/117], Loss: 0.0606 | training accuracy: 96.63 %
Epoch [3/10], Step [200/117], Loss: 0.1783 | training accuracy: 97.36 %
Epoch [3/10], Step [400/117], Loss: 0.0818 | training accuracy: 97.38 %
Epoch [4/10], Step [200/117], Loss: 0.0904 | training accuracy: 97.89 %
Epoch [4/10], Step [400/117], Loss: 0.0917 | training accuracy: 97.77 %
Epoch [5/10], Step [200/117], Loss: 0.0746 | training accuracy: 97.96 %
Epoch [5/10], Step [400/117], Loss: 0.0690 | training accuracy: 98.00 %
Epoch [6/10], Step [200/117], Loss: 0.0573 | training accuracy: 98.18 %
Epoch [6/10], Step [400/117], Loss: 0.1043 | training accuracy: 98.05 %
Epoch [7/10], Step [200/117], Loss: 0.0847 | training accuracy: 98.63 %
Epoch [7/10], Step [400/117], Loss: 0.0215 | training accuracy: 

In [113]:
num_epochs = 10
batch_size = 32
learning_rate = 0.01

input_size = 784
hidden_size = 64
num_classes = 10

MLP = Net(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(MLP.parameters(), lr=learning_rate)

outputs.append(train_test_evaluate(MLP, num_epochs, batch_size, learning_rate, criterion, optimizer))

Epoch [1/10], Step [200/1875], Loss: 0.1858 | training accuracy: 89.31 %
Epoch [1/10], Step [400/1875], Loss: 0.1679 | training accuracy: 91.95 %
Epoch [2/10], Step [200/1875], Loss: 0.0926 | training accuracy: 95.86 %
Epoch [2/10], Step [400/1875], Loss: 0.1006 | training accuracy: 96.23 %
Epoch [3/10], Step [200/1875], Loss: 0.0301 | training accuracy: 97.22 %
Epoch [3/10], Step [400/1875], Loss: 0.0239 | training accuracy: 97.01 %
Epoch [4/10], Step [200/1875], Loss: 0.1009 | training accuracy: 97.48 %
Epoch [4/10], Step [400/1875], Loss: 0.0168 | training accuracy: 97.39 %
Epoch [5/10], Step [200/1875], Loss: 0.0912 | training accuracy: 97.77 %
Epoch [5/10], Step [400/1875], Loss: 0.0363 | training accuracy: 97.64 %
Epoch [6/10], Step [200/1875], Loss: 0.1207 | training accuracy: 97.95 %
Epoch [6/10], Step [400/1875], Loss: 0.0725 | training accuracy: 97.95 %
Epoch [7/10], Step [200/1875], Loss: 0.0219 | training accuracy: 98.32 %
Epoch [7/10], Step [400/1875], Loss: 0.0175 | train

In [114]:
num_epochs = 30
batch_size = 256
learning_rate = 0.01

input_size = 784
hidden_size = 128
num_classes = 10

MLP = Net(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(MLP.parameters(), lr=learning_rate)

outputs.append(train_test_evaluate(MLP, num_epochs, batch_size, learning_rate, criterion, optimizer))

Epoch [1/30], Step [200/234], Loss: 0.1772 | training accuracy: 90.68 %
Epoch [1/30], Step [400/234], Loss: 0.0578 | training accuracy: 92.93 %
Epoch [2/30], Step [200/234], Loss: 0.0585 | training accuracy: 96.70 %
Epoch [2/30], Step [400/234], Loss: 0.1038 | training accuracy: 96.61 %
Epoch [3/30], Step [200/234], Loss: 0.0422 | training accuracy: 97.44 %
Epoch [3/30], Step [400/234], Loss: 0.0669 | training accuracy: 97.31 %
Epoch [4/30], Step [200/234], Loss: 0.0510 | training accuracy: 97.70 %
Epoch [4/30], Step [400/234], Loss: 0.0247 | training accuracy: 97.64 %
Epoch [5/30], Step [200/234], Loss: 0.0315 | training accuracy: 98.09 %
Epoch [5/30], Step [400/234], Loss: 0.0476 | training accuracy: 97.95 %
Epoch [6/30], Step [200/234], Loss: 0.1142 | training accuracy: 98.34 %
Epoch [6/30], Step [400/234], Loss: 0.0270 | training accuracy: 98.22 %
Epoch [7/30], Step [200/234], Loss: 0.0911 | training accuracy: 98.46 %
Epoch [7/30], Step [400/234], Loss: 0.0501 | training accuracy: 

In [116]:
num_epochs = 50
batch_size = 128
learning_rate = 0.01

input_size = 784
hidden_size = 32
num_classes = 10

MLP = Net(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(MLP.parameters(), lr=learning_rate)

outputs.append(train_test_evaluate(MLP, num_epochs, batch_size, learning_rate, criterion, optimizer))

Epoch [1/50], Step [200/468], Loss: 0.1348 | training accuracy: 89.01 %
Epoch [1/50], Step [400/468], Loss: 0.2325 | training accuracy: 91.39 %
Epoch [2/50], Step [200/468], Loss: 0.1344 | training accuracy: 95.62 %
Epoch [2/50], Step [400/468], Loss: 0.1058 | training accuracy: 95.63 %
Epoch [3/50], Step [200/468], Loss: 0.1663 | training accuracy: 96.23 %
Epoch [3/50], Step [400/468], Loss: 0.1599 | training accuracy: 96.23 %
Epoch [4/50], Step [200/468], Loss: 0.1822 | training accuracy: 96.78 %
Epoch [4/50], Step [400/468], Loss: 0.1175 | training accuracy: 96.66 %
Epoch [5/50], Step [200/468], Loss: 0.1007 | training accuracy: 97.08 %
Epoch [5/50], Step [400/468], Loss: 0.0266 | training accuracy: 97.09 %
Epoch [6/50], Step [200/468], Loss: 0.0629 | training accuracy: 97.48 %
Epoch [6/50], Step [400/468], Loss: 0.1572 | training accuracy: 97.29 %
Epoch [7/50], Step [200/468], Loss: 0.0246 | training accuracy: 97.59 %
Epoch [7/50], Step [400/468], Loss: 0.1262 | training accuracy: 

# Conclusão

In [117]:
output_frame = pd.DataFrame(outputs, columns=["hidden layer size", "epochs", "batch size", "learning rate", "loss function", "optimizer", "train accuracy", "test accuracy", "fit time"])

output_frame

Unnamed: 0,hidden layer size,epochs,batch size,learning rate,loss function,optimizer,train accuracy,test accuracy,fit time
0,500,5,128,0.001,<class 'torch.nn.modules.loss.CrossEntropyLoss'>,<class 'torch.optim.sgd.SGD'>,74.3965 %,76.0200 %,38.74969s
1,500,5,128,0.001,<class 'torch.nn.modules.loss.CrossEntropyLoss'>,<class 'torch.optim.adam.Adam'>,98.7559 %,97.9100 %,49.57635s
2,500,5,128,0.01,<class 'torch.nn.modules.loss.CrossEntropyLoss'>,<class 'torch.optim.sgd.SGD'>,89.1738 %,90.1400 %,38.82370s
3,500,5,128,0.01,<class 'torch.nn.modules.loss.CrossEntropyLoss'>,<class 'torch.optim.adam.Adam'>,97.9570 %,97.3500 %,48.72583s
4,256,5,128,0.001,<class 'torch.nn.modules.loss.CrossEntropyLoss'>,<class 'torch.optim.sgd.SGD'>,73.7422 %,75.5300 %,35.68521s
5,256,5,128,0.01,<class 'torch.nn.modules.loss.CrossEntropyLoss'>,<class 'torch.optim.adam.Adam'>,97.9746 %,96.9900 %,42.90964s
6,256,5,128,0.01,<class 'torch.nn.modules.loss.CrossEntropyLoss'>,<class 'torch.optim.adagrad.Adagrad'>,97.1172 %,96.8400 %,37.27072s
7,128,5,128,0.01,<class 'torch.nn.modules.loss.CrossEntropyLoss'>,<class 'torch.optim.adam.Adam'>,97.9102 %,96.5700 %,35.83737s
8,64,5,128,0.01,<class 'torch.nn.modules.loss.CrossEntropyLoss'>,<class 'torch.optim.adam.Adam'>,97.6426 %,96.6700 %,33.79173s
9,64,10,128,0.01,<class 'torch.nn.modules.loss.CrossEntropyLoss'>,<class 'torch.optim.adam.Adam'>,98.5664 %,97.1400 %,74.64287s


Os testes mostram que o otimizador Adam é inegavelmente a melhor opção para o cenário. Mostra resultados melhores quando comparado ao Adagrada e ao SGD. Além de que não demonstra aumento de complexidade temporal.  

Para avaliar o tamanho da camada escondida é preciso levar em consideração a complexidade espacial. Os resultados tendem a relacionar o aumento da camada escondida com o aumento da precisão no conjunto de teste.  

É interessante perceber que um aumento na epoch além de 5 mantendo uma quantidade elevada de camadas escondidas não parece trazer um aumento significativo para a precisão no caso da utilização do Adam.  

É possível fazer um tradeoff entre o tamanho da camada escondida, quantidade de epochs e tempo de fit. Diminuindo significativamente o tamanho da camada e aumentando a quantidade de epochs causa um aumento no tempo de fit porém gera uma melhora de precisão. Isso é interessante para casos onde a complexidade espacial é importante.