In [None]:
import torch
import torchvision.transforms as T
from torchvision.datasets import MNIST

# **MLP**

In [None]:
class MultiLayerPerceptron(torch.nn.Module):
  def __init__(self, input_dim=28*28, hidden_dim=100, output_dim=10):
    super().__init__()
    self.mlp = torch.nn.Sequential(
        torch.nn.Linear(input_dim, hidden_dim),
        torch.nn.Sigmoid(),
        torch.nn.Linear(hidden_dim, output_dim))
    
  def forward(self, x):
    x = x.view(x.shape[0], -1)
    return self.mlp(x)

# **Dataset and DataLoader**

In [None]:
torch.manual_seed(42)

transform = T.Compose([T.ToTensor()])

train_dataset = MNIST('./data', train=True, download=True, transform=transform)
test_dataset = MNIST('./data', train=False, download=True, transform=transform)

tr_split = 0.5

q_train = int(tr_split * len(train_dataset))
q_val = int(len(train_dataset) - q_train)
print(f'Splitting:\n-Train: {q_train}({tr_split})\n-Val:   {q_val}({1-tr_split}))\n-Test:  {len(test_dataset)}(1)')

train_data, val_data = torch.utils.data.random_split(train_dataset, [q_train, q_val])

train_loader = torch.utils.data.DataLoader(train_data,
                                           batch_size=256,
                                           shuffle=True,
                                           num_workers=2)
val_loader = torch.utils.data.DataLoader(val_data,
                                         batch_size=256,
                                         shuffle=False,
                                         num_workers=2)

test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=128,
                                          shuffle=False,
                                          num_workers=2)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw

Splitting:
-Train: 30000(0.5)
-Val:   30000(0.5))
-Test:  10000(1)


# **Define Train and Test**

In [None]:
def train_one_epoch(model, loader, optimizer, cost_function, device, e, kind, writer):
  samples = 0.
  cumulative_loss = 0.
  cumulative_accuracy = 0.

  model.train()
  for batch_size, (x, y) in enumerate(loader):
    x = x.to(device)
    y = y.to(device)

    out = model(x)

    loss = cost_function(out, y)

    loss.backward()

    optimizer.step()

    optimizer.zero_grad()

    samples += x.shape[0]
    cumulative_loss += loss.item()
    _, predicted = out.max(dim=1)
    cumulative_accuracy += predicted.eq(y).sum().item()
  
  log_values(writer, e, cumulative_loss, cumulative_accuracy/samples*100, kind)

  return cumulative_loss/samples, cumulative_accuracy/samples*100

In [None]:
def test_one_epoch(model, loader, cost_function, device, e, kind, writer):
  samples = 0.
  cumulative_loss = 0.
  cumulative_accuracy = 0.

  model.eval()
  with torch.no_grad():
    for (x, y) in loader:
      x = x.to(device)
      y = y.to(device)

      out = model(x)

      loss = cost_function(out, y)

      samples += x.shape[0]
      cumulative_loss += loss.item()
      _, predicted = out.max(dim=1)
      cumulative_accuracy += predicted.eq(y).sum().item()
  
  log_values(writer, e, cumulative_loss, cumulative_accuracy/samples*100, kind)
  
  return cumulative_loss/samples, cumulative_accuracy/samples*100

# **Optimizer**

In [None]:
total_epochs = 20

device = torch.device('cuda:0')

model = MultiLayerPerceptron()
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)

# optimizer = torch.optim.SGD(
#     model.parameters(),
#     lr=1e-2,
#     momentum=0.9,
#     weight_decay=1e-5)

cost_function = torch.nn.CrossEntropyLoss()

In [None]:
!rm -r runs

In [None]:
from torch.utils.tensorboard import SummaryWriter

def log_values(writer, step, loss, accuracy, prefix):
  writer.add_scalar(f'{prefix}/loss', loss, step)
  writer.add_scalar(f'{prefix}/accuracy', accuracy, step)

writer = SummaryWriter(log_dir='runs/exp1')

torch.manual_seed(42)

print('Starting point:')
train_loss, train_accuracy = test_one_epoch(model, train_loader, cost_function, device, -1, 'Train', writer)
val_loss, val_accuracy = test_one_epoch(model, val_loader, cost_function, device, -1, 'Validation', writer)
test_loss, test_accuracy = test_one_epoch(model, test_loader, cost_function, device, -1, 'Test', writer)
print('\t Training loss {:.5f}, Training accuracy {:.2f}'.format(train_loss, train_accuracy))
print('\t Validation loss {:.5f}, Validation accuracy {:.2f}'.format(val_loss, val_accuracy))
print('-----------------------------------------------------\n\n')

for e in range(1, total_epochs+1):
  train_loss, train_accuracy = train_one_epoch(model, train_loader, optimizer, cost_function, device, e, 'Train', writer)
  val_loss, val_accuracy = test_one_epoch(model, val_loader, cost_function, device, e, 'Validation', writer)

  print('Epoch: {:d}'.format(e))
  print('\t Training loss {:.5f}, Training accuracy {:.2f}'.format(train_loss, train_accuracy))
  print('\t Validation loss {:.5f}, Validation accuracy {:.2f}'.format(val_loss, val_accuracy))
  print('-----------------------------------------------------')

# Compute final evaluation results
print('Results:')
train_loss, train_accuracy = test_one_epoch(model, train_loader, cost_function, device, e, 'Train', writer)
val_loss, val_accuracy = test_one_epoch(model, val_loader, cost_function, device, e, 'Validation', writer)
test_loss, test_accuracy = test_one_epoch(model, test_loader, cost_function, device, e, 'Test', writer)

print('\t Training loss {:.5f}, Training accuracy {:.2f}'.format(train_loss, train_accuracy))
print('\t Validation loss {:.5f}, Validation accuracy {:.2f}'.format(val_loss, val_accuracy))
print('\t Test loss {:.5f}, Test accuracy {:.2f}'.format(test_loss, test_accuracy))
print('-----------------------------------------------------')

# Close the logger
writer.close()

Starting point:
	 Training loss 0.00926, Training accuracy 9.96
	 Validation loss 0.00927, Validation accuracy 9.90
-----------------------------------------------------


Epoch: 1
	 Training loss 0.00556, Training accuracy 70.67
	 Validation loss 0.00306, Validation accuracy 84.82
-----------------------------------------------------
Epoch: 2
	 Training loss 0.00224, Training accuracy 87.91
	 Validation loss 0.00179, Validation accuracy 88.98
-----------------------------------------------------
Epoch: 3
	 Training loss 0.00152, Training accuracy 90.23
	 Validation loss 0.00142, Validation accuracy 90.29
-----------------------------------------------------
Epoch: 4
	 Training loss 0.00125, Training accuracy 91.48
	 Validation loss 0.00125, Validation accuracy 91.14
-----------------------------------------------------
Epoch: 5
	 Training loss 0.00111, Training accuracy 92.27
	 Validation loss 0.00114, Validation accuracy 91.82
-----------------------------------------------------
Epo

In [None]:
%load_ext tensorboard
%tensorboard --logdir=runs

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


<IPython.core.display.Javascript object>