In [79]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn as sk
import sklearn.model_selection
import sklearn.datasets
import torch

In [80]:
dataset = sk.datasets.load_digits()
X = dataset.data / 16
y = dataset.target

X_train, X_val, y_train, y_val = sk.model_selection.train_test_split(X, y)

In [81]:
class DigitsDataset():
  def __init__(self, X, y):
    self.X = X
    self.y = y

  def __len__(self):
    return len(self.X)

  def __getitem__(self, i):

    xi = self.X[i]
    yi = self.y[i]

    return torch.tensor(xi, dtype=torch.float32), torch.tensor(yi, dtype=torch.long)

In [82]:
dataset_train = DigitsDataset(X_train, y_train)
dataset_val = DigitsDataset(X_val, y_val)

dataloader_train = torch.utils.data.DataLoader(dataset_train, batch_size=16, shuffle=True)
dataloader_val = torch.utils.data.DataLoader(dataset_val, batch_size=16, shuffle=False)

In [83]:
class SimpleNeuralNetwork(torch.nn.Module):
  def __init__(self):
    super().__init__()
    self.dense1 = torch.nn.Linear(64, 256)
    self.dense2 = torch.nn.Linear(256, 128)
    self.dense3 = torch.nn.Linear(128, 10)

  def forward(self, x):
    x = self.dense1(x)
    x = torch.nn.functional.relu(x)
    x = self.dense2(x)
    x = torch.nn.functional.relu(x)
    x = self.dense3(x)

    return x

In [84]:
model = SimpleNeuralNetwork()
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)

In [None]:
num_epochs = 20
L_train_history = []
L_val_history = []

for ep in range(num_epochs):
  L = 0
  for x_batch, y_batch in dataloader_train:
    outputs = model(x_batch)
    loss = torch.nn.functional.cross_entropy(outputs, y_batch)
    L += loss*len(x_batch)

    model.zero_grad()
    loss.backward()
    optimizer.step()

  L = L.item() / len(dataset_train)
  L_train_history.append(L)
  print(f'Just finished epoch: {ep}')
  print(f'Cost function value (training data): {L}')

  L = 0
  for x_batch, y_batch in dataloader_val:
    with torch.no_grad():
      outputs = model(x_batch)
      loss = torch.nn.functional.cross_entropy(outputs, y_batch)
      L += loss*len(x_batch)

  L = L.item() / len(dataset_val)
  L_val_history.append(L)
  print(f'Cost function value (validation data): {L}')


In [None]:
plt.figure(figsize=(3,3))
plt.plot(L_train_history, label='training data')
plt.plot(L_val_history, label='validation data')
plt.legend()
plt.title('Cost function value vs. epoch')