In [1]:
import torch
from torch import nn
from torch.utils.data import TensorDataset, Dataset, DataLoader
from torch.optim import SGD, Adam
device = 'cuda' if torch.cuda.is_available() else 'cpu'
from torchvision import datasets
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from torchvision import datasets
import torch
data_folder = './content/' # This can be any directory you want to download FMNIST to
fmnist = datasets.FashionMNIST(data_folder, download=True, train=True)
val_fmnist = datasets.FashionMNIST(data_folder, download=True, train=False)
tr_images, tr_targets = fmnist.data, fmnist.targets
val_images, val_targets = val_fmnist.data, val_fmnist.targets

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./content/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:21<00:00, 1229563.12it/s]


Extracting ./content/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./content/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./content/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 126710.75it/s]


Extracting ./content/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./content/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./content/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:03<00:00, 1467612.29it/s]


Extracting ./content/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./content/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./content/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/5148 [00:00<?, ?it/s]





OSError: [Errno 50] Network is down

# FMNISTDataset

In [3]:
# get dataset
class FMNISTDataset(Dataset):
  def __init__(self, x, y):
    x = x.float()/255
    x = x.view(-1,1,28,28)
    self.x, self.y = x, y 
  def __getitem__(self, ix):
    x, y = self.x[ix], self.y[ix]        
    return x.to(device), y.to(device)
  def __len__(self): 
    return len(self.x)

def train_batch(x, y, model, optimizer, loss_fn):
  prediction = model(x)
  batch_loss = loss_fn(prediction, y)
  batch_loss.backward()
  optimizer.step()
  optimizer.zero_grad()
  return batch_loss.item()

def accuracy(x, y, model):
  model.eval()
  with torch.no_grad():
    prediction = model(x)
  max_values, argmaxes = prediction.max(-1)
  is_correct = argmaxes == y
  return is_correct.cpu().numpy().tolist()
  
@torch.no_grad()
def val_loss(x, y, model, loss_fn):
  model.eval()
  prediction = model(x)
  val_loss = loss_fn(prediction, y)
  return val_loss.item()

# DataLoader

In [None]:
# get dataloader
def get_data():     
  train = FMNISTDataset(tr_images, tr_targets)     
  trn_dl = DataLoader(train, batch_size=32, shuffle=True)
  val = FMNISTDataset(val_images, val_targets)     
  val_dl = DataLoader(val, batch_size=len(val_images), shuffle=False)
  return trn_dl, val_dl

In [4]:
# make train, valid loop as function

def train(trn_dl, val_dl, model, loss_fn, optimizer, num_epochs=30):
  train_losses, train_accuracies = [], []
  val_losses, val_accuracies = [], []
  for epoch in range(num_epochs):
    model.train()
    train_epoch_losses, train_epoch_accuracies = [], []
    for ix, batch in enumerate(iter(trn_dl)):
      x, y = batch
      batch_loss = train_batch(x, y, model, optimizer, loss_fn)
      train_epoch_losses.append(batch_loss)        
    train_epoch_loss = np.array(train_epoch_losses).mean()

    for ix, batch in enumerate(iter(trn_dl)):
      x, y = batch
      is_correct = accuracy(x, y, model)
      train_epoch_accuracies.extend(is_correct)
    train_epoch_accuracy = np.mean(train_epoch_accuracies)

    # validation loss
    model.eval()
    val_epoch_losses, val_epoch_accuracies = [], []
    for ix, batch in enumerate(iter(val_dl)):
      x, y = batch
      val_batch_loss = val_loss(x, y, model, loss_fn)
      val_is_correct = accuracy(x, y, model)

      val_epoch_losses.append(val_batch_loss)
      val_epoch_accuracies.extend(val_is_correct)
    val_epoch_loss = np.mean(val_epoch_losses)
    val_epoch_accuracy = np.mean(val_epoch_accuracies)


    # append to losses and accuracies
    train_losses.append(train_epoch_loss)
    train_accuracies.append(train_epoch_accuracy)
    val_losses.append(val_epoch_loss)
    val_accuracies.append(val_epoch_accuracy)

    
  return train_losses, train_accuracies, val_losses, val_accuracies

###  L2 Regularization, Dropout, Batch Normalization, all

In [5]:
# L2 Regularization
def train_l2(num_epochs=10):
  trn_dl, val_dl = get_data()
  model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(28*28, 1000),
    nn.ReLU(),
    nn.Linear(1000, 1000),
    nn.ReLU(),
    nn.Linear(1000, 10),
  ).to(device)
  loss_fn = nn.CrossEntropyLoss()
  optimizer = Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
  return train(trn_dl, val_dl, model, loss_fn, optimizer, num_epochs)

# dropout
def train_dropout(num_epochs=10):
  trn_dl, val_dl = get_data()
  model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(28*28, 1000),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(1000, 1000),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(1000, 10),
  ).to(device)
  loss_fn = nn.CrossEntropyLoss()
  optimizer = Adam(model.parameters(), lr=1e-3)
  return train(trn_dl, val_dl, model, loss_fn, optimizer, num_epochs)

# batch normalization
def train_batch_norm(num_epochs=10):
  trn_dl, val_dl = get_data()
  model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(28*28, 1000),
    nn.BatchNorm1d(1000),
    nn.ReLU(),
    nn.Linear(1000, 1000),
    nn.BatchNorm1d(1000),
    nn.ReLU(),
    nn.Linear(1000, 10),
  ).to(device)
  loss_fn = nn.CrossEntropyLoss()
  optimizer = Adam(model.parameters(), lr=1e-3)
  return train(trn_dl, val_dl, model, loss_fn, optimizer, num_epochs)

# all
def train_all(num_epochs=10):
  trn_dl, val_dl = get_data()
  model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(28*28, 1000),
    nn.BatchNorm1d(1000),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(1000, 1000),
    nn.BatchNorm1d(1000),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(1000, 10),
  ).to(device)
  loss_fn = nn.CrossEntropyLoss()
  optimizer = Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
  return train(trn_dl, val_dl, model, loss_fn, optimizer, num_epochs)


In [None]:
# trainer
l2_losses, l2_accuracies, l2_val_losses, l2_val_accuracies = train_l2(30)
dropout_losses, dropout_accuracies, dropout_val_losses, dropout_val_accuracies = train_dropout(30)
batch_norm_losses, batch_norm_accuracies, batch_norm_val_losses, batch_norm_val_accuracies = train_batch_norm(30)
all_losses, all_accuracies, all_val_losses, all_val_accuracies = train_all(30)

# Make Plot for loss and accuracy

In [None]:
# get plot for loss and accuracy value over increasing epochs
plt.subplot(2, 1, 1)
plt.plot(l2_losses, label='l2', color='blue')
plt.plot(l2_val_losses, label='l2 val', color='blue', linestyle='--')
plt.plot(dropout_losses, label='dropout', color='green')
plt.plot(dropout_val_losses, label='dropout val', color='green', linestyle='--')
plt.plot(batch_norm_losses, label='batch norm', color='red')
plt.plot(batch_norm_val_losses, label='batch norm val', color='red', linestyle='--')
plt.plot(all_losses, label='all', color='black')
plt.plot(all_val_losses, label='all val', color='black', linestyle='--')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)

plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Loss value over increasing epochs')

plt.subplot(2, 1, 2)
plt.plot(l2_accuracies, label='l2', color='blue')
plt.plot(l2_val_accuracies, label='l2 val', color='blue', linestyle='--')
plt.plot(dropout_accuracies, label='dropout', color='green')
plt.plot(dropout_val_accuracies, label='dropout val', color='green', linestyle='--')
plt.plot(batch_norm_accuracies, label='batch norm', color='red')
plt.plot(batch_norm_val_accuracies, label='batch norm val', color='red', linestyle='--')
plt.plot(all_accuracies, label='all', color='black')
plt.plot(all_val_accuracies, label='all val', color='black', linestyle='--')
plt.xlabel('Epochs')
plt.ylabel('Accuracy(%)')
plt.title('Accuracy value over increasing epochs')

plt.tight_layout()
plt.show()

# Make Tables for L2, Dropout, Batchnorm, All

In [None]:
# get tables
cases = ['L2', 'Dropout', 'BatchNorm', 'All']
train_losses_mean = [np.mean(l2_losses[-5:]), np.mean(dropout_losses[-5:]), np.mean(batch_norm_losses[-5:]), np.mean(all_losses[-5:])]
val_losses_mean = [np.mean(l2_val_losses[-5:]), np.mean(dropout_val_losses[-5:]), np.mean(batch_norm_val_losses[-5:]), np.mean(all_val_losses[-5:])]
train_accuracies_mean = [np.mean(l2_accuracies[-5:]), np.mean(dropout_accuracies[-5:]), np.mean(batch_norm_accuracies[-5:]), np.mean(all_accuracies[-5:])]
val_accuracies_mean = [np.mean(l2_val_accuracies[-5:]), np.mean(dropout_val_accuracies[-5:]), np.mean(batch_norm_val_accuracies[-5:]), np.mean(all_val_accuracies[-5:])]

loss_gaps = [abs(train_losses_mean[i] - val_losses_mean[i]) for i in range(len(cases))]
accuracy_gaps = [abs(train_accuracies_mean[i] - val_accuracies_mean[i]) for i in range(len(cases))]

fig, ax = plt.subplots()

row_labels = ['Train loss', 'Validation loss', 'Traning/Validation loss gap', 'Train accuracy (%)', 'Validation accuracy (%)', 'Traning/Validation accuracy gap (%)']
col_labels = ['L2 regular', 'Dropout', 'Batch norm', 'All']

table_data = []
table_data.append([round(val, 4) for val in train_losses_mean])
table_data.append([round(val, 4) for val in val_losses_mean])
table_data.append([round(val, 4) for val in loss_gaps])
table_data.append([round(val*100, 2) for val in train_accuracies_mean])
table_data.append([round(val*100, 2) for val in val_accuracies_mean])
table_data.append([round(val*100, 2) for val in accuracy_gaps])

table = ax.table(cellText=table_data, rowLabels=row_labels, colLabels=col_labels, loc='center')
table.auto_set_font_size(False)
table.set_fontsize(14)
table.scale(1, 2)
ax.axis('off')

plt.show()