In [1]:
import torch
import numpy as np
import torchvision
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from torch.utils.data import random_split
# from torchvision.utils import make_grid
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data.dataloader import DataLoader
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
dataset = MNIST(root='data/', transform = ToTensor())

In [None]:
val_split = 0.2
train_sz, val_sz = int(len(dataset)*(1-val_split)), int(len(dataset)*(val_split))
# print(train_sz, val_sz)

train_ds, val_ds = random_split(dataset, [train_sz, val_sz])
len(train_ds), len(val_ds)

In [None]:
batch_size = 128

train_dl = DataLoader(train_ds, batch_size, shuffle = True)
val_dl = DataLoader(val_ds, batch_size, shuffle = True)

# To_GPU

In [2]:
torch.cuda.is_available()

False

In [None]:
def get_default_device():
  if torch.cuda.is_available():
    return torch.device('cuda')
  else:
    return torch.device('cpu')

device = get_default_device()
device

In [None]:
def to_device(data, device):
  if isinstance(data, (list, tuple)):
    return [to_device(x, device) for x in data]
  return data.to(device, non_blocking = True)

In [None]:
class DeviceDataLoader():
  def __init__(self, dl, device):
    self.dl=dl
    self.device = device

  def __len__(self):
    return len(self.dl)

  def __iter__(self):
    for b in self.dl:
      yield to_device(b, self.device)

train_dl = DeviceDataLoader(train_dl, device)
val_dl = DeviceDataLoader(val_dl, device)
to_device(model)

# Model and Training

In [None]:
def accuracy(out, labels):
    _, preds = torch.max(out, dim=1)
    return torch.tensor(torch.sum(preds == labels) / len(labels))

In [None]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        #define Model Layers here:
        self.linear1 = nn.Linear(784, 32)
        self.linear2 = nn.Linear(32, 10)
        ###
    
    def forward(self, x):
        #define Forward pass here:
        x = x.view(x.size(0), -1)
        y = self.linear1(x)
        y = F.relu(y)
        y = self.linear2(y)
        ###
        return y
    
    def training_step(self, batch, loss_fn):
        x, y = batch
        y_pred = self(x)
        loss = loss_fn(y_pred, y)
        return loss
    
    def validation_step(self, batch, loss_fn, metrics=None):
        x, y = batch
        y_pred = self(x)
        loss = loss_fn(y_pred, y)
        
        metric = None
        if metrics is not None:
            metric = metrics(y_pred, y)
        
        return {'val_loss' : loss, f'val_{metrics.__name__}' : metric}
    
    def validation_epoch_end(self, val_dl, loss_fn,  metrics = None):
        val_batches = [self.validation_step(batch, loss_fn, metrics) for batch in val_dl]
        
        #Epoch Loss
        batch_losses = [x['val_loss'] for x in val_batches]
        epoch_loss = torch.stack(batch_losses).mean()
        print('val_loss: ', epoch_loss.item())
        
        #Metrics
        if metrics is not None:
            batch_mets = [x[f'val_{metrics.__name__}'] for x in val_batches]
            epoch_mets = torch.stack(batch_mets).mean() 
            print(f'val_{metrics.__name__}: ', epoch_mets.item())
            
        return (epoch_loss.item(), epoch_mets.item())
            
    def epoch_end(self, epoch, val_dl, loss_fn, metrics=None):
        print(f'Epoch {epoch+1}:')
        return self.validation_epoch_end(val_dl, loss_fn, metrics)
        
    def fit(self, train_dl, val_dl, epochs, lr, loss_fn, opt_fn = torch.optim.SGD, metrics=None):
        history = []
        opt = opt_fn(self.parameters(), lr=lr)
        for epoch in range(epochs):
            # Training Phase
            for batch in train_dl:
                loss = self.training_step(batch, loss_fn)
                loss.backward()
                opt.step()
                opt.zero_grad()
                
            # Validation Phase
            history.append(self.epoch_end(epoch, val_dl, loss_fn, metrics))
        return history
        

In [None]:
model = Model()

In [None]:
model.fit(train_dl, val_dl, 5, 0.5, F.cross_entropy, metrics=accuracy)

In [None]:
model.fit(train_dl, val_dl, 10, 0.1, F.cross_entropy, metrics=accuracy)

In [None]:
?model