In [1]:
%%capture
!pip install -Uqq pytorch-lightning torchmetrics fastai datasets
!apt install tree

In [2]:
# Imports
import torch
from torch import nn
import torch.nn.functional as F
from torch import optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display
from sklearn.metrics import classification_report

import warnings
warnings.filterwarnings('ignore')

%load_ext tensorboard

In [3]:
# Import key objects from the datasets library
from datasets import load_dataset

In [4]:
mnist = load_dataset('mnist')
mnist

In [5]:
one_item = mnist['train'][0]
display(one_item)
display(one_item['image'])
display(one_item['label'])

In [6]:
mnist['train'].features

In [7]:
def scale_and_flatten(item):
    img = item['image']
    item['flattened_img'] = (np.array(img) / 255.).ravel()
    return item

In [8]:
# your code here
# mnist = ...

In [10]:
mnist

In [11]:
mnist = mnist.remove_columns(['image'])

In [12]:
mnist.set_format('torch')

In [13]:
sample = mnist['train'][1,4,-1, 18]
display(sample)
display(sample['flattened_img'].shape)
display(sample['label'].shape)

In [14]:
train_dl = DataLoader(mnist['train'], batch_size=64, shuffle=True)
valid_dl = DataLoader(mnist['test'], batch_size=128)

In [15]:
batch = next(iter(train_dl))
batch

In [16]:
assert batch['flattened_img'].shape[0] == train_dl.batch_size
assert batch['label'].shape[0] == train_dl.batch_size

In [17]:
mnist.save_to_disk('mnist_data')

In [18]:
!tree -L 2 mnist_data

In [19]:
from datasets import load_from_disk
mnist = load_from_disk('mnist_data')
mnist.set_format('torch')
mnist

In [20]:
# Import pytorch-lightning and torchmetrics
import pytorch_lightning as pl
import torchmetrics

In [21]:
%tensorboard --logdir lightning_logs

In [22]:
# Create a simple LightningDataModule
class MnistDataModule(pl.LightningDataModule):
    def __init__(self, dataset_path, batch_size:int=32):
        super().__init__()
        self.dataset_path = dataset_path
        self.ds = load_from_disk(dataset_path)
        self.ds.set_format('torch')
        self.batch_size = batch_size

    def train_dataloader(self):
        return DataLoader(self.ds['train'], batch_size=self.batch_size)

    def val_dataloader(self):
        return DataLoader(self.ds['test'], batch_size=self.batch_size * 2)

In [23]:
datamodule = MnistDataModule('mnist_data')

In [24]:
def linear(in_features, out_features, dropout=0.2):
    """
    This function returns the basic repeating linear block
    we will use for our models.
    """
    return nn.Sequential(
        nn.Linear(in_features, out_features),
        nn.ReLU(),
        nn.Dropout(dropout),
        nn.BatchNorm1d(out_features)
    )

In [25]:
class MnistMLPModel(pl.LightningModule):
    def __init__(self, n_inputs=28*28, n_outputs=10, n_hidden_layers=3, hidden_dim=128, dropout=0.2, lr:float=1e-3):
        super().__init__()
        # Save hyperparameters as class attributes
        self.n_inputs = n_inputs
        self.n_outputs = n_outputs
        self.n_hidden_layers = n_hidden_layers
        self.hidden_dim = hidden_dim
        self.dropout = dropout
        self.lr = lr

        # Log the hyperparameters
        self.save_hyperparameters()

        # Set up the components of our model
        self.input = linear(self.n_inputs, self.hidden_dim, self.dropout)
        self.hidden = nn.Sequential(*(linear(self.hidden_dim, self.hidden_dim, self.dropout) for _ in range(n_hidden_layers)))
        self.out = nn.Linear(self.hidden_dim, self.n_outputs)

        # Set up our loss function and metrics
        self.loss = nn.CrossEntropyLoss()
        self.train_acc = torchmetrics.Accuracy(num_classes=self.n_outputs, task="multiclass")
        self.valid_acc = torchmetrics.Accuracy(num_classes=self.n_outputs, task="multiclass")

    def forward(self, X):
        return self.out(self.hidden(self.input(X)))

    def training_step(self, batch, batch_idx):
        self.train()
        # Split the batch into X and y
        X, y = batch['flattened_img'], batch['label']
        # Get the logits
        logits = self(X)
        # Calculate the loss of the batch
        l = self.loss(logits, y)
        # Log the loss
        self.log('train_loss', l, on_step=True, on_epoch=True, prog_bar=True)

        # Calculate teh batch accuracy
        a = self.train_acc(logits.softmax(dim=-1), y)

        return l

    def on_train_epoch_end(self):
        # At the end of an epoch, compute the overall train accuracy
        # Log the epoch's train accuracy
        self.log('train_acc', self.train_acc, on_step=False, on_epoch=True, prog_bar=True)

    def validation_step(self, batch, batch_idx):
        self.eval()
        # Split the batch into X and y
        X, y = batch['flattened_img'], batch['label']
        # Get the logits
        logits = self(X)
        # Calculate the validation loss of the batch
        l = self.loss(logits, y)
        # Log the validation loss (on the epoch only)
        self.log('valid_loss', l, on_step=False, on_epoch=True, prog_bar=True)

        a = self.valid_acc(logits.softmax(dim=-1), y)

        return l

    def on_validation_epoch_end(self):
        # At the end of the validation epoch,
        # compute and log the validation accuracy.
        self.log('valid_acc', self.valid_acc, on_step=False, on_epoch=True, prog_bar=True)

    def configure_optimizers(self):
        # Configure the optimizer
        return optim.Adam(self.parameters(), lr=self.lr)

In [26]:
model = MnistMLPModel(n_hidden_layers=3, hidden_dim=128, dropout=0.2)

In [27]:
# Instantiate some callbacks.
callbacks = [
    pl.callbacks.EarlyStopping('valid_loss', verbose=True, patience=3),
    pl.callbacks.ModelCheckpoint(dirpath='lightning_checkpoints', verbose=True),
]

In [28]:
logger = pl.loggers.TensorBoardLogger(save_dir='lightning_logs', log_graph=True, name='lightning_mlp')

In [29]:
trainer = pl.Trainer(
    accelerator='gpu', devices=torch.cuda.device_count(), # Tell the trainer how many GPUs to use
    max_epochs=30, # Set the number of epochs
    callbacks=callbacks, # Pass the callbacks to the trainer
    logger=logger, # Pass the logger to the trainer
    log_every_n_steps=1, # Determine how often you want to log metrics. If computing metrics is slow, increasing this number could improve training time.
)

In [30]:
trainer.fit(model=model, datamodule=datamodule)

In [31]:
# Notes:


In [32]:
# Save the final version
trainer.save_checkpoint('lightning_final_model.ckpt')

In [33]:
# Load from checkpoint
model = MnistMLPModel.load_from_checkpoint('lightning_final_model.ckpt')

In [34]:
trainer.validate(model, datamodule.val_dataloader())

In [35]:
# Find the model checkpoint callback
trainer.callbacks

In [36]:
# Find the best model path
best_model_path = trainer.callbacks[-1].best_model_path
best_model_path

In [37]:
# Load from the best model path
model = MnistMLPModel.load_from_checkpoint(best_model_path)

In [38]:
trainer.validate(model, datamodule.val_dataloader())

In [39]:
# Test inferences on a batch of data with the loaded model
batch = next(iter(trainer.datamodule.val_dataloader()))

inferences = model(batch['flattened_img']).softmax(dim=-1).argmax(dim=1)
for p, (i, l) in enumerate(zip(inferences.numpy(), batch['label'].numpy())):
    print(f'Inference: {i}, Label: {l}, Correct: {i == l}')
    if p >= 20:
        break

In [40]:
# For fastai, they generally recommed importing *
from fastai import *
from fastai.data.all import *
from fastai.tabular.all import *
from fastai.callback.tensorboard import *

In [41]:
class MnistDataset(torch.utils.data.Dataset):
    def __init__(self, path):
        super().__init__()
        self.path = path
        self.df = pd.read_csv(path)
        self.X = self.df.iloc[:, 1:].values / 255
        self.y = self.df.iloc[:, 0]

    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, idx):
        return torch.tensor(self.X[idx]).float(), torch.tensor(self.y[idx])

In [42]:
dls = DataLoaders.from_dsets(MnistDataset('sample_data/mnist_train_small.csv'), MnistDataset('sample_data/mnist_test.csv'))

In [43]:
x_b, y_b = dls.one_batch()
x_b.shape, y_b.shape

In [44]:
# Your code here

In [46]:
train_df = pd.read_csv('sample_data/mnist_train_small.csv', header=None)
valid_df = pd.read_csv('sample_data/mnist_test.csv', header=None)

In [47]:
train_df['is_valid'] = 0
valid_df['is_valid'] = 1

mnist_df = (
    pd.concat([train_df, valid_df], axis=0, ignore_index=True)
    .reset_index(drop=True)
    .rename(columns={0:'label'})
)

In [48]:
# Note: Just this block took TONS of reading documentation,
# diving into source code, trial and error and it still doesn't
# do exactly what I want. But, here's an example of creating
# dataloaders with data blocks from scratch.
# We'll see it's much more powerful with data like images.

pixel_cols = mnist_df.columns[1:-1].tolist()

block = DataBlock(
    blocks = [TransformBlock, CategoryBlock], # What types of blocks is it?
    splitter = ColSplitter(col='is_valid'), # How do we split the train and valid datasets?
    get_x = lambda x: torch.FloatTensor(x[pixel_cols].values / 255.), # How do we get each X item?
    get_y = lambda x: x['label'], # How do we get each y item?
)

In [49]:
dsets = block.datasets(mnist_df)
type(dsets), hasattr(dsets, 'train'), hasattr(dsets, 'valid')

In [50]:
dls = dsets.dataloaders()

In [51]:
x_b, y_b = dls.one_batch()
x_b.shape, y_b.shape

In [52]:
def mnistmodel(n_inputs=28*28, n_outputs=10, n_hidden_layers=3, hidden_dim=128):
    input = linear(n_inputs, hidden_dim)
    hidden = nn.Sequential(*(linear(hidden_dim, hidden_dim) for _ in range(n_hidden_layers)))
    out = nn.Linear(hidden_dim, n_outputs)

    return nn.Sequential(input, hidden, out)

In [53]:
learn = Learner(
    dls,
    mnistmodel(hidden_dim=128),
    loss_func=CrossEntropyLossFlat(),
    cbs=[
         EarlyStoppingCallback(patience=3),
         SaveModelCallback(fname='fastaimodel'),
    ],
    metrics=[error_rate, accuracy]
)

In [54]:
learn.lr_find()

In [55]:
learn.fit_one_cycle(30, lr_max=1e-3)

In [56]:
learn.save('model.pth', with_opt=True)

In [57]:
learn2 = learn.load('model.pth')

In [58]:
x_b, y_b = dls.one_batch()

In [59]:
learn2.predict(x_b[0])