### Must install Lightning

In [1]:
!pip3 install lightning-bolts --quiet
!pip3 install pytorch-lightning --quiet

In [9]:
from torchvision import transforms
import pytorch_lightning as pl
#from pl_bolts.transforms.dataset_normalizations import cifar10_normalization
from torchvision.models.resnet import resnet18
from pytorch_lightning import Trainer, LightningModule
import torch.nn as nn
import torch
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import CIFAR10

In [3]:
import torchmetrics
from torchmetrics.functional import accuracy
from torchmetrics import Accuracy
from pytorch_lightning.callbacks import Callback
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks import ModelCheckpoint

In [28]:
EPOCHS = 5
LR = 0.1
MOMENTUM = 0.9
WEIGHT_DECAY = 5e-4
PRINT_FREQ = 50
TRAIN_BATCH=128
VAL_BATCH=128

In [5]:
GPU = 0

### fill in the transform statements below

In [21]:
class CIFAR10DataModule(pl.LightningDataModule):
    def __init__(self, train_batch_size, val_batch_size, data_dir: str = './'):
        super().__init__()
        self.data_dir = data_dir
        self.train_batch_size = train_batch_size
        self.val_batch_size = val_batch_size
        
        # fix these
        self.transform_train = transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor()
        ])
        self.transform_val = transforms.Compose([
            transforms.ToTensor()
        ])
        
        self.dims = (3, 32, 32)
        self.num_classes = 10

    def prepare_data(self):
        # download 
        CIFAR10(self.data_dir, train=True, download=True)
        CIFAR10(self.data_dir, train=False, download=True)

    def setup(self, stage=None):
        # Assign train/val datasets for use in dataloaders
        if stage == 'fit' or stage is None:
#            cifar_full = CIFAR10(self.data_dir, train=True, transform=self.transform)
#            self.cifar_train, self.cifar_val = random_split(cifar_full, [45000, 5000])
            self.cifar_train = CIFAR10(self.data_dir, train=True, transform=self.transform_train)
            self.cifar_val = CIFAR10(self.data_dir, train=False, transform=self.transform_val)

        # Assign test dataset for use in dataloader(s)
        if stage == 'test' or stage is None:
            self.cifar_test = CIFAR10(self.data_dir, train=False, transform=self.transform_val)

    def train_dataloader(self):
        return DataLoader(self.cifar_train, batch_size=self.train_batch_size, num_workers = 2, shuffle=True)

    def val_dataloader(self):
        return DataLoader(self.cifar_val, batch_size=self.val_batch_size, num_workers = 2)

    def test_dataloader(self):
        return DataLoader(self.cifar_test, batch_size=self.batch_size, num_workers = 2)

In [11]:
dm = CIFAR10DataModule(TRAIN_BATCH, VAL_BATCH)
dm.prepare_data()
dm.setup()


Files already downloaded and verified
Files already downloaded and verified


In [12]:
MODEL_CKPT_PATH = 'model/'
MODEL_CKPT = 'model/model-{epoch:02d}-{val_loss:.2f}'

checkpoint_callback = ModelCheckpoint(
    monitor='val_loss',
    filename=MODEL_CKPT ,
    save_top_k=3,
    mode='min')

In [13]:
# Samples required by the custom ImagePredictionLogger callback to log image predictions.
val_samples = next(iter(dm.val_dataloader()))
val_imgs, val_labels = val_samples[0], val_samples[1]
val_imgs.shape, val_labels.shape

(torch.Size([128, 3, 32, 32]), torch.Size([128]))

In [14]:
early_stop_callback = EarlyStopping(
   monitor='val_loss',
   patience=3,
   verbose=False,
   mode='min'
)

### Complete the training, validation, and optimizer methods below

In [24]:
class LitResnet18(LightningModule):
    def __init__(self, learning_rate, momentum, weight_decay):
        super().__init__()
        self.nn = resnet18(pretrained = False, progress  = True)
        self.nn.fc = nn.Linear(self.nn.fc.in_features, 10)
        self.lr = learning_rate
        self.momentum = momentum
        self.weight_decay = weight_decay
        self.criterion = nn.CrossEntropyLoss().cuda(GPU)
    
    def forward(self, x):
        return self.nn.forward(x)
    
    def training_step(self, batch, batch_idx):
        x,y = batch
        logits = self.nn(x)
        loss = self.criterion(logits, y)
        # training metrics
        preds = torch.argmax(logits, dim=1)
        accuracy = Accuracy(task="multiclass", num_classes=10)
        acc = accuracy(preds, y)
        self.log('train_loss', loss, on_step=True, on_epoch=True, logger=False)
        self.log('train_acc', acc, on_step=True, on_epoch=True, logger=False)
        if batch_idx % PRINT_FREQ == 0:
          print("train step! " + str(batch_idx) + " train loss: " + str(loss.item()) + " train acc " + str(acc.item()))        
        return loss     
        
        
    def validation_step(self, batch, batch_idx):
        x,y = batch
        logits = self.nn(x)
        loss = self.criterion(logits, y) 
        # validation metrics
        preds = torch.argmax(logits, dim=1)
        accuracy = Accuracy(task="multiclass", num_classes=10)
        acc = accuracy(preds, y)
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc', acc, prog_bar=True)
        if batch_idx % PRINT_FREQ == 0:
          print("val step! " + str(batch_idx) + " val loss: " + str(loss.item()) + " val acc " + str(acc.item()))
        return loss  
        
        
        
    def configure_optimizers(self):
        optimizer = torch.optim.SGD(model.parameters(), self.lr, momentum=self.momentum, weight_decay=self.weight_decay)
        return optimizer

In [25]:
# model = resnet18(pretrained = False, progress  = True)
model = LitResnet18(LR, MOMENTUM, WEIGHT_DECAY)




In [26]:
# Initialize a trainer
#trainer = pl.Trainer(max_epochs=EPOCHS,
#                     gpus=1, 
#                     logger=None,
#                     callbacks=[early_stop_callback],
#                     checkpoint_callback=checkpoint_callback)
trainer = pl.Trainer(max_epochs=EPOCHS,
                     #gpus=1, 
                     logger=None,
                     callbacks=[early_stop_callback,checkpoint_callback])

INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [27]:
trainer.fit(model, dm)

Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type             | Params
-----------------------------------------------
0 | nn        | ResNet           | 11.2 M
1 | criterion | CrossEntropyLoss | 0     
-----------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.727    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

val step! 0 val loss: 2.3204562664031982 val acc 0.0859375


Training: 0it [00:00, ?it/s]

train step! 0 train loss: 2.6171748638153076 train acc 0.09375
train step! 50 train loss: 2.021372079849243 train acc 0.2578125
train step! 100 train loss: 1.8373616933822632 train acc 0.296875
train step! 150 train loss: 2.6018128395080566 train acc 0.34375
train step! 200 train loss: 1.6149816513061523 train acc 0.3984375
train step! 250 train loss: 1.6812665462493896 train acc 0.3515625
train step! 300 train loss: 1.5283910036087036 train acc 0.46875
train step! 350 train loss: 1.5790196657180786 train acc 0.375


Validation: 0it [00:00, ?it/s]

val step! 0 val loss: 1.3479198217391968 val acc 0.515625
val step! 50 val loss: 1.4026861190795898 val acc 0.5
train step! 0 train loss: 1.3880482912063599 train acc 0.5234375
train step! 50 train loss: 1.2271296977996826 train acc 0.5390625
train step! 100 train loss: 1.3553073406219482 train acc 0.4765625
train step! 150 train loss: 1.2032804489135742 train acc 0.5703125
train step! 200 train loss: 1.206375002861023 train acc 0.5390625
train step! 250 train loss: 1.45773184299469 train acc 0.46875
train step! 300 train loss: 1.232248306274414 train acc 0.5703125
train step! 350 train loss: 1.190900444984436 train acc 0.5625


Validation: 0it [00:00, ?it/s]

val step! 0 val loss: 1.2521189451217651 val acc 0.578125
val step! 50 val loss: 1.3666176795959473 val acc 0.5390625
train step! 0 train loss: 1.2093464136123657 train acc 0.5859375
train step! 50 train loss: 1.184073567390442 train acc 0.578125
train step! 100 train loss: 1.2095469236373901 train acc 0.546875
train step! 150 train loss: 0.9577568769454956 train acc 0.671875
train step! 200 train loss: 1.0198103189468384 train acc 0.609375
train step! 250 train loss: 1.124049425125122 train acc 0.6328125
train step! 300 train loss: 1.2049161195755005 train acc 0.6015625
train step! 350 train loss: 1.1222656965255737 train acc 0.6328125


Validation: 0it [00:00, ?it/s]

val step! 0 val loss: 1.1509207487106323 val acc 0.6171875
val step! 50 val loss: 1.3624839782714844 val acc 0.53125
train step! 0 train loss: 0.9523325562477112 train acc 0.6875
train step! 50 train loss: 0.7518081068992615 train acc 0.734375
train step! 100 train loss: 0.7865822911262512 train acc 0.71875
train step! 150 train loss: 0.9375712275505066 train acc 0.6484375
train step! 200 train loss: 0.9453731775283813 train acc 0.6484375
train step! 250 train loss: 0.8611190319061279 train acc 0.6796875
train step! 300 train loss: 0.9593604803085327 train acc 0.6953125
train step! 350 train loss: 0.8411887884140015 train acc 0.6796875


Validation: 0it [00:00, ?it/s]

val step! 0 val loss: 0.9532586336135864 val acc 0.6484375
val step! 50 val loss: 1.1406605243682861 val acc 0.578125
train step! 0 train loss: 0.909386932849884 train acc 0.65625
train step! 50 train loss: 0.8923816680908203 train acc 0.7265625
train step! 100 train loss: 0.8190409541130066 train acc 0.6640625
train step! 150 train loss: 0.8114896416664124 train acc 0.6796875
train step! 200 train loss: 0.7733449935913086 train acc 0.765625
train step! 250 train loss: 0.8610629439353943 train acc 0.6953125
train step! 300 train loss: 0.7928433418273926 train acc 0.7109375
train step! 350 train loss: 0.9846262335777283 train acc 0.6640625


Validation: 0it [00:00, ?it/s]

val step! 0 val loss: 0.9314310550689697 val acc 0.625
val step! 50 val loss: 0.9926841855049133 val acc 0.6640625
train step! 0 train loss: 0.713104784488678 train acc 0.8046875
train step! 50 train loss: 0.9747906923294067 train acc 0.65625
train step! 100 train loss: 0.8782041072845459 train acc 0.6953125
train step! 150 train loss: 0.7832355499267578 train acc 0.765625
train step! 200 train loss: 0.7506006956100464 train acc 0.7109375
train step! 250 train loss: 0.8115875124931335 train acc 0.7421875
train step! 300 train loss: 0.8085607886314392 train acc 0.7109375
train step! 350 train loss: 0.9328473806381226 train acc 0.65625


Validation: 0it [00:00, ?it/s]

val step! 0 val loss: 0.941392719745636 val acc 0.6875
val step! 50 val loss: 0.7983275651931763 val acc 0.7734375
train step! 0 train loss: 0.8326703310012817 train acc 0.703125


  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")
