In [1]:
import torchvision
import torch
import torchvision.transforms as transforms
from torchvision.models import resnet50
from tqdm import tqdm
import pytorch_lightning as pl
import torchmetrics

In [2]:
import torch.nn as nn
class Flatten(nn.Module):
    def forward(self, x): return x.view(x.size(0), x.size(1))

def make_cnn(c=64, num_classes=10):
    ''' Returns a 5-layer CNN with width parameter c. '''
    return nn.Sequential(
        # Layer 0
        nn.Conv2d(3, c, kernel_size=3, stride=1,
                  padding=1, bias=True),
        nn.BatchNorm2d(c),
        nn.ReLU(),

        # Layer 1
        nn.Conv2d(c, c*2, kernel_size=3,
                  stride=1, padding=1, bias=True),
        nn.BatchNorm2d(c*2),
        nn.ReLU(),
        nn.MaxPool2d(2),

        # Layer 2
        nn.Conv2d(c*2, c*4, kernel_size=3,
                  stride=1, padding=1, bias=True),
        nn.BatchNorm2d(c*4),
        nn.ReLU(),
        nn.MaxPool2d(2),

        # Layer 3
        nn.Conv2d(c*4, c*8, kernel_size=3,
                  stride=1, padding=1, bias=True),
        nn.BatchNorm2d(c*8),
        nn.ReLU(),
        nn.MaxPool2d(2),

        # Layer 4
        nn.MaxPool2d(4),
        Flatten(),
        nn.Linear(c*8, num_classes, bias=True)
    )

In [3]:
# define the LightningModule
class LitCNN(pl.LightningModule):
    def __init__(self, cnn, lr=1e-4):
        super().__init__()
        self.cnn = cnn
        self.lr = lr

    def training_step(self, batch, batch_idx):
        # training_step defines the train loop.
        # it is independent of forward
        inputs, labels = batch
        inputs = inputs.cuda()
        labels = labels.cuda()
        outputs = self.cnn(inputs)
        loss = torch.nn.functional.cross_entropy(outputs, labels)

        _, preds = torch.max(outputs.data, 1)
        acc = torchmetrics.functional.accuracy(preds, labels)

        # Logging to TensorBoard by default
        self.log("train_loss", loss)
        self.log("train_acc", acc)
        return loss

    def validation_step(self, batch, batch_idx):
        # training_step defines the train loop.
        # it is independent of forward
        inputs, labels = batch
        inputs = inputs.cuda()
        labels = labels.cuda()
        outputs = self.cnn(inputs)
        loss = torch.nn.functional.cross_entropy(outputs, labels)
        
        _, preds = torch.max(outputs.data, 1)
        acc = torchmetrics.functional.accuracy(preds, labels)

        # Logging to TensorBoard by default
        self.log("test_loss", loss)
        self.log("test_acc", acc)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
        return optimizer

In [4]:
width = 64
n_cls = 10
lr = 0.0001
batch_size = 256
mod = make_cnn(width, n_cls)
litmod = LitCNN(mod, lr=lr)

In [5]:
# transform = transforms.Compose(
#     [transforms.RandomCrop(32,padding=4),
#     transforms.RandomHorizontalFlip(),
#     transforms.ToTensor(),
#     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

transform = transforms.Compose(
    [transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
#                                         download=True, transform=transform)
trainset = torch.load(f'./cifar10_noisyp0.2_noDA.pt') # noisy version
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=4)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=4)

Files already downloaded and verified


In [6]:
logger = pl.loggers.CSVLogger("logs", name=f"cnn_width{width}")
trainer = pl.Trainer(
    max_epochs=5,
    accelerator="auto",
    logger=logger,
    gpus=1)
trainer.fit(litmod, trainloader, testloader)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type       | Params
------------------------------------
0 | cnn  | Sequential | 1.6 M 
------------------------------------
1.6 M     Trainable params
0         Non-trainable params
1.6 M     Total params
6.232     Total estimated model params size (MB)


Epoch 4: 100%|██████████| 236/236 [00:06<00:00, 38.00it/s, loss=1.23, v_num=6]


In [9]:
# ## If you want to resume training from checkpoint

# width = 64
# n_cls = 10
# lr = 0.0001
# batch_size = 256
# mod = make_cnn(width, n_cls)
# litmod = LitCNN(mod, lr=lr)
# run_num_epoch = 10
# prev_ckpt_path = "logs/cnn_width64/version_6/checkpoints/epoch=4-step=979.ckpt"

# trainer = pl.Trainer(
#     max_epochs=run_num_epoch,
#     accelerator="auto",
#     logger=logger,
#     gpus=1)
# # automatically restores model, epoch, step, LR schedulers, apex, etc...
# trainer.fit(litmod, trainloader, testloader, ckpt_path=prev_ckpt_path)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
Restoring states from the checkpoint path at logs/cnn_width64/version_6/checkpoints/epoch=4-step=979.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Restored all states from the checkpoint file at logs/cnn_width64/version_6/checkpoints/epoch=4-step=979.ckpt

  | Name | Type       | Params
------------------------------------
0 | cnn  | Sequential | 1.6 M 
------------------------------------
1.6 M     Trainable params
0         Non-trainable params
1.6 M     Total params
6.232     Total estimated model params size (MB)


Epoch 9: 100%|██████████| 236/236 [00:06<00:00, 38.96it/s, loss=0.795, v_num=6]
