In [1]:
from pathlib import Path

import torch
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import ImageFolder

DATASET_PATH = Path("datasets")
TRAIN_PATH = DATASET_PATH / "train"
TEST_PATH = DATASET_PATH / "test"
VAL_PATH = DATASET_PATH / "val"
img_size = 64
train_ds = ImageFolder(
    TRAIN_PATH.as_posix(),
    transforms.Compose([
        transforms.Resize([img_size, img_size]),
        # transforms.RandomHorizontalFlip(),
        # transforms.RandomRotation(15),
        # transforms.RandomAffine(15),
        # transforms.GaussianBlur(5, (0.1, 0.5)),
        # # transforms.ColorJitter(0.2, 0.2, 0.1, 0.01),
        # transforms.CenterCrop(img_size),
        transforms.ToTensor(),
    ]))
train_dl = DataLoader(train_ds)
val_ds = ImageFolder(
    VAL_PATH.as_posix(),
    transforms.Compose([
        transforms.Resize([img_size, img_size]),
        # transforms.CenterCrop(img_size),
        transforms.ToTensor(),
    ])
)
val_dl = DataLoader(val_ds)

In [2]:
# Create a CNN Module
import torch.nn as nn
import torch.nn.functional as F


class CNNBlock(nn.Module):
    def __init__(self, c_in, c_out, kernel=3, dropout=0.1):
        super(CNNBlock, self).__init__()

        self.net = nn.Sequential(
            nn.Conv2d(c_in, c_out, kernel, 1, 1),
            nn.BatchNorm2d(c_out),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(dropout),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.net(x)


class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        self.net = nn.Sequential(
            CNNBlock(3,  32),
            CNNBlock(32, 64),
            CNNBlock(64, 128),
            CNNBlock(128, 256),
            CNNBlock(256, 512),
            CNNBlock(512, 1024),
            # CNNBlock(1024, 2048),
            nn.Flatten(),
            nn.Linear(1024, 2),
            nn.Softmax(dim=1),
        )

    def forward(self, x):
        return self.net(x)


CNN()(torch.ones([5, 3, img_size, img_size])).shape

torch.Size([5, 2])

In [3]:
# Create a pytorch lightning module
import pytorch_lightning as pl


class LitModel(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.model = CNN()

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.cross_entropy(y_hat, y)
        acc = torch.sum(torch.argmax(y_hat, dim=1) == y) / y.shape[0]
        self.log("train_acc", acc)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        acc = torch.sum(torch.argmax(y_hat, dim=1) == y) / y.shape[0]
        self.log("acc", acc)

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.0005, weight_decay=0.00001)


In [4]:
model = LitModel()

In [None]:
from pytorch_lightning.callbacks import EarlyStopping

early_stop_callback = EarlyStopping(monitor="acc", min_delta=0.00, patience=4, verbose=False, mode="max")
trainer = pl.Trainer(max_epochs=40,
                     accelerator='gpu',
                     callbacks=[early_stop_callback]
                     )
trainer.fit(
    model,
    train_dataloaders=DataLoader(train_ds, batch_size=16, shuffle=True, num_workers=8),
    val_dataloaders=DataLoader(val_ds, batch_size=128, shuffle=False, num_workers=2)
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type | Params
-------------------------------
0 | model | CNN  | 6.3 M 
-------------------------------
6.3 M     Trainable params
0         Non-trainable params
6.3 M     Total params
25.177    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

# Notes
- Batch Size greatly speeds up convergence
- Learning Rate has *some* effect on reducing jitter during training
- Adding more 3x3 2DConv seems to consistently improve the network
- Adding weight decay regularization for Adam reduces training overfitting
- Adding BatchNorm significantly improves performance
- ReLU performs significantly better than Sigmoid
- Having a tiny Fully Connected Layer at the end improves training stability

# Changes
- v11 uses a tiny FCN at the end
- v11 -> v12 uses a 2048 -> DropOut -> 1024 -> DropOut -> 2 instead of 2048 -> 2
- v11 -> v13 uses a AvgPool at the end (mimicking resnet)
- v11 -> v14 uses LeakyReLU (Not much difference)
- v11 -> v15 uses increase image size slightly, cropping out the middle
- v15 -> v16 move crop to end of transform pipeline