In this notebook I set up proper cross-validation.

In [8]:
!mkdir ../models/ > /dev/null 2>&1

In [19]:
%%writefile ../models/model_3.py

NUM_EPOCHS = 20
NUM_FOLDS = 5


import torch
from torchvision.datasets import ImageFolder
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np
from torchvision import transforms


class BobRossSegmentedImagesDataset(Dataset):
    def __init__(self, dataroot):
        super().__init__()
        self.dataroot = dataroot
        self.imgs = list((self.dataroot / 'train' / 'images').rglob('*.png'))
        self.segs = list((self.dataroot / 'train' / 'labels').rglob('*.png'))
        self.transform = transforms.Compose([
            transforms.Resize((256, 256)), transforms.ToTensor()
        ])
        self.color_key = {
            3 : 0,
            5: 1,
            10: 2,
            14: 3,
            17: 4,
            18: 5,
            22: 6,
            27: 7,
            61: 8
        }
        assert len(self.imgs) == len(self.segs)
        # TODO: remean images to N(0, 1)?
        
    def __len__(self):
        return len(self.imgs)
    
    def __getitem__(self, i):
        def translate(x):
            return self.color_key[x]
        translate = np.vectorize(translate)
        
        img = Image.open(self.imgs[i])
        img = self.transform(img)
        
        seg = Image.open(self.segs[i])
        seg = seg.resize((256, 256))
        
        # Labels are in the ADE20K ontology and are not consequetive,
        # we have to apply a remap operation over the labels in a just-in-time
        # manner. This slows things down, but it's fine, this is just a demo
        # anyway.
        seg = translate(np.array(seg)).astype('int64')
        
        # One-hot encode the segmentation mask.
        # def ohe_mat(segmap):
        #     return np.array(
        #         list(
        #             np.array(segmap) == i for i in range(9)
        #         )
        #     ).astype(int).reshape(9, 256, 256)
        # seg = ohe_mat(seg)
        
        # Additionally, the original UNet implementation outputs a segmentation map
        # for a subset of the overall image, not the image as a whole! With this input
        # size the segmentation map targeted is a (164, 164) center crop.
        seg = seg[46:210, 46:210]
        
        return img, seg
    
    
from torch import nn

class UNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv_1_1 = nn.Conv2d(3, 64, 3)
        self.relu_1_2 = nn.ReLU()
        self.conv_1_3 = nn.Conv2d(64, 64, 3)
        self.relu_1_4 = nn.ReLU()
        self.pool_1_5 = nn.MaxPool2d(2)
        
        self.conv_2_1 = nn.Conv2d(64, 128, 3)
        self.relu_2_2 = nn.ReLU()
        self.conv_2_3 = nn.Conv2d(128, 128, 3)
        self.relu_2_4 = nn.ReLU()        
        self.pool_2_5 = nn.MaxPool2d(2)
        
        self.conv_3_1 = nn.Conv2d(128, 256, 3)
        self.relu_3_2 = nn.ReLU()
        self.conv_3_3 = nn.Conv2d(256, 256, 3)
        self.relu_3_4 = nn.ReLU()
        self.pool_3_5 = nn.MaxPool2d(2)
        
        self.conv_4_1 = nn.Conv2d(256, 512, 3)
        self.relu_4_2 = nn.ReLU()
        self.conv_4_3 = nn.Conv2d(512, 512, 3)
        self.relu_4_4 = nn.ReLU()
        
        # deconv is the '2D transposed convolution operator'
        self.deconv_5_1 = nn.ConvTranspose2d(512, 256, (2, 2), 2)
        # 61x61 -> 48x48 crop
        self.c_crop_5_2 = lambda x: x[:, :, 6:54, 6:54]
        self.concat_5_3 = lambda x, y: torch.cat((x, y), dim=1)
        self.conv_5_4 = nn.Conv2d(512, 256, 3)
        self.relu_5_5 = nn.ReLU()
        self.conv_5_6 = nn.Conv2d(256, 256, 3)
        self.relu_5_7 = nn.ReLU()
        
        self.deconv_6_1 = nn.ConvTranspose2d(256, 128, (2, 2), 2)
        # 121x121 -> 88x88 crop
        self.c_crop_6_2 = lambda x: x[:, :, 17:105, 17:105]
        self.concat_6_3 = lambda x, y: torch.cat((x, y), dim=1)
        self.conv_6_4 = nn.Conv2d(256, 128, 3)
        self.relu_6_5 = nn.ReLU()
        self.conv_6_6 = nn.Conv2d(128, 128, 3)
        self.relu_6_7 = nn.ReLU()
        
        self.deconv_7_1 = nn.ConvTranspose2d(128, 64, (2, 2), 2)
        # 252x252 -> 168x168 crop
        self.c_crop_7_2 = lambda x: x[:, :, 44:212, 44:212]
        self.concat_7_3 = lambda x, y: torch.cat((x, y), dim=1)
        self.conv_7_4 = nn.Conv2d(128, 64, 3)
        self.relu_7_5 = nn.ReLU()
        self.conv_7_6 = nn.Conv2d(64, 64, 3)
        self.relu_7_7 = nn.ReLU()
        
        # 1x1 conv ~= fc; n_classes = 9
        self.conv_8_1 = nn.Conv2d(64, 9, 1)

    def forward(self, x):
        x = self.conv_1_1(x)
        x = self.relu_1_2(x)
        x = self.conv_1_3(x)
        x_residual_1 = self.relu_1_4(x)
        x = self.pool_1_5(x_residual_1)
        
        x = self.conv_2_1(x)
        x = self.relu_2_2(x)        
        x = self.conv_2_3(x)
        x_residual_2 = self.relu_2_4(x)        
        x = self.pool_2_5(x_residual_2)
        
        x = self.conv_3_1(x)
        x = self.relu_3_2(x)        
        x = self.conv_3_3(x)
        x_residual_3 = self.relu_3_4(x)
        x = self.pool_3_5(x_residual_3)
        
        x = self.conv_4_1(x)
        x = self.relu_4_2(x)
        x = self.conv_4_3(x)
        x = self.relu_4_4(x)
        
        x = self.deconv_5_1(x)
        x = self.concat_5_3(self.c_crop_5_2(x_residual_3), x)
        x = self.conv_5_4(x)
        x = self.relu_5_5(x)
        x = self.conv_5_6(x)
        x = self.relu_5_7(x)
        
        x = self.deconv_6_1(x)
        x = self.concat_6_3(self.c_crop_6_2(x_residual_2), x)
        x = self.conv_6_4(x)
        x = self.relu_6_5(x)
        x = self.conv_6_6(x)
        x = self.relu_6_7(x)
        
        x = self.deconv_7_1(x)
        x = self.concat_7_3(self.c_crop_7_2(x_residual_1), x)
        x = self.conv_7_4(x)
        x = self.relu_7_5(x)
        x = self.conv_7_6(x)
        x = self.relu_7_7(x)
        
        x = self.conv_8_1(x)
        return x

from pathlib import Path
dataroot = Path('/spell/bob-ross-kaggle-dataset/')
dataset = BobRossSegmentedImagesDataset(dataroot)
dataloader = DataLoader(dataset, shuffle=True)

import numpy as np
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

from sklearn.model_selection import KFold
from torch.utils.tensorboard import SummaryWriter

kf = KFold(n_splits=NUM_FOLDS, shuffle=True)
idxs = list(range(len(dataset)))
for fold, (train_idxs, test_idxs) in enumerate(kf.split(idxs)):
    writer = SummaryWriter(f'/spell/tensorboards/experiment_3_fold_{fold}')
    model = UNet()
    model.cuda()
    
    for epoch in range(NUM_EPOCHS):
        losses = []

        for i, train_idx in enumerate(train_idxs):
            batch, segmap = dataset[i]

            batch = batch[None].cuda()
            segmap = torch.tensor(segmap[None]).cuda()

            optimizer.zero_grad()

            output = model(batch)
            loss = criterion(output, segmap)
            loss.backward()
            optimizer.step()

            curr_loss = loss.item()
            if i % 50 == 0:
                print(
                    f'Finished fold {fold}, epoch {epoch}, batch {i}. Loss: {curr_loss:.3f}.'
                )

            writer.add_scalar(
                'training loss', curr_loss, epoch * len(dataloader) + i
            )
            losses.append(curr_loss)

        print(
            f'Finished epoch {epoch}. '
            f'avg loss: {np.mean(losses)}; median loss: {np.min(losses)}'
        )

Overwriting ../models/model_3.py


The `pytorch` dataset and dataloder interface do not support cross-validation train-test splits out-of-the-box.

There's a couple of ways to get around this. The lightweight way is to implement it ourselves, as here. Recall that `KFold` just draws indices, so we can pass a range of integers, get the indices, and then load the dataset along these indices. This is what I've done here. Notes on this approach:

* Lightweight, minimizes code modification.
* Have to fall back from using the `DataLoader` class to implementing data loading ourselves, drawing directly from the `Dataset` class instead. This requires many annoying small changes, and we lose all of the nice features `DataLoader` provides, e.g. concurrent dataset loading, automatic data batching, etcetera. For more complex datasets this approach is not going to work.

One alternative is using `skorch`. `skorch` is a `sklearn` API wrapper on PyTorch, which brings PyTorch in line with the `scikit-learn` API and allows the models to be used with `sklearn` core. Of course we know from experience with `keras` that this does *not* mean compatibility with the broader `sklearn` ecosystem, unfortunately, but it should mean compatibility with the `sklearn` library itself. Of course `skorch` has the obvious downsides:

* It's another layer of abstraction, which can leak. Without more experience with it I can't assess how much of a risk there is of this.
* It's a higher-level model wrapper, so may require workarounds to be compatible with other tools that rely on the direct PyTorch interface.

Let's try the `skorch` approach.

In [15]:
# %%writefile ../models/model_3.py

NUM_EPOCHS = 2
NUM_FOLDS = 3


import torch
from torchvision.datasets import ImageFolder
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np
from torchvision import transforms


class BobRossSegmentedImagesDataset(Dataset):
    def __init__(self, dataroot):
        super().__init__()
        self.dataroot = dataroot
        self.imgs = list((self.dataroot / 'train' / 'images').rglob('*.png'))
        self.segs = list((self.dataroot / 'train' / 'labels').rglob('*.png'))
        self.transform = transforms.Compose([
            transforms.Resize((256, 256)), transforms.ToTensor()
        ])
        self.color_key = {
            3 : 0,
            5: 1,
            10: 2,
            14: 3,
            17: 4,
            18: 5,
            22: 6,
            27: 7,
            61: 8
        }
        assert len(self.imgs) == len(self.segs)
        # TODO: remean images to N(0, 1)?
        
    def __len__(self):
        return len(self.imgs)
    
    def __getitem__(self, i):
        def translate(x):
            return self.color_key[x]
        translate = np.vectorize(translate)
        
        img = Image.open(self.imgs[i])
        img = self.transform(img)
        
        seg = Image.open(self.segs[i])
        seg = seg.resize((256, 256))
        
        # Labels are in the ADE20K ontology and are not consequetive,
        # we have to apply a remap operation over the labels in a just-in-time
        # manner. This slows things down, but it's fine, this is just a demo
        # anyway.
        seg = translate(np.array(seg)).astype('int64')
        
        # One-hot encode the segmentation mask.
        # def ohe_mat(segmap):
        #     return np.array(
        #         list(
        #             np.array(segmap) == i for i in range(9)
        #         )
        #     ).astype(int).reshape(9, 256, 256)
        # seg = ohe_mat(seg)
        
        # Additionally, the original UNet implementation outputs a segmentation map
        # for a subset of the overall image, not the image as a whole! With this input
        # size the segmentation map targeted is a (164, 164) center crop.
        seg = seg[46:210, 46:210]
        
        return img, seg
    
    
from torch import nn

class UNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv_1_1 = nn.Conv2d(3, 64, 3)
        self.relu_1_2 = nn.ReLU()
        self.conv_1_3 = nn.Conv2d(64, 64, 3)
        self.relu_1_4 = nn.ReLU()
        self.pool_1_5 = nn.MaxPool2d(2)
        
        self.conv_2_1 = nn.Conv2d(64, 128, 3)
        self.relu_2_2 = nn.ReLU()
        self.conv_2_3 = nn.Conv2d(128, 128, 3)
        self.relu_2_4 = nn.ReLU()        
        self.pool_2_5 = nn.MaxPool2d(2)
        
        self.conv_3_1 = nn.Conv2d(128, 256, 3)
        self.relu_3_2 = nn.ReLU()
        self.conv_3_3 = nn.Conv2d(256, 256, 3)
        self.relu_3_4 = nn.ReLU()
        self.pool_3_5 = nn.MaxPool2d(2)
        
        self.conv_4_1 = nn.Conv2d(256, 512, 3)
        self.relu_4_2 = nn.ReLU()
        self.conv_4_3 = nn.Conv2d(512, 512, 3)
        self.relu_4_4 = nn.ReLU()
        
        # deconv is the '2D transposed convolution operator'
        self.deconv_5_1 = nn.ConvTranspose2d(512, 256, (2, 2), 2)
        # 61x61 -> 48x48 crop
        self.c_crop_5_2 = lambda x: x[:, :, 6:54, 6:54]
        self.concat_5_3 = lambda x, y: torch.cat((x, y), dim=1)
        self.conv_5_4 = nn.Conv2d(512, 256, 3)
        self.relu_5_5 = nn.ReLU()
        self.conv_5_6 = nn.Conv2d(256, 256, 3)
        self.relu_5_7 = nn.ReLU()
        
        self.deconv_6_1 = nn.ConvTranspose2d(256, 128, (2, 2), 2)
        # 121x121 -> 88x88 crop
        self.c_crop_6_2 = lambda x: x[:, :, 17:105, 17:105]
        self.concat_6_3 = lambda x, y: torch.cat((x, y), dim=1)
        self.conv_6_4 = nn.Conv2d(256, 128, 3)
        self.relu_6_5 = nn.ReLU()
        self.conv_6_6 = nn.Conv2d(128, 128, 3)
        self.relu_6_7 = nn.ReLU()
        
        self.deconv_7_1 = nn.ConvTranspose2d(128, 64, (2, 2), 2)
        # 252x252 -> 168x168 crop
        self.c_crop_7_2 = lambda x: x[:, :, 44:212, 44:212]
        self.concat_7_3 = lambda x, y: torch.cat((x, y), dim=1)
        self.conv_7_4 = nn.Conv2d(128, 64, 3)
        self.relu_7_5 = nn.ReLU()
        self.conv_7_6 = nn.Conv2d(64, 64, 3)
        self.relu_7_7 = nn.ReLU()
        
        # 1x1 conv ~= fc; n_classes = 9
        self.conv_8_1 = nn.Conv2d(64, 9, 1)

    def forward(self, x):
        x = self.conv_1_1(x)
        x = self.relu_1_2(x)
        x = self.conv_1_3(x)
        x_residual_1 = self.relu_1_4(x)
        x = self.pool_1_5(x_residual_1)
        
        x = self.conv_2_1(x)
        x = self.relu_2_2(x)        
        x = self.conv_2_3(x)
        x_residual_2 = self.relu_2_4(x)        
        x = self.pool_2_5(x_residual_2)
        
        x = self.conv_3_1(x)
        x = self.relu_3_2(x)        
        x = self.conv_3_3(x)
        x_residual_3 = self.relu_3_4(x)
        x = self.pool_3_5(x_residual_3)
        
        x = self.conv_4_1(x)
        x = self.relu_4_2(x)
        x = self.conv_4_3(x)
        x = self.relu_4_4(x)
        
        x = self.deconv_5_1(x)
        x = self.concat_5_3(self.c_crop_5_2(x_residual_3), x)
        x = self.conv_5_4(x)
        x = self.relu_5_5(x)
        x = self.conv_5_6(x)
        x = self.relu_5_7(x)
        
        x = self.deconv_6_1(x)
        x = self.concat_6_3(self.c_crop_6_2(x_residual_2), x)
        x = self.conv_6_4(x)
        x = self.relu_6_5(x)
        x = self.conv_6_6(x)
        x = self.relu_6_7(x)
        
        x = self.deconv_7_1(x)
        x = self.concat_7_3(self.c_crop_7_2(x_residual_1), x)
        x = self.conv_7_4(x)
        x = self.relu_7_5(x)
        x = self.conv_7_6(x)
        x = self.relu_7_7(x)
        
        x = self.conv_8_1(x)
        return x

from pathlib import Path
dataroot = Path('/spell/bob-ross-kaggle-dataset/')
dataset = BobRossSegmentedImagesDataset(dataroot)
dataloader = DataLoader(dataset, shuffle=True)

model = UNet()
model.cuda()


import numpy as np
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

In [23]:
from skorch.classifier import NeuralNetClassifier
model = NeuralNetClassifier(
    model,
    criterion=criterion,
    optimizer=optimizer,
    max_epochs=NUM_EPOCHS,
    batch_size=1,
    # passing an initialized dataset object means fit is unparameterized
    dataset=dataset,
    device=0
    # CV is controlled by the train_test parameter, default is 5-fold CV?
)

In [25]:
model.fit(dataset, None)

TypeError: forward() missing 2 required positional arguments: 'input' and 'target'

`skorch` is sufficiently complex that I can't get it working without going through the tutorial materials first. I'm not too keen on doing this right right now, so I'll stop here with this approach.

Review of this part of `skorch`. `skorch` has its own `Dataset` implementation which has wider input compatibility than the PyTorch `Dataset` implementation, but acts as a thin wrapper, and you can use a PyTorch `Dataset` directly. `skorch` reuses the PyTorch `DataLoader` without modification.

By default `skorch` uses `train_test` with five-fold `skorch.CVSplit`. `skorch` *only makes a single model split* by default; so I guess it's really just a `train_test_split`?

Let's return back to our bootstrapped approach and run that model.

In [3]:
!spell login --identity #### --password ####

[0mHello, Aleksey Bilogur!
[0m[0m

In [16]:
# rsync not installed error, have to put rsync in the apt-get field of the workspace
# 'pytorch' framework does not support tensorboard!?!
# Use default, which does.
# --framework 'pytorch'

!spell run 'ls .; ls models; python models/model_3.py'\
    --machine-type 'K80'\
    --mount 'uploads/bob-ross-kaggle-dataset':'/spell/bob-ross-kaggle-dataset'\
    --tensorboard-dir '/spell/tensorboards/'

Everything up-to-date
[0m💫 Casting spell #156…
[0m✨ Stop viewing logs with ^C
[0m[K[0m[?25h[0m✨ Machine_Requested… done
[0m[K[0m[?25h[0m✨ Building… done tagged registry-1.spell:80/residentmario/2dc6b76f8dc6……[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m
[0m[K[0m[?25h[0m✨ Mounting… doneting[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m
[0m✨ [0mRun is running
[0m0_initial_model.ipynb
[0m1_initial_spell_model.ipynb
[0m2_with_tensorboard.ipynb
[0m3_cross_validated.ipynb
[0mls: cannot access 'models': No such file or directory
[0mpython: can't open file 'models/model_3.py': [Errno 2] No such file or directory
[0m[K[0m[?25h[0m✨ Saving… doner modified or new files from the run[0m
[0m[K[0m[?25h[0m✨ Pushing… donengg[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m
[0m🎉 [0mTotal run time: 16.440464s
[0m🎉 [0mRun 156 compl

I expect the `spell run` command to mount the root of the current Git project, e.g. the folder nearest to the current working directory in the file hierarchy containing the `.git` repository.

In [17]:
!spell run 'ls .; ls models; python models/model_3.py'\
    --machine-type 'K80'\
    --mount 'uploads/bob-ross-kaggle-dataset':'/spell/bob-ross-kaggle-dataset'\
    --github-url 'https://github.com/ResidentMario/unet-pytorch.git'\
    --tensorboard-dir '/spell/tensorboards/'

[0m💫 Casting spell #157…
[0m✨ Stop viewing logs with ^C
[0m[K[0m[?25h[0m✨ Building… doneuired -- commencing run[0m[0m[0m
[0m[K[0m[?25h[0m✨ Machine_Requested… done tagged registry-1.spell:80/remote_content_15……[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m
[0m[K[0m[?25h[0m✨ Mounting… doneting[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m
[0m✨ [0mRun is running
[0mbob-ross-kaggle-dataset
[0mmodels
[0mnotebooks
[0mREADME.md
[0mtensorboards
[0mmodel_3.py
[0mFinished fold 0, epoch 0, batch 0. Loss: 2.195.
[0m^C

[0m✨ Your run is still running remotely.
[0m✨ Use 'spell kill 157' to terminate your run
[0m✨ Use 'spell logs 157' to view logs again
[0m[K[0m[?25h[0m[0m