In this notebook we hook up the basic UNet model we've trained to Tensorboard.

In [1]:
import torch
from torchvision.datasets import ImageFolder
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np
from torchvision import transforms


class BobRossSegmentedImagesDataset(Dataset):
    def __init__(self, dataroot):
        super().__init__()
        self.dataroot = dataroot
        self.imgs = list((self.dataroot / 'train' / 'images').rglob('*.png'))
        self.segs = list((self.dataroot / 'train' / 'labels').rglob('*.png'))
        self.transform = transforms.Compose([
            transforms.Resize((256, 256)), transforms.ToTensor()
        ])
        self.color_key = {
            3 : 0,
            5: 1,
            10: 2,
            14: 3,
            17: 4,
            18: 5,
            22: 6,
            27: 7,
            61: 8
        }
        assert len(self.imgs) == len(self.segs)
        # TODO: remean images to N(0, 1)?
        
    def __len__(self):
        return len(self.imgs)
    
    def __getitem__(self, i):
        def translate(x):
            return self.color_key[x]
        translate = np.vectorize(translate)
        
        img = Image.open(self.imgs[i])
        img = self.transform(img)
        
        seg = Image.open(self.segs[i])
        seg = seg.resize((256, 256))
        
        # Labels are in the ADE20K ontology and are not consequetive,
        # we have to apply a remap operation over the labels in a just-in-time
        # manner. This slows things down, but it's fine, this is just a demo
        # anyway.
        seg = translate(np.array(seg)).astype('int64')
        
        # One-hot encode the segmentation mask.
        # def ohe_mat(segmap):
        #     return np.array(
        #         list(
        #             np.array(segmap) == i for i in range(9)
        #         )
        #     ).astype(int).reshape(9, 256, 256)
        # seg = ohe_mat(seg)
        
        # Additionally, the original UNet implementation outputs a segmentation map
        # for a subset of the overall image, not the image as a whole! With this input
        # size the segmentation map targeted is a (164, 164) center crop.
        seg = seg[46:210, 46:210]
        
        return img, seg

    
from torch import nn

class UNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv_1_1 = nn.Conv2d(3, 64, 3)
        self.relu_1_2 = nn.ReLU()
        self.conv_1_3 = nn.Conv2d(64, 64, 3)
        self.relu_1_4 = nn.ReLU()
        self.pool_1_5 = nn.MaxPool2d(2)
        
        self.conv_2_1 = nn.Conv2d(64, 128, 3)
        self.relu_2_2 = nn.ReLU()
        self.conv_2_3 = nn.Conv2d(128, 128, 3)
        self.relu_2_4 = nn.ReLU()        
        self.pool_2_5 = nn.MaxPool2d(2)
        
        self.conv_3_1 = nn.Conv2d(128, 256, 3)
        self.relu_3_2 = nn.ReLU()
        self.conv_3_3 = nn.Conv2d(256, 256, 3)
        self.relu_3_4 = nn.ReLU()
        self.pool_3_5 = nn.MaxPool2d(2)
        
        self.conv_4_1 = nn.Conv2d(256, 512, 3)
        self.relu_4_2 = nn.ReLU()
        self.conv_4_3 = nn.Conv2d(512, 512, 3)
        self.relu_4_4 = nn.ReLU()
        
        # deconv is the '2D transposed convolution operator'
        self.deconv_5_1 = nn.ConvTranspose2d(512, 256, (2, 2), 2)
        # 61x61 -> 48x48 crop
        self.c_crop_5_2 = lambda x: x[:, :, 6:54, 6:54]
        self.concat_5_3 = lambda x, y: torch.cat((x, y), dim=1)
        self.conv_5_4 = nn.Conv2d(512, 256, 3)
        self.relu_5_5 = nn.ReLU()
        self.conv_5_6 = nn.Conv2d(256, 256, 3)
        self.relu_5_7 = nn.ReLU()
        
        self.deconv_6_1 = nn.ConvTranspose2d(256, 128, (2, 2), 2)
        # 121x121 -> 88x88 crop
        self.c_crop_6_2 = lambda x: x[:, :, 17:105, 17:105]
        self.concat_6_3 = lambda x, y: torch.cat((x, y), dim=1)
        self.conv_6_4 = nn.Conv2d(256, 128, 3)
        self.relu_6_5 = nn.ReLU()
        self.conv_6_6 = nn.Conv2d(128, 128, 3)
        self.relu_6_7 = nn.ReLU()
        
        self.deconv_7_1 = nn.ConvTranspose2d(128, 64, (2, 2), 2)
        # 252x252 -> 168x168 crop
        self.c_crop_7_2 = lambda x: x[:, :, 44:212, 44:212]
        self.concat_7_3 = lambda x, y: torch.cat((x, y), dim=1)
        self.conv_7_4 = nn.Conv2d(128, 64, 3)
        self.relu_7_5 = nn.ReLU()
        self.conv_7_6 = nn.Conv2d(64, 64, 3)
        self.relu_7_7 = nn.ReLU()
        
        # 1x1 conv ~= fc; n_classes = 9
        self.conv_8_1 = nn.Conv2d(64, 9, 1)

    def forward(self, x):
        x = self.conv_1_1(x)
        x = self.relu_1_2(x)
        x = self.conv_1_3(x)
        x_residual_1 = self.relu_1_4(x)
        x = self.pool_1_5(x_residual_1)
        
        x = self.conv_2_1(x)
        x = self.relu_2_2(x)        
        x = self.conv_2_3(x)
        x_residual_2 = self.relu_2_4(x)        
        x = self.pool_2_5(x_residual_2)
        
        x = self.conv_3_1(x)
        x = self.relu_3_2(x)        
        x = self.conv_3_3(x)
        x_residual_3 = self.relu_3_4(x)
        x = self.pool_3_5(x_residual_3)
        
        x = self.conv_4_1(x)
        x = self.relu_4_2(x)
        x = self.conv_4_3(x)
        x = self.relu_4_4(x)
        
        x = self.deconv_5_1(x)
        x = self.concat_5_3(self.c_crop_5_2(x_residual_3), x)
        x = self.conv_5_4(x)
        x = self.relu_5_5(x)
        x = self.conv_5_6(x)
        x = self.relu_5_7(x)
        
        x = self.deconv_6_1(x)
        x = self.concat_6_3(self.c_crop_6_2(x_residual_2), x)
        x = self.conv_6_4(x)
        x = self.relu_6_5(x)
        x = self.conv_6_6(x)
        x = self.relu_6_7(x)
        
        x = self.deconv_7_1(x)
        x = self.concat_7_3(self.c_crop_7_2(x_residual_1), x)
        x = self.conv_7_4(x)
        x = self.relu_7_5(x)
        x = self.conv_7_6(x)
        x = self.relu_7_7(x)
        
        x = self.conv_8_1(x)
        return x

In [2]:
from pathlib import Path
dataroot = Path('/spell/bob-ross-kaggle-dataset/')
dataset = BobRossSegmentedImagesDataset(dataroot)
dataloader = DataLoader(dataset, shuffle=True)

In [3]:
model = UNet()
model.cuda()

UNet(
  (conv_1_1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))
  (relu_1_2): ReLU()
  (conv_1_3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (relu_1_4): ReLU()
  (pool_1_5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv_2_1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (relu_2_2): ReLU()
  (conv_2_3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1))
  (relu_2_4): ReLU()
  (pool_2_5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv_3_1): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1))
  (relu_3_2): ReLU()
  (conv_3_3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1))
  (relu_3_4): ReLU()
  (pool_3_5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv_4_1): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1))
  (relu_4_2): ReLU()
  (conv_4_3): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1))
  (relu_4_4): ReLU()
  (deconv_5_1): ConvTranspose2d(512, 

In [15]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter('/spell/tensorboards/experiment_2')

In [16]:
import numpy as np
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

for epoch in range(10):
    losses = []
    
    for i, (batch, segmap) in enumerate(dataloader):
        
        batch = batch.cuda()
        segmap = segmap.cuda()
        
        optimizer.zero_grad()

        output = model(batch)
        loss = criterion(output, segmap)
        loss.backward()
        optimizer.step()
        
        curr_loss = loss.item()
        if i % 50 == 0:
            print(f'Finished epoch {epoch}, batch {i}. Loss: {curr_loss:.3f}.')
        
        writer.add_scalar(
            'training loss', curr_loss, epoch * len(dataloader) + i
        )
        losses.append(curr_loss)
    
    print(
        f'Finished epoch {epoch}. '
        f'avg loss: {np.mean(losses)}; median loss: {np.min(losses)}'
    )

Finished epoch 0, batch 0. Loss: 1.272.
Finished epoch 0, batch 50. Loss: 1.159.
Finished epoch 0, batch 100. Loss: 1.062.
Finished epoch 0, batch 150. Loss: 1.410.
Finished epoch 0, batch 200. Loss: 3.235.
Finished epoch 0, batch 250. Loss: 1.391.
Finished epoch 0. avg loss: 1.5243689935520826; median loss: 0.5609884262084961
Finished epoch 1, batch 0. Loss: 2.595.
Finished epoch 1, batch 50. Loss: 1.595.
Finished epoch 1, batch 100. Loss: 1.654.
Finished epoch 1, batch 150. Loss: 0.997.
Finished epoch 1, batch 200. Loss: 1.600.
Finished epoch 1, batch 250. Loss: 1.320.
Finished epoch 1. avg loss: 1.4904698867246924; median loss: 0.6060738563537598
Finished epoch 2, batch 0. Loss: 1.471.
Finished epoch 2, batch 50. Loss: 1.290.
Finished epoch 2, batch 100. Loss: 1.505.
Finished epoch 2, batch 150. Loss: 1.191.
Finished epoch 2, batch 200. Loss: 1.464.
Finished epoch 2, batch 250. Loss: 1.682.
Finished epoch 2. avg loss: 1.488304772462503; median loss: 0.4966057538986206
Finished epoch

There's some interesting product weirdness around the [JupyterLab Tensorboard integration](https://github.com/chaoleili/jupyterlab_tensorboard).

There are two ways to open a Tensorboard window in JupyterLab:

* By clicking on a new "Tensorboard" tile in the create pane. This is how most people would interact with this feature.
* By searching "Tensorboard" in the command menu and clicking on "Create a new tensorboard."

An important aspect of how Tensorboard works is that all of the information is logged to a file, and that file can be placed anywhere on your machine. When using Tensorboard the old-fashioned way, you specify the path to that file as part of the input to the `tensorboard` CLI command.

When launching a Tensorboard via the create tile, Tensorboard is initialized with the default `logdir` argument. The directory chosen is not a modifiable part of the user flow, nor can it be modified in the "Tensorboards" pane after-the-fact!

The default `logdir` chosen is the root of the JupyterLab directory. This is not at all user discoverable, you have to visit the repo `README.md` to learn this! On Spell this is `/spell/`. This means that the only place you can create Tensorboards visible to Tensorboard instances launched this way is to write them out to the `/spell/` directory, which is poor form.

What about launching Tensorboard via the command palette? In this case you can customize the Tensorboard directory. However, likely due to security limitations in place in JupyterLab, it is only possible to specify subpaths relative to the Jupyter Lab root directory&mdash;which is, again, `/spell/`.

A good idea: making a PR against the Tensorboard integrations to allow inline setting of the logdir path when clicking on the pane; and allow updating the `logdir` location later by right-clicking on the Tensorboard in the `Tensorboards` menu later.

There's also some funkiness in the open tabs menu with Tensorboard instances appearing out-of-line with the rest of the tabs listed there that can be fixed.

Dumping the Tensorboard runs in the top-level Spell directory is a bad idea. Ultimately the correct flow *at this time* (when running from a Jupyter instance) is to use a path like `/spell/tensorboard/experiment_2` and launch there using the command palette:

![](https://i.imgur.com/TKawv5j.png)