# Segmentation of fetal head in ultrasound data
---
The US dataset used for this task can be found at [HC-18 challenge](https://hc18.grand-challenge.org/).

## Obtaining data & setting up requirements

In [None]:
# Connect data
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
! pwd
! mkdir us_dataset
! tar -xvzf /content/gdrive/MyDrive/us_data.tar.gz -C us_dataset

In [None]:
! ls us_dataset

all_images  all_masks


In [None]:
# Obtain U-Net code by Meet Shah
! git clone https://github.com/meetps/pytorch-semseg

Cloning into 'pytorch-semseg'...
remote: Enumerating objects: 1088, done.[K
remote: Total 1088 (delta 0), reused 0 (delta 0), pack-reused 1088[K
Receiving objects: 100% (1088/1088), 277.47 KiB | 2.69 MiB/s, done.
Resolving deltas: 100% (738/738), done.


In [None]:
# rearrange files for easy access
! mv pytorch-semseg/ptsemseg .

In [None]:
# Install kornia, a useful library that does a lot of operations on PyTorch tensors
! pip install kornia

Collecting kornia
  Downloading kornia-0.5.8-py2.py3-none-any.whl (303 kB)
[?25l[K     |█                               | 10 kB 19.7 MB/s eta 0:00:01[K     |██▏                             | 20 kB 25.6 MB/s eta 0:00:01[K     |███▎                            | 30 kB 13.2 MB/s eta 0:00:01[K     |████▎                           | 40 kB 9.9 MB/s eta 0:00:01[K     |█████▍                          | 51 kB 5.2 MB/s eta 0:00:01[K     |██████▌                         | 61 kB 5.6 MB/s eta 0:00:01[K     |███████▌                        | 71 kB 6.0 MB/s eta 0:00:01[K     |████████▋                       | 81 kB 6.7 MB/s eta 0:00:01[K     |█████████▊                      | 92 kB 6.9 MB/s eta 0:00:01[K     |██████████▉                     | 102 kB 5.4 MB/s eta 0:00:01[K     |███████████▉                    | 112 kB 5.4 MB/s eta 0:00:01[K     |█████████████                   | 122 kB 5.4 MB/s eta 0:00:01[K     |██████████████                  | 133 kB 5.4 MB/s eta 0:00:01

In [None]:
import os
import cv2
import numpy as np
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
from tqdm import trange
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from kornia.losses import dice_loss
from kornia.utils import one_hot
from ptsemseg.models.unet import unet

## Dataloader

In [None]:
class ImageLoader(Dataset):
  """ Data loader class """
  def __init__(self, path, file_list, aug_list=None, aug_prob=None):
    """
    Args:
      path (str): path where images stored
      file_list (List[str]): list of images in current split
      aug_list (List[str]): list of torchvision transforms
      aug_prob (float): Probability of applying random aug (if aug_list != None)
    """
    self.path = path
    self.file_list = file_list
    self.aug_list = aug_list
    self.aug_prob = aug_prob

  def __len__(self):
    return len(self.file_list)
  
  def __getitem__(self, idx):
    """ Preprocess and return a single sample & label """
    img_name = os.path.join(self.path, 'all_images', self.file_list[idx])
    mask_fname = self.file_list[idx].split('.')[0] + '_mask.png'
    mask_name = os.path.join(self.path, 'all_masks', mask_fname)
    img = Image.open(img_name)
    mask = Image.open(mask_name)
    # Resize to dimensions supported by Vanilla UNet
    img = img.resize((572, 572), Image.LANCZOS)
    mask = mask.resize((388, 388), Image.NEAREST)


    img = np.array(img)
    mask = np.array(mask)
    mask[mask == 255] = 1

    mask = torch.Tensor([mask])

    img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)  
    img = torch.Tensor(img)
    img = img.permute(2, 0, 1)

    # select and apply random augmentation (if passed)
    if self.aug_list:
      do_aug = np.random.choice([True, False], 1, p=[self.aug_prob,
                                                     1-self.aug_prob])
      if do_aug:
        aug_name = np.random.choice(self.aug_list, 1)
        img = aug_name[0](img)
    img = (img - torch.mean(img)) / torch.std(img)
    return img, mask

In [None]:
def get_data_loaders(categories, path, file_lists,
                     augment, aug_prob, batch_size):
  """
  Wrapper function to return dataloader(s)
  Args:
    categories (List[str]): names of processes for which dataloader needed
    path (str): path where images stored
    file_lists (List[List[str]]): list of file lists
    augment (boolean): whether to apply augmentation
    aug_prob (float): Probability of applying random aug
    batch_size (int): batch size
  Returns:
    torch.utils.data.DataLoader object
  """
  loaders = []
  for i, category in enumerate(categories):
    if category == 'train' and augment:
      aug_list = [
          transforms.RandomAffine(0, translate=(0.2, 0.2)),
          transforms.RandomHorizontalFlip(p=1),
          transforms.RandomRotation(degrees=(-10, 10), fill=(0,)),
          transforms.GaussianBlur((17, 17), (11, 11))
      ]
    else:
      aug_list = None
    loader = DataLoader(
        ImageLoader(path, file_lists[i], aug_list, aug_prob),
        batch_size,
        num_workers=1
        )
    loaders.append(loader)
  return loaders

## Train/val/test loop

In [None]:
# dice score
def integral_dice(pred, gt, k):
    '''
    Dice coefficient for multiclass hard thresholded prediction consisting of integers instead of binary
    values. k = integer for class for which Dice is being calculated.
    '''
    return (torch.sum(pred[gt == k] == k)*2.0
            / (torch.sum(pred[pred == k] == k)
               + torch.sum(gt[gt == k] == k)).float())

In [None]:
def learn(model, loader, optimizer, process):
  """ main function for single epoch of train, val or test """
  dice_list = []
  running_loss = 0
  num_batches = len(loader)
  with trange(num_batches, desc=process, ncols=100) as t:
    for batch_num, sample in enumerate(loader):
      img_batch, masks = sample
      masks = masks[:, 0, :, :, 0].long()
      # one hot encoding labels
      masks_oh = one_hot(masks, num_classes=2, device='cpu', dtype=masks.dtype)
      if process == 'train':
        model.train()
        optimizer.zero_grad()
        preds = F.softmax(model(img_batch.cuda()), 1)
        loss = F.binary_cross_entropy(preds, masks_oh.cuda())
        # loss = dice_loss(preds, masks.cuda())
        loss.backward()
        optimizer.step()
      else:
        model.eval()
        with torch.no_grad():
          preds = F.softmax(model(img_batch.cuda()), 1)
          loss = F.binary_cross_entropy(preds, masks_oh.cuda())
          # loss = dice_loss(preds, masks.cuda())
      hard_preds = torch.argmax(preds, 1)
      dice = integral_dice(hard_preds, masks, 1)
      dice_list.append(dice.item())
      running_loss += loss  
      t.set_postfix(loss=running_loss.item()/(float(batch_num+1)*batch_size))
      t.update()
  mean_dice = np.mean(np.array(dice_list))
  final_loss = running_loss.item()/(num_batches*batch_size)
  return mean_dice, final_loss

In [None]:
def get_splits(all_names, train_size, val_size, test_size):
  split1_size = (val_size+test_size)
  split2_size = test_size / (val_size+test_size)
  trn_names, valtst_names = train_test_split(
      all_names, test_size=split1_size, random_state=0)
  val_names, tst_names = train_test_split(
      valtst_names, test_size=split2_size, random_state=0)
  return trn_names, val_names, tst_names 

In [None]:
def perform_learning(model, optimizer, path, all_names, batch_size,
                     splits, num_epochs):
  """ Wrapper function to run train, val, test loops """
  train_size, val_size, test_size = splits
  trn_names, val_names, tst_names = get_splits(all_names, train_size, val_size,
                                               test_size)
  train_loader, val_loader, test_loader = get_data_loaders(
      ['train', 'val', 'test'],
      path, [trn_names, val_names, tst_names],
      augment=True,
      aug_prob=0.5,
      batch_size=batch_size
      )
  for epoch_num in range(num_epochs):
    train_dice, train_loss = learn(model, train_loader, optimizer, 'train')
    print(f'Training Epoch {epoch_num} - Loss: {train_loss} ; Dice : {train_dice}')
    val_dice, val_loss = learn(model, val_loader, optimizer, 'val')
    print(f'Validation Epoch {epoch_num} - Loss: {val_loss} ; Dice : {val_dice}')
  tst_dice, tst_loss = learn(model, test_loader, optimizer, 'test')
  print(f'Test - Loss: {tst_loss} ; Dice : {tst_dice}')

## Let's run!

In [None]:
path = '/content/us_dataset'
all_names = os.listdir(os.path.join(path, 'all_images'))

lr = 1e-4
wt_dec = 1e-4
num_epochs = 5
batch_size = 2
splits = [0.8, 0.1, 0.1]

model = unet(n_classes=2)
model = model.cuda()

optimizer = Adam(model.parameters(), lr=lr, weight_decay=wt_dec)

perform_learning(model, optimizer, path, all_names, batch_size,
                 splits, num_epochs)

train:   0%|                                                                | 0/400 [00:00<?, ?it/s]Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f77203b0050>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
train: 100%|██████████████████████████████████████████| 400/400 [04:32<00:00,  1.47it/s, loss=0.248]


Training Epoch 0 - Loss: 0.2480936813354492 ; Dice : 0.3468497826378143


val: 100%|██████████████████████████████████████████████| 50/50 [00:30<00:00,  1.64it/s, loss=0.208]


Validation Epoch 0 - Loss: 0.20792417526245116 ; Dice : 0.663888944387436


train: 100%|██████████████████████████████████████████| 400/400 [04:42<00:00,  1.42it/s, loss=0.187]


Training Epoch 1 - Loss: 0.1873435401916504 ; Dice : 0.7037050391174853


val: 100%|██████████████████████████████████████████████| 50/50 [00:30<00:00,  1.62it/s, loss=0.144]


Validation Epoch 1 - Loss: 0.14365605354309083 ; Dice : 0.7724949312210083


train: 100%|██████████████████████████████████████████| 400/400 [04:32<00:00,  1.47it/s, loss=0.165]


Training Epoch 2 - Loss: 0.1648356246948242 ; Dice : 0.7402136340644211


val: 100%|██████████████████████████████████████████████| 50/50 [00:30<00:00,  1.64it/s, loss=0.134]


Validation Epoch 2 - Loss: 0.13352821350097657 ; Dice : 0.7840890741348266


train: 100%|██████████████████████████████████████████| 400/400 [04:31<00:00,  1.47it/s, loss=0.155]


Training Epoch 3 - Loss: 0.1545381736755371 ; Dice : 0.7611129674501718


val: 100%|██████████████████████████████████████████████| 50/50 [00:30<00:00,  1.66it/s, loss=0.119]


Validation Epoch 3 - Loss: 0.11851264953613282 ; Dice : 0.8111391639709473


train: 100%|██████████████████████████████████████████| 400/400 [04:29<00:00,  1.48it/s, loss=0.147]


Training Epoch 4 - Loss: 0.14718589782714844 ; Dice : 0.7784149835258722


val: 100%|██████████████████████████████████████████████| 50/50 [00:30<00:00,  1.65it/s, loss=0.126]


Validation Epoch 4 - Loss: 0.1259153175354004 ; Dice : 0.7942298424243927


test: 100%|██████████████████████████████████████████████| 50/50 [00:30<00:00,  1.66it/s, loss=0.13]

Test - Loss: 0.1300245475769043 ; Dice : 0.7992158937454223



