# Image segmentation for JetCar project

<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/StefanW0815/PublicTest/blob/main/JetCar_Segmentation_MobileNetV2.ipynb">
    <img src="https://www.tensorflow.org/images/colab_logo_32px.png" />
    Run in Google Colab</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/StefanW0815/PublicTest/blob/main/JetCar_Segmentation_MobileNetV2.ipynb">
    <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
    View source on GitHub</a>
  </td>
  <td>
    <a href="https://storage.googleapis.com/github.com/StefanW0815/PublicTest/blob/main/JetCar_Segmentation_MobileNetV2.ipynb"><img src="https://www.tensorflow.org/images/download_logo_32px.png" />Download notebook</a>
  </td>
</table>

# 1. Introduction

This notebook uses Semantic Segmentation to train a U-Net model. It utilizes the packages "segmentation-models-pytorch" and "catalyst". 

Each training and validation data pair consist of

    a jpg image with 3 channels (RGB)
    a png mask with 1 channel containing the class values for each pixel
    
In addition there is a set of images in a third set to create predicted masks as feedback for segmentation mask adjustments.

The code is based on following examples:
* https://github.com/qubvel/segmentation_models.pytorch
* https://www.tensorflow.org/tutorials/images/segmentation
* https://github.com/usuyama/pytorch-unet
* https://yann-leguilly.gitlab.io/post/2019-12-14-tensorflow-tfdata-segmentation


# 2. Preparing the Environment
## 2.1. List all Imports

In [None]:
from glob import glob
import shutil
import argparse
import zipfile
import hashlib
import requests
from tqdm import tqdm
import IPython.display as display
import matplotlib.pyplot as plt
import numpy as np
import datetime,os
from IPython.display import clear_output
from urllib.parse import urlparse
import zipfile
import helper
import PIL.Image
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.datasets as datasets
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import random
import cv2


## 2.2. Define Global Constants

Defining all constants at the top allows finding them quickly and changing them easily.

In [None]:
# Image size that we are going to use
IMG_SIZE = 224 
# Our images are RGB (3 channels)
N_CHANNELS = 3
# Scene Parsing has 22 classes including 'nothing'= 0
N_CLASSES = 17

# A batch size of 100-200 works with Colab Pro and High-Ram setting, reduce when getting out-of-memory errors 
BATCH_SIZE = 200
# It normally never gets to 100 or more because of early stop
EPOCHS = 500
# 1e-3 is a got starting point, you can try to repeat same with 1e-4 or smaller
LEARNING_RATE = 1e-3
# 
ENCODER_LEARNING_RATE = LEARNING_RATE/2
# Number of epochs without improvement to change learning rate
SCHEDULER_PATIENCE = 5
# Number of epochs without improvement to stop training completely
EARLY_STOP_PATIENCE = 10

# O0: no optimization, O1: some, O2: more, O3:all FP16
FP16_OPT_LEVEL = "O3"

## 2.3. Mount Google Drive
The best way to get to large self made datasets is placing them into a google drive and access it them there.

In [None]:
GDRIVE_MOUNT = "/content/gdrive/"
GDRIVE_PATH = '/content/gdrive/My Drive/JetCar/'

from google.colab import drive
drive.mount(GDRIVE_MOUNT, force_remount=True)
!ls 

In [None]:
!ls

In [None]:
!ls "gdrive/My Drive/JetCar/"

In [None]:
!ls "/content/JetCar/"

In [None]:
# Define the URLs to the data to be downloaded
DOWNLOAD_URLS = [
      GDRIVE_PATH+'JetCar_DataSet.zip']

In [None]:
# Define all path and file constants
DATA_PATH = "/content/JetCar/"
DATASET_PATH = DATA_PATH + "DataSet/"
IMAGE_SUBDIR = "Img/"
MASK_SUBDIR = "Mask/"
TRAINING_SUBDIR = "Train/"
VALIDATION_SUBDIR = "Val/"
RECORDING_PATH = DATA_PATH +"DataSet/Img/Test/"
PREDICTION_PATH = DATA_PATH +"Prediction/"
MODEL_WEIGHT_FILE_NAME = 'JetCar_Best_MobileNetV2_Model_Weights_Catalyst.pth'
PREDICTION_ZIP_FILE_NAME = 'Prediction_MobileNetV2_Catalyst.zip'
LOG_DIR = "./logs/segmentation"
LOG_ZIP_FILE_NAME = "JetCar_Logs_Catalyst.zip"

## 2.4. Downloading the Dataset

Define the download functions for te datasets.

In [None]:
def download(source_url, destination_path=None, overwrite=False):

    print("download(%s,%s,%d)\n"%(source_url, destination_path, overwrite))

    if destination_path is None:
        fname = source_url.split('/')[-1]
    else:
        destination_path = os.path.expanduser(destination_path)
        if os.path.isdir(destination_path):
            fname = os.path.join(destination_path, source_url.split('/')[-1])
        else:
            fname = destination_path

    if overwrite or not os.path.exists(fname):
        dirname = os.path.dirname(os.path.abspath(os.path.expanduser(fname)))
        if not os.path.exists(dirname):
            os.makedirs(dirname)

        scheme = urlparse(source_url).scheme
        if scheme == 'http' or scheme == 'https':
            print('Downloading %s from %s...'%(fname, source_url))
            r = requests.get(source_url, stream=True)
            if r.status_code != 200:
                raise RuntimeError("Failed downloading url %s"%url)
            total_length = r.headers.get('content-length')
           
            with open(fname, 'wb') as f:
                if total_length is None: # no content length header
                    for chunk in r.iter_content(chunk_size=1024):
                        if chunk: # filter out keep-alive new chunks
                            f.write(chunk)
                else:
                    total_length = int(total_length)
                    for chunk in tqdm(r.iter_content(chunk_size=1024),
                                      total=int(total_length / 1024. + 0.5),
                                      unit='KB', unit_scale=False, dynamic_ncols=True):
                        f.write(chunk)

        else:
            print('Copying %s from %s...'%(os.path.normpath(fname), os.path.normpath(source_url)))
            shutil.copy(os.path.normpath(source_url), os.path.normpath(fname))

    return fname

def download_dataset(source_urls, destination_path, overwrite=False):
    if not os.path.exists(destination_path):
        os.mkdir(destination_path)
    download_dir = os.path.join(destination_path, 'downloads')
    if not os.path.exists(download_dir):
        os.mkdir(download_dir)
    for url in source_urls:
        filename = download(source_url=url, destination_path=download_dir, overwrite=overwrite)
        # extract
        with zipfile.ZipFile(filename,"r") as zip_ref:
            zip_ref.extractall(path=destination_path)

In [None]:
# Create local directory for data
if not os.path.exists(DATA_PATH):
  os.makedirs(DATA_PATH)
  download_dataset(DOWNLOAD_URLS, DATA_PATH, overwrite=False)

In [None]:
!ls "/content/JetCar/downloads/"

In [None]:
TRAINSET_SIZE = len(glob(DATASET_PATH + IMAGE_SUBDIR + TRAINING_SUBDIR + "*.jpg"))
print(f"The Training Dataset contains {TRAINSET_SIZE} images.")

VALSET_SIZE = len(glob(DATASET_PATH + IMAGE_SUBDIR + VALIDATION_SUBDIR + "*.jpg"))
print(f"The Validation Dataset contains {VALSET_SIZE} images.")

PREDSET_SIZE = len(glob(RECORDING_PATH + "*.jpg"))
print(f"The Prediction Dataset contains {PREDSET_SIZE} images.")

# Install additional packages

In [None]:
# Catalyst
!pip install catalyst==20.12

In [None]:
# for pretrained segmentation models for PyTorch
!pip install segmentation-models-pytorch

In [None]:
# for tensorboard
!pip install tensorflow

In [None]:
# Colab supports FP16
!git clone https://github.com/NVIDIA/apex
!pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./apex
is_fp16_used = True

## Enabling GPU on Colab


In [None]:
if not torch.cuda.is_available():
  raise Exception("GPU not available. CPU training will be too slow.")

import catalyst
from catalyst import utils

device = utils.get_device()
print(f"device: {device}")

print("device name", torch.cuda.get_device_name(0))

In [None]:
SEED = 42
utils.set_global_seed(SEED)
utils.prepare_cudnn(deterministic=True)

print(f"torch: {torch.__version__}, catalyst: {catalyst.__version__}")

## Prepare Dataset and DataLoader

In [None]:
def create_one_hot(code_mask):
  one_hot_mask = torch.nn.functional.one_hot(code_mask, N_CLASSES).float()
  one_hot_mask = one_hot_mask.numpy()
  return transforms.functional.to_tensor(one_hot_mask)

In [None]:
def create_mask(one_hot_mask):
  n = len(list(one_hot_mask.size()))-3
  #print(f'create_mask n={n}')
  return torch.argmax(one_hot_mask, dim=n)

In [None]:
class JetCarDatasetClass(Dataset):
  def __init__(self, image_dir, mask_dir):
    self.image_dir = image_dir
    self.mask_dir = mask_dir
    self.images = glob(os.path.join(image_dir, '*.jpg'))

  def __len__(self):
    return len(self.images)

  def __getitem__(self, idx):
    image_fname = self.images[idx];
    img_base_name = os.path.basename(image_fname)
    mask_base_name = img_base_name.replace("Img","Mask")
    mask_base_name = mask_base_name.replace(".jpg",".png")
    mask_fname = os.path.join(self.mask_dir, mask_base_name);

    image = PIL.Image.open(image_fname).convert('RGB')
    image.load()
    image = transforms.functional.resize(image, (IMG_SIZE, IMG_SIZE))
    image = np.array(image)
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    image = transforms.functional.to_tensor(image)
    image = transforms.functional.normalize(image, [0.485, 0.456, 0.406][::-1], [0.229, 0.224, 0.225][::-1]) # Reverse order for BGR instead of RGB processing
    mask = PIL.Image.open(mask_fname)
    mask.load()
    mask = transforms.functional.pil_to_tensor(mask).squeeze().long()
    mask = create_one_hot(mask)
    return [image, mask]


train_set = JetCarDatasetClass(DATASET_PATH + IMAGE_SUBDIR + TRAINING_SUBDIR, DATASET_PATH + MASK_SUBDIR + TRAINING_SUBDIR)
valid_set = JetCarDatasetClass(DATASET_PATH + IMAGE_SUBDIR + VALIDATION_SUBDIR, DATASET_PATH + MASK_SUBDIR + VALIDATION_SUBDIR)

image_datasets = {
  'train': train_set, 'valid': valid_set
}

dataloaders = {
  'train': DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=0),
  'valid': DataLoader(valid_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
}

In [None]:
class JetCarPredictionClass(Dataset):
  def __init__(self, image_dir):
     self.images = glob(os.path.join(image_dir, '*.jpg'))

  def __len__(self):
    return len(self.images)

  def __getitem__(self, idx):
    image_fname = self.images[idx];
    img_base_name = os.path.basename(image_fname)
    image = PIL.Image.open(image_fname).convert('RGB')
    image.load()
    image = transforms.functional.resize(image, (IMG_SIZE, IMG_SIZE))
    image = np.array(image)
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    image = transforms.functional.to_tensor(image)
    image = transforms.functional.normalize(image, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    return [image, img_base_name]

pred_set = JetCarPredictionClass(RECORDING_PATH)
predloader = DataLoader(pred_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)


## Check the outputs from DataLoader

In [None]:
def reverse_image(inp):
  inp = inp.numpy().transpose((1, 2, 0))
  mean = np.array([0.485, 0.456, 0.406][::-1])
  std = np.array([0.229, 0.224, 0.225][::-1])
  inp = std * inp + mean
  inp = np.clip(inp, 0, 1)
  inp = (inp * 255).astype(np.uint8)
  return cv2.cvtColor(inp, cv2.COLOR_BGR2RGB)

def reverse_mask(inp):
  inp = create_mask(inp).numpy()
  return inp.astype(np.uint8)

def display(display_list):
  plt.figure(figsize=(15, 15))
  title = ['Input Image', 'True Mask', 'Predicted Mask']
  for i in range(len(display_list)):
    plt.subplot(1, len(display_list), i+1)
    plt.title(title[i])
    img = display_list[i]
    if img.size(0) == 3:
      img = reverse_image(img)
    else:
      img = reverse_mask(img)
    plt.imshow(img)
    plt.axis('off')
  plt.show()


In [None]:
# Get a set from training data
display_idx = random.randint(0,TRAINSET_SIZE-1)
print("Index:%d"%(display_idx))
img, mask = train_set[display_idx] 

display([img,mask])

# Instantiate the UNet model

- Move the model to GPU


In [None]:
import segmentation_models_pytorch as smp

# We will use Feature Pyramid Network with pre-trained mobilenet backbone
model = smp.Unet(encoder_name="mobilenet_v2", classes=N_CLASSES)

In [None]:
# Uncomment below to show the model
#model

In [None]:
# Uncomment below to show the model
#from torchsummary import summary
#summary(model.cpu(), input_size=(3, 224, 224))

In [None]:
if os.path.exists(GDRIVE_PATH + MODEL_WEIGHT_FILE_NAME): 
    model.load_state_dict(torch.load(GDRIVE_PATH + MODEL_WEIGHT_FILE_NAME))
    print('Last best model weights loaded!')

#Model training

We will optimize loss as the sum of IoU, Dice and BCE, specifically this function: IoU+Dice+0.8∗BCE.

In [None]:
from catalyst.contrib.nn import DiceLoss, IoULoss

# we have multiple criterions
criterion = {
    "dice": DiceLoss(),
    "iou": IoULoss(),
    "bce": nn.BCEWithLogitsLoss()
}

In [None]:
from torch import optim
from catalyst.contrib.nn import RAdam, Lookahead

learning_rate = LEARNING_RATE
encoder_learning_rate = ENCODER_LEARNING_RATE

# Since we use a pre-trained encoder, we will reduce the learning rate on it.
layerwise_params = {"encoder*": dict(lr=encoder_learning_rate, weight_decay=0.00003)}

# This function removes weight_decay for biases and applies our layerwise_params
model_params = utils.process_model_params(model, layerwise_params=layerwise_params)

# Catalyst has new SOTA optimizers out of box
base_optimizer = RAdam(model_params, lr=learning_rate, weight_decay=0.0003)
optimizer = Lookahead(base_optimizer)

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.25, patience=SCHEDULER_PATIENCE)

In [None]:
from catalyst.dl import SupervisedRunner

# by default SupervisedRunner uses "features" and "targets",
# in our case we get "image" and "mask" keys in dataset __getitem__
runner = SupervisedRunner(device=device, input_key="image", input_target_key="mask")

In [None]:
if is_fp16_used:
    fp16_params = dict(opt_level=FP16_OPT_LEVEL) # params for FP16
else:
    fp16_params = None

print(f"FP16 params: {fp16_params}")

In [None]:
%load_ext tensorboard
%tensorboard --logdir {LOG_DIR}

## Training

In [None]:
from catalyst.dl import EarlyStoppingCallback

early_stop_callback = EarlyStoppingCallback(patience=EARLY_STOP_PATIENCE, metric="loss", minimize=True)

In [None]:
from catalyst.dl import Callback, CallbackOrder, CallbackNode, IRunner

class SaveBestModelCallback(Callback):
    def __init__(
        self,
        metric: str = "loss",
        minimize: bool = True,
        min_delta: float = 1e-6,
        filename: str = "best_model.pth"
    ):
        super().__init__(order=CallbackOrder.external, node=CallbackNode.all)
        self.best_score = None
        self.metric = metric
        self.num_bad_epochs = 0
        self.is_better = None

        if minimize:
            self.is_better = lambda score, best: score <= (best - min_delta)
        else:
            self.is_better = lambda score, best: score >= (best + min_delta)

    def on_epoch_end(self, runner: "IRunner") -> None:
        if runner.stage.startswith("infer"):
            return

        score = runner.valid_metrics[self.metric]
        if self.best_score is None or self.is_better(score, self.best_score):
            print(f'Loss improved from {self.best_score} to {score}. ->Saving model weights!')
            torch.save(model.state_dict(), GDRIVE_PATH + MODEL_WEIGHT_FILE_NAME)
            self.num_bad_epochs = 0
            self.best_score = score
        else:
            self.num_bad_epochs += 1
            print(f'Loss did not improve from {self.best_score} for {self.num_bad_epochs} epoch(s)')
 

save_best_model_callback = SaveBestModelCallback(filename=GDRIVE_PATH + MODEL_WEIGHT_FILE_NAME)

In [None]:
from catalyst.dl import DiceCallback, IouCallback, \
  CriterionCallback, MetricAggregationCallback

callbacks = [
    # Each criterion is calculated separately.
    CriterionCallback(
        input_key="mask",
        prefix="loss_dice",
        criterion_key="dice"
    ),
    CriterionCallback(
        input_key="mask",
        prefix="loss_iou",
        criterion_key="iou"
    ),
    CriterionCallback(
        input_key="mask",
        prefix="loss_bce",
        criterion_key="bce"
    ),

    # And only then we aggregate everything into one loss.
    MetricAggregationCallback(
        prefix="loss",
        mode="weighted_sum", # can be "sum", "weighted_sum" or "mean"
        # because we want weighted sum, we need to add scale for each loss
        metrics={"loss_dice": 1.0, "loss_iou": 1.0, "loss_bce": 0.8},
    ),

    # metrics
    DiceCallback(input_key="mask"),
    IouCallback(input_key="mask"),
    
    early_stop_callback,
    save_best_model_callback
]

runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    # our dataloaders
    loaders=dataloaders,
    # We can specify the callbacks list for the experiment;
    callbacks=callbacks,
    # path to save logs
    logdir=LOG_DIR,
    num_epochs=EPOCHS,
    # save our best checkpoint by IoU metric
    main_metric="iou",
    # IoU needs to be maximized.
    minimize_metric=False,
    # for FP16. It uses the variable from the very first cell
    fp16=fp16_params,
    # prints train logs
    verbose=False,
)

In [None]:
print('Done!')

At this point the training is done, very likely terminated via early stopping. Now, the logs can be copied to the google drive.

In [None]:
#shutil.copy(os.path.normpath(checkpoint_path), os.path.normpath(GDRIVE_PATH+checkpoint_path))
#torch.save(model.state_dict(), GDRIVE_PATH + MODEL_WEIGHT_FILE_NAME)

In [None]:
# Function to zip up a complete directory and add all files to the zip file.
def zipdir(dir_to_archive, archive_filename):
    ziph = zipfile.ZipFile(archive_filename, 'w', zipfile.ZIP_DEFLATED)
    for root, dirs, files in os.walk(dir_to_archive):
        for file in files:
            if file != archive_filename:
                ziph.write(os.path.join(root, file))
    ziph.close()

In [None]:
# Zip log directory and copy to google drive
if os.path.exists(LOG_DIR): 
    print("Compressing logs")
    zipdir(LOG_DIR, DATA_PATH + LOG_ZIP_FILE_NAME)
    print('Copying %s to %s...'%(LOG_ZIP_FILE_NAME, GDRIVE_PATH))
    shutil.copy(os.path.normpath(DATA_PATH + LOG_ZIP_FILE_NAME), 
                os.path.normpath(GDRIVE_PATH + LOG_ZIP_FILE_NAME))

## Predict new images using the trained model
Besides training and validation sets, there is a third set to test the model. This set just contains JPG images and below the model is invoked to create mask PNG files for that set. This allows visually inspecting the output of the model.


In [None]:
if os.path.exists(GDRIVE_PATH + MODEL_WEIGHT_FILE_NAME): 
    model.load_state_dict(torch.load(GDRIVE_PATH + MODEL_WEIGHT_FILE_NAME))
    print('Last best model weights re-loaded!')

In [None]:
if not os.path.exists(PREDICTION_PATH):
    os.makedirs(PREDICTION_PATH)

In [None]:
if os.path.exists(PREDICTION_PATH + PREDICTION_ZIP_FILE_NAME):
    os.remove(PREDICTION_PATH + PREDICTION_ZIP_FILE_NAME)

In [None]:
zipObj = zipfile.ZipFile(PREDICTION_PATH+PREDICTION_ZIP_FILE_NAME, 'w')

In [None]:
model.eval()   # Set model to the evaluation mode
n=0
for step, (image_batch, filename_batch) in enumerate(predloader):
  with torch.no_grad():
    pred_batch = model(image_batch.to(device))
    mask_batch = create_mask(pred_batch).cpu()
    for i in range(image_batch.shape[0]):
        base_filename = filename_batch[i]
        base_filename = os.path.basename(base_filename)
        base_filename = os.path.splitext(base_filename)[0]
        pred_filename = base_filename.replace("Img","Pred")+".png"
        n = n+1
        print(f"Saving file #{n}: " + pred_filename)
        torchvision.utils.save_image(mask_batch[i]/255.0, PREDICTION_PATH + pred_filename, normalize=False, scale_each=False)
        zipObj.write(PREDICTION_PATH + pred_filename)


In [None]:
zipObj.close()

In [None]:
print('Copying %s to %s...'%(PREDICTION_ZIP_FILE_NAME, GDRIVE_PATH))
shutil.copy(os.path.normpath(PREDICTION_PATH+PREDICTION_ZIP_FILE_NAME), 
                 os.path.normpath(GDRIVE_PATH+PREDICTION_ZIP_FILE_NAME))

In [None]:
print("All Done!")