<a href="https://colab.research.google.com/github/JorgeBennasar/incubit_challenge/blob/main/code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Incubit Inc. Challenge Code, by Jorge J. Bennasar Vazquez

In [None]:
# Import data from Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [10]:
import torch
import torchvision.transforms as transforms
import json
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np 
import copy
import random

from PIL import Image, ImageEnhance
from torch.utils.data import DataLoader

annotations_path = 'drive/MyDrive/challenge/annotations'
images_path = 'drive/MyDrive/challenge/raw'

# Dataset class creation
class my_dataset(torch.utils.data.Dataset):

  def __init__(
      self, 
      annotations_path,     # annotations path
      images_path,          # images path
      mode,                 # 'tr' for training, 'cv' for validation, 'tt' for testing
      class_sel,            # 1 for 'Buildings', 2 for 'Houses' and 'Sheds/Garages', 3 for all classes
      h_p = 0.5,            # probability of horizontal flip in data augmentation
      v_p = 0.5,            # probability of vertical flip in data augmentation
      c_p = 0.5,            # probability of cropping in data augmentation
      window = [0.5, 1],    # window of lengths (in percentage) for cropping in data augmentation
      factor = [0.9, 1.1],  # window of brightness factors in data augmentation
      resize = 0.4,         # parameter to resize images (in percentage)
      ):
    
    self.mode = mode
    self.h_p = h_p
    self.v_p = v_p
    self.c_p = c_p
    self.window = window
    self.factor = factor
    self.class_sel = class_sel
    classes = ['Background', 'Buildings', 'Houses', 'Sheds/Garages']
    self.classes = classes
    self.resize = resize
    EPS = 1-1e-8  # to assure resized polygons are within boundaries

    # Select images for each set
    if mode == 'tr':
      i_sel = [0, 1, 2, 3, 4, 5, 6, 7]
    elif mode == 'cv':
      i_sel = [8]
    else:
      i_sel = [0, 1, 2, 3, 4, 5, 6, 7, 8]

    # Get images, polygons and labels (only images for test set)
    images = []
    polygons = []
    polygons_all = []
    labels = []
    labels_all = []

    for i in i_sel:
      for j in range(9):
        image_ = Image.open(os.path.join(images_path, f'{i}_{j}.png')).convert('RGB')
        image_shape = np.array(image_).shape
        image_ = image_.resize((int(image_shape[1]*resize), int(image_shape[0]*resize)))  # image resizing
        image_ = np.array(image_)
        if os.path.isfile(os.path.join(annotations_path, f'{i}_{j}.png-annotated.json')):
          if mode != 'tt':
            polygons_ = []
            polygons_all_ = []
            labels_ = []
            labels_all_ = []
            with open(os.path.join(annotations_path, f'{i}_{j}.png-annotated.json'), 'r') as f:
              data = json.load(f)
            for k in range(len(data['labels'])):
              label = data['labels'][k]['name']
              for l in range(len(data['labels'][k]['annotations'])): 
                polygon = np.array(np.array(data['labels'][k]['annotations'][l]['segmentation']) * resize - EPS, dtype=int)  # polygon resizing
                if class_sel == 1 and label == 'Buildings':
                    polygons_.append(polygon)
                    labels_.append(classes.index(label))
                elif class_sel == 2 and label != 'Buildings':
                    polygons_.append(polygon)
                    labels_.append(classes.index(label) - 1)
                elif class_sel == 3:
                    polygons_.append(polygon)
                    labels_.append(classes.index(label))
                polygons_all_.append(polygon)
                labels_all_.append(classes.index(label))
            images.append(image_)
            polygons.append(polygons_)
            polygons_all.append(polygons_all_)
            labels.append(labels_)
            labels_all.append(labels_all_)
        elif mode == 'tt':
          images.append(image_)

    self.images = images
    self.polygons = polygons
    self.polygons_all = polygons_all
    self.labels = labels
    self.labels_all = labels_all

  def __getitem__(self, idx):

    # Transform module creation
    transform = transforms.Compose([
        transforms.ToTensor(),  # convert to tensor
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),  # normalize
    ])

    if self.mode != 'tt':

      # Calculate boxes from polygons
      boxes = []
      for i in range(len(self.polygons[idx])):
        box = self.get_box(self.polygons[idx][i])
        boxes.append(box)
      boxes = np.array(boxes)

      # Data loading and (if training) augmentation
      if self.mode == 'tr':
        image_, boxes_, polygons_, labels_ = self.augment(idx, boxes)
      else:
        image_ = self.images[idx]
        boxes_ = boxes
        polygons_ = self.polygons[idx]
        labels_ = self.labels[idx]

      # Calculate masks
      masks_ = []
      for i in range(len(polygons_)):
        mask_ = self.get_mask(image_, polygons_[i])
        masks_.append(mask_)

      # To tensors
      boxes_t = torch.tensor(boxes_, dtype=torch.float32)
      labels_t = torch.tensor(labels_, dtype=torch.int64)
      masks_t = torch.tensor(masks_, dtype=torch.uint8)
      image_id_t = torch.tensor([idx])
      area_t = (boxes_t[:, 3] - boxes_t[:, 1])*(boxes_t[:, 2] - boxes_t[:, 0])
      iscrowd_t = torch.zeros((len(polygons_),), dtype=torch.int64)

      target_ = {
          'boxes': boxes_t,
          'labels': labels_t,
          'masks': masks_t,
          'image_id': image_id_t,
          'area': area_t,
          'iscrowd': iscrowd_t,
          }
      image_ = Image.fromarray(image_)
      return transform(image_), target_

    else:

      # Data loading
      image_ = self.images[idx]

      # To tensors
      image_ = Image.fromarray(image_)
      return transform(image_)

  def __len__(self):

    return len(self.images)

  def get_box(self, polygon):

    # Get individual box from polygon
    xy_vec = polygon.reshape((-1, 2))
    x_points = xy_vec[:, 0]
    y_points = xy_vec[:, 1]
    box = [min(x_points), min(y_points), max(x_points), max(y_points)]
    
    return box

  def get_mask(self, image, polygon):

    # Get individual mask
    mask = np.zeros((image.shape[0], image.shape[1]), dtype=np.uint8)
    xy_vec = polygon.reshape((-1, 2))
    x_points = xy_vec[:, 0]
    y_points = xy_vec[:, 1]
    all_points = []
    for j, x in enumerate(x_points):
      all_points.append([x, y_points[j]])
    all_points = np.array(all_points, dtype=int)
    cv2.fillPoly(mask, [all_points], color=(1))
    
    return mask

  def horizontal_flip(self, image, boxes, polygons, p):

    # Apply horizontal flip with probability p
    image_ = copy.copy(image)
    boxes_ = np.array(copy.copy(boxes), dtype=float)
    polygons_ = copy.deepcopy(polygons)

    if random.random() < p:
      image_center = np.array(image_.shape[:2])[::-1]/2
      image_center = np.hstack((image_center, image_center))    
      image_ =  image_[:,::-1,:]

      boxes_[:,[0,2]] += 2*(image_center[[0,2]] - boxes_[:,[0,2]])
      box_w = abs(boxes_[:,0] - boxes_[:,2])       
      boxes_[:,0] -= box_w
      boxes_[:,2] += box_w
      boxes_ = np.array(boxes_, dtype=int)

      for i in range(len(polygons_)):
        polygons_[i][::2] = 2*image_center[0] - polygons_[i][::2]
             
    return image_, boxes_, polygons_

  def vertical_flip(self, image, boxes, polygons, p):

    # Apply vertical flip with probability p
    image_ = copy.copy(image)
    boxes_ = np.array(copy.copy(boxes), dtype=float)
    polygons_ = copy.deepcopy(polygons)

    if random.random() < p:
      image_center = np.array(image_.shape[:2])[::-1]/2
      image_center = np.hstack((image_center, image_center))    
      image_ =  image_[::-1,:,:]

      boxes_[:,[1,3]] += 2*(image_center[[1,3]] - boxes_[:,[1,3]])
      box_w = abs(boxes_[:,1] - boxes_[:,3])       
      boxes_[:,1] -= box_w
      boxes_[:,3] += box_w
      boxes_ = np.array(boxes_, dtype=int)

      for i in range(len(polygons_)):
        polygons_[i][1::2] = 2*image_center[1] - polygons_[i][1::2]
              
    return image_, boxes_, polygons_

  def crop(self, image, boxes, polygons, labels, p, window):

    # Crop images within a window of lengths with probability p
    if random.random() < p: 

      flag = False
      while flag is False:
        image_ = copy.copy(image)
        boxes_ = copy.copy(boxes)
        polygons_ = copy.deepcopy(polygons)
      
        len_x = random.randint(int(image_.shape[1]*window[0]), int(image_.shape[1]*window[1]))
        len_y = random.randint(int(image_.shape[0]*window[0]), int(image_.shape[0]*window[1]))
        x_1 = random.randint(0, int(image_.shape[1]-len_x))
        x_2 = int(x_1+len_x)
        y_1 = random.randint(0, int(image_.shape[0]-len_y))
        y_2 = int(y_1+len_y)

        image_crop = image_[y_1:y_2, x_1:x_2, :]
        boxes_crop = []
        polygons_crop = []
        labels_crop = []
        for i in range(len(polygons_)):
          x_aux_max = max(polygons_[i][::2])
          y_aux_max = max(polygons_[i][1::2])
          x_aux_min = min(polygons_[i][::2])
          y_aux_min = min(polygons_[i][1::2])
          if x_aux_max < x_2 and x_aux_min > x_1 and y_aux_max < y_2 and y_aux_min > y_1:
            polygons_[i][::2] -= x_1
            polygons_[i][1::2] -= y_1
            polygons_crop.append(polygons_[i])
            xy_vec = polygons_crop[-1].reshape((-1, 2))
            x_points = xy_vec[:, 0]
            y_points = xy_vec[:, 1]
            boxes_crop.append([min(x_points), min(y_points), max(x_points), 
                              max(y_points)])
            labels_crop.append(labels[i])
            
        if len(boxes_crop) > 0:
          boxes_crop = np.array(boxes_crop)
          flag = True

      return image_crop, boxes_crop, polygons_crop, labels_crop

    else:

      return image, boxes, polygons, labels

  def brightness_change(self, image, factor):

    # Change the brightness of the image
    image_ = Image.fromarray(copy.copy(image)).convert('RGB')
    enhancer = ImageEnhance.Brightness(image_)
    f = factor[0] + (factor[1]-factor[0])*np.random.rand()
    image_ = enhancer.enhance(f)

    return np.array(image_)

  def augment(self, idx, boxes):
    
    # Data augmentation (horizontal and vertical flips, cropping and brightness)
    image_v, boxes_v, polygons_v = self.vertical_flip(
        self.images[idx], boxes, self.polygons[idx], self.v_p)
    image_vh, boxes_vh, polygons_vh = self.horizontal_flip(
        image_v, boxes_v, polygons_v, self.h_p)
    image_vhc, boxes_vhc, polygons_vhc, labels_vhc = self.crop(
        image_vh, boxes_vh, polygons_vh, self.labels[idx], 
        self.c_p, self.window)
    image_vhcb = self.brightness_change(image_vhc, self.factor)

    return image_vhcb, boxes_vhc, polygons_vhc, labels_vhc

  def plot_mask(self, idx):

    # Plot image mask
    mask = np.ones((self.images[idx].shape[0], self.images[idx].shape[1], 3), dtype=np.float32) * 0.75
    for i in range(len(self.polygons_all[idx])):
      mask_ = np.squeeze(self.get_mask(self.images[idx], self.polygons_all[idx][i]))
      dim = [1, 2, 0]
      for j in range(3):
        mask[:, :, j] -= mask_ * 0.75 
      mask[:, :, dim[self.labels_all[idx][i]-1]] += mask_ 
    plt.imshow(mask)

  def plot_boxes(self, idx):

    # Plot image with boxes
    plt.imshow(self.images[idx])
    for i in range(len(self.polygons_all[idx])):
      box = self.get_box(self.polygons_all[idx][i])
      xy_vec = np.array(box).reshape((-1, 2))
      x_vec = np.array([xy_vec[0, 0], xy_vec[0, 0], xy_vec[1, 0], xy_vec[1, 0], xy_vec[0, 0]])
      y_vec = np.array([xy_vec[0, 1], xy_vec[1, 1], xy_vec[1, 1], xy_vec[0, 1], xy_vec[0, 1]])
      if self.labels_all[idx][i] == 1:
        color = 'lightgreen'
      elif self.labels_all[idx][i] == 2:
        color = 'blue'
      else:
        color = 'orangered'
      plt.plot(x_vec, y_vec, color=color)
        
  def plot_polygons(self, idx):

    # Plot image with polygons
    plt.imshow(self.images[idx])
    for i in range(len(self.polygons_all[idx])):
      xy_vec = np.array(self.polygons_all[idx][i]).reshape((-1, 2))
      x_vec = np.concatenate((xy_vec[:, 0], xy_vec[0, 0]), axis=None)
      y_vec = np.concatenate((xy_vec[:, 1], xy_vec[0, 1]), axis=None)
      if self.labels_all[idx][i] == 1:
        color = 'lightgreen'
      elif self.labels_all[idx][i] == 2:
        color = 'blue'
      else:
        color = 'orangered'
      plt.plot(x_vec, y_vec, color=color)

  def get_data(self, idx):

    # Get data dictionary from self
    image = self.images[idx]
    if self.mode != 'tt':
      polygons = self.polygons_all[idx]
      labels = self.labels_all[idx]

      # Calculate boxes
      boxes = []
      for i in range(len(polygons)):
        box = self.get_box(polygons[i])
        boxes.append(box)
      boxes = np.array(boxes)

      # Calculate mask
      mask = np.zeros((image.shape[0], image.shape[1], 3), dtype=np.uint8)
      for i in range(len(polygons)):
        xy_vec = polygons[i].reshape((-1, 2))
        x_points = xy_vec[:, 0]
        y_points = xy_vec[:, 1]
        all_points = []
        for j, x in enumerate(x_points):
          all_points.append([x, y_points[j]])
        all_points = np.array(all_points, dtype=int)
        if labels[i] == 1:
          color = (0, 255, 0)
        elif labels[i] == 2:
          color = (0, 0, 255)
        else:
          color = (255, 0, 0)
        cv2.fillPoly(mask, [all_points], color=color)
      
      data = {
          'image': image,
          'polygons': polygons,
          'boxes': boxes,
          'labels': labels,
          'mask': mask,
      }
    else:
      data = {
          'image': image
      }
    
    return data

  def get_areas(self):

    # Get areas (in pixels) for the different classes
    areas = {
        'Buildings': [],
        'Houses': [],
        'Sheds/Garages': [],
    }
    for i in range(len(self.images)):
      for j in range(len(self.labels_all[i])):
        areas[self.classes[self.labels_all[i][j]]].append(np.sum(np.array(self.get_mask(self.images[i], self.polygons_all[i][j]))))

    return areas

In [16]:
# Create train dataset (Buildings)
train_dataset_1 = my_dataset(
      annotations_path, 
      images_path, 
      mode='tr',
      class_sel=1,
      )

# Create validation dataset (Buildings)
val_dataset_1 = my_dataset(
      annotations_path, 
      images_path, 
      mode='cv',
      class_sel=1,
      )

# Create train dataset (Houses and Sheds/Garages)
train_dataset_2 = my_dataset(
      annotations_path, 
      images_path, 
      mode='tr',
      class_sel=2,
      )

# Create validation dataset (Houses and Sheds/Garages)
val_dataset_2 = my_dataset(
      annotations_path, 
      images_path, 
      mode='cv',
      class_sel=2,
      )

In [None]:
# Get statistics for training data
areas = train_dataset_1.get_areas()
mean_area_buildings = np.mean(np.array(areas['Buildings']))
mean_area_houses = np.mean(np.array(areas['Houses']))
mean_area_sheds_and_garages = np.mean(np.array(areas['Sheds/Garages']))
std_area_buildings = np.std(np.array(areas['Buildings']))
std_area_houses = np.std(np.array(areas['Houses']))
std_area_sheds_and_garages = np.std(np.array(areas['Sheds/Garages']))
num_samples_buildings = len(areas['Buildings'])
num_samples_houses = len(areas['Houses'])
num_samples_sheds_and_garages = len(areas['Sheds/Garages'])

print('Mean of areas:  Buildings:', str(mean_area_buildings), ' Houses:', str(mean_area_houses), ' Sheds/Garages:', str(mean_area_sheds_and_garages))
print('Standard deviation of areas:  Buildings:', str(std_area_buildings), ' Houses:', str(std_area_houses), ' Sheds/Garages:', str(std_area_sheds_and_garages))
print('Number of samples:  Buildings:', str(num_samples_buildings), ' Houses:', str(num_samples_houses), ' Sheds/Garages:', str(num_samples_sheds_and_garages))

In [None]:
# Plot histogram
import seaborn

# Function to remove outliers (for class 'Buildings' only, to improve visualization)
def remove_outliers(data, q1_pos, q2_pos):

  # Function to remove outliers
  q1 = np.quantile(data, q1_pos)
  q2 = np.quantile(data, q2_pos)
  data_clean = []
  for i in range(len(data)):
    if data[i] >= q1 and data[i] <= q2:
      data_clean.append(data[i])

  return data_clean

plt.figure()
seaborn.histplot(data=np.array(remove_outliers(areas['Buildings'], 0, 0.8)), stat='count', kde=True, binwidth=10, color='lightgreen', fill=False)
seaborn.histplot(data=np.array(areas['Houses']), stat='count', kde=True, binwidth=10, color='blue', fill=False)
seaborn.histplot(data=np.array(areas['Sheds/Garages']), stat='count', kde=True, binwidth=10, color='orangered', fill=False)
plt.xlim([0, max(remove_outliers(areas['Buildings'], 0, 0.8))])

In [None]:
# Create train and validation dataloaders
batch_size_tr = 4
batch_size_cv = 6  # Do not change, important to store all validation loss metrics
num_workers = 1

def collate_fn(batch):
  return tuple(zip(*batch))

train_loader_1 = torch.utils.data.DataLoader(
    train_dataset_1, 
    batch_size=batch_size_tr,
    shuffle=True,
    drop_last=True,
    num_workers=num_workers,
    collate_fn=collate_fn,
    )

val_loader_1 = torch.utils.data.DataLoader(
    val_dataset_1, 
    batch_size=batch_size_cv,
    shuffle=True,
    drop_last=True,
    num_workers=num_workers,
    collate_fn=collate_fn,
    )

train_loader_2 = torch.utils.data.DataLoader(
    train_dataset_2, 
    batch_size=batch_size_tr,
    shuffle=True,
    drop_last=True,
    num_workers=num_workers,
    collate_fn=collate_fn,
    )

val_loader_2 = torch.utils.data.DataLoader(
    val_dataset_2, 
    batch_size=batch_size_cv,
    shuffle=True,
    drop_last=True,
    num_workers=num_workers,
    collate_fn=collate_fn,
    )

In [None]:
# Clone PyTorch Vision
! git clone 'https://github.com/pytorch/vision'

In [None]:
# Engine for training (based on: 'https://github.com/pytorch/vision/blob/master/references/detection/engine.py')
import math
import sys
import time
import vision.references.detection.utils as utils

def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
  model.train()
  metric_logger = utils.MetricLogger(delimiter="  ")
  metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
  header = 'Epoch: [{}]'.format(epoch)

  lr_scheduler = None
  if epoch == 0:
    warmup_factor = 1. / 1000
    warmup_iters = min(1000, len(data_loader) - 1)
    lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)

  for images, targets in metric_logger.log_every(data_loader, print_freq, header):
    images = list(image.to(device) for image in images)
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
    loss_dict = model(images, targets)
    losses = sum(loss for loss in loss_dict.values())
    loss_dict_reduced = utils.reduce_dict(loss_dict)
    losses_reduced = sum(loss for loss in loss_dict_reduced.values())
    loss_value = losses_reduced.item()

    if not math.isfinite(loss_value):
      print("Loss is {}, stopping training".format(loss_value))
      print(loss_dict_reduced)
      sys.exit(1)

    optimizer.zero_grad()
    losses.backward()
    optimizer.step()
    if lr_scheduler is not None:
      lr_scheduler.step()
    metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
    metric_logger.update(lr=optimizer.param_groups[0]["lr"])

  return metric_logger

In [None]:
# Model creation and hyper-parameter selection
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

# Create models
num_classes_1 = 2
hidden_layer_1 = 512 
model_1 = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
in_features_1 = model_1.roi_heads.box_predictor.cls_score.in_features
model_1.roi_heads.box_predictor = FastRCNNPredictor(in_features_1, num_classes_1)
in_features_mask_1 = model_1.roi_heads.mask_predictor.conv5_mask.in_channels
model_1.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask_1, hidden_layer_1, num_classes_1)

num_classes_2 = 3
hidden_layer_2 = 512 
model_2 = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
in_features_2 = model_2.roi_heads.box_predictor.cls_score.in_features
model_2.roi_heads.box_predictor = FastRCNNPredictor(in_features_2, num_classes_2)
in_features_mask_2 = model_2.roi_heads.mask_predictor.conv5_mask.in_channels
model_2.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask_2, hidden_layer_2, num_classes_2)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Define optimizers
lr_1 = 1e-03
weight_decay_1 = 0.0005
betas_1 = (0.9, 0.999)
eps_1 = 1e-08
params_1 = [p for p in model_1.parameters() if p.requires_grad]
optimizer_1 = torch.optim.Adam(params_1, lr=lr_1, betas=betas_1, eps=eps_1, weight_decay=weight_decay_1, amsgrad=False)

lr_2 = 1e-03
weight_decay_2 = 0.0005
betas_2 = (0.9, 0.999)
eps_2 = 1e-08
params_2 = [p for p in model_2.parameters() if p.requires_grad]
optimizer_2 = torch.optim.Adam(params_2, lr=lr_2, betas=betas_2, eps=eps_2, weight_decay=weight_decay_2, amsgrad=False)

# Define learning rate schedulers
step_size_1 = 2  
gamma_1 = 0.9 
lr_scheduler_1 = torch.optim.lr_scheduler.StepLR(optimizer_1, step_size=step_size_1, gamma=gamma_1)

step_size_2 = 2  
gamma_2 = 0.9 
lr_scheduler_2 = torch.optim.lr_scheduler.StepLR(optimizer_2, step_size=step_size_2, gamma=gamma_2)

In [None]:
# Training
num_epochs_1 = 40
num_epochs_2 = 40

# Function to save loss metrics
def append_losses(loss_dict, logger):
  loss_dict['loss'].append(sum(train_logger.__getattr__('loss').deque)/
                           len(train_logger.__getattr__('loss').deque))  
  loss_dict['loss_classifier'].append(sum(train_logger.__getattr__('loss_classifier').deque)/
                                      len(train_logger.__getattr__('loss_classifier').deque)) 
  loss_dict['loss_box_reg'].append(sum(train_logger.__getattr__('loss_box_reg').deque)/
                                   len(train_logger.__getattr__('loss_box_reg').deque))  
  loss_dict['loss_mask'].append(sum(train_logger.__getattr__('loss_mask').deque)/
                                len(train_logger.__getattr__('loss_mask').deque))  
  loss_dict['loss_objectness'].append(sum(train_logger.__getattr__('loss_objectness').deque)/
                                      len(train_logger.__getattr__('loss_objectness').deque))  
  loss_dict['loss_rpn_box_reg'].append(sum(train_logger.__getattr__('loss_rpn_box_reg').deque)/
                                       len(train_logger.__getattr__('loss_rpn_box_reg').deque))  

  return loss_dict

loss_tr_1 = {
    'loss': [],
    'loss_classifier': [],
    'loss_box_reg': [],
    'loss_mask': [],
    'loss_objectness': [],
    'loss_rpn_box_reg': [],
}

loss_cv_1 = {
    'loss': [],
    'loss_classifier': [],
    'loss_box_reg': [],
    'loss_mask': [],
    'loss_objectness': [],
    'loss_rpn_box_reg': [],
}

loss_tr_2 = {
    'loss': [],
    'loss_classifier': [],
    'loss_box_reg': [],
    'loss_mask': [],
    'loss_objectness': [],
    'loss_rpn_box_reg': [],
}

loss_cv_2 = {
    'loss': [],
    'loss_classifier': [],
    'loss_box_reg': [],
    'loss_mask': [],
    'loss_objectness': [],
    'loss_rpn_box_reg': [],
}

print('')
print('==================================================')
print('')
print('------------------ M O D E L - 1 -----------------')
print('')
print('==================================================')
print('')

model_1.train()
for epoch in range(num_epochs_1):

  # Training
  model_1.to(device)
  train_logger = train_one_epoch(model_1, optimizer_1, train_loader_1, device, epoch, print_freq=4)
  lr_scheduler_1.step()

  # Saving training loss metrics
  loss_tr_1 = append_losses(loss_tr_1, train_logger)
 
  # Validation
  with torch.no_grad():
    cpu_device = torch.device("cpu")
    model_1.to(cpu_device)
    for batch_idx, (images, targets) in enumerate(val_loader_1):
      val_logger = model_1(images, targets)
    
  # Saving validation loss metrics
  loss_cv_1['loss_classifier'].append(float(val_logger['loss_classifier'].cpu().detach().numpy()))
  loss_cv_1['loss_box_reg'].append(float(val_logger['loss_box_reg'].cpu().detach().numpy()))
  loss_cv_1['loss_mask'].append(float(val_logger['loss_mask'].cpu().detach().numpy()))
  loss_cv_1['loss_objectness'].append(float(val_logger['loss_objectness'].cpu().detach().numpy()))
  loss_cv_1['loss_rpn_box_reg'].append(float(val_logger['loss_rpn_box_reg'].cpu().detach().numpy()))
  loss_cv_1['loss'].append(float(loss_cv_1['loss_classifier'][-1] + loss_cv_1['loss_box_reg'][-1] +
                                 loss_cv_1['loss_mask'][-1] + loss_cv_1['loss_objectness'][-1] + 
                                 loss_cv_1['loss_rpn_box_reg'][-1]))

  print('')
  print('==================================================')
  print('')

  print('Training results:  loss:', str(loss_tr_1['loss'][-1]), 
        ' loss_classifier:', str(loss_tr_1['loss_classifier'][-1]), 
        ' loss_box_reg:', str(loss_tr_1['loss_box_reg'][-1]), 
        ' loss_mask:', str(loss_tr_1['loss_mask'][-1]),
        ' loss_objectness:', str(loss_tr_1['loss_objectness'][-1]),
        ' loss_rpn_box_reg:', str(loss_tr_1['loss_rpn_box_reg'][-1]))
  
  print('')
  print('==================================================')
  print('')

  print('Validation results:  loss:', str(loss_cv_1['loss'][-1]), 
        ' loss_classifier:', str(loss_cv_1['loss_classifier'][-1]), 
        ' loss_box_reg:', str(loss_cv_1['loss_box_reg'][-1]), 
        ' loss_mask:', str(loss_cv_1['loss_mask'][-1]),
        ' loss_objectness:', str(loss_cv_1['loss_objectness'][-1]),
        ' loss_rpn_box_reg:', str(loss_cv_1['loss_rpn_box_reg'][-1]))
    
  print('')
  print('==================================================')
  print('')

print('')
print('==================================================')
print('')
print('------------------ M O D E L - 2 -----------------')
print('')
print('==================================================')
print('')

model_2.train()
for epoch in range(num_epochs_2):

  # Training
  model_2.to(device)
  train_logger = train_one_epoch(model_2, optimizer_2, train_loader_2, device, epoch, print_freq=2)
  lr_scheduler_2.step()

  # Saving training loss metrics
  loss_tr_2 = append_losses(loss_tr_2, train_logger)
 
  # Validation
  with torch.no_grad():
    cpu_device = torch.device("cpu")
    model_2.to(cpu_device)
    for batch_idx, (images, targets) in enumerate(val_loader_2):
      val_logger = model_2(images, targets)
    
  # Saving validation loss metrics
  loss_cv_2['loss_classifier'].append(float(val_logger['loss_classifier'].cpu().detach().numpy()))
  loss_cv_2['loss_box_reg'].append(float(val_logger['loss_box_reg'].cpu().detach().numpy()))
  loss_cv_2['loss_mask'].append(float(val_logger['loss_mask'].cpu().detach().numpy()))
  loss_cv_2['loss_objectness'].append(float(val_logger['loss_objectness'].cpu().detach().numpy()))
  loss_cv_2['loss_rpn_box_reg'].append(float(val_logger['loss_rpn_box_reg'].cpu().detach().numpy()))
  loss_cv_2['loss'].append(float(loss_cv_2['loss_classifier'][-1] + loss_cv_2['loss_box_reg'][-1] +
                                 loss_cv_2['loss_mask'][-1] + loss_cv_2['loss_objectness'][-1] + 
                                 loss_cv_2['loss_rpn_box_reg'][-1]))

  print('')
  print('==================================================')
  print('')

  print('Training results:  loss:', str(loss_tr_2['loss'][-1]), 
        ' loss_classifier:', str(loss_tr_2['loss_classifier'][-1]), 
        ' loss_box_reg:', str(loss_tr_2['loss_box_reg'][-1]), 
        ' loss_mask:', str(loss_tr_2['loss_mask'][-1]),
        ' loss_objectness:', str(loss_tr_2['loss_objectness'][-1]),
        ' loss_rpn_box_reg:', str(loss_tr_2['loss_rpn_box_reg'][-1]))
  
  print('')
  print('==================================================')
  print('')

  print('Validation results:  loss:', str(loss_cv_2['loss'][-1]), 
        ' loss_classifier:', str(loss_cv_2['loss_classifier'][-1]), 
        ' loss_box_reg:', str(loss_cv_2['loss_box_reg'][-1]), 
        ' loss_mask:', str(loss_cv_2['loss_mask'][-1]),
        ' loss_objectness:', str(loss_cv_2['loss_objectness'][-1]),
        ' loss_rpn_box_reg:', str(loss_cv_2['loss_rpn_box_reg'][-1]))
    
  print('')
  print('==================================================')
  print('')

print("C'est fini!")

# Saving models
model_1_name = 'model_buildings'
save_path_1 = '/content/drive/MyDrive/Colab Notebooks/' + model_1_name + '.pth'
torch.save(model_1.state_dict(), save_path_1)
with open('/content/drive/MyDrive/Colab Notebooks/' + model_1_name + '_loss_tr.json', 'w') as f:
  json.dump(loss_tr_1, f)
with open('/content/drive/MyDrive/Colab Notebooks/' + model_1_name + '_loss_cv.json', 'w') as f:
  json.dump(loss_cv_1, f)

model_2_name = 'model_houses_sheds_and_garages'
save_path_2 = '/content/drive/MyDrive/Colab Notebooks/' + model_2_name + '.pth'
torch.save(model_2.state_dict(), save_path_2)
with open('/content/drive/MyDrive/Colab Notebooks/' + model_2_name + '_loss_tr.json', 'w') as f:
  json.dump(loss_tr_2, f)
with open('/content/drive/MyDrive/Colab Notebooks/' + model_2_name + '_loss_cv.json', 'w') as f:
  json.dump(loss_cv_2, f)

In [4]:
# Loading models
model_1_name = 'model_buildings'
model_2_name = 'model_houses_sheds_and_garages'

model_1.load_state_dict(torch.load('/content/drive/MyDrive/Colab Notebooks/' + model_1_name + '.pth'))
with open('/content/drive/MyDrive/Colab Notebooks/' + model_1_name + '_loss_tr.json', 'r') as f:
  loss_tr_1 = json.load(f)
with open('/content/drive/MyDrive/Colab Notebooks/' + model_1_name + '_loss_cv.json', 'r') as f:
  loss_cv_1 = json.load(f)

model_2.load_state_dict(torch.load('/content/drive/MyDrive/Colab Notebooks/' + model_2_name + '.pth'))
with open('/content/drive/MyDrive/Colab Notebooks/' + model_2_name + '_loss_tr.json', 'r') as f:
  loss_tr_2 = json.load(f)
with open('/content/drive/MyDrive/Colab Notebooks/' + model_2_name + '_loss_cv.json', 'r') as f:
  loss_cv_2 = json.load(f)

In [None]:
# Loss plots for training and validation
x_tr_1 = [i for i in range(len(loss_tr_1['loss']))]
x_cv_1 = [i for i in range(len(loss_cv_1['loss']))]
x_tr_2 = [i for i in range(len(loss_tr_2['loss']))]
x_cv_2 = [i for i in range(len(loss_cv_2['loss']))]

plt.figure(figsize=(30, 10))

plt.subplot(2, 6, 1)
plt.plot(x_tr_1, loss_tr_1['loss'], color='lightgreen', linewidth=8)
plt.plot(x_cv_1, loss_cv_1['loss'], color='blue', linewidth=8)
plt.ylabel("Loss (Model 1)")
plt.title("Total Loss")
plt.ylim([1, 3])

plt.subplot(2, 6, 2)
plt.plot(x_tr_1, loss_tr_1['loss_classifier'], color='lightgreen', linewidth=8)
plt.plot(x_cv_1, loss_cv_1['loss_classifier'], color='blue', linewidth=8)
plt.title("Classifier Loss")
plt.ylim([0.2, 0.7])

plt.subplot(2, 6, 3)
plt.plot(x_tr_1, loss_tr_1['loss_box_reg'], color='lightgreen', linewidth=8)
plt.plot(x_cv_1, loss_cv_1['loss_box_reg'], color='blue', linewidth=8)
plt.title("Box Reg. Loss")
plt.ylim([0.3, 0.8])

plt.subplot(2, 6, 4)
plt.plot(x_tr_1, loss_tr_1['loss_mask'], color='lightgreen', linewidth=8)
plt.plot(x_cv_1, loss_cv_1['loss_mask'], color='blue', linewidth=8)
plt.title("Mask Loss")
plt.ylim([0.2, 0.9])

plt.subplot(2, 6, 5)
plt.plot(x_tr_1, loss_tr_1['loss_objectness'], color='lightgreen', linewidth=8)
plt.plot(x_cv_1, loss_cv_1['loss_objectness'], color='blue', linewidth=8)
plt.title("Objectness Loss")
plt.ylim([0, 0.7])

plt.subplot(2, 6, 6)
plt.plot(x_tr_1, loss_tr_1['loss_rpn_box_reg'], color='lightgreen', linewidth=8)
plt.plot(x_cv_1, loss_cv_1['loss_rpn_box_reg'], color='blue', linewidth=8)
plt.title("RPN Box Reg. Loss")
plt.ylim([0, 0.3])

plt.subplot(2, 6, 7)
plt.plot(x_tr_2, loss_tr_2['loss'], color='lightgreen', linewidth=8)
plt.plot(x_cv_2, loss_cv_2['loss'], color='blue', linewidth=8)
plt.ylabel("Loss (Model 2)")
plt.xlabel("Epoch")
plt.ylim([1, 3])

plt.subplot(2, 6, 8)
plt.plot(x_tr_2, loss_tr_2['loss_classifier'], color='lightgreen', linewidth=8)
plt.plot(x_cv_2, loss_cv_2['loss_classifier'], color='blue', linewidth=8)
plt.xlabel("Epoch")
plt.ylim([0.2, 0.7])

plt.subplot(2, 6, 9)
plt.plot(x_tr_2, loss_tr_2['loss_box_reg'], color='lightgreen', linewidth=8)
plt.plot(x_cv_2, loss_cv_2['loss_box_reg'], color='blue', linewidth=8)
plt.xlabel("Epoch")
plt.ylim([0.3, 0.8])

plt.subplot(2, 6, 10)
plt.plot(x_tr_2, loss_tr_2['loss_mask'], color='lightgreen', linewidth=8)
plt.plot(x_cv_2, loss_cv_2['loss_mask'], color='blue', linewidth=8)
plt.xlabel("Epoch")
plt.ylim([0.2, 0.9])

plt.subplot(2, 6, 11)
plt.plot(x_tr_2, loss_tr_2['loss_objectness'], color='lightgreen', linewidth=8)
plt.plot(x_cv_2, loss_cv_2['loss_objectness'], color='blue', linewidth=8)
plt.xlabel("Epoch")
plt.ylim([0, 0.7])

plt.subplot(2, 6, 12)
plt.plot(x_tr_2, loss_tr_2['loss_rpn_box_reg'], color='lightgreen', linewidth=8)
plt.plot(x_cv_2, loss_cv_2['loss_rpn_box_reg'], color='blue', linewidth=8)
plt.xlabel("Epoch")
plt.ylim([0, 0.3])

In [11]:
# Create testing dataset (same for both models, no need to replicate)
test_dataset = my_dataset(
      annotations_path, 
      images_path, 
      mode='tt',
      class_sel=1,
      )

In [6]:
# Gathering results from models
model_1.eval()
model_2.eval()
cpu_device = torch.device("cpu")
model_1.to(cpu_device)
model_2.to(cpu_device)

# Select mode and mask threshold
mode = 'tt'  # 'cv' for validation, 'tt' for testing
threshold = 0.5  # mask value threshold for forming the polygons and binary masks

# Testing and validation datasets are the same for both models, no need to replicate
if mode == 'cv':
  num_images = 6
  eval_dataset = val_dataset_1
else:
  num_images = 9
  eval_dataset = test_dataset

# Create polygons and prepare results
def create_polygon_and_binary_mask(mask):

  # Create polygon and binary mask
  mask_ = copy.copy(mask)
  mask_ = mask_.swapaxes(0,2).swapaxes(0,1)
  _, mask_ = cv2.threshold(mask_, threshold, 1, cv2.THRESH_BINARY)
  mask_ = np.array(mask_, dtype=np.uint8)
  contours, _ = cv2.findContours(mask_, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
  polygon_ = np.squeeze(contours[0]).reshape(-1)

  return polygon_, mask_

def gather_results(model, dataset):

  # Gather results for a model
  class_sel = dataset.class_sel
  results = {
      'images': [],
      'boxes': [],
      'labels': [],
      'masks': [],
      'polygons': [],
  }
  for i in range(num_images):
    data = dataset.get_data(i)
    sample = dataset[i]
    if mode == 'cv':
      out = model([sample[0]])
    else:
      out = model([sample]) 
    image_ = data['image']
    boxes_ = list(out[0]['boxes'].cpu().detach().numpy())
    labels_ = list(out[0]['labels'].cpu().detach().numpy())
    masks_non_binary = out[0]['masks'].cpu().detach().numpy()
    polygons_ = []
    masks_ = []
    for j in range(len(masks_non_binary)):
      polygon, mask = create_polygon_and_binary_mask(masks_non_binary[j])
      masks_.append(mask)
      polygons_.append(polygon)
    results['images'].append(image_)
    results['boxes'].append(boxes_)
    results['labels'].append(labels_)
    results['masks'].append(masks_)
    results['polygons'].append(polygons_)

  return results

results_1 = gather_results(model_1, eval_dataset)
results_2 = gather_results(model_2, eval_dataset)

results_1_save = copy.copy(results_1)
results_2_save = copy.copy(results_2)

In [7]:
# Model combination algorithm
boundary = 450  # boundary for selection in case of overlapping (area in pixels)
coexistance_threhold = 0.5  # maximum percentage of area of the smaller prediction that overlaps to accept both instances

final_results = {
    'images': [],
    'boxes': [],
    'labels': [],
    'masks': [],
    'polygons': [],
  }

for i in range(num_images):
  deleted_1 = 0
  deleted_2 = 0
  num_instances_1 = len(results_1['boxes'][i])
  num_instances_2 = len(results_2['boxes'][i])
  for j in range(num_instances_1):
    flag = False
    idx_del = []
    mask_1 = results_1['masks'][i][j - deleted_1]
    area_1 = np.sum(mask_1)
    for k in range(num_instances_2 - deleted_2):
      mask_2 = results_2['masks'][i][k]
      area_2 = np.sum(mask_2)
      mask_sum = mask_1 + mask_2
      mask_sum = mask_sum.reshape(-1)
      overlap = np.count_nonzero(mask_sum > 1)
      # If masks ovelap more than the accepted percentage, apply the boundary rule
      if np.max(mask_sum) > 1 and overlap > coexistance_threhold * min(area_1, area_2):
        if area_1 > boundary or area_2 > boundary:
          idx_del.append(k)
        else:
          del results_1['boxes'][i][j - deleted_1]
          del results_1['labels'][i][j - deleted_1]
          del results_1['masks'][i][j - deleted_1]
          del results_1['polygons'][i][j - deleted_1]
          flag = True
          deleted_1 += 1
          break
    if flag is False:
      for n in range(len(idx_del)):
        del results_2['boxes'][i][idx_del[n] - n] 
        del results_2['labels'][i][idx_del[n] - n] 
        del results_2['masks'][i][idx_del[n] - n] 
        del results_2['polygons'][i][idx_del[n] - n]
        deleted_2 += 1
  for j in range(len(results_2['labels'][i])):
    results_2['labels'][i][j] += 1

  final_results['images'].append(results_1['images'][i])
  final_results['boxes'].append(np.array(results_1['boxes'][i] + results_2['boxes'][i]))
  final_results['labels'].append(results_1['labels'][i] + results_2['labels'][i])
  final_results['masks'].append(results_1['masks'][i] + results_2['masks'][i])
  final_results['polygons'].append(results_1['polygons'][i] + results_2['polygons'][i])

images = final_results['images']
boxes = final_results['boxes']
polygons = final_results['polygons']
masks = final_results['masks']
labels = final_results['labels']

In [16]:
# Plot functions of predictions

def plot_polygons(image, polygons, labels):
  # Plot polygons with image
  plt.imshow(image)
  for i in range(len(polygons)):
    xy_vec = np.array(polygons[i]).reshape((-1, 2))
    x_vec = np.concatenate((xy_vec[:, 0], xy_vec[0, 0]), axis=None)
    y_vec = np.concatenate((xy_vec[:, 1], xy_vec[0, 1]), axis=None)
    dim = [1, 2, 0]
    colors = ['orangered', 'lightgreen', 'blue']
    plt.plot(x_vec, y_vec, colors[dim[labels[i]-1]])

def plot_mask(image, masks, labels):
  # Plot image mask
  mask = np.ones((image.shape[0], image.shape[1], 3), dtype=np.float32) * 0.75
  for i in range(len(masks)):
    mask_ = np.squeeze(masks[i])
    dim = [1, 2, 0]
    for j in range(3):
      mask[:, :, j] -= mask_ * 0.75 
    mask[:, :, dim[labels[i]-1]] += mask_ 
  plt.imshow(mask)

In [None]:
# Polygons plots
if mode == 'cv':
  plt.figure(figsize=(20, 60))
else:
  plt.figure(figsize=(10, 60))
  
for i in range(num_images):
  if mode == 'cv':
    plt.subplot(num_images, 2, 1+2*i)
    plot_polygons(images[i], polygons[i], labels[i])
    plt.xlim([0, images[i].shape[1]])
    plt.ylim([0, images[i].shape[0]])
    plt.subplot(num_images, 2, 2+2*i)
    eval_dataset.plot_polygons(i)
    plt.xlim([0, images[i].shape[1]])
    plt.ylim([0, images[i].shape[0]])
  else:
    plt.subplot(num_images, 1, 1+i)
    plot_polygons(images[i], polygons[i], labels[i])
    plt.xlim([0, images[i].shape[1]])
    plt.ylim([0, images[i].shape[0]])

In [None]:
# Mask plots
if mode == 'cv':
  plt.figure(figsize=(20, 60))
else:
  plt.figure(figsize=(10, 60))

for i in range(num_images):
  if mode == 'cv':
    plt.subplot(num_images, 2, 1+2*i)
    plot_mask(images[i], masks[i], labels[i])
    plt.xlim([0, images[i].shape[1]])
    plt.ylim([0, images[i].shape[0]])
    plt.subplot(num_images, 2, 2+2*i)
    eval_dataset.plot_mask(i)
    plt.xlim([0, images[i].shape[1]])
    plt.ylim([0, images[i].shape[0]])
  else:
    plt.subplot(num_images, 1, 1+i)
    plot_mask(images[i], masks[i], labels[i])
    plt.xlim([0, images[i].shape[1]])
    plt.ylim([0, images[i].shape[0]])

In [None]:
# Check number of detected instances per class in comparison to ground truth (only for validation)
predicted = np.zeros(3, dtype=int)
truth = np.zeros(3, dtype=int)
if mode == 'cv':
  for i in range(num_images):
    for j in range(len(eval_dataset.labels_all[i])):
      truth[eval_dataset.labels_all[i][j] - 1] += 1
    for j in range(len(labels[i])):
      predicted[labels[i][j] - 1] += 1
  print('Number of predicted...  Buildings:', str(predicted[0]), ' Houses:', str(predicted[1]), 'Sheds/Garages:', str(predicted[2]))
  print('Ground-truth number of...  Buildings:', str(truth[0]), ' Houses:', str(truth[1]), 'Sheds/Garages:', str(truth[2]))
else:
  print('No ground-truth results for testing!')

In [36]:
# Save the results into the desired format

# JSON encoder
class my_JSONEncoder(json.JSONEncoder):
  def default(self, obj):
    if isinstance(obj, np.integer):
      return int(obj)
    elif isinstance(obj, np.floating):
      return float(obj)
    elif isinstance(obj, np.ndarray):
      return obj.tolist()
    else:
      return super(my_JSONEncoder, self).default(obj)

# Convert polygons to original image size and save results
image_id_idx = ['6_6', '6_7', '6_8', '7_6', '7_7', '7_8', '8_6', '8_7', '8_8']
if mode == 'tt':
  for i in range(num_images):
    save_dict = {
        'filename': image_id_idx[i] + '.png',
        'labels': [{'name': 'Buildings', 'annotations': []}, 
                   {'name': 'Houses', 'annotations': []}, 
                   {'name': 'Sheds/Garages', 'annotations': []}],
    }
    for j in range(len(final_results['polygons'][i])):
      save_dict_ = {
          'id': int(str(i) + str(j)),
          'type': 'polygon',
          'segmentation': list(np.array(final_results['polygons'][i][j] / eval_dataset.resize, dtype=int))
      }
      save_dict['labels'][final_results['labels'][i][j] - 1]['annotations'].append(save_dict_)
    with open('/content/drive/MyDrive/Colab Notebooks/' + image_id_idx[i] + '.png-annotated.json', 'w') as f:
      json.dump(save_dict, f, cls=my_JSONEncoder)
else:
  print('This step is only for testing data!')

In [None]:
# The End!
# Thank you for considering my application.
# Jorge J. Bennasar Vazquez