
# Assignment 3

This is a template notebook for Assignment 3.


## Install dependencies and initialization

In [None]:
# install dependencies: 
!pip install pyyaml==5.1 pycocotools>=2.0.1
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.6/index.html

In [None]:
!pwd # shows current directory
!ls  # shows all files in this directory
!nvidia-smi # shows the specs and the current status of the allocated GPU

In [None]:
# import some common libraries
from google.colab.patches import cv2_imshow
from sklearn.metrics import jaccard_score
from PIL import Image, ImageDraw
from tqdm.notebook import tqdm
import pandas as pd
import numpy as np
import datetime
import random
import json
import cv2
import csv
import os
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import tensorflow as tf
# import some common pytorch utilities
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from torch.autograd import Variable
import torch.nn.functional as F
import torch.nn as nn
import torch
import copy
# import some common detectron2 utilities
import detectron2
from detectron2.data import detection_utils as utils
from detectron2 import model_zoo
from detectron2.config import get_cfg
from detectron2.structures import BoxMode
from detectron2.engine import DefaultTrainer
from detectron2.engine import DefaultPredictor
from detectron2.utils.logger import setup_logger
from detectron2.utils.visualizer import ColorMode
from detectron2.utils.visualizer import Visualizer
from detectron2.data import build_detection_test_loader,build_detection_train_loader
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import transforms as T
setup_logger()

In [None]:
# Make sure that GPU is available for your notebook. 
# Otherwise, you need to update the settungs in Runtime -> Change runtime type -> Hardware accelerator
torch.cuda.is_available()

In [None]:
# You need to mount your google drive in order to load the data:
from google.colab import drive
drive.mount('/content/drive')
# Put all the corresponding data files in a data folder and put the data folder in a same directory with this notebook.
# Also create an output directory for your files such as the trained models and the output images.

In [6]:
# Define the location of current directory, which should contain data/train, data/test, and data/train.json.
# TODO: approx 1 line
BASE_DIR = '/content/drive/My Drive/CMPT_CV_lab3'
OUTPUT_DIR = '{}/output'.format(BASE_DIR)
os.makedirs(OUTPUT_DIR, exist_ok=True)

## Part 1: Object Detection

### Data Loader

In [7]:
'''
# This function should return a list of data samples in which each sample is a dictionary. 
# Make sure to select the correct bbox_mode for the data
# For the test data, you only have access to the images, therefore, the annotations should be empty.
# Other values could be obtained from the image files.
# TODO: approx 35 lines
'''

def get_detection_data(set_name):
  #img = Image.open(data["file_name"])
  data_dirs = '{}/data'.format(BASE_DIR)
  train_dirs = '{}/data/train'.format(BASE_DIR)
  test_dirs = '{}/data/test'.format(BASE_DIR)
  dataset = []
  json_file = os.path.join(data_dirs, "train.json")
  with open(json_file) as f:
    imgs_anns = json.load(f)

  dataset_dicts = []
  objs = []
  count = 0
  name_change = ""
  if set_name == "test":             
      for filenames in os.listdir(test_dirs):
        record = {}  
        if filenames.endswith(".png"):
          filename = os.path.join(test_dirs, filenames)
          img = Image.open(filename)
          width, height = img.size
          record["file_name"] = filename
          record["height"] = height
          record["width"] = width
          obj = {}
          #objs.append(obj)
          record["annotations"] =objs
          dataset.append(record) 
      ##loop through all pic in file 
  else:
    for i in range(len(imgs_anns)):
      #print(count)
      count+=1
      v = imgs_anns[i]
      record = {}


      filename = os.path.join(train_dirs, v["file_name"])

      if name_change!=filename:
        dataset.append(record)
        objs = []
        name_change = filename
      img = Image.open(filename)

      width, height = img.size
        
      record["file_name"] = filename
      record["image_id"] = v["image_id"]
      #print(v["image_id"])
      record["height"] = height
      record["width"] = width

      obj = {
                "bbox": v["bbox"],
                "bbox_mode": BoxMode.XYWH_ABS,
                "segmentation": v["segmentation"],
                "category_id": 0,
        }
      objs.append(obj)
      record["annotations"] = objs
      

  return dataset

In [8]:
'''
# Remember to add your dataset to DatasetCatalog and MetadataCatalog
# Consdier "data_detection_train" and "data_detection_test" for registration
# You can also add an optional "data_detection_val" for your validation by spliting the training data
# TODO: approx 5 lines
'''

for d in ["train", "val","test"]:
  DatasetCatalog.register("plane_" + d, lambda d=d: get_detection_data(d))
  MetadataCatalog.get("plane_" + d).set(thing_classes=["plane"])
balloon_metadata = MetadataCatalog.get("plane_train")

In [None]:
'''
# Visualize some samples using Visualizer to make sure that the function works correctly
# TODO: approx 5 lines
'''
dataset_dicts = get_detection_data("train")
for d in random.sample(dataset_dicts, 3):
    #print(d["file_name"])
    img = cv2.imread(d["file_name"])
    
    visualizer = Visualizer(img[:, :, ::-1], metadata=balloon_metadata, scale=0.5)
    out = visualizer.draw_dataset_dict(d)
    cv2_imshow(out.get_image()[:, :, ::-1])


### Set Configs

In [9]:
'''
# Set the configs for the detection part in here.
# TODO: approx 15 lines
'''
cfg = get_cfg()
cfg.OUTPUT_DIR = "{}/output/".format(BASE_DIR)

#cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"))
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("plane_train",)
cfg.DATASETS.TEST = ("plane_test",)

cfg.DATALOADER.NUM_WORKERS = 4
#cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025 
cfg.SOLVER.MAX_ITER = 7000  
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512   
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  



### Training

In [None]:
"""
custom data augmentation
"""
def custom_mapper(dataset_dict):
  dataset_dict = copy.deepcopy(dataset_dict)
  image = utils.read_image(dataset_dict["file_name"], format="BGR")
  transform_list = [
                    T.RandomBrightness(0.8, 1.8),
                    T.RandomContrast(0.6, 1.3),
                    T.RandomSaturation(0.8, 1.4),
                    T.RandomLighting(0.7),
                      T.Resize((512,512)),
                      T.RandomFlip(prob=0.5, horizontal=False, vertical=True),
                      T.RandomFlip(prob=0.5, horizontal=True, vertical=False), 
            ]
  image, transforms = T.apply_transform_gens(transform_list, image)
  dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
  annos = [
        utils.transform_instance_annotations(obj, transforms, image.shape[:2])
        for obj in dataset_dict.pop("annotations")
        if obj.get("iscrowd", 0) == 0
    ]
  instances = utils.annotations_to_instances(annos, image.shape[:2])
  dataset_dict["instances"] = utils.filter_empty_instances(instances)
  return dataset_dict

In [None]:
class CustomTrainer(DefaultTrainer):
    @classmethod
    def build_train_loader(cls, cfg):
        return build_detection_train_loader(cfg, mapper=custom_mapper)

In [None]:
'''
# Create a DefaultTrainer using the above config and train the model
# TODO: approx 5 lines
'''
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg) 
#trainer = CustomTrainer(cfg)
trainer.resume_or_load(resume=True)   #default False

trainer.train()



### Evaluation and Visualization

In [11]:
'''
# After training the model, you need to update cfg.MODEL.WEIGHTS
# Define a DefaultPredictor
'''
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.6   # set a custom testing threshold
predictor = DefaultPredictor(cfg)

In [None]:
'''
# Visualize the output for 3 random test samples
# TODO: approx 10 lines
'''
dataset_dicts = get_detection_data("test")
for d in random.sample(dataset_dicts, 3):  
    
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)  # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format

    #print(outputs["instances"])

    
    
    #print(one_img)
    v = Visualizer(im[:, :, ::-1],
                   metadata=balloon_metadata, 
                   scale=0.3, 
                    # remove the colors of unsegmented pixels. This option is only available for segmentation models
    )
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    cv2_imshow(out.get_image()[:, :, ::-1])

In [None]:
"""  
dataset_dicts = get_detection_data("test")
sample1 = dataset_dicts[1]
sample2 = dataset_dicts[4]
sample3 = dataset_dicts[25]
im = cv2.imread(sample1["file_name"])
#cv2.imwrite("predictor1.png", im)
im2 = cv2.imread(sample2["file_name"])
#cv2.imwrite("predictor2.png", im2)
im3 = cv2.imread(sample3["file_name"])
#cv2.imwrite("predictor3.png", im3)



outputs = predictor(im3)  # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format


v = Visualizer(im3[:, :, ::-1],
                   metadata=balloon_metadata, 
                   scale=0.3, 
                    # remove the colors of unsegmented pixels. This option is only available for segmentation models
    )
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
cv2_imshow(out.get_image()[:, :, ::-1])
cv2.imwrite("predictor3.png", out.get_image()[:, :, ::-1])
"""

In [None]:
'''
# Use COCOEvaluator and build_detection_train_loader
# You can save the output predictions using inference_on_dataset
# TODO: approx 5 lines
'''
print(OUTPUT_DIR)

evaluator = COCOEvaluator("plane_train", cfg, False, output_dir= "./output/")
val_loader = build_detection_test_loader(cfg, "plane_train")
print(inference_on_dataset(trainer.model, val_loader, evaluator))

### Improvements

For this part, you can bring any improvement which you have by adding new input parameters to the previous functions or defining new functions and variables.

In [None]:
'''
# Bring any changes and updates regarding the improvement in here
'''


## Part 2: Semantic Segmentation

### Data Loader

In [None]:
### caching test data, must run before proceed 
test_crop = []
test_dicts = get_detection_data("test")
#count = 0
for pic in test_dicts:
  #if count == 1:
    #break
  record = {}
  record["file_name"] = pic["file_name"]

  objs = []

  print(pic["file_name"])
  img = cv2.imread(pic["file_name"])
  outputs = predictor(img)
  pil_img = Image.open(pic["file_name"])
  for i in range(len(outputs["instances"])):
    #print(i)
    one_plane = outputs["instances"][i].pred_boxes
    one_plane = one_plane.tensor[0].cpu().numpy()
    height = (one_plane[2])
    width = (one_plane[3])
    im1 = pil_img.crop((one_plane[0], one_plane[1], height, width))
    obj_img = im1.resize((128,128),Image.ANTIALIAS)

    obj_mask = np.zeros((128,128))
    

    #print(obj_mask.size)
    #print(obj_img.size)
    obj_img = np.resize(obj_img,(128,128,3))

    obj = {
        "bbox":one_plane,
        "obj_img":obj_img,
        "obj_mask":obj_mask,
    }  
    objs.append(obj)
  #count +=1

  record["annotations"]=objs
  test_crop.append(record)    
    #print(one_plane)

In [None]:
### caching train data, must run before proceed 
image_croppingset = get_detection_data("train")
cropped = []
for v in image_croppingset:
  record = {}
  record["file_name"] = v["file_name"]
  img = Image.open(v["file_name"])
  objs = []
  for i in v["annotations"]:
    img_seg = i["segmentation"]
    img_bbox = i["bbox"]
    height = int(img_bbox[2])
    width = int(img_bbox[3])
    new_seg = []
  
  
    for i in img_seg:
      new_row = []
      for j in range(0,len(i)-1,2):
        i[j] = i[j]-img_bbox[0]
        i[j+1] = i[j+1]-img_bbox[1]
        new_row.append(i[j])
        new_row.append(i[j+1])
      new_seg.append(new_row)        
    im1 = img.crop((img_bbox[0], img_bbox[1], img_bbox[0]+height, img_bbox[1]+width))
    obj_img = im1.resize((128,128),Image.ANTIALIAS)
    obj_img = np.resize(obj_img,(128,128,3)) 
    obj_mask = detectron2.utils.visualizer.GenericMask(new_seg,width,height).mask
    obj_mask = cv2.resize(obj_mask, (128, 128))
    obj = {
        "obj_img":obj_img,
        "obj_mask":obj_mask,
    }
   
    objs.append(obj)     
  record["annotations"]=objs
  cropped.append(record)   
"""
plt.imshow(cropped[2]["annotations"][1]["obj_img"], cmap='gray')
plt.figure()
plt.imshow(cropped[2]["annotations"][1]["obj_mask"], cmap='gray')
plt.figure()
  #display(cropped[0]["annotations"][0]["obj_img"])
"""

In [None]:
print(len(image_croppingset))
print(len(test_crop))

In [None]:
'''
# Write a function that returns the cropped image and corresponding mask regarding the target bounding box
# idx is the index of the target bbox in the data
# high-resolution image could be passed or could be load from data['file_name']
# You can use detectron2.utils.visualizer.GenericMask to convert the segmentation annotations to binary masks
# TODO: approx 10 lines
'''

def get_instance_sample(data, idx, prepared_imageset):
  for i in prepared_imageset:
    if i["file_name"] == data["file_name"]:
      #obj_img = i["annotations"][idx[1]]["obj_img"]
      #obj_mask = i["annotations"][idx[1]]["obj_mask"]
      obj_img = i["annotations"][idx]["obj_img"]
      obj_mask = i["annotations"][idx]["obj_mask"]
      #print(obj_mask.size)
      break
  return obj_img, obj_mask




In [None]:
""" Test test obj_img
my_data_list = DatasetCatalog.get("plane_{}".format('test'))
sample = my_data_list[0]
print(len(test_crop[2]["annotations"]))
someimg, somemask = get_instance_sample(sample, 0, test_crop)
plt.imshow(someimg, cmap='gray')

plt.figure()
"""

In [None]:

'''
# We have provided a template data loader for your segmentation training
# You need to complete the __getitem__() function before running the code
# You may also need to add data augmentation or normalization in here
'''

class PlaneDataset(Dataset):
  def __init__(self, set_name, data_list,prepared_imageset):
      self.transforms = transforms.Compose([
          transforms.ToTensor(), # Converting the image to tensor and change the image format (Channels-Last => Channels-First)
      ])
      self.set_name = set_name
      self.data = data_list
      self.predata = prepared_imageset
      #print(data_list)
      self.instance_map = []
      #print(self.data[0])
      for i, d in enumerate(self.data):
        #print(d)
        for j in range(len(d['annotations'])):
          self.instance_map.append([i,j])
          """
          if i == 0:
            print(i)
            print(j)
          """
  '''
  # you can change the value of length to a small number like 10 for debugging of your training procedure and overfeating
  # make sure to use the correct length for the final training
  '''
  def __len__(self):
      #return 50
      return len(self.instance_map)

  def numpy_to_tensor(self, img, mask):
    if self.transforms is not None:
        img = self.transforms(img)
    img = torch.tensor(img, dtype=torch.float)
    mask = torch.tensor(mask, dtype=torch.float)
    return img, mask

  '''
  # Complete this part by using get_instance_sample function
  # make sure to resize the img and mask to a fixed size (for example 128*128)
  # you can use "interpolate" function of pytorch or "numpy.resize"
  # TODO: 5 lines
  '''
  def __getitem__(self, idx):
    if torch.is_tensor(idx):
        idx = idx.tolist()
    idx = self.instance_map[idx]
    data = self.data[idx[0]]
    ann_index = idx[1]
    img, mask = get_instance_sample(data, ann_index, self.predata)

    img, mask = self.numpy_to_tensor(img, mask)
    img = img.reshape((3,128,128))
    mask = mask.reshape((1,128,128))

    return img, mask

def get_plane_dataset(set_name, prepared_imageset,batch_size=2):
    my_data_list = DatasetCatalog.get("plane_{}".format(set_name))
    dataset = PlaneDataset(set_name, my_data_list,prepared_imageset)
    loader = DataLoader(dataset, batch_size=batch_size, num_workers=4,
                                              pin_memory=True, shuffle=True)
    return loader, dataset

def get_prediction_dataset(set_name, data_list,prepared_imageset,batch_size=2):


    dataset = PlaneDataset(set_name, data_list,prepared_imageset)

    loader = DataLoader(dataset, batch_size=batch_size, num_workers=4,
                                              pin_memory=True, shuffle=False)
    return loader, dataset

### Network

In [None]:
'''
# convolution module as a template layer consists of conv2d layer, batch normalization, and relu activation
'''
class conv(nn.Module):
    def __init__(self, in_ch, out_ch, activation=True):
        super(conv, self).__init__()
        if(activation):
          self.layer = nn.Sequential(
             nn.Conv2d(in_ch, out_ch, 3, padding=1),
             nn.BatchNorm2d(out_ch),
             nn.ReLU(inplace=True)
          )
        else:
          self.layer = nn.Sequential(
             nn.Conv2d(in_ch, out_ch, 3, padding=1)  
             )

    def forward(self, x):
        x = self.layer(x)
        return x

'''
# downsampling module equal to a conv module followed by a max-pool layer
'''

class down(nn.Module):
    def __init__(self, in_ch, out_ch):
        super(down, self).__init__()
        self.layer = nn.Sequential(
            conv(in_ch, out_ch),
            conv(out_ch, out_ch),

            nn.MaxPool2d(2),

            

            )

    def forward(self, x):
        x = self.layer(x)
        return x

'''
# upsampling module equal to a upsample function followed by a conv module
'''
class up(nn.Module):
    def __init__(self, in_ch, out_ch, bilinear=False):
        super(up, self).__init__()
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
        else:
            self.up = nn.ConvTranspose2d(in_ch, in_ch, 2, stride=2)

        self.conv = conv(in_ch, out_ch)

    def forward(self, x):
        y = self.up(x)
        y = self.conv(y)
        return y

'''
# the main model which you need to complete by using above modules.
# you can also modify the above modules in order to improve your results.
'''
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        
        # Encoder
        
        self.input_conv = conv(3, 32)
        self.down = down(32, 64)
        self.down1 = down(64, 128)
        self.down2 = down(128, 256)
        self.down3 = down(256, 512)
        # Decoder

        self.up = up(512, 256)
        self.up1 = up(256, 128)
        self.up2 = up(128, 64)
        self.up3 = up(64, 4)
        #self.up4 = up(32, 4)
        self.output_conv = conv(4, 1, False) # ReLu activation is removed to keep the logits for the loss function
        

    def forward(self, input):
      y = self.input_conv(input)
      y = self.down(y)
      y = self.down1(y)
      y = self.down2(y)
      y = self.down3(y)
      y = self.up(y)
      y = self.up1(y)
      y = self.up2(y)
      y = self.up3(y)
      #y = self.up4(y)
      output = self.output_conv(y)
      return output

### Training

In [None]:
'''
# The following is a basic training procedure to train the network
# You need to update the code to get the best performance
# TODO: approx ? lines
'''

# Set the hyperparameters
num_epochs = 250
batch_size = 4
learning_rate = 0.001
weight_decay = 1e-5

model = MyModel() # initialize the model
model = model.cuda() # move the model to GPU
loader, _ = get_plane_dataset('train', cropped,batch_size) # initialize data_loader
crit = nn.BCEWithLogitsLoss() # Define the loss function
optim = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) # Initialize the optimizer as Adam

# start the training procedure
for epoch in range(num_epochs):
  total_loss = 0
  for (img, mask) in tqdm(loader):
    img = torch.tensor(img, device=torch.device('cuda'), requires_grad = True)
    mask = torch.tensor(mask, device=torch.device('cuda'), requires_grad = True)
    pred = model(img)
    loss = crit(pred, mask)
    optim.zero_grad()
    loss.backward()
    optim.step()
    total_loss += loss.cpu().data
  print("Epoch: {}, Loss: {}".format(epoch, total_loss/len(loader)))
  torch.save(model.state_dict(), '{}/output/{}_segmentation_model.pth'.format(BASE_DIR, epoch))

'''
# Saving the final model
'''
torch.save(model.state_dict(), '{}/output/final_segmentation_model.pth'.format(BASE_DIR))


### Evaluation and Visualization

In [None]:
"""
# Before starting the evaluation, you need to set the model mode to eval
# You may load the trained model again, in case if you want to continue your code later
# TODO: approx 15 lines
"""
batch_size = 8
model = MyModel().cuda()
model.load_state_dict(torch.load('{}/output/final_segmentation_model.pth'.format(BASE_DIR)))
model = model.eval() # chaning the model to evaluation mode will fix the bachnorm layers
loader, dataset = get_plane_dataset('train',cropped, batch_size)

total_iou = 0
count = 0
overall_score = 0

img_data = []
mask_data = []
pred_data = []

for (img, mask) in tqdm(loader):
  
  with torch.no_grad():
    img = img.cuda()
    mask = mask.cuda()
    #mask = torch.unsqueeze(mask,1)
    pred = model(img)

    '''
    ## Complete the code by obtaining the IoU for each img and print the final Mean IoU
    '''
    m = nn.Sigmoid()
    batch_size = pred.shape
    
    batch_count = 0

    batch_score = 0
  for batch in range(batch_size[0]):
    img_original = img[batch].cpu()
    img_original = img_original.numpy()
    img_original = np.resize(img_original,(128,128))
    img_data.append(img_original)



    pred_mk = pred[batch].cpu()
    pred_mk = m(pred_mk)
    pred_mk = pred_mk.numpy()
    pred_mk = np.squeeze(pred_mk,0)
    for i in pred_mk:
      i[i<0.4] = 0
      i[i>= 0.4] = 1


    pred_data.append(pred_mk)

    mk = mask[batch].cpu()
    mk = mk.numpy()
    mk = np.resize(mk,(128,128))
    mask_data.append(mk)


    intersect = np.logical_and(pred_mk,mk)
    union = np.logical_or(pred_mk,mk)
    score = np.sum(intersect)/np.sum(union)    
    overall_score += score
    #batch_score += score
    #batch_count+=1
    count+=1
  #print(batch_size[0])    
  #print(batch_score/batch_count)

print("IoU:",overall_score/count)



print("\n #images: {}, Mean IoU: {}".format(_, _))


In [None]:

"""   test eval
m = nn.Sigmoid()
batch_size = pred.shape
count = 0
overall_score = 0

for batch in range(batch_size[0]):
  img1 = pred[batch].cpu()
  img1 = m(img1)
  img1 = img1.numpy()
  img1 = np.squeeze(img1,0)
  for i in img1:
  #print(i)
    i[i<0.6] = 0
    i[i>= 0.6] = 1
  img = mask[batch].cpu()
  img = img.numpy()
  img = np.resize(img,(128,128))

  plt.imshow(img, cmap='gray')
  plt.figure()
  plt.imshow(img1, cmap='gray')
  plt.figure()
  #display(img)
  intersect = np.logical_and(img1,img)
  union = np.logical_or(img1,img)
  score = np.sum(intersect)/np.sum(union)
  print(score)
  overall_score += score
  count +=1
print(overall_score/count)

#test = pred.shape
#print(test[0])
#plt.imshow(img, cmap='gray')
#plt.imshow(img1, cmap='gray')
#print(pred.shape)
print(mask.shape)
"""

In [None]:
'''
# Visualize 3 sample outputs
# TODO: approx 5 lines
'''

for i in range(3):
  r = random.randint(0, len(mask_data)-1)

  plt.imshow(img_data[r], cmap='gray')
  plt.figure()
  plt.imshow(mask_data[r], cmap='gray')
  plt.figure()
  plt.imshow(pred_data[r], cmap='gray')
  plt.figure()

## Part 3: Instance Segmentation

In this part, you need to obtain the instance segmentation results for the test data by using the trained segmentation model in the previous part and the detection model in Part 1.

### Get Prediction

In [None]:
'''
# Define a new function to obtain the prediction mask by passing a sample data
# For this part, you need to use all the previous parts (predictor, get_instance_sample, data preprocessings, etc)
# TODO: approx 35 lines
'''

def get_prediction_mask(data,prepared_imageset):

  sample = []

  if len(data["annotations"])==0:
    for name in prepared_imageset:
      if name["file_name"] == data["file_name"]:
        for instance in name:
          sample.append(name)

  else:
    sample.append(data)


  
  width = data["width"]
  height = data["height"]
  #print(width,height)
  all_plane = []
  for i in data["annotations"]:
    all_plane.append(i["segmentation"][0])
      #print(i["segmentation"][0])
    #print(all_plane)
  if len(data["annotations"])!=0:
    gt_mask = detectron2.utils.visualizer.GenericMask(all_plane,height,width).mask
  else:
    gt_mask = None
    


  pred_data = []
  batch_size = 8
  model = MyModel().cuda()
  model.load_state_dict(torch.load('{}/output/final_segmentation_model.pth'.format(BASE_DIR)))
  model = model.eval() # chaning the model to evaluation mode will fix the bachnorm layers

  loader, dataset = get_prediction_dataset("prediction", sample,prepared_imageset,batch_size)

  for (img, mask) in tqdm(loader):
 
    with torch.no_grad():
      img = img.cuda()
      mask = mask.cuda()
      #mask = torch.unsqueeze(mask,1)
      pred = model(img)
      m = nn.Sigmoid()
      batch_size = pred.shape
      #print(batch_size)
    for batch in range(batch_size[0]):
      #print(batch)
      pred_mk = pred[batch].cpu()
      pred_mk = m(pred_mk)
      pred_mk = pred_mk.numpy()
      pred_mk = np.squeeze(pred_mk,0)
      for i in pred_mk:
        i[i<0.4] = 0
        i[i>= 0.4] = 1
      pred_data.append(pred_mk)
      #plt.imshow(pred_mk, cmap='gray')

      #plt.figure()
  

  pred_mask = pred_data
  gt = np.zeros((height, width))
  for i in range(len(pred_data)):
    row_count = 0
    sample_width = int(data["annotations"][i]["bbox"][3])
    sample_height = int(data["annotations"][i]["bbox"][2])
    sample_x = int(data["annotations"][i]["bbox"][0])
    sample_y = int(data["annotations"][i]["bbox"][1])
    pred_data[i]= cv2.resize(pred_data[i],(sample_height,sample_width))
 
    for y in pred_data[i]: 
      count=0 
      for x in y:
        if(x>0.5):
          gt[row_count+sample_y,count+sample_x] = i+1

        count+=1   

      row_count+=1

  img = Image.open(data["file_name"])
  
  #img = torch.from_numpy(img)
  if len(data["annotations"])!=0:
    gt_mask = torch.from_numpy(gt_mask)
    
  else:
    gt_mask = gt
  gt = torch.from_numpy(gt)
  #print(torch.unique(gt))
  gt = gt.cuda()
  return img, gt_mask, gt
  

In [None]:
### function for getting mask for test image
def get_prediction_test(data,prepared_imageset):
  original_img = Image.open(data["file_name"])
  filename = data["file_name"]
  sample = []
  print(data["file_name"])
  for name in prepared_imageset:
    if name["file_name"] == data["file_name"]:
      sample.append(name)
      for instance in name:
 
        instance_sample = name

  print(len(instance_sample["annotations"]))

  #plt.imshow(instance_sample["annotations"][0]["obj_img"], cmap='gray')
  #plt.figure()
  width = data["width"]
  height = data["height"]

  gt_mask = None
    
  pred_data = []
  batch_size = 8
  model = MyModel().cuda()
  model.load_state_dict(torch.load('{}/output/final_segmentation_model.pth'.format(BASE_DIR)))
  model = model.eval() # chaning the model to evaluation mode will fix the bachnorm layers

  loader, dataset = get_prediction_dataset("prediction", sample,prepared_imageset, batch_size)

  for (img, mask) in tqdm(loader):
    
    with torch.no_grad():
      img = img.cuda()
      #mask = mask.cuda()
      #mask = torch.unsqueeze(mask,1)
      pred = model(img)
      m = nn.Sigmoid()
      batch_size = pred.shape
      batch_size =batch_size[0] 
    for batch in range(batch_size):
      
      #print(batch)
      pred_mk = pred[batch].cpu()
      pred_mk = m(pred_mk)
      pred_mk = pred_mk.numpy()
      pred_mk = np.squeeze(pred_mk,0)
      #print(pred_mk.shape)
      for i in pred_mk:
        i[i<0.4] = 0
        i[i>= 0.4] = 1
      pred_data.append(pred_mk)

    for s in prepared_imageset:
      if s["file_name"] == data["file_name"]:
        for i in range(len(pred_data)):
          s["annotations"][i]["obj_mask"] = pred_data[i]
    #plt.imshow(pred_data[5], cmap='gray')

    #plt.figure()

  pred_mask = pred_data
  #print(len(pred_data))
  #print(height, width)
  gt = np.zeros((height, width))

  for i in range(len(pred_data)):


    row_count = 0
    #print(i)
    sample_width = int(instance_sample["annotations"][i]["bbox"][3])
    sample_height = int(instance_sample["annotations"][i]["bbox"][2])
    sample_x = int(instance_sample["annotations"][i]["bbox"][0])
    sample_y = int(instance_sample["annotations"][i]["bbox"][1])
    pred_data[i]= cv2.resize(pred_data[i],(sample_height-sample_x,sample_width-sample_y))

    for y in pred_data[i]: 
      count=0 
      #print(len(y))
      for x in y:
        if(x>0.5):
          #print(row_count,count)
          #print(sample_y,sample_x)
          #print(row_count+sample_y,count+sample_x)
          gt[row_count+sample_y,count+sample_x] = i+1

        count+=1   

      row_count+=1

 

  #img = torch.from_numpy(img)

  gt_mask = gt
  gt = torch.from_numpy(gt)
  gt = gt.cuda()
  return original_img, gt_mask, gt

In [None]:
""" test train image mask
my_data_list = DatasetCatalog.get("plane_{}".format('train'))
sample = my_data_list[0]
#print(len(sample["annotations"]))
img, gt_mask, pred_mask = get_prediction_mask(sample,cropped)

plt.imshow(img, cmap='gray')

plt.figure()

plt.imshow(gt_mask, cmap='gray')
#plt.imsave('filename.png', gt_mask, cmap=cm.gray)
plt.figure()
plt.imshow(pred_mask.cpu(), cmap='gray')
#plt.imsave('filename.png', pred_mask.cpu(), cmap=cm.gray)
plt.figure()



"""

In [None]:
""" test test image mask
my_data_list = DatasetCatalog.get("plane_{}".format('test'))
sample = my_data_list[27]
#print(len(sample["annotations"]))
img, gt_mask, pred_mask = get_prediction_test(sample,test_crop)


plt.imshow(img, cmap='gray')
#plt.imsave('testimage1.png', img#, cmap=cm.gray)
plt.figure()

plt.imshow(gt_mask, cmap='gray')
plt.imsave('testpredict3.png', gt_mask, cmap=cm.gray)
plt.figure()
plt.imshow(pred_mask.cpu(), cmap='gray')
#plt.imsave('filename.png', pred_mask, cmap=cm.gray)
plt.figure()
"""

In [None]:
 #visualize test obj_mask
"""
my_data_list = DatasetCatalog.get("plane_{}".format('test'))
sample = my_data_list[11]
#print(len(sample["annotations"]))
img, gt_mask, pred_mask = get_prediction_test(sample,test_crop)
 

obj_img, obj_mask = get_instance_sample(sample, 9, test_crop)
plt.imshow(obj_img, cmap='gray')
plt.imsave('plane3.png', obj_img, cmap=cm.gray)
plt.figure()
plt.imshow(obj_mask, cmap='gray')
plt.imsave('planepredict3.png', obj_mask, cmap=cm.gray)
plt.figure()
"""

### Visualization and Submission

In [None]:
'''
# Visualise the output prediction as well as the GT Mask and Input image for a sample input
# TODO: approx 10 lines
'''



In [None]:
'''
# ref: https://www.kaggle.com/rakhlin/fast-run-length-encoding-python
# https://www.kaggle.com/c/airbus-ship-detection/overview/evaluation
'''
def rle_encoding(x):
    '''
    x: numpy array, 1 - mask, 0 - background
    Returns run length as list
    '''
    dots = torch.where(torch.flatten(x.long())==1)[0]
    if(len(dots)==0):
      return []
    inds = torch.where(dots[1:]!=dots[:-1]+1)[0]+1
    inds = torch.cat((torch.tensor([0], device=torch.device('cuda'), dtype=torch.long), inds))
    tmpdots = dots[inds]
    inds = torch.cat((inds, torch.tensor([len(dots)], device=torch.device('cuda'))))
    inds = inds[1:] - inds[:-1]
    runs = torch.cat((tmpdots, inds)).reshape((2,-1))
    runs = torch.flatten(torch.transpose(runs, 0, 1)).cpu().data.numpy()
    return ' '.join([str(i) for i in runs])

In [None]:
'''
# You need to upload the csv file on kaggle
# The speed of your code in the previous parts highly affects the running time of this part
'''

preddic = {"ImageId": [], "EncodedPixels": []}

'''
# Writing the predictions of the training set
'''

my_data_list = DatasetCatalog.get("plane_{}".format('train'))
for i in tqdm(range(len(my_data_list)), position=0, leave=True):
  sample = my_data_list[i]
  sample['image_id'] = sample['file_name'].split("/")[-1][:-4]
  #print(sample)
  #print(i)
  img, true_mask, pred_mask = get_prediction_mask(sample,cropped)
  inds = torch.unique(pred_mask)
  if(len(inds)==1):
    preddic['ImageId'].append(sample['image_id'])
    preddic['EncodedPixels'].append([])
  else:
    for index in inds:
      if(index == 0):
        continue
      tmp_mask = (pred_mask==index)
      encPix = rle_encoding(tmp_mask)
      preddic['ImageId'].append(sample['image_id'])
      preddic['EncodedPixels'].append(encPix)

'''
# Writing the predictions of the test set
'''

my_data_list = DatasetCatalog.get("plane_{}".format('test'))
for i in tqdm(range(len(my_data_list)), position=0, leave=True):
  sample = my_data_list[i]
  sample['image_id'] = sample['file_name'].split("/")[-1][:-4]

  img, true_mask, pred_mask = get_prediction_test(sample,test_crop)
  inds = torch.unique(pred_mask)
  if(len(inds)==1):
    preddic['ImageId'].append(sample['image_id'])
    preddic['EncodedPixels'].append([])
  else:
    for j, index in enumerate(inds):
      if(index == 0):
        continue
      tmp_mask = (pred_mask==index).double()
      encPix = rle_encoding(tmp_mask)
      preddic['ImageId'].append(sample['image_id'])
      preddic['EncodedPixels'].append(encPix)

pred_file = open("{}/pred.csv".format(BASE_DIR), 'w')
pd.DataFrame(preddic).to_csv(pred_file, index=False)
pred_file.close()


## Part 4: Mask R-CNN

For this part you need to follow a same procedure to part 2 with the configs of Mask R-CNN, other parts are generally the same as part 2.

### Data Loader

In [14]:
cfg = get_cfg()
cfg.OUTPUT_DIR = "{}/output/".format(BASE_DIR)

#cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"))
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("plane_train",)
cfg.DATASETS.TEST = ("plane_test",)

cfg.DATALOADER.NUM_WORKERS = 4
#cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025 
cfg.SOLVER.MAX_ITER = 1500  
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512   
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  



### Network

### Training

In [None]:
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume=False)   #default False

trainer.train()



### Evaluation and Visualization

In [17]:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.6   # set a custom testing threshold
predictor = DefaultPredictor(cfg)

In [None]:
print(OUTPUT_DIR)

evaluator = COCOEvaluator("plane_train", cfg, False, output_dir= "./output/")
val_loader = build_detection_test_loader(cfg, "plane_train")
print(inference_on_dataset(trainer.model, val_loader, evaluator))

In [None]:
dataset_dicts = get_detection_data("test")
count = 0
for d in random.sample(dataset_dicts, 3):  
    count +=1
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)  # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format

    #print(outputs["instances"])

    
    
    #print(one_img)
    v = Visualizer(im[:, :, ::-1],
                   metadata=balloon_metadata, 
                   scale=0.3, 
                    # remove the colors of unsegmented pixels. This option is only available for segmentation models
    )
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    cv2_imshow(out.get_image()[:, :, ::-1])
    cv2.imwrite("part4predict{}.png".format(count), out.get_image()[:, :, ::-1])
   