# Detectron2 for Instance Segmentation of Individual Slum Units

### Install detectron2 and import other dependencies

Acknowledgement: much of this code is lifted and amended from [Detectron2's Colab Guide](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5). 

All comments are my own.

In [None]:
!python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

In [None]:
import detectron2
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from detectron2.config import get_cfg
from detectron2.structures import BoxMode
from detectron2.utils.logger import setup_logger
import os, cv2, json, torch, random, shutil, glob
from detectron2 import model_zoo
from detectron2.data.datasets import register_coco_instances
from detectron2.utils.visualizer import ColorMode, Visualizer
from detectron2.engine import DefaultTrainer, DefaultPredictor, hooks
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader, MetadataCatalog, DatasetCatalog



### Data Preprocessing

The detectron2 library requires its the input .json files to have a specific file structure, as indicated [here](https://detectron2.readthedocs.io/en/latest/tutorials/datasets.html).

The following code cells first copy the .json file of the slum dataset from input to working. The get_COCOFormat_data() function is used to process the .json file, which was obtained from makesense.ai (object detection, labeling, followed by exporting to COCO-formatted .json file). Note the line 'bbox_mode': BoxMode.XYWH_ABS. This means that the bounding box annotations are of the form (x1,y1,width,height). This is the correct form to use for makesense's output. Read [here](https://detectron2.readthedocs.io/en/latest/modules/structures.html?highlight=boxmode#detectron2.structures.BoxMode) for more details if your dataset is in another format.

In [None]:
# Copy the source .json for future manipulation
source_path_kd = '/kaggle/input/kibera-dharavi/kibera_dharavi/'
source_json_kd = 'kibera_dharavi_json.json'
copy_to_kd = '/kaggle/working/'
source_file = os.path.join(source_path_kd, source_json_kd)
target_file = os.path.join(copy_to_kd, source_json_kd)
shutil.copy(source_file, target_file)
print(f'Json file copied to {copy_to_kd}')


In [None]:
def get_COCOFormat_data(json_file, path_to_images):
    
    with open(json_file, 'r') as file:
        original_data = json.load(file)

    image_info = {image['id']: {'file_name': image['file_name'], 'width': image['width'], 'height': image['height']} 
                  for image in original_data['images']}

    new_data = {}
    for annotation in tqdm(original_data['annotations']):
        image_id = annotation['image_id']
        if image_id in image_info:
            bbox = annotation['bbox']
            new_annotation = {
                'bbox': bbox,
                'bbox_mode': BoxMode.XYWH_ABS, #bbox of form (x1,y1,width,height)
                'segmentation': annotation.get('segmentation', []),
                'category_id': 0 #setting slum to class 0, detectron2 requires the max category id < num_classes
            }

            if image_id not in new_data:
                filename = os.path.join(path_to_images, image_info[image_id]['file_name'])
                new_data[image_id] = {
                    'file_name': filename,
                    'image_id': image_id,
                    'height': image_info[image_id]['height'],
                    'width': image_info[image_id]['width'],
                    'annotations': [new_annotation]
                }
            else:
                new_data[image_id]['annotations'].append(new_annotation)

    
    return list(new_data.values())

kd_dict_data = get_COCOFormat_data('/kaggle/working/kibera_dharavi_json.json',
                                   '/kaggle/input/kibera-dharavi/kibera_dharavi/kibera_dharavi_tiles')

In [None]:
# iSAID Data

def get_iSAID_data():

    # this .json file fixes an error in image 2678's (h,w) annotation
    with open('/kaggle/input/isaid-coco/iSAID_COCO_with_fixed_val/iSAID_COCO_with_fixed_val/iSAID_val_fixed.json', 'r') as file:
        original_data = json.load(file)
    
    image_info = {image['id']: {'file_name': image['file_name'], 'width': image['width'], 'height': image['height']} 
                  for image in original_data['images']}
    
    #do int(category['id'])-1 if you have no category id of 0
    category_map = {category['id']: (int(category['id'])) for category in original_data['categories']}

    new_data = {}
    for annotation in tqdm(original_data['annotations']):
        image_id = annotation['image_id']
        if image_id in image_info:
            bbox = annotation['bbox']
            new_annotation = {
                'bbox': bbox,
                'bbox_mode': BoxMode.XYWH_ABS,
                'segmentation': annotation.get('segmentation', []),
                'category_id': category_map.get(annotation['category_id'], -1) #-1 for not found
            }

            if image_id not in new_data:
                # where the real images actually are
                filename = os.path.join('/kaggle/input/isaid-dota-validation-images/images', image_info[image_id]['file_name'])
                new_data[image_id] = {
                    'file_name': filename,
                    'image_id': image_id,
                    'height': image_info[image_id]['height'],
                    'width': image_info[image_id]['width'],
                    'annotations': [new_annotation]
                }
            else:
                new_data[image_id]['annotations'].append(new_annotation)

    
    return list(new_data.values())

iSAID_dict_data = get_iSAID_data()

### Splitting and registering the data

The following code cells split the data in training, validation, and test sets. The datasets are then registered. Note for registration: use DatasetCatalog.register(_train, lambda: get_data_dict(arg1, arg2))  


In [None]:
random.seed(10) #reproducability purposes

image_data = kd_dict_data + iSAID_dict_data
print("Merged dataset size:", len(image_data))
random.shuffle(image_data)
train_size = len(image_data) // 2
val_size = len(image_data) // 6
test_size = len(image_data) - train_size - val_size

train_data = image_data[:train_size]
val_data = image_data[train_size:train_size + val_size]
test_data = image_data[train_size + val_size:]

print('Data split.')
print("Train set size:", len(train_data))
print("Validation set size:", len(val_data))
print("Test set size:", len(test_data))


#These functions are needed to register your dataset with detectron2
def get_train_data():
    return train_data

def get_val_data():
    return val_data

def get_test_data():
    return test_data


In [None]:
# registering the datasets with detectron2

# this placeholder is useful for re-running partial code, as you can not register the same name
# for the dataset twice in a session.
v = 6
_train = f'train_slums_iSAID_v{v}'
_val = f'val_slums_iSAID_v{v}'
_test = f'test_slums_iSAID_v{v}'

# You MUST pass a function which returns the correctly formatted data, read note above if your function takes arguments.
DatasetCatalog.register(_train, get_train_data)
DatasetCatalog.register(_val, get_val_data)
DatasetCatalog.register(_test, get_test_data)

# replace as needed
classes=['slum','storage_tank',
'Large_Vehicle', 'Small_Vehicle',
'ship', 'Harbor',
'baseball_diamond', 'Ground_Track_Field',
'Soccer_ball_field', 'Swimming_pool',
'Roundabout', 'tennis_court',
'basketball_court', 'plane',
'Helicopter', 'Bridge']

# needed for future visualizations
MetadataCatalog.get(_train).set(thing_classes=classes)
MetadataCatalog.get(_val).set(thing_classes=classes)
MetadataCatalog.get(_test).set(thing_classes=classes)

train_slums_iSAID_metadata = MetadataCatalog.get(_train)
val_slums_iSAID_metadata = MetadataCatalog.get(_val)
test_slums_iSAID_metadata = MetadataCatalog.get(_test)

print('Datasets registered.')

In [None]:
#Keep this around if you want to clear your directory for whatever reason.

#shutil.rmtree("/kaggle/working/output")

### Model Training

This version of the notebook has code which would train the Cascade R-CNN, resuming from the 1000th iteration from my previous. training.cfg.MODEL.WEIGHTS is a useful, which is why the version

In [None]:
# Training MRCNN_R101_FPN_3x

MRCNN_R101_FPN_3x = "COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml"
MRCNN_X152 = "Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml"

# Total iterations to run
total_iterations = 20
iterations_per_loop = 10
weight_path_152 = '/kaggle/input/152-1000-model-final/152_1000_model_final.pth'

setup_logger()
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file(MRCNN_X152)) #pre-trained model weights
cfg.DATASETS.TRAIN = (_train,)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 4 # Faster than 2 or 8
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(MRCNN_X152)  #weight_path_152
#cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.SOLVER.IMS_PER_BATCH = 1  # Kaggle's P100 can seem to handle only 1
cfg.SOLVER.BASE_LR = 0.00025  # set small due to fine-tuning, increase if randomly intialized model or adjust accordingly.
cfg.SOLVER.MAX_ITER = total_iterations   
cfg.SOLVER.STEPS = []        
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 16
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)


#Training loop to mitigate chances of a Kaggle-crash. Set resume=False to train the model just the one time, and take it out of the loop
for iteration in range(0, total_iterations, iterations_per_loop):
    trainer = DefaultTrainer(cfg)
    cfg.SOLVER.MAX_ITER = iteration + iterations_per_loop
    trainer.resume_or_load(resume = iteration > 0)
    trainer.train()

# After training, set the weights for inference
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")


### Inference

The following code cells return the COCO evaluation of the model on the test set, and also visualize an image and its corresponding predicted labels. If you've just trained your model a little bit, and loss is still high, you might get a no predictions error. Consider training for longer or decreasing the cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 to display annotations which have a low likelihood of being correct.

In [None]:
#cfg.MODEL.WEIGHTS = '#os.path.join(cfg.OUTPUT_DIR, "model_final.pth") #feel free to change this
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  
predictor = DefaultPredictor(cfg)
test_loader = build_detection_test_loader(cfg, _test)
evaluator = COCOEvaluator(_test, output_dir="./output")
print(inference_on_dataset(predictor.model, test_loader, evaluator))

In [None]:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth") 
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5   # set a custom testing threshold

predictor = DefaultPredictor(cfg)

im = cv2.imread("/kaggle/input/kibera-dharavi/kibera_dharavi/kibera_dharavi_tiles/dharavi_tile_5_2.jpg")
outputs = predictor(im)  

v = Visualizer(im[:, :, ::-1],
               metadata=test_slums_iSAID_metadata, 
               scale=0.5, 
               instance_mode=ColorMode.IMAGE_BW)

# Output and save prediction
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
plt.imshow(out.get_image()[:, :, ::-1])
output_path = '/kaggle/working/output_image.jpg'
plt.savefig(output_path, bbox_inches='tight', pad_inches=0)
plt.close()


In [None]:
# Please email me at raphael@uni.minerva.edu if you have any questions