In [None]:
from davis import *

## 1. Load up the `test_results_dict` downloaded from the Google Drive.

If done in fragments, the dictionaries should be combined to form a single dictionary.  
train_results_dict and val_results_dict can also be loaded (if predicted for).

In [None]:
import pickle

test_results_dict = {}
with open('test_results_dict.pickle', 'rb') as fp:
    test_results_dict = pickle.load(fp)
    
train_results_dict = {}
with open('train_results_dict.pickle', 'rb') as fp:
    train_results_dict = pickle.load(fp)
    
val_results_dict = {}
with open('val_results_dict.pickle', 'rb') as fp:
    val_results_dict = pickle.load(fp)

In [None]:
datasets = [dataset_train, dataset_val, dataset_test]
results_dicts = [train_results_dict, val_results_dict, test_results_dict]

In [None]:
"""
The folder name to which each prediction is to be written is
added on to the dataset. This could've been done when the
images were actually loaded to the dataset (load_images).
"""
for dataset in datasets:
    for image_id in dataset.image_ids:
        image_path = dataset.image_info[image_id]['path']
        dataset.image_info[image_id]['folder_name'] = image_path.split('/')[-2]

## 2. Write the segmentations and masks to local folders.

The target root folders should be given to `write_segmentations()` and `write_transformed_masks()` respectively.

In [None]:
sequence_info = {}

def get_mask(i, image_id):
    """
    Given a dataset number and an image_id, returns the
    predicted mask reshaped to 832, 832, 3.
    
    Inputs -
        i - dataset number:
            0 - train
            1 - val
            2 - test
        image_id - the image_id for which the mask is to
            be fetched.
    
    Working - Currently, only the most certain mask is
        chosen and reshaped. This need not be the best
        way to proceed.
        
    A possible solution - Consider all the different
        predicted masks to form the final mask. A
        threshold should be set, each mask has to
        have a p% overlap with the actual image to
        be considered for the final mask.
        
    Returns -
        mask - a np.ndarray of shape (832, 832, 3),
            the final predicted mask for the image.

    """
    image_masks = results_dicts[i][image_id]['masks']
    if image_masks.shape == (832, 832, 0):
        mask = np.zeros((832, 832, 3))
    else:
        mask = image_masks[:, :, 0:1]
        broadcaster = np.ones((1, 3))
        mask = mask * broadcaster
    return mask

def write_mask(mask, mask_path):
    """
    Takes the mask and the path to which it should be
    written and writes it.
    
    Inputs -
        mask - a np.ndarray of shape (832, 832, 3),
            the final predicted mask for the image.
        mask_path - a local location to which the
            mask image should be written.
    
    Working -
        The mask should be multiplied by 255 to bring
        the values back to scale, otherwise a near-black
        image would be saved.
    """
    mask = mask * 255
    mask = mask.astype('uint8')
    cv2.imwrite(mask_path, mask)

def write_segmentations(segmentations_path='segmentations'):
    """
    Write the segmentations in the proper folder structure.
    """
    def _add_to_sequence_info(sequence_name, image_id, dataset):
        if sequence_name not in sequence_info:
            sequence_info[sequence_name] = {
                'image_ids': [],
                'dataset': dataset,
            }
        sequence_info[sequence_name]['image_ids'].append(image_id)
        
    for i, dataset in enumerate(datasets):
        for image_id in dataset.image_ids:
            image_info = dataset.image_info[image_id]
            folder_path = os.path.join(segmentations_path,
                                        image_info['folder_name'])
            _add_to_sequence_info(image_info['folder_name'], image_id, dataset)
            if not os.path.exists(folder_path):
                os.makedirs(folder_path)
            mask_path = os.path.join(folder_path,
                                     image_info['pic_name'])
            mask = get_mask(i, image_id)
            dataset.image_info[image_id]['segmentation_path'] = mask_path
            write_mask(mask, mask_path)

def write_transformed_masks(masks_path, scale, padding, crop):
    """
    Transforming the original masks for evaluation
    """
    for i, dataset in enumerate(datasets):
        for image_id in dataset.image_ids:
            image_info = dataset.image_info[image_id]
            original_mask = dataset.load_mask(image_id=image_id)[0]
            transformed_mask = utils.resize_mask(original_mask,
                                                 scale, padding, crop)
            folder_path = os.path.join(masks_path,
                                       image_info['folder_name'])
            if not os.path.exists(folder_path):
                os.makedirs(folder_path)
            mask_path = os.path.join(folder_path,
                                     image_info['pic_name'])
            dataset.image_info[image_id]['transformed_mask_path'] = mask_path
            write_mask(transformed_mask, mask_path)

In [None]:
"""
Need to get the scale, padding and crop used to resize
the image. The original image was 854X480. This needed to
be resized into an image of equal height and width and the
dimension should have been divisible by 2 at least 6 times.
832X832 was chosen as it was the nearest number to 854 that
satisfies this criterion.

This same transformation needs to be applied to the ground
truth masks as well in order to be consistent.
"""
image = dataset_test.load_image(0)

In [None]:
image, window, scale, padding, crop = utils.resize_image(
    image,
    min_dim=config.IMAGE_MIN_DIM,
    min_scale=config.IMAGE_MIN_SCALE,
    max_dim=config.IMAGE_MAX_DIM,
    mode=config.IMAGE_RESIZE_MODE)

In [None]:
"""
Writing the segmentations and transformed ground truth masks
in the proper folder structure as required. Need not be run
multiple times.

TODO: Alter the functions called here to skip writing the
files if the directory is found in the specified location, so
that no time is wasted even if these functions are called
multiple times.
"""
write_segmentations('segmentations')
write_transformed_masks('transformed_masks', scale, padding, crop)

## 3. Methods for computing IoU and mean IoU - Computing mean IoUs over various datasets

In [None]:
def get_iou(prediction, actual):
    """
    Given the prediction and the ground truth, computes
    and returns the Jaccard index (Intersection Over Union)
    for the pair.
    
    The sizes of the intersection and the union are also
    returned to facilitate the aggregation over directories.
    
    Inputs -
        prediction - an np.ndarray with the predicted mask.
        actual - an np.ndarray with the ground truth mask.
    
    Assumptions - 
        1. There is only a single mask of white colour
            on a black background.
        2. The prediction and the actual are of same shape,
            right now (832, 832, 3).
    
    Working -
        1. Reshape the masks to 1 channel only, this is
            because all the channels would be same.
        2. Get sum of prediction & actual (intersection).
        3. Get sum of prediction | actual (union).
        4. Divide intersection by union.
        
    Returns -
        iou - a np.float64, the Intersection Over Union value
            for the pair of images.
        intersection_size - the count of pixels in intersection.
        union_size - the count of pixels in union.
    """
    prediction = prediction[:, :, 0:1]
    actual = actual[:, :, 0:1]
    intersection = prediction & actual
    union = prediction | actual
    intersection_size = intersection.sum()
    union_size = union.sum()
    iou = intersection_size / union_size
    return iou, intersection_size, union_size

In [None]:
def get_mean_iou(prediction_paths, actual_paths):
    """
    Given a directory of predictions and another of the
    respective ground truths, computes the mean IOUs over
    all the pair of images inside it.
    
    Inputs -
        prediction_paths - list of str, each item in the list
            a path to a predicted segmentation.
        actual_paths - list of str, each item in the list a
            path to a ground truth.
    
    Assumptions -
        1. The order should is maintained. ith item in
            prediction_paths is the segmentation of ith item
            in actual_paths.
            
    Working -
        1. Iterate through the lists, and keep calling
            get_iou() for each pair of items.
        2. Keep adding intersection_size and union_size
            returned by get_iou() in two variables,
            total_i and total_u respectively.
        3. mean_iou = total_i / total_u
    
    Returns -
        mean_iou - the mean Intersection Over Union over the
            given predictions and ground truths.
    """
    total_i = 0
    total_u = 0
    for i in range(len(prediction_paths)):
        prediction = cv2.imread(prediction_paths[i])
        actual = cv2.imread(actual_paths[i])
        iou, intersection_size, union_size = get_iou(prediction, actual)
        total_i += intersection_size
        total_u += union_size
    mean_iou = total_i / total_u
    return mean_iou

In [None]:
# Getting the mean IOU over training data
prediction_paths = [item['segmentation_path'] for item in dataset_train.image_info]
actual_paths = [item['transformed_mask_path'] for item in dataset_train.image_info]
mean_training_iou = get_mean_iou(prediction_paths, actual_paths)

In [None]:
# Getting the mean IOU over validation data
prediction_paths = [item['segmentation_path'] for item in dataset_val.image_info]
actual_paths = [item['transformed_mask_path'] for item in dataset_val.image_info]
mean_validation_iou = get_mean_iou(prediction_paths, actual_paths)

In [None]:
# Getting the mean IOU over test data
prediction_paths = [item['segmentation_path'] for item in dataset_test.image_info]
actual_paths = [item['transformed_mask_path'] for item in dataset_test.image_info]
# mean_testing_iou = get_mean_iou(prediction_paths, actual_paths)

In [None]:
# Getting the mean IOU over entire data
prediction_paths = []
for dataset in datasets:
    for item in dataset.image_info:
        prediction_paths.append(item['segmentation_path'])
actual_paths = []
for dataset in datasets:
    for item in dataset.image_info:
        actual_paths.append(item['transformed_mask_path'])
mean_overall_iou = get_mean_iou(prediction_paths, actual_paths)

In [None]:
# Getting the mean IOU over each video sequence - 
from collections import defaultdict

def get_sequence_wise_ious(datasets):
    video_sequences = {}
    for i, dataset in enumerate(datasets):
        for image_info in dataset.image_info:
            sequence = image_info['folder_name']
            if not image_info['folder_name'] in video_sequences:
                video_sequences[sequence] = defaultdict(list)
            video_sequences[sequence]['prediction_paths'].append(image_info['segmentation_path'])
            video_sequences[sequence]['actual_paths'].append(image_info['transformed_mask_path'])

    mean_ious = {}
    for sequence, paths in video_sequences.items():
        mean_ious[sequence] = get_mean_iou(paths['prediction_paths'], paths['actual_paths'])
    return mean_ious

In [None]:
mean_ious = get_sequence_wise_ious(datasets)

## 4. Getting mean IoU over different video sequence attributes.

db_info.yml is provided along with the DAVIS dataset. This file provides information on the different attributes encountered in the different video sequences.

In [None]:
import yaml
from collections import defaultdict

def get_db_info():
    with open("db_info.yml", 'r') as stream:
        db_info = yaml.load(stream)
    return db_info

db_info = get_db_info()

In [None]:
train_sequences = set()
val_sequences = set()
test_sequences = set()
sequences = [train_sequences, val_sequences, test_sequences]
for ix, dataset in enumerate(datasets):
    print (ix)
    for image_id in dataset.image_ids:
        sequences[ix].add(dataset.image_info[image_id]['folder_name'])

In [None]:
test_sequences_list = list(test_sequences)

In [None]:
sequence_dataset_dict = {}
for sequence in sequence_wise_ious.keys():
    if sequence in train_sequences:
        sequence_dataset_dict[sequence] = 'train'
    elif sequence in val_sequences:
        sequence_dataset_dict[sequence] = 'validation'
    else:
        sequence_dataset_dict[sequence] = 'test'

In [None]:
train_ious = {sequence: iou for sequence, iou in sequence_wise_ious.items() if sequence in train_sequences}
val_ious = {sequence: iou for sequence, iou in sequence_wise_ious.items() if sequence in val_sequences}
test_ious = {sequence: iou for sequence, iou in sequence_wise_ious.items() if sequence in test_sequences}

In [None]:
val_sequences = []
train_sequences = []
for sequence in db_info['sequences']:
    if sequence['set'] == 'test':
        val_sequences.append(sequence)
    else:
        train_sequences.append(sequence)

In [None]:
train_attributes_dict = {sequence['name']: sequence['attributes'] for sequence in train_sequences}
val_attributes_dict = {sequence['name']: sequence['attributes'] for sequence in val_sequences}


attributes = db_info['attributes']
train_attribute_sequences_dict = defaultdict(list)
val_attribute_sequences_dict = defaultdict(list)
for key, value in train_attributes_dict.items():
    for attribute in value:
        train_attribute_sequences_dict[attribute].append(key)
for key, value in val_attributes_dict.items():
    for attribute in value:
        val_attribute_sequences_dict[attribute].append(key)

train_attributes = {key: len(value) for key, value in train_attribute_sequences_dict.items()}
val_attributes = {key: len(value) for key, value in val_attribute_sequences_dict.items()}

In [None]:
final_train_attributes_dict = {sequence['name']: sequence['attributes'] for sequence in db_info['sequences'] if
                                  sequence['name'] in train_sequences}
final_val_attributes_dict = {sequence['name']: sequence['attributes'] for sequence in db_info['sequences'] if
                                  sequence['name'] in val_sequences}

In [None]:
def get_attributes_count(attributes):
    attributes_count = {}
    for attributes_list in attributes:
        for attribute in attributes_list:
            attributes_count[attribute] = attributes_count.get(attribute, 0) + 1
    return attributes_count

In [None]:
train_attributes = get_attributes_count(final_train_attributes_dict.values())
val_attributes = get_attributes_count(final_val_attributes_dict.values())

In [None]:
test_folders = ['soapbox', 'scooter-black', 'parkour', 'paragliding-launch', 'motocross-jump']
test_attributes_dict = {name: attributes for name, attributes in val_attributes_dict.items() if name in test_folders}
split_val_attributes_dict = {name: attributes for name, attributes in val_attributes_dict.items() if name not in test_folders}

In [None]:
test_attribute_sequences_dict = defaultdict(list)
split_val_attribute_sequences_dict = defaultdict(list)
for key, value in test_attributes_dict.items():
    for attribute in value:
        test_attribute_sequences_dict[attribute].append(key)
test_attributes = {key: len(value) for key, value in test_attribute_sequences_dict.items()}
for key, value in split_val_attributes_dict.items():
    for attribute in value:
        split_val_attribute_sequences_dict[attribute].append(key)
split_val_attributes = {key: len(value) for key, value in split_val_attribute_sequences_dict.items()}

In [None]:
sequence_attributes_dict = {sequence['name']: sequence['attributes'] for sequence in db_info['sequences']}
attributes = db_info['attributes']
attribute_sequences_dict = defaultdict(list)
for key, value in sequence_attributes_dict.items():
    for attribute in value:
        attribute_sequences_dict[attribute].append(key)

In [None]:
def get_paths(sequence_names, sequence_info):
    """
    Takes in a list of sequence names and returns the segmentation
    paths and the transformed mask paths for all the frames under
    those sequences
    
    Input -
        sequence_names - list of str, names of video sequences.
        sequence_info - (global) dict, having sequence_names as
            keys and respective frame information as values.
    Returns -
        prediction_paths, actual_paths - lists of str, paths
            containing all the predictions for these sequences
            and the ground truths for these sequences.
    """
    prediction_paths = []
    actual_paths = []
    for sequence in sequence_names:
        dataset = sequence_info[sequence]['dataset']
        for image_id in sequence_info[sequence]['image_ids']:
            prediction_paths.append(dataset.image_info[image_id]['segmentation_path'])
            actual_paths.append(dataset.image_info[image_id]['transformed_mask_path'])
    return prediction_paths, actual_paths

In [None]:
attribute_wise_mean_ious = {}
for attribute in attributes:
    prediction_paths, actual_paths = get_paths(attribute_sequences_dict[attribute], sequence_info)
    attribute_wise_mean_ious[attribute] = get_mean_iou(prediction_paths, actual_paths)
sorted(attribute_wise_mean_ious, key=lambda x: attribute_wise_mean_ious[x])

In [None]:
sequences = {}
for key, value in attributes_dict.items():
    sequences[key] = {}
    sequences[key]['attributes'] = value
for key, value in mean_ious.items():
    sequences[key]['mean_iou'] = value

## 5. Appendix - visualisation

In [None]:
def get_ax(rows=1, cols=1, size=8):
    """Return a Matplotlib Axes array to be used in
    all visualizations in the notebook. Provide a
    central point to control graph sizes.
    
    Change the default size attribute to control the size
    of rendered images
    """
    _, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
    return ax

class InferenceConfig(DAVISConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    DETECTION_MIN_CONFIDENCE = 0.8

inference_config = InferenceConfig()

In [None]:
# Checking on a random image from test dataset
image_id = 1300
original_image, image_meta, gt_class_id, gt_bbox, gt_mask =\
    modellib.load_image_gt(dataset_test, inference_config, 
                           image_id, use_mini_mask=False)

log("original_image", original_image)
log("image_meta", image_meta)
log("gt_class_id", gt_class_id)
log("gt_bbox", gt_bbox)
log("gt_mask", gt_mask)

visualize.display_instances(original_image, gt_bbox, gt_mask, gt_class_id, 
                            dataset_train.class_names, figsize=(8, 8))

In [None]:
results = test_results_dict[image_id]
r = results
visualize.display_instances(original_image, r['rois'], r['masks'], r['class_ids'], 
                            dataset_train.class_names, r['scores'], ax=get_ax())