In [1]:
import os, json
import numpy as np
import ceruleanml.data as data
from pathlib import Path
import skimage.io as skio
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from icevision.all import *
import icedata
import skimage
import pandas as pd

ml_data_path = os.path.join(os.path.abspath(os.getcwd()),"../../data/cv2_transfer/outputs/skytruth_annotation/first_phase/")
path = Path(ml_data_path)
class_folders = list(path.glob("*/"))

with open("../../data/cv2_transfer/instances_slicks_test_v2.json") as annotation:
    data_js=json.load(annotation)

In [2]:
class_map = {
    "Infrastructure": 1,
    "Natural Seep": 2,
    "Coincident Vessel": 3,
    "Recent Vessel": 4,
    "Old Vessel": 5,
    "Ambiguous": 6,
    "Hard Negatives": 0,
}

class_map

{'Infrastructure': 1,
 'Natural Seep': 2,
 'Coincident Vessel': 3,
 'Recent Vessel': 4,
 'Old Vessel': 5,
 'Ambiguous': 6,
 'Hard Negatives': 0}

In [3]:
list(data_js['categories'])

[{'supercategory': 'slick', 'id': 1, 'name': 'infra_slick'},
 {'supercategory': 'slick', 'id': 2, 'name': 'natural_seep'},
 {'supercategory': 'slick', 'id': 3, 'name': 'coincident_vessel'},
 {'supercategory': 'slick', 'id': 4, 'name': 'recent_vessel'},
 {'supercategory': 'slick', 'id': 5, 'name': 'old_vessel'},
 {'supercategory': 'slick', 'id': 6, 'name': 'ambiguous'}]

In [4]:
infra_slick_stats = [] 
natural_seep_stats = []
coincident_vessel_stats = []
recent_vessel_stats = []
old_vessel_stats = []
ambiguous_stats = []

In [5]:
for c in list(data_js['annotations']):
    cat = c['category_id']
    if cat == 1:
        infra_slick_stats.append(c)
    elif cat == 2:
        natural_seep_stats.append(c)
    elif cat == 3:
        coincident_vessel_stats.append(c)
    elif cat == 4:
        recent_vessel_stats.append(c)
    elif cat == 5:
        old_vessel_stats.append(c)
    elif cat == 6:
        ambiguous_stats.append(c)                          

In [6]:
parser = parsers.COCOMaskParser(annotations_filepath="../../data/cv2_transfer/instances_slicks_test_v2.json", img_dir="../../data/tiled_image_slicks_test_v2")

In [7]:
train_records = parser.parse(data_splitter=data.SingleSplitSplitter())
record = train_records[0][0]

  0%|          | 0/487 [00:00<?, ?it/s]

[1m[1mINFO    [0m[1m[0m - [1m[34m[1mAutofixing records[0m[1m[34m[0m[1m[0m | [36micevision.parsers.parser[0m:[36mparse[0m:[36m122[0m


  0%|          | 0/487 [00:00<?, ?it/s]

In [56]:
infra_slick_stats_area_segment = [] 
natural_seep_stats_area_segment = []
coincident_vessel_stats_area_segment = []
recent_vessel_stats_area_segment = []
old_vessel_stats_area_segment = []
ambiguous_stats_area_segment = []

infra_slick_stats_area_bbox = [] 
natural_seep_stats_area_bbox = []
coincident_vessel_stats_area_bbox = []
recent_vessel_stats_area_bbox = []
old_vessel_stats_area_bbox = []
ambiguous_stats_area_bbox = []

infra_slick_stats_major_axis_length_segment = [] 
natural_seep_stats_major_axis_length_segment = []
coincident_vessel_stats_major_axis_length_segment = []
recent_vessel_stats_major_axis_length_segment = []
old_vessel_stats_major_axis_length_segment = []
ambiguous_stats_major_axis_length_segment = []

infra_slick_stats_minor_axis_length_segment = [] 
natural_seep_stats_minor_axis_length_segment = []
coincident_vessel_stats_minor_axis_length_segment = []
recent_vessel_stats_minor_axis_length_segment = []
old_vessel_stats_minor_axis_length_segment = []
ambiguous_stats_minor_axis_length_segment = []

In [57]:
def get_minor_axis_length_proxy(bbox):
    ml1 = bbox[3]-bbox[0]
    ml2 = bbox[4]-bbox[1]
    ml3 = min(ml1, ml2)
    return ml3

In [58]:
def get_props_segments(d, category_list, category_prop_area_seg_list, category_prop_area_bbox_list, category_major_axis_length_list, category_minor_axis_length_list):
    mask_array = d['detection']['masks'][0].to_mask(d['common']['height'],d['common']['width']).data
    rprops = skimage.measure.regionprops(mask_array.transpose(1,2,0))
    category_prop_area_seg_list.append(rprops[0]['area'])
    category_prop_area_bbox_list.append(rprops[0]['area_bbox'])
    category_major_axis_length_list.append(rprops[0]['axis_major_length'])
    #category_minor_axis_length_list.append(rprops[0]['axis_minor_length']) # problematic, see: https://github.com/scikit-image/scikit-image/issues/2625
    # Use proxy:
    axis_minor_length_proxy = get_minor_axis_length_proxy(rprops[0]['bbox'])
    category_minor_axis_length_list.append(axis_minor_length_proxy) 

axis_major_length, axis_minor_length, area, and area_bbox

In [59]:
for i in range(len(train_records[0])):
    record = train_records[0][i]
    d = record.as_dict()
    if d['detection']['labels'] == ['infra_slick']:
        get_props_segments(d, infra_slick_stats, infra_slick_stats_area_segment, infra_slick_stats_area_bbox, infra_slick_stats_major_axis_length_segment, infra_slick_stats_minor_axis_length_segment)
    elif d['detection']['labels'] == ['natural_seep']:
        get_props_segments(d, natural_seep_stats, natural_seep_stats_area_segment, natural_seep_stats_area_bbox, natural_seep_stats_major_axis_length_segment, natural_seep_stats_minor_axis_length_segment)
    elif d['detection']['labels'] == ['coincident_vessel']:
        get_props_segments(d, coincident_vessel_stats, coincident_vessel_stats_area_segment, coincident_vessel_stats_area_bbox, coincident_vessel_stats_major_axis_length_segment, coincident_vessel_stats_minor_axis_length_segment)
    elif d['detection']['labels'] == ['recent_vessel']:
        get_props_segments(d, recent_vessel_stats, recent_vessel_stats_area_segment, recent_vessel_stats_area_bbox, recent_vessel_stats_major_axis_length_segment, recent_vessel_stats_minor_axis_length_segment)
    elif d['detection']['labels'] == ['old_vessel']:
        get_props_segments(d, old_vessel_stats, old_vessel_stats_area_segment, old_vessel_stats_area_bbox, old_vessel_stats_major_axis_length_segment, old_vessel_stats_minor_axis_length_segment)
    elif d['detection']['labels'] == ['ambiguous']:
        get_props_segments(d, ambiguous_stats, ambiguous_stats_area_segment, ambiguous_stats_area_bbox, ambiguous_stats_major_axis_length_segment, ambiguous_stats_minor_axis_length_segment)

In [60]:
max_infra_slick_stats_area_bbox, avg_infra_slick_stats_area_bbox, stdv_infra_slick_stats_area_bbox = max(infra_slick_stats_area_bbox), np.mean(infra_slick_stats_area_bbox), np.std(infra_slick_stats_area_bbox)
max_infra_slick_stats_area_segment, avg_infra_slick_stats_area_segment, stdv_infra_slick_stats_area_segment = max(infra_slick_stats_area_segment), np.mean(infra_slick_stats_area_segment), np.std(infra_slick_stats_area_segment)
max_infra_slick_stats_major_axis_length_segment, avg_infra_slick_stats_major_axis_length_segment, stdv_infra_slick_stats_major_axis_length_segment = max(infra_slick_stats_major_axis_length_segment), np.mean(infra_slick_stats_major_axis_length_segment), np.std(infra_slick_stats_major_axis_length_segment)
max_infra_slick_stats_minor_axis_length_segment, avg_infra_slick_stats_minor_axis_length_segment, stdv_infra_slick_stats_minor_axis_length_segment = max(infra_slick_stats_minor_axis_length_segment), np.mean(infra_slick_stats_minor_axis_length_segment), np.std(infra_slick_stats_minor_axis_length_segment)

max_natural_seep_stats_area_bbox, avg_natural_seep_stats_area_bbox, stdv_natural_seep_stats_area_bbox = max(natural_seep_stats_area_bbox), np.mean(natural_seep_stats_area_bbox), np.std(natural_seep_stats_area_bbox)
max_natural_seep_stats_area_segment, avg_natural_seep_stats_area_segment, stdv_natural_seep_stats_area_segment = max(natural_seep_stats_area_segment), np.mean(natural_seep_stats_area_segment), np.std(natural_seep_stats_area_segment)
max_natural_seep_stats_major_axis_length_segment, avg_natural_seep_stats_major_axis_length_segment, stdv_natural_seep_stats_major_axis_length_segment = max(natural_seep_stats_major_axis_length_segment), np.mean(natural_seep_stats_major_axis_length_segment), np.std(natural_seep_stats_major_axis_length_segment)
max_natural_seep_stats_minor_axis_length_segment, avg_natural_seep_stats_minor_axis_length_segment, stdv_natural_seep_stats_minor_axis_length_segment = max(natural_seep_stats_minor_axis_length_segment), np.mean(natural_seep_stats_minor_axis_length_segment), np.std(natural_seep_stats_minor_axis_length_segment)

max_coincident_vessel_stats_area_bbox, avg_coincident_vessel_stats_area_bbox, stdv_coincident_vessel_stats_area_bbox = max(coincident_vessel_stats_area_bbox), np.mean(coincident_vessel_stats_area_bbox), np.std(coincident_vessel_stats_area_bbox)
max_coincident_vessel_stats_area_segment, avg_coincident_vessel_stats_area_segment, stdv_coincident_vessel_stats_area_segment = max(coincident_vessel_stats_area_segment), np.mean(coincident_vessel_stats_area_segment), np.std(coincident_vessel_stats_area_segment)
max_coincident_vessel_stats_major_axis_length_segment, avg_coincident_vessel_stats_major_axis_length_segment, stdv_coincident_vessel_stats_major_axis_length_segment = max(coincident_vessel_stats_major_axis_length_segment), np.mean(coincident_vessel_stats_major_axis_length_segment), np.std(coincident_vessel_stats_major_axis_length_segment)
max_coincident_vessel_stats_minor_axis_length_segment, avg_coincident_vessel_stats_minor_axis_length_segment, stdv_coincident_vessel_stats_minor_axis_length_segment = max(coincident_vessel_stats_minor_axis_length_segment), np.mean(coincident_vessel_stats_minor_axis_length_segment), np.std(coincident_vessel_stats_minor_axis_length_segment)

max_recent_vessel_stats_area_bbox, avg_recent_vessel_stats_area_bbox, stdv_recent_vessel_stats_area_bbox = max(recent_vessel_stats_area_bbox), np.mean(recent_vessel_stats_area_bbox), np.std(recent_vessel_stats_area_bbox)
max_recent_vessel_stats_area_segment, avg_recent_vessel_stats_area_segment, stdv_recent_vessel_stats_area_segment = max(recent_vessel_stats_area_segment), np.mean(recent_vessel_stats_area_segment), np.std(recent_vessel_stats_area_segment)
max_recent_vessel_stats_major_axis_length_segment, avg_recent_vessel_stats_major_axis_length_segment, stdv_recent_vessel_stats_major_axis_length_segment = max(recent_vessel_stats_major_axis_length_segment), np.mean(recent_vessel_stats_major_axis_length_segment), np.std(recent_vessel_stats_major_axis_length_segment)
max_recent_vessel_stats_minor_axis_length_segment, avg_recent_vessel_stats_minor_axis_length_segment, stdv_recent_vessel_stats_minor_axis_length_segment = max(recent_vessel_stats_minor_axis_length_segment), np.mean(recent_vessel_stats_minor_axis_length_segment), np.std(recent_vessel_stats_minor_axis_length_segment)

max_old_vessel_stats_area_bbox, avg_old_vessel_stats_area_bbox, stdv_old_vessel_stats_area_bbox = max(old_vessel_stats_area_bbox), np.mean(old_vessel_stats_area_bbox), np.std(old_vessel_stats_area_bbox)
max_old_vessel_stats_area_segment, avg_old_vessel_stats_area_segment, stdv_old_vessel_stats_area_segment = max(old_vessel_stats_area_segment), np.mean(old_vessel_stats_area_segment), np.std(old_vessel_stats_area_segment)
max_old_vessel_stats_major_axis_length_segment, avg_old_vessel_stats_major_axis_length_segment, stdv_old_vessel_stats_major_axis_length_segment = max(old_vessel_stats_major_axis_length_segment), np.mean(old_vessel_stats_major_axis_length_segment), np.std(old_vessel_stats_major_axis_length_segment)
max_old_vessel_stats_minor_axis_length_segment, avg_old_vessel_stats_minor_axis_length_segment, stdv_old_vessel_stats_minor_axis_length_segment = max(old_vessel_stats_minor_axis_length_segment), np.mean(old_vessel_stats_minor_axis_length_segment), np.std(old_vessel_stats_minor_axis_length_segment)

max_ambiguous_stats_area_bbox, avg_ambiguous_stats_area_bbox, stdv_ambiguous_stats_area_bbox = max(ambiguous_stats_area_bbox), np.mean(ambiguous_stats_area_bbox), np.std(ambiguous_stats_area_bbox)
max_ambiguous_stats_area_segment, avg_ambiguous_stats_area_segment, stdv_ambiguous_stats_area_segment = max(ambiguous_stats_area_segment), np.mean(ambiguous_stats_area_segment), np.std(ambiguous_stats_area_segment)
max_ambiguous_stats_major_axis_length_segment, avg_ambiguous_stats_major_axis_length_segment, stdv_ambiguous_stats_major_axis_length_segment = max(ambiguous_stats_major_axis_length_segment), np.mean(ambiguous_stats_major_axis_length_segment), np.std(ambiguous_stats_major_axis_length_segment)
max_ambiguous_stats_minor_axis_length_segment, avg_ambiguous_stats_minor_axis_length_segment, stdv_ambiguous_stats_minor_axis_length_segment = max(ambiguous_stats_minor_axis_length_segment), np.mean(ambiguous_stats_minor_axis_length_segment), np.std(ambiguous_stats_minor_axis_length_segment)



In [61]:
class_list = [1,2,3,4,5,6]
class_list_string = ['Infrastructure', 'Natural Seep', 'Coincident Vessel', 'Recent Vessel', 'Old Vessel', 'Ambiguous']

In [62]:
avg_area_segment_list = [avg_infra_slick_stats_area_segment, avg_natural_seep_stats_area_segment, avg_coincident_vessel_stats_area_segment, avg_recent_vessel_stats_area_segment, avg_old_vessel_stats_area_segment, avg_ambiguous_stats_area_segment]
avg_area_bbox_list = [avg_infra_slick_stats_area_bbox, avg_natural_seep_stats_area_bbox, avg_coincident_vessel_stats_area_bbox, avg_recent_vessel_stats_area_bbox, avg_old_vessel_stats_area_bbox, avg_ambiguous_stats_area_bbox]
avg_major_axis_length_segment_list = [avg_infra_slick_stats_major_axis_length_segment, avg_natural_seep_stats_major_axis_length_segment, avg_coincident_vessel_stats_major_axis_length_segment, avg_recent_vessel_stats_major_axis_length_segment,avg_old_vessel_stats_major_axis_length_segment, avg_ambiguous_stats_major_axis_length_segment]   
avg_minor_axis_length_segment_list = [avg_infra_slick_stats_minor_axis_length_segment, avg_natural_seep_stats_minor_axis_length_segment, avg_coincident_vessel_stats_minor_axis_length_segment, avg_recent_vessel_stats_minor_axis_length_segment,avg_old_vessel_stats_minor_axis_length_segment, avg_ambiguous_stats_minor_axis_length_segment] 

In [63]:
stdv_area_segment_list = [stdv_infra_slick_stats_area_segment, stdv_natural_seep_stats_area_segment, stdv_coincident_vessel_stats_area_segment, stdv_recent_vessel_stats_area_segment, stdv_old_vessel_stats_area_segment, stdv_ambiguous_stats_area_segment]
stdv_area_bbox_list = [stdv_infra_slick_stats_area_bbox, stdv_natural_seep_stats_area_bbox, stdv_coincident_vessel_stats_area_bbox, stdv_recent_vessel_stats_area_bbox, stdv_old_vessel_stats_area_bbox, stdv_ambiguous_stats_area_bbox]
stdv_major_axis_length_segment_list = [stdv_infra_slick_stats_major_axis_length_segment, stdv_natural_seep_stats_major_axis_length_segment, stdv_coincident_vessel_stats_major_axis_length_segment, stdv_recent_vessel_stats_major_axis_length_segment, stdv_old_vessel_stats_major_axis_length_segment, stdv_ambiguous_stats_major_axis_length_segment]   
stdv_minor_axis_length_segment_list = [stdv_infra_slick_stats_minor_axis_length_segment, stdv_natural_seep_stats_minor_axis_length_segment, stdv_coincident_vessel_stats_minor_axis_length_segment, stdv_recent_vessel_stats_minor_axis_length_segment, stdv_old_vessel_stats_minor_axis_length_segment, stdv_ambiguous_stats_minor_axis_length_segment] 


In [64]:
stats_df = pd.DataFrame(
    {'classes (integer)': class_list,
     'classes (string)': class_list_string,
     'avg. area of segment': avg_area_segment_list,
     'avg. area of bbox': avg_area_bbox_list,
     'avg. major axis length of segment': avg_major_axis_length_segment_list,
     'avg. minor axis length of segment': avg_minor_axis_length_segment_list,
     'std. dev. area of segment': stdv_area_segment_list,
     'std. dev. area of bbox': stdv_area_bbox_list,
     'std. dev. major axis length of segment': stdv_major_axis_length_segment_list,
     'std. dev. minor axis length of segment': stdv_minor_axis_length_segment_list
    })

In [65]:
stats_df

Unnamed: 0,classes (integer),classes (string),avg. area of segment,avg. area of bbox,avg. major axis length of segment,avg. minor axis length of segment,std. dev. area of segment,std. dev. area of bbox,std. dev. major axis length of segment,std. dev. minor axis length of segment
0,1,Infrastructure,2946.290323,16050.419355,164.227503,79.548387,3874.543346,21626.373261,118.792378,60.107344
1,2,Natural Seep,5172.342205,83756.78327,414.415061,201.634981,6765.375961,87189.957753,282.743027,159.583309
2,3,Coincident Vessel,1824.48,31084.31,269.817058,101.66,2626.433542,50526.50895,217.732971,103.680492
3,4,Recent Vessel,920.0,19445.666667,252.794146,108.333333,253.452165,3875.489159,15.596585,13.767918
4,5,Old Vessel,2012.2625,23843.5375,266.909915,86.625,2762.08003,37354.747627,196.810739,75.752455
5,6,Ambiguous,3459.3,13514.3,124.887436,82.7,5117.603913,17653.337821,93.779341,66.726382
