# Checking that the obstacle areas in the offline carla dataset are reasonably split by the small - medium - large split by the default coco eval API

In [34]:
from pathlib import Path
import numpy as np
import json
import pandas as pd

In [67]:
cutoffs = [1024, 9216]

In [99]:
RUNS = \
    list(filter(lambda p: p.is_dir() and p.stem == "ClearNoon" and any(p.iterdir()),
           Path("/data/ges/faster-rcnn-driving/training_data").rglob("*")))

In [81]:
def read_json(path):
    with open(path, 'r') as f:
        return json.load(f)

def get_areas(row):
    class_label, bbox_coords = row
    bbox_coords = np.array(bbox_coords).T
    arg_order = bbox_coords.reshape(-1)  # xmn,xmx,ymn,ymx
    xmn, xmx, ymn, ymx = arg_order
    return (xmx-xmn) * (ymx-ymn)

def get_label(row):
    return row[0]

## Table of one row per obstacle

In [71]:
all_areas = []
area_by_run = {}
for r in RUNS:
    for p in r.glob("*.json"):
        areas = [get_areas(row) for row in read_json(p)]
        run = p.parent.parent.parent.stem
        town = run.split("_")[0]
        all_areas.extend([{"town": town, "run": run, "area": area} for area in areas])
        if (town, run) in area_by_run:
            area_by_run[(town, run)].extend(areas)
        else:
            area_by_run[(town, run)] = areas

In [72]:
df = pd.DataFrame(all_areas)

In [73]:
df

Unnamed: 0,town,run,area
0,town02,town02_start80,532
1,town02,town02_start80,20995
2,town02,town02_start80,262866
3,town02,town02_start80,46800
4,town02,town02_start80,27510
...,...,...,...
8817,town01,town01_start1,3318
8818,town01,town01_start1,4389
8819,town01,town01_start1,6106
8820,town01,town01_start1,247


## Table of run per row, fraction obstacles per size category in columns

In [79]:
df = []
for (town, run), areas in area_by_run.items():
    areas = np.array(areas)
    small = np.sum(areas < 1024) / len(areas)
    medium = np.sum(np.logical_and(areas < 9216, areas > 1024)) / len(areas)
    large = np.sum(areas > 9216) / len(areas)
    df.append({"town": town, "run": run, "small": small, "medium": medium, "large": large})
df = pd.DataFrame(df)

In [80]:
df

Unnamed: 0,town,run,small,medium,large
0,town02,town02_start80,0.210638,0.434043,0.355319
1,town02,town02_start1,0.258278,0.421634,0.317881
2,town02,town02_start99,0.264317,0.453744,0.281938
3,town01,town01_start20,0.306667,0.413333,0.273333
4,town03,town03_start250,0.2277,0.478873,0.293427
5,town03,town03_start10,0.389444,0.383738,0.226819
6,town01,town01_start50,0.234043,0.404255,0.361702
7,town01,town01_start30,0.398998,0.288815,0.310518
8,town03,town03_start150,0.114883,0.360313,0.524804
9,town02,town02_start60,0.16875,0.55625,0.275


## Set of categories being used in offline carla dataset, seems to match the pylot label map

In [82]:
all_cateogries = set()

In [None]:
all_cateogries.union()

In [105]:
all_cateogries = set()
for r in RUNS:
    for p in r.glob("*.json"):
        labels = set([get_label(row) for row in read_json(p)])
        all_cateogries = all_cateogries.union(set(labels))

In [107]:
all_cateogries

{'bicycle',
 'car',
 'motorcycle',
 'person',
 'speed limit 30',
 'speed limit 60',
 'speed limit 90'}