In [23]:
import json

In [24]:
def import_file(path):
    with open(path, "r") as file:
        c = json.load(file)
        file.close()
        return c
    
def create_relations(annotations):
    """
    For every image, creates a list of triples in the form of (s, o, p)
    where s and o are the indexes of the annotations within the image and p is the index of the relation_category
    """

def get_bbox_relation(a, b):
    """
    Simple heuristic to return a rel_cat id to two bounding boxes
    """
    a_xyxy = xywh_to_xyxy(a)
    b_xyxy = xywh_to_xyxy(b)
    if box_similarity(a_xyxy, b_xyxy) > 0.6:
        return 0
    elif box_similarity(a_xyxy, b_xyxy) > 0.3:
        if a[1] > b[1]: # When a is below b
            return 1
        else:
            return 2
    return None
    
def xywh_to_xyxy(a):
    return (a[0] + a[2], a[0], a[1] + a[3], a[1])

def box_similarity(a, b):
    """
    Returns fraction of overlap with b to own box. 1 if perfect overlap, 0 if no overlap
    a == b == [xmax, xmin, ymax, ymin]
    """
    a_a = (a[0] - a[1]) * (a[2] - a[3])
    a_b = (b[0] - b[1]) * (b[2] - b[3])
    overlap = max(min(a[0], b[0]) - max(a[1], b[1]), 0) * \
              max(min(a[2], b[2]) - max(a[3], b[3]), 0)

    if a_a > a_b:
        return overlap / a_a
    else:
        return overlap / a_b

rel_cat = ["on", "below", "above"]

In [25]:
train = import_file("train.json")

In [26]:
img_id2id = dict()
for i, img in enumerate(train["images"]):
    idx = img["id"]
    img_id2id[idx] = i
    train["images"][i]["id"] = i

In [27]:
new_annotations = []
for i, ann in enumerate(train["annotations"]):
    idx = ann["image_id"]
    ann["image_id"] = img_id2id[idx]
    new_annotations.append(ann)
train["annotations"] = new_annotations

In [28]:
for ann in train["annotations"]:
    ann["area"] = ann["bbox"][2] * ann["bbox"][3]

In [30]:
image2ann = dict()
for a in train["annotations"]:
    image_id = a["image_id"]
    if image_id in image2ann.keys():
        image2ann[image_id].append(a)
    else:
        image2ann[image_id] = [a]

In [31]:
# For every image, examine their annotiosw
image_rel = dict()
for img, ann_list in image2ann.items():
    triples = []
    for i, a in enumerate(ann_list):
        for j, b in enumerate(ann_list):
            if i >= j:
                continue
            rel = get_bbox_relation(a["bbox"], b["bbox"])
            if rel:
                triples.append([i, j, rel])
    image_rel[img] = triples

In [32]:
train["images"][0]

{'license': 1,
 'file_name': 'Video 1 ezgif-frame-001 (1).jpg',
 'height': 1920,
 'width': 1080,
 'date_captured': '2022-07-14 12:59:58',
 'id': 0,
 'coco_url': 'https://storage.labelbox.com/cl5jgpg6c2fs508ub88aa3748%2F63c21fac-d49f-7072-1767-c8e9c57c2d51-Video%201%20ezgif-frame-001%20(1).jpg?Expires=1673986338674&KeyName=labelbox-assets-key-3&Signature=g-O_Or88jf5eYGrELX5Do-9sAJ0'}

In [33]:
image_rel

{1068: [],
 986: [],
 1058: [],
 1003: [],
 757: [],
 597: [[4, 5, 1]],
 815: [],
 146: [[3, 10, 2], [6, 7, 2], [7, 8, 2]],
 550: [],
 886: [[0, 16, 2]],
 198: [],
 621: [],
 255: [],
 283: [[3, 4, 1], [4, 5, 1], [5, 6, 1]],
 838: [[4, 9, 1]],
 985: [],
 549: [],
 1064: [],
 598: [[1, 2, 1]],
 1057: [],
 254: [],
 997: [],
 756: [],
 145: [[0, 3, 1], [6, 7, 2], [7, 8, 2], [8, 9, 2]],
 1069: [],
 660: [],
 0: [[4, 13, 1],
  [4, 27, 1],
  [7, 10, 1],
  [10, 26, 2],
  [12, 30, 2],
  [13, 19, 2],
  [19, 27, 1]],
 894: [[6, 7, 2]],
 699: [],
 916: [],
 731: [],
 1010: [[1, 3, 2]],
 41: [],
 966: [[0, 1, 2], [0, 11, 1]],
 698: [],
 825: [[1, 11, 1], [9, 12, 2]],
 28: [],
 893: [[14, 15, 2]],
 830: [[0, 7, 2]],
 1025: [[2, 3, 1]],
 4: [[8, 30, 1],
  [11, 31, 1],
  [13, 14, 2],
  [13, 25, 2],
  [14, 16, 1],
  [16, 25, 2]],
 684: [],
 915: [],
 732: [[0, 11, 2]],
 892: [[14, 15, 2]],
 1089: [],
 777: [[0, 1, 1]],
 235: [],
 798: [],
 799: [],
 249: [],
 703: [],
 842: [[3, 12, 2]],
 234: [],
 8

In [34]:
train_img = list(image_rel.keys())[:1000]
val_img = list(image_rel.keys())[1001:]

In [35]:
rel_train = dict()
rel_val = dict()
for i in train_img:
    rel_train[i] = image_rel[i]
    
for i in val_img:
    rel_val[i] = image_rel[i]

In [36]:
out_rel = {"train": rel_train, "val": rel_val, "rel_categories": rel_cat}

In [37]:
with open("oic_rel.json", "w") as file:
    json.dump(out_rel, file)
    file.close()

In [38]:
val_img = set(val_img)
train_img = set(train_img)
img_train = []
img_val = []
for i in train["images"]:
    if i["id"] in val_img:
        img_val.append(i)
    elif i["id"] in train_img: # we cannot be sure only with complement
        img_train.append(i)

In [39]:
ann_train = []
ann_val = []
for a in train["annotations"]:
    if a["image_id"] in val_img:
        ann_val.append(a)
    elif a["image_id"] in train_img: # we cannot be sure only with complement
        ann_train.append(a)

In [40]:
print(len(img_val))
print(len(img_train))

186
1000


In [41]:
print(len(ann_val))
print(len(ann_train))
print(len(train["annotations"]))

2375
12585
14973


In [42]:
image_dict_train = {"info": train["info"], "licenses": train["licenses"], "images": img_train, "annotations": ann_train, "categories": train["categories"]}
image_dict_val = {"info": train["info"], "licenses": train["licenses"], "images": img_val, "annotations": ann_val, "categories": train["categories"]}

In [43]:
with open("oic_train.json", "w") as file:
    json.dump(image_dict_train, file)
    file.close()

In [44]:
with open("oic_val.json", "w") as file:
    json.dump(image_dict_val, file)
    file.close()

In [27]:
train["images"]

[{'license': 1,
  'file_name': 'Video 1 ezgif-frame-001 (1).jpg',
  'height': 1920,
  'width': 1080,
  'date_captured': '2022-07-14 12:59:58',
  'id': 0,
  'coco_url': 'https://storage.labelbox.com/cl5jgpg6c2fs508ub88aa3748%2F63c21fac-d49f-7072-1767-c8e9c57c2d51-Video%201%20ezgif-frame-001%20(1).jpg?Expires=1673986338674&KeyName=labelbox-assets-key-3&Signature=g-O_Or88jf5eYGrELX5Do-9sAJ0'},
 {'license': 1,
  'file_name': 'Video 1 ezgif-frame-001 (87).jpg',
  'height': 1920,
  'width': 1080,
  'date_captured': '2022-07-14 12:59:58',
  'id': 1,
  'coco_url': 'https://storage.labelbox.com/cl5jgpg6c2fs508ub88aa3748%2Fad77ff31-d64f-dac3-d5f3-a637fc77a07d-Video%201%20ezgif-frame-001%20(87).jpg?Expires=1673986338674&KeyName=labelbox-assets-key-3&Signature=zM6kxZej8OXssGbFtUu5OAbSLes'},
 {'license': 1,
  'file_name': 'Video 1 ezgif-frame-001 (88).jpg',
  'height': 1920,
  'width': 1080,
  'date_captured': '2022-07-14 12:59:58',
  'id': 2,
  'coco_url': 'https://storage.labelbox.com/cl5jgpg6c2f

In [7]:
from urllib.request import urlopen
import tqdm

In [8]:
for img in tqdm.tqdm(train["images"]):
    response = urlopen(img["coco_url"])
    data = response.read()
    with open("img/" + img["file_name"], "wb") as file:
        file.write(data)
        file.close()

100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 1187/1187 [13:20<00:00,  1.48it/s]
