#### Research Notes
+ Required orientation: 0, 45, 90, 135, 180, 225, 270, 315
- MENOW Homepahe: https://chenyanwu.github.io/MEBOW/

In [None]:
import os
import json
import re
import shutil

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

In [None]:
required_orientation = [0, 45, 90, 135, 180, 225, 270, 315]

In [None]:
annotation_train_fp = "dataset/train_hoe.json"
annotation_val_fp = "dataset/val_hoe.json"

#### COCO Annotation Extraction

In [None]:
coco_annotation_train_fp = "dataset/coco2017_annotations/instances_train2017.json"
coco_annotation_val_fp = "dataset/coco2017_annotations/instances_val2017.json"

In [None]:
with open(coco_annotation_train_fp) as json_file:
    annotation_coco_train = json.load(json_file)

In [None]:
with open(coco_annotation_val_fp) as json_file:
    annotation_coco_val = json.load(json_file)

In [None]:
# bbox_train = [{"bbox": ann['bbox'], "image_id": ann['image_id'], "instance_id": ann['id']} 
#               for ann in annotation_coco_train['annotations'] if  ann["category_id"]==1]

# bbox_val = [{"bbox": ann['bbox'], "image_id": ann['image_id'], "instance_id": ann['id']} 
#               for ann in annotation_coco_val['annotations'] if  ann["category_id"]==1]

bbox_train = {ann['id']: {"bbox": ann['bbox'], "image_id": ann['image_id']}
              for ann in annotation_coco_train['annotations'] if  ann["category_id"]==1}

bbox_val = {ann['id']: {"bbox": ann['bbox'], "image_id": ann['image_id']}
              for ann in annotation_coco_val['annotations'] if  ann["category_id"]==1}

#### Training Set

In [None]:
with open(annotation_train_fp) as json_file:
    annotation_train = json.load(json_file)

In [None]:
unique_orientation = set(annotation_train.values())
print(f"Unique Orientation: {unique_orientation}")

In [None]:
annotation_count = {}
for orientation in unique_orientation:
    tmp_counter = 0
    for key, val in annotation_train.items():
        if val == orientation:
            tmp_counter += 1
            
    annotation_count[int(orientation)] = tmp_counter

In [None]:
annotation_count_df = pd.DataFrame(annotation_count.items(), columns=["orientation", "count"])
annotation_count_df.head()

In [None]:
plt.figure(figsize=(12, 6))
ax = sns.barplot(annotation_count_df, x="orientation", y="count")
ax.tick_params(axis="x", rotation=75)
ax.set(xlabel="orientation", ylabel="count")
plt.show()

In [None]:
annotation_count_df.describe()

In [None]:
# Extract all the count for required 8 orientation
annotation_count_df[annotation_count_df["orientation"].isin(required_orientation)]

In [None]:
1033+1946+763+605+5603+778+735+1906

#### Validation Set

In [None]:
with open(annotation_val_fp) as json_file:
    annotation_val = json.load(json_file)

In [None]:
unique_orientation_val = set(annotation_val.values())
print(f"Validation Unique Orientation: {unique_orientation}")

In [None]:
annotation_count_val = {}
for orientation in unique_orientation_val:
    tmp_counter = 0
    for key, val in annotation_val.items():
        if val == orientation:
            tmp_counter += 1
            
    annotation_count_val[int(orientation)] = tmp_counter

In [None]:
annotation_count_val_df = pd.DataFrame(annotation_count_val.items(), columns=["orientation", "count"])
annotation_count_val_df.head()

In [None]:
plt.figure(figsize=(12, 6))
ax = sns.barplot(annotation_count_val_df, x="orientation", y="count")
ax.tick_params(axis="x", rotation=75)
ax.set(xlabel="orientation", ylabel="count")
plt.show()

In [None]:
annotation_count_val_df.describe()

In [None]:
# Extract all the count for required 8 orientation
annotation_count_val_df[annotation_count_val_df["orientation"].isin(required_orientation)]

In [None]:
bbox_val[510346]

#### Dataset Preparation

In [None]:
train_fname_ls = [fname for fname in os.listdir("./dataset/train2017/")]
val_fname_ls = [fname for fname in os.listdir("./dataset/val2017/")]

In [None]:
def annotation_mapping(fname_ls, annotations, coco_annotations):
    fname_ls = sorted(fname_ls)
    annotations = dict(sorted(annotations.items(), key=lambda x: x[0]))
    
    counter = 0
    annotation_mapping = {}

    for key, val in annotations.items():
        if val in required_orientation:
            image_id = key.split("_")[0]
            instance_id = int(key.split("_")[1])
            
            for fname in fname_ls:
                # match_id = re.search(rf"{image_id}\.", fname) #TODO: Use only when full file name is available
                match_id = re.search(rf"{image_id}", fname)

                if match_id:
                    if fname not in annotation_mapping.keys():
                        tmp_coco = coco_annotations[instance_id]

                        annotation_mapping[fname] = [{"instance_id": instance_id, "bbox": tmp_coco["bbox"], "label": val}]
                    else:
                        tmp_coco = coco_annotations[instance_id]

                        annotation_mapping[fname].append({"instance_id": instance_id, "bbox": tmp_coco["bbox"], "label": val})
                        
                    break
                    
            counter+=1
                
    return annotation_mapping

In [None]:
train_mapping = annotation_mapping(train_fname_ls, annotation_train, bbox_train)
val_mapping = annotation_mapping(val_fname_ls, annotation_val, bbox_val)

In [None]:
new_train_fp = "dataset/coco2017_8orienation_train_new"
new_val_fp = "dataset/coco2017_8orientation_val_new"

In [None]:
def create_dataset(mapping_file, dataset_dir, new_dataset_dir):
    if os.path.exists(new_dataset_dir):
        for key, val in mapping_file.items():
            shutil.copy(os.path.join(dataset_dir, key), new_dataset_dir)
            
    else:
        os.makedirs(new_dataset_dir)
        for key, val in mapping_file.items():
            shutil.copy(os.path.join(dataset_dir, key), new_dataset_dir)

In [None]:
create_dataset(train_mapping, "./dataset/train2017/", new_train_fp)

In [None]:
create_dataset(val_mapping, "./dataset/val2017/", new_val_fp)

In [None]:
# write train mapping dict to json file
with open("dataset/train_coco2017_annotation_new.json", "w") as json_file:
    json.dump(train_mapping, json_file)

In [None]:
# write val mapping dict to json file
with open("dataset/val_coco2017_annotation_new.json", "w") as json_file:
    json.dump(val_mapping, json_file)

In [1]:
# Zip newly extracted train dataset with 8 orientation
!zip -q -r dataset/coco2017_8orienation_train_new.zip dataset/coco2017_8orienation_train_new/ 

In [None]:
!zip -q -r dataset/coco2017_8orienation_val_new.zip dataset/coco2017_8orientation_val_new//