In [1]:
import argparse
import os
#from IPython import embed
import json
import time
from PIL import Image
import numpy as np

import pandas as pd

from shapely.geometry import Polygon

def num(s):
    try:
        return int(s)
    except:
        return int(float(s))

# Sytem initialization

You need to define there the type of data that you want to fetch, the number of keypoints, hresholds, etc ...

In [2]:
super_category = "vehicle" #in the coco formatting, the supercategory for the cars is "vehicle"
number_keypoints = 14 #Number of keypoints of the cars
threshold = 1 # minimum number of keypoints to consider the cars in both the training and validation dataset


train_test_ratio = 1.0 #ratio between the training + validation and testing 
train_val_ratio = 0.7 #Ratio between training and validation

COCO_KEYPOINTS = [

    'front_left_wheel',         #1          0
    'front_right_wheel',        #2          1
    'back_left_wheel',          #3          2
    'back_right_wheel',         #4          3
    'front_left_light',         #5          4
    'front_right_light',        #6          5
    'back_left_light',          #7          6
    'back_right_light',         #8          7
    'left_mirror',              #9          8
    'right_mirror',             #10         9
    'upper_left_windshield',    #11         10
    'upper_right_windshield',   #12         11
    'upper_left_rear',          #13         12
    'upper_right_rear',         #14         13
]

SKELETON = [
            [1, 2], [1,3], [2,4], [3,4],    #wheels
            [1,5], [2,6],[3,7], [4,8],      #Links between the wheels and the lights
            [5,6], [7,8],                   #links between the lights
            [5,9], [6,10],                  #links between the mirrors and the front lights
            [5,11],[6,12], [7,13],[8,14],   #links between the lights and the windshiel/rear
            [11,12],[11,13],[12,14],[13,14] #links between the rear and the windshiel ,
            ]

# Outputs and inputs of the system

In [3]:
scenes_dir = "/data/bonnesoeur-data/data/samsung/scenes"           # The folder where the scenes files are contained

annotations_dir= "/data/bonnesoeur-data/data/samsung/json_samsung" # The folder containing the json for those scenes

output_dir = os.getcwd()+"/results_coco"                           # The output directory where the json files will be saved

jsons = ['car_train.json', 'car_val.json', 'car_test.json']       
        # Name of the json files with the annotations for training, velidation and testing

jsons_paths = [output_dir+'/'+json for json in jsons]

# Box and keypoint converter

This function just arrange the data in a coco style format

In [5]:
def getAnnotation(instance, total_keypoints ,width, height):

    visible = instance[:,2]>0
    num_keypoints = int(np.sum(visible))

    keypoints = np.zeros((total_keypoints,3), dtype=np.int32)
    try:
        hull = Polygon([(x[0], x[1]) for x in instance[visible, :2]]).convex_hull
        frame = Polygon([(0, 0), (width, 0), (width, height), (0, height)])
        hull = hull.intersection(frame).convex_hull

        bbox = hull.bounds
        w, h = bbox[2]-bbox[0], bbox[3]-bbox[1]
        x_o = max(bbox[0]-(w/10),0)
        y_o = max(bbox[1]-(h/10),0)
        x_i = min(x_o+(w/4)+w,width)
        y_i = min(y_o+(h/4)+h,height)
        bbox = [int(x_o), int(y_o), int(x_i - x_o), int(y_i - y_o)]

        segmentation = list(hull.convex_hull.exterior.coords)[:-1]
        segmentation = [[int(x[0]), int(x[1])] for x in segmentation]

        keypoints[:, :] = instance[:, :]

    except:
        bbox = [0, 0, 0, 0]
        segmentation = []

    keypoints = np.reshape(keypoints, (total_keypoints*3,))
    keypoints = keypoints.tolist()
    keypoints = [int(x) for x in keypoints]

    seg = []
    for s in segmentation:
        seg.append(s[0])
        seg.append(s[1])


    return bbox, seg, keypoints, num_keypoints

# Processing data

In [6]:
def json_saver(data, json_path,count_scenes, output_dir):
    #Save the processed data
    
    
    json_str = json.dumps(data)
    print(json_path,count_scenes)
    ann_file = json_path
    if not os.path.exists(output_dir):
         os.mkdir(output_dir)
    with open(ann_file, 'w') as f:
         f.write(json_str)
    

In [7]:
def data_init():
    
    #Initialize the data structure
    my_data = {}

    my_data["info"] = {
            'url': "https://www.epfl.ch/labs/vita/",
            'year': time.localtime().tm_year,
            'date_created': time.strftime("%a, %d %b %Y %H:%M:%S +0000",
                time.localtime()),
            'description': "This is a keypoint dataset for object detection.",
            'version': '1.0',
            'contributor': 'VITA laboratory'}

    my_data["categories"] = [{'name': 'car',
        'id': 1,
        'skeleton': SKELETON,
        'supercategory': 'car',
        'keypoints': [str(x) for x in range(number_keypoints)]}]

    my_data["licenses"] = [{'id': 1,
                'name': "unknown",
                'url': "unknown"}]


    # expect sub-folder for subsets
    my_data["images"] = []
    my_data["annotations"] = []
    
    return my_data

In [10]:
def data_processor(images_dir, labels_dir, output_dir, json_path, separations):
    
    #Processing of the images
    
    obj_id = 0
    image_id = 0
    image_dico = {}

    count_json = 0
    count_scenes = 0
    
    my_data = data_init()

    for image_dir in os.listdir(images_dir)[separations[0]:separations[1]]:

        count_scenes+=1
        im_size = True

        json_file = [s for s in os.listdir(labels_dir) if image_dir in s]

        if len(json_file)!=0:

            count_json+=1

            with open(os.path.join(labels_dir,json_file[0])) as json_data:
                data = json.load(json_data)


            #search for the vehicle category
            cat_ids = []
            for cat in data["categories"]:
                if(cat["super_category"]==super_category):
                    cat_ids.append(cat["cat_id"])


            #get the size of the images
            if im_size:
                im = os.path.join(images_dir,image_dir)
                im= os.path.join(im,os.listdir(im)[0])
                im_size = False
                im = Image.open(im)
                width, height = im.size

            #cycle through the annotations
            for annotation in data["annotations"]:

                #To prevent an error in the json
                if "cat_id" not in annotation.keys():
                    continue

                if annotation["cat_id"] in cat_ids:
                    for projection in annotation["projections"]:

                        frame_id = annotation["pair_id"]
                        time_stamp = data["frames"][frame_id-1]["point_cloud"][-27:-10]#data["frames"][frame_id-1]["time_stamp"]
                        image_name = os.path.join(os.path.join(images_dir,image_dir), time_stamp+"_"+projection["sensor_id"]+"_R.jpg")
                        num_keypoints = 0

                        if "keypoints" in projection.keys() and len(projection["keypoints"]) != 0 :

                            instance = np.zeros((number_keypoints, 3), dtype=np.int32)

                            for i, part_type in enumerate(COCO_KEYPOINTS):
                                keypoint = projection["keypoints"][part_type]
                                instance[i,0] = keypoint[0] # X coordinate 
                                instance[i,1] = keypoint[1] # Y Coordinate
                                instance[i,2] = keypoint[2] # Visible point

                                if keypoint[0] <= 0 or keypoint[1] > height or keypoint[1] <= 0 or keypoint[0] > width:
                                    instance[i,2] = 0  #Identify the keypoints outside of the frame of the image            

                            bbox, segmentation, keypoints, num_keypoints = getAnnotation(instance, number_keypoints, width, height)

                            if num_keypoints<threshold:
                                num_keypoints = 0

                        # Add the cars that do not have enough or any keypoint (categorize them by their bounding box)
                        if(num_keypoints == 0):
                            keypoints = [0 for a in range(number_keypoints*3)]
                            box = projection['bbox']
                            hull = Polygon([(box["x"]-1/2*box["w"],box["y"]-1/2*box["h"]),(box["x"] + 1/2*box["w"], box["y"]-1/2*box["h"]),
                                            (box["x"]+1/2*box["w"],box["y"] + 1/2*box["h"]),(box["x"]-1/2*box["w"],box["y"] + 1/2*box["h"])]).convex_hull

                            frame = Polygon([(0, 0), (width, 0), (width, height), (0, height)])
                            hull = hull.intersection(frame).convex_hull
                            bbox = hull.bounds
                            w, h = bbox[2]-bbox[0], bbox[3]-bbox[1]
                            x_o = max(bbox[0]-(w/10),0)
                            y_o = max(bbox[1]-(h/10),0)
                            x_i = min(x_o+(w/4)+w,width)
                            y_i = min(y_o+(h/4)+h,height)
                            bbox = [int(x_o), int(y_o), int(x_i - x_o), int(y_i - y_o)]
                            #bbox=[box["x"],box["y"],box["w"], box["h"]]


                            seg = list(hull.convex_hull.exterior.coords)[:-1]
                            seg = [[int(x[0]), int(x[1])] for x in seg]
                            segmentation = []

                            for s in seg:
                                segmentation.append(s[0])
                                segmentation.append(s[1])


                        #Check if the image is already in the list
                        if image_name not in image_dico.keys():
                            image_dico[image_name] = image_id
                            image_id+=1


                            my_data["images"].append({#'flickr_url': "unknown",
                                'coco_url': data["frames"][annotation["pair_id"]-1]["point_cloud"],
                                'file_name': image_name,
                                'id': image_dico[image_name],
                                'license':1,
                                'date_captured': "unknown",
                                'width': width,
                                'height': height})

                        if (projection["difficulty"] is None):
                            crowd = 0
                        else :
                            crowd = projection["difficulty"]


                        my_data["annotations"].append({
                            'image_id': image_dico[image_name],
                            'category_id': 1,
                            'iscrowd': crowd,
                            'id': obj_id,
                            'area': bbox[2]*bbox[3],
                            'bbox': bbox,
                            'iscrowd': 0,
                            'num_keypoints': num_keypoints,
                            'keypoints': keypoints,
                            'segmentation': [segmentation]})

                        obj_id+=1


    print(f"The total nuber of annotated images is: {image_id} ")
    print(f"The total nuber of annotation is: {obj_id} ")

    json_saver(my_data, json_path,count_scenes, output_dir)

## Run this to process the data 

In [11]:
number_scenes = len(os.listdir(scenes_dir))
separations = [0, train_val_ratio*train_test_ratio, train_test_ratio, 1.0]
separation = list(map(int,[separation*number_scenes for separation in separations]))


for i, json_path in enumerate(jsons_paths) :
    data_processor(scenes_dir,annotations_dir,output_dir, json_path , [separation[i],separation[i+1]])
    

The total nuber of annotated images is: 2622 
The total nuber of annotation is: 12408 
/home/bonnesoe/semester_project/samsung_to_coco/results_coco/car_train.json 14
The total nuber of annotated images is: 895 
The total nuber of annotation is: 5565 
/home/bonnesoe/semester_project/samsung_to_coco/results_coco/car_val.json 6
The total nuber of annotated images is: 0 
The total nuber of annotation is: 0 
/home/bonnesoe/semester_project/samsung_to_coco/results_coco/car_test.json 0
