In [1]:
import argparse
import os
import json
import time
from PIL import Image
import numpy as np

from shapely.geometry import Polygon

def num(s):
    try:
        return int(s)
    except:
        return int(float(s))


# Objective 

The objective of this notebook is to process the carfusion dataset to make it readable by openpifpaf. 

The keypoints will be processed in a COCO formatting and a car detection with ImageAi will be performed to detect the cars without bounding box. In fact, the carfusion dataset only provides a list of keypoints and no information about the other non annotated cars. 

In order to run the ImageAi detection on the images, we determine an IOU to applicate on the images in order to annotate only the cars without keypoints (default = 0.3)

# ImageAi 
The objective is to detect the cars which are not annotated. This way, we will be able to create a bounding box for the cars. 

In [2]:
import tensorflow as tf
from imageai.Detection import ObjectDetection

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


Settings of the detector

In [3]:
detector = ObjectDetection()
detector.setModelTypeAsYOLOv3()
detector.setModelPath("./model/yolo.h5") #link of the yolo model for car detection
# You can download it here : https://github.com/OlafenwaMoses/ImageAI/releases/download/1.0/yolo.h5
detector.loadModel()

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


The detector is configured to detect only cars, bus and trucks.

# TO MODIFY

In [6]:
IOU = 0.3
car_only = False                     #Yolov3 detects the cars, buses and van, by puting car_only to True, it will only detect the cars
dir_carfusion="./datasets/carfusion" #Directory of carfusion
number_keypoints = 14 #Number of keypoint for the cars 

If you want to remove some keypoints in the carfusion dataset, you can use the span0 and span1 variables which will remove the keypoints in the interval :  ]span0, span1]

In [7]:

# Used to remove a set of keypoints in the interval ]span0, span1]
span0 = span1 = 0
#span0 = 8
#span1 = 10

text = str(IOU)

if span0 != span1:
    text+="_"+str(span0)+"_"+str(span1)

keypoints_threshold = 1 # minimum number of keypoints to consider the cars in both the training and validation dataset


out = "./1.jpg" # choose whatever name/place that you want (only used during processing, nothing is saved)

if car_only:
    custom = detector.CustomObjects(car=True, truck=False, bus=False)
else: 
    custom = detector.CustomObjects(car=True, truck=True, bus=True)

In [8]:
skeleton =[
            [1, 2], [1,3], [2,4], [3,4],    #wheels
            [1,5], [2,6],[3,7], [4,8],      #Links between the wheels and the lights
            #[5,6], [7,8],                   #links between the lights
            [5,9], [6,10],                  #links between the mirrors and the front lights
            [5,11],[6,12], [7,13],[8,14],   #links between the lights and the windshiel/rear
            [11,12],[11,13],[12,14],[13,14] #links between the rear and the windshiel ,
            ]
    
    
COCO_KEYPOINTS = [
    'front_left_wheel',         #1          0
    'front_right_wheel',        #2          1
    'back_left_wheel',          #3          2
    'back_right_wheel',         #4          3
    'front_left_light',         #5          4
    'front_right_light',        #6          5
    'back_left_light',          #7          6
    'back_right_light',         #8          7
    'left_mirror',              #9          8
    'right_mirror',             #10         9
    'upper_left_windshield',    #11         10
    'upper_right_windshield',   #12         11
    'upper_left_rear',          #13         12
    'upper_right_rear',         #14         13
]

reorder_keypoints = [1,0,3,2,5,4,7,6,13,8,10,9,12,11]

## Annotation analyser

In [9]:
def getAnnotation(instance, total_keypoints ,width, height):

    valid_2 = instance[:, 2] == 1
    valid = instance[:, 2] == 2

    visible = np.logical_or(valid, valid_2)
    num_keypoints = int(np.sum(visible))

    keypoints = np.zeros((total_keypoints,3), dtype=np.int32)
    try:
        hull = Polygon([(x[0], x[1]) for x in instance[visible, :2]]).convex_hull
        frame = Polygon([(0, 0), (width, 0), (width, height), (0, height)])
        hull = hull.intersection(frame).convex_hull

        bbox = hull.bounds
        w, h = bbox[2]-bbox[0], bbox[3]-bbox[1]
        x_o = max(bbox[0]-(w/10),0)
        y_o = max(bbox[1]-(h/10),0)
        x_i = min(x_o+(w/4)+w,width)
        y_i = min(y_o+(h/4)+h,height)
        bbox = [int(x_o), int(y_o), int(x_i - x_o), int(y_i - y_o)]

        segmentation = list(hull.convex_hull.exterior.coords)[:-1]
        segmentation = [[int(x[0]), int(x[1])] for x in segmentation]

        keypoints[:, :] = instance[:, :]

    except:
        bbox = [0, 0, 0, 0]
        segmentation = []

    keypoints = keypoints[reorder_keypoints,:]
    #print(keypoints.shape)
    keypoints = np.reshape(keypoints, (total_keypoints*3,))
    keypoints = keypoints.tolist()
    keypoints = [int(x) for x in keypoints]
    
    #print(keypoints)

    seg = []
    for s in segmentation:
        seg.append(s[0])
        seg.append(s[1])


    return bbox, seg, keypoints, num_keypoints

### Simple IOU

In [10]:
def compute_IOU(boxA, boxB):

    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[0]+boxA[2], boxB[0]+boxB[2])
    yB = min(boxA[1]+boxA[3], boxB[1]+ boxB[3])

    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)

    boxAArea = boxA[2]*boxA[3]
    boxBArea = boxB[2]*boxB[3]

    iou = interArea / float(boxAArea + boxBArea - interArea)

    # return the intersection over union value
    return abs(iou)


## Definition of our data structure and parameters

In [11]:
data_type='test'

iou_threshold = IOU

In [12]:
if car_only:
    output_filename = 'car_only_'+ data_type+text
else:
    output_filename = 'car_'+ data_type+text

if data_type=='train':
    image_dir = os.path.join(dir_carfusion,"train")
else:
    image_dir = os.path.join(dir_carfusion,"test")
    
output_dir = os.getcwd()+"/annotations"
path_dir = dir_carfusion



data = {}

data["info"] = {
        'url': "https://www.epfl.ch/labs/vita/",
        'year': time.localtime().tm_year,
        'date_created': time.strftime("%a, %d %b %Y %H:%M:%S +0000",
            time.localtime()),
        'description': "This is a keypoint dataset for object detection.",
        'version': '1.0',
        'contributor': 'VITA laboratory'}

data["categories"] = [{'name': 'car',
    'id': 1,
    'skeleton':skeleton,
                       
    'supercategory': 'car',
    'keypoints': [str(x) for x in range(14)]}]

data["licenses"] = [{'id': 1,
            'name': "unknown",
            'url': "unknown"}]


obj_id = 0
# expect sub-folder for subsets
data["images"] = []
data["annotations"] = []
json_name = output_filename+'.json'
loop=0
count_images=0

# Data analysis

In [15]:
for sub_dir in os.listdir(image_dir):
    im_size = True
    
    if sub_dir[:3] == 'car' and ".zip" not in sub_dir:
        loop= loop+1
        im_dir = os.path.join(image_dir,sub_dir)+'/images_jpg'

        labels_dir = os.path.join(image_dir,sub_dir) + '/gt/'
        print(im_dir)
        for i,file_name in enumerate(os.listdir(labels_dir)):

            if i%50 == 0:
                print(f"there is {i} images processed on the {len(os.listdir(labels_dir))} available.\n")
            #if i>30:
            #    continue
                
            if(file_name[-3:]!='txt'):
                continue
            count_images =count_images+1
            file_str = file_name.split('.')[0]
            
        
            vid_str, id_str  = file_str.split('_')

            
            frame_id = int(id_str)
            video_id = int(vid_str)
            image_id = int(loop*1e8+video_id*1e5+frame_id)

            image_name = os.path.join(im_dir, "{}.jpg".format(file_str))

            if im_size: # Get the size of the images
                im_size = False
                im = Image.open(image_name)
                width, height = im.size
                
            data["images"].append({
                'coco_url': "unknown",
                'file_name': image_name,
                'id': image_id,
                'license':1,
                #'has_visible_keypoints':True,
                'date_captured': "unknown",
                'width': width,
                'height': height})


            with open(os.path.join(labels_dir, file_name.split('.')[0]+'.txt')) as f:
                keypoints = f.readlines()
                keypoints = [s.split(',') for s in keypoints]
                keypoints = [list(map(num, s)) for s in keypoints]
                
            #detect the cars in the image
            detections = detector.detectCustomObjectsFromImage( custom_objects=custom, input_image =image_name, output_image_path=out, minimum_percentage_probability=30)
            boxes_imageAI = []
            for obj in detections:
                # Create a Rectangle patch
                box = obj["box_points"]
                boxes_imageAI.append([box[0], box[1], box[2]-box[0], box[3]-box[1]])
            
            instances = {}
            
            assert len(keypoints)!=0
            
            for keypoint in keypoints:
                if keypoint[3] not in instances: #check if keypoint is in the list
                    instances[keypoint[3]] = np.zeros((number_keypoints, 3), dtype=np.int32) 
                instances[keypoint[3]][keypoint[2]-1,0] = keypoint[0] # X coordinate 
                instances[keypoint[3]][keypoint[2]-1,1] = keypoint[1] # Y Coordinate
                if keypoint[4] == 2:
                    instances[keypoint[3]][keypoint[2]-1,2] = 1
                elif keypoint[4] == 1 or keypoint[4] == 3:
                    instances[keypoint[3]][keypoint[2]-1,2] = 2

                if keypoint[0] <= 0 or keypoint[1] > height or keypoint[1] <= 0 or keypoint[0] > width:
                    instances[keypoint[3]][keypoint[2]-1,2] = 0  #Identify the keypoints outside of the frame of the image            
                
                        
            for instance in instances.values():
                
                bbox, segmentation, keypoints, num_keypoints = getAnnotation(instance, number_keypoints, width, height)
                
                if num_keypoints > keypoints_threshold:
                    
                    #print(len(boxes_imageAI))
                    for box in boxes_imageAI:
           
                        if(compute_IOU(bbox,box) > iou_threshold):
                            
                            boxes_imageAI.remove(box)                    
                    
                    
                    data["annotations"].append({
                        'image_id': image_id,
                        'category_id': 1,
                        'iscrowd': 0,
                        #'has_visible_keypoints': True,
                        'id': obj_id,
                        'area': bbox[2]*bbox[3],
                        'bbox': bbox,
                        'num_keypoints': num_keypoints,
                        'keypoints': keypoints[:span0*3] + keypoints[span1*3:], # We can remove some keypoints that we deem not necessary here.
                        'segmentation': [segmentation]})
                
                obj_id += 1
            
            for box in boxes_imageAI:
                
                keypoints = [0 for a in range(number_keypoints*3)]
                hull = Polygon([(box[0],box[1]),(box[0] + box[2], box[1]),
                                (box[0],box[1] + box[3]),(box[0] + box[2],box[1] + box[3])]).convex_hull
                frame = Polygon([(0, 0), (width, 0), (width, height), (0, height)])
                hull = hull.intersection(frame).convex_hull
                bbox = hull.bounds
                w, h = bbox[2]-bbox[0], bbox[3]-bbox[1]
                x_o = max(bbox[0]-(w/10),0)
                y_o = max(bbox[1]-(h/10),0)
                x_i = min(x_o+(w/4)+w,width)
                y_i = min(y_o+(h/4)+h,height)
                bbox = [int(x_o), int(y_o), int(x_i - x_o), int(y_i - y_o)]
                
                seg = list(hull.convex_hull.exterior.coords)[:-1]
                seg = [[int(x[0]), int(x[1])] for x in seg]
                segmentation = []

                for s in seg:
                    segmentation.append(s[0])
                    segmentation.append(s[1])
                
                
                data["annotations"].append({
                        'image_id': image_id,
                        'category_id': 1,
                        'iscrowd': 0,
                        #'has_visible_keypoints': True,
                        'id': obj_id,
                        'area': bbox[2]*bbox[3],
                        'bbox': bbox,
                        'num_keypoints': 0,
                        'keypoints': keypoints[:span0*3] + keypoints[span1*3:], # We can remove some keypoints that we deem not necessary here.
                        'segmentation': [segmentation]})
                
                obj_id += 1

./datasets/carfusion/test/car_penn1/images_jpg
there is 0 images processed on the 6919 available.



ValueError: Ensure you specified correct input image, input type, output type and/or output image path 

In [74]:
json_str = json.dumps(data)

print(json_name,count_images)
ann_file = os.path.join(output_dir, json_name)
if not os.path.exists(output_dir):
     os.mkdir(output_dir)
with open(ann_file, 'w') as f:
     f.write(json_str)
        

car_only_test0.3_8_10.json 12761


# Training Phase :

In [75]:
data_type='train'

In [77]:
if car_only :
    output_filename = 'car_only_'+ data_type+text
else :
    output_filename = 'car_'+ data_type+text

if data_type=='train':
    image_dir = os.path.join(dir_carfusion,"train")
else:
    image_dir = os.path.join(dir_carfusion,"test")
    

output_dir = os.getcwd()+"/annotations"
path_dir = dir_carfusion



data = {}

data["info"] = {
        'url': "https://www.epfl.ch/labs/vita/",
        'year': time.localtime().tm_year,
        'date_created': time.strftime("%a, %d %b %Y %H:%M:%S +0000",
            time.localtime()),
        'description': "This is a keypoint dataset for object detection.",
        'version': '1.0',
        'contributor': 'VITA laboratory'}

data["categories"] = [{'name': 'car',
    'id': 1,
    'skeleton':skeleton,
                       
    'supercategory': 'car',
    'keypoints': [str(x) for x in range(14)]}]

data["licenses"] = [{'id': 1,
            'name': "unknown",
            'url': "unknown"}]


obj_id = 0
# expect sub-folder for subsets
data["images"] = []
data["annotations"] = []
json_name = output_filename+'.json'
loop=0
count_images=0

# Data analysis

In [None]:
for sub_dir in os.listdir(image_dir):
    im_size = True
    
    if sub_dir[:3] == 'car':
        loop= loop+1
        im_dir = os.path.join(image_dir,sub_dir)+'/images_jpg'

        labels_dir = os.path.join(image_dir,sub_dir) + '/gt/'
        print(im_dir)
        for i,file_name in enumerate(os.listdir(labels_dir)):

            if i%50 == 0:
                print(f"there is {i} images processed on the {len(os.listdir(labels_dir))} available.\n")
            #if i>30:
            #    continue
                
            if(file_name[-3:]!='txt'):
                continue
            count_images =count_images+1
            file_str = file_name.split('.')[0]
            
        
            vid_str, id_str  = file_str.split('_')

            
            frame_id = int(id_str)
            video_id = int(vid_str)
            image_id = int(loop*1e8+video_id*1e5+frame_id)

            image_name = os.path.join(im_dir, "{}.jpg".format(file_str))

            if im_size: # Get the size of the images
                im_size = False
                im = Image.open(image_name)
                width, height = im.size
                
            data["images"].append({
                'coco_url': "unknown",
                'file_name': image_name,
                'id': image_id,
                'license':1,
                #'has_visible_keypoints':True,
                'date_captured': "unknown",
                'width': width,
                'height': height})
            

            
            


            with open(os.path.join(labels_dir, file_name.split('.')[0]+'.txt')) as f:
                keypoints = f.readlines()
                keypoints = [s.split(',') for s in keypoints]
                keypoints = [list(map(num, s)) for s in keypoints]
                
            #detect the cars in the image
            detections = detector.detectCustomObjectsFromImage( custom_objects=custom, input_image =image_name,  output_image_path=out, minimum_percentage_probability=30)
            boxes_imageAI = []
            for obj in detections:
                # Create a Rectangle patch
                box = obj["box_points"]
                boxes_imageAI.append([box[0], box[1], box[2]-box[0], box[3]-box[1]])
            
            instances = {}
            
            assert len(keypoints)!=0
            
            for keypoint in keypoints:
                if keypoint[3] not in instances: #check if keypoint is in the list
                    instances[keypoint[3]] = np.zeros((number_keypoints, 3), dtype=np.int32) 
                instances[keypoint[3]][keypoint[2]-1,0] = keypoint[0] # X coordinate 
                instances[keypoint[3]][keypoint[2]-1,1] = keypoint[1] # Y Coordinate
                if keypoint[4] == 2:
                    instances[keypoint[3]][keypoint[2]-1,2] = 1
                elif keypoint[4] == 1 or keypoint[4] == 3:
                    instances[keypoint[3]][keypoint[2]-1,2] = 2

                if keypoint[0] <= 0 or keypoint[1] > height or keypoint[1] <= 0 or keypoint[0] > width:
                    instances[keypoint[3]][keypoint[2]-1,2] = 0  #Identify the keypoints outside of the frame of the image            
                
                        
            for instance in instances.values():
                
                bbox, segmentation, keypoints, num_keypoints = getAnnotation(instance, number_keypoints, width, height)
                
                if num_keypoints > keypoints_threshold:
                    
                    #print(len(boxes_imageAI))
                    for box in boxes_imageAI:

                        
                        if(compute_IOU(bbox,box) > iou_threshold):
                            
                            boxes_imageAI.remove(box)
                    #print(len(boxes_imageAI))
                    
                    data["annotations"].append({
                        'image_id': image_id,
                        'category_id': 1,
                        'iscrowd': 0,
                        #'has_visible_keypoints': True,
                        'id': obj_id,
                        'area': bbox[2]*bbox[3],
                        'bbox': bbox,
                        'num_keypoints': num_keypoints,
                        'keypoints':keypoints[:span0*3] + keypoints[span1*3:], # We can remove some keypoints that we deem not necessary here.
                        'segmentation': [segmentation]})
                
                obj_id += 1
            
            for box in boxes_imageAI:
                
                keypoints = [0 for a in range(number_keypoints*3)]
                hull = Polygon([(box[0],box[1]),(box[0] + box[2], box[1]),
                                (box[0],box[1] + box[3]),(box[0] + box[2],box[1] + box[3])]).convex_hull
                frame = Polygon([(0, 0), (width, 0), (width, height), (0, height)])
                hull = hull.intersection(frame).convex_hull
                bbox = hull.bounds
                w, h = bbox[2]-bbox[0], bbox[3]-bbox[1]
                x_o = max(bbox[0]-(w/10),0)
                y_o = max(bbox[1]-(h/10),0)
                x_i = min(x_o+(w/4)+w,width)
                y_i = min(y_o+(h/4)+h,height)
                bbox = [int(x_o), int(y_o), int(x_i - x_o), int(y_i - y_o)]
                
                seg = list(hull.convex_hull.exterior.coords)[:-1]
                seg = [[int(x[0]), int(x[1])] for x in seg]
                segmentation = []

                for s in seg:
                    segmentation.append(s[0])
                    segmentation.append(s[1])
                
                
                data["annotations"].append({
                        'image_id': image_id,
                        'category_id': 1,
                        'iscrowd': 0,
                        #'has_visible_keypoints': True,
                        'id': obj_id,
                        'area': bbox[2]*bbox[3],
                        'bbox': bbox,
                        'num_keypoints': 0,
                        'keypoints': keypoints[:span0*3] + keypoints[span1*3:], # We can remove some keypoints that we deem not necessary here.
                        'segmentation': [segmentation]})
                
                obj_id += 1

/data/bonnesoeur-data/data/carfusion/train/car_butler1/images_jpg
there is 0 images processed on the 1172 available.

there is 50 images processed on the 1172 available.

there is 100 images processed on the 1172 available.

there is 150 images processed on the 1172 available.

there is 200 images processed on the 1172 available.

there is 250 images processed on the 1172 available.

there is 300 images processed on the 1172 available.

there is 350 images processed on the 1172 available.

there is 400 images processed on the 1172 available.

there is 450 images processed on the 1172 available.

there is 500 images processed on the 1172 available.

there is 550 images processed on the 1172 available.

there is 600 images processed on the 1172 available.

there is 650 images processed on the 1172 available.

there is 700 images processed on the 1172 available.

there is 750 images processed on the 1172 available.

there is 800 images processed on the 1172 available.

there is 850 images

In [None]:
json_str = json.dumps(data)

print(json_name,count_images)
ann_file = os.path.join(output_dir, json_name)
if not os.path.exists(output_dir):
     os.mkdir(output_dir)
with open(ann_file, 'w') as f:
     f.write(json_str)
        