# Initialize notebook

In [8]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import os
import math
import seaborn as sn
import statistics
from tqdm import tqdm
import time
from functools import partial
from operator import is_not
import pickle
from sklearn.metrics import mean_squared_error
from scipy.optimize import curve_fit
from math import sqrt

pd.set_option('display.max_columns', None)

In [9]:
# Constants
KITTI_DATASET = '/media/robesafe/SSD_SATA/KITTI_DATASET/'

# Load dataframe and pkl files

In [10]:
df = pd.read_csv (KITTI_DATASET+'kitti_gt.csv')
df_train = pd.read_csv (KITTI_DATASET+'kitti_train_gt.csv')
df_val = pd.read_csv (KITTI_DATASET+'kitti_val_gt.csv')

In [11]:
def objective_0(x):
    return 0

def objective_const(x, a):
        return a

def objective_log_f(x, a, b, c):
    return a * np.log(x)**b + c

def objective_f(x, a, b):
    return a * x + b

def objective_2f(x, a, b, c):
        return a * x**2 + b * x + c
    
def objective_3f(x, a, b, c, d):
    return a * x**3 + b * x**2 + c * x + d

def objective_4f(x, a, b, c, d, e):
    return a * x**4 + b * x**3 + c * x**2 + d * x + e

class Regression_model:
    def __init__(self, function, variables):
        self.function = function
        self.variables = variables
    def __str__(self):
        return "function = %s, variables = %s"%(self.function, self.variables)

class Regression_model_class:
    def __init__(self, model_car, model_pedestrian, model_cyclist):
        self.car = model_car
        self.pedestrian = model_pedestrian
        self.cyclist = model_cyclist
    def __str__(self):
        return "car: {%s},\npedestrian: {%s},\ncyclist: {%s}"%(self.car, self.pedestrian, self.cyclist)
        
class Regression_model_bb:
    def __init__(self, bb_complete, bb_incomplete):
        self.bb_complete = bb_complete
        self.bb_incomplete = bb_incomplete
    def __str__(self):
        return "bb_complete: {%s},\nbb_incomplete: {%s}"%(self.bb_complete, self.bb_incomplete)

In [12]:
height_model = None
pc_proj_refinement_model = None
with open('height_model.pkl', 'rb') as f:
    height_model = pickle.load(f)
with open('pc_proj_refinement_model.pkl', 'rb') as f:
    pc_proj_refinement_model = pickle.load(f)

In [13]:
print(height_model)

car: {bb_complete: {function = <function objective_log_f at 0x7fd92b6df378>, variables = [689.26985181  -2.0374056  -16.72954044]},
bb_incomplete: {function = <function objective_log_f at 0x7fd92b6df378>, variables = [651.02767472  -1.7506046  -29.32080446]}},
pedestrian: {function = <function objective_log_f at 0x7fd92b6df378>, variables = [1173.22871064   -2.52031575  -10.67344022]},
cyclist: {function = <function objective_log_f at 0x7fd92b6df378>, variables = [1050.87275107   -2.40355944  -12.05918045]}


In [14]:
print(pc_proj_refinement_model)

car: {function = <function objective_4f at 0x7fd92c028c80>, variables = [-9.13991144e-07  1.16832794e-04 -5.13379294e-03  9.95424688e-02
  9.08476367e-01]},
pedestrian: {function = <function objective_log_f at 0x7fd92b6df378>, variables = [-1.22541728  0.37735961  1.7788618 ]},
cyclist: {function = <function objective_const at 0x7fd979724730>, variables = [-0.28281803]}


# Execute both models

## Execute height model

In [15]:
def approximate_distance_height(type_name, top, bottom, bb_complete, model):
    height = bottom - top
    if type_name == 'Car':
        if bb_complete:
            f = model.car.bb_complete.function
            return f(height, *model.car.bb_complete.variables)
        else:
            f = model.car.bb_incomplete.function
            return f(height, *model.car.bb_incomplete.variables)
    elif type_name == 'Pedestrian':
        f = model.pedestrian.function
        return f(height, *model.pedestrian.variables)
    elif type_name == 'Cyclist':
        f = model.cyclist.function
        return f(height, *model.cyclist.variables)
    else:
        raise ValueError("Type must be Car, Pedestrian or Cyclist")

In [16]:
df_train['distance_height'] = df_train.apply(lambda row: approximate_distance_height(row['type'],
                                                                              row['top'],
                                                                              row['bottom'],
                                                                              not row['incomplete_2d_horizontal'],
                                                                              height_model), axis=1)
df_val['distance_height'] = df_val.apply(lambda row: approximate_distance_height(row['type'],
                                                                          row['top'],
                                                                          row['bottom'],
                                                                          not row['incomplete_2d_horizontal'],
                                                                          height_model), axis=1)

In [17]:
df_train

Unnamed: 0,frame,id,type,truncated,occluded,alpha,left,top,right,bottom,height,width,length,x,y,z,ry,distance,height_image,width_image,theta_ray,incomplete_2d_horizontal,incomplete_2d_bottom,distance_height
0,0,0,Pedestrian,0.00,0.0,-0.20,712.40,143.00,810.73,307.92,1.89,0.48,1.20,1.84,1.47,8.41,0.01,8.733533,164.92,98.33,0.21,False,False,8.597865
1,1,0,Car,0.00,0.0,1.85,387.63,181.54,423.81,203.12,1.67,1.87,3.69,-16.53,2.39,58.49,1.57,60.827897,21.58,36.18,-0.28,False,False,53.316196
2,1,1,Cyclist,0.00,3.0,-1.65,676.60,163.95,688.98,193.93,1.86,0.60,2.02,4.59,1.32,45.84,-1.55,46.088134,29.98,12.38,0.10,False,False,43.396508
3,2,0,Car,0.00,0.0,-1.67,657.39,190.13,700.07,223.39,1.41,1.58,4.36,3.18,2.27,34.38,-1.58,34.601296,33.26,42.68,0.09,False,False,36.825652
4,3,0,Car,0.00,0.0,1.55,614.24,181.78,727.31,284.77,1.57,1.73,4.15,1.00,1.75,13.22,1.62,13.372767,102.99,113.07,0.07,False,False,13.570701
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27993,5983,8,Car,0.00,2.0,-1.72,668.22,167.79,726.04,215.88,1.56,1.48,3.36,2.95,1.42,25.30,-1.61,25.510956,48.09,57.82,0.11,False,False,26.950301
27994,5983,9,Car,0.00,1.0,-1.66,640.25,174.03,671.68,198.24,1.38,1.55,3.60,2.70,1.47,43.39,-1.60,43.498770,24.21,31.43,0.06,False,False,48.262606
27995,5983,10,Car,0.77,3.0,2.27,0.00,196.98,95.57,298.45,1.35,1.59,3.41,-9.21,1.73,10.06,1.55,13.748476,101.47,95.57,-0.72,True,False,15.359191
27996,5983,11,Car,0.00,2.0,-1.57,594.88,172.76,633.46,207.96,1.52,1.67,3.61,0.16,1.54,33.23,-1.56,33.266050,35.20,38.58,0.01,False,False,35.102942


## Execute pointcloud projection model

In [18]:
def get_point_cloud_projected(image_id):
    name = '%06d'%image_id # 6 digit zeropadding
    img = KITTI_DATASET+'images/'+name+'.png'
    binary = KITTI_DATASET+'velodyne/'+name+'.bin'
    with open(KITTI_DATASET+'calib/'+name+'.txt','r') as f:
        calib = f.readlines()

    # P2 (3 x 4) for left eye (intrinsic matrix in homogeneous coordinates)
    P2 = np.matrix([float(x) for x in calib[2].strip('\n').split(' ')[1:]]).reshape(3,4)
    R0_rect = np.matrix([float(x) for x in calib[4].strip('\n').split(' ')[1:]]).reshape(3,3)
    # Add a 1 in bottom-right, reshape to 4 x 4
    R0_rect = np.insert(R0_rect,3,values=[0,0,0],axis=0)
    R0_rect = np.insert(R0_rect,3,values=[0,0,0,1],axis=1)
    Tr_velo_to_cam = np.matrix([float(x) for x in calib[5].strip('\n').split(' ')[1:]]).reshape(3,4)
    Tr_velo_to_cam = np.insert(Tr_velo_to_cam,3,values=[0,0,0,1],axis=0)

    # read raw data from binary
    scan = np.fromfile(binary, dtype=np.float32).reshape((-1,4))
    points = scan[:, 0:3] # lidar xyz (front, left, up)
    # TODO: use fov filter? 
    velo = np.insert(points,3,1,axis=1).T
    velo = np.delete(velo,np.where(velo[0,:]<0),axis=1)
    
    cam = P2 * R0_rect * Tr_velo_to_cam * velo
    cam = np.delete(cam,np.where(cam[2,:]<0)[1],axis=1)
    # get u,v,z
    cam[:2] /= cam[2,:]
    png = mpimg.imread(img)
    IMG_H,IMG_W,_ = png.shape
    # filter point out of canvas
    u,v,z = cam
    u_out = np.logical_or(u<0, u>IMG_W)
    v_out = np.logical_or(v<0, v>IMG_H)
    outlier = np.logical_or(u_out, v_out)
    cam = np.delete(cam,np.where(outlier),axis=1)
    return cam

In [19]:
def get_bounding_boxes_2D(image_id):
    df_frame = df[df['frame'] == image_id]
    bbs = []
    for index, row in df_frame.iterrows():
        bb = (row['left'], row['top'], row['right'], row['bottom'])
        bbs.append(bb)
    return bbs

In [20]:
def approximate_distance_pc(image_id, model):
    bbs = get_bounding_boxes_2D(image_id)
    projected_pc = get_point_cloud_projected(image_id)
    types = df[df['frame'] == image_id]['type'].tolist()
    incomplete = df[df['frame'] == image_id]['incomplete_2d_horizontal'].tolist()
    
    
    # Sort bbs by height (change to sort by regression method using height)
    with open('height_model.pkl', 'rb') as f:
        height_model = pickle.load(f)
        id_bbs = sorted(list(range(len(bbs))),
                     key=lambda i: approximate_distance_height(types[i], bbs[i][1], bbs[i][3],
                                                        not incomplete[i], height_model))
        bbs = list(enumerate(bbs))
        types = list(enumerate(types))
        bbs.sort(key=lambda x: id_bbs.index(x[0]))
        types.sort(key=lambda x: id_bbs.index(x[0]))
        bbs = list(map(lambda x: x[1], bbs))
        types = list(map(lambda x: x[1], types))
        
    # Create output dict
    bbs_distance = {'frame':[image_id]*len(types),
                    'left':list(map(lambda x: x[0], bbs)),
                    'top':list(map(lambda x: x[1], bbs)),
                    'right':list(map(lambda x: x[2], bbs)),
                    'bottom':list(map(lambda x: x[3], bbs)),
                    'type':types,
                    'distance':[]}    
    
    # Iterate over bb
    for type_name, bb in zip(types, bbs):
        filter_bb = (bb[0] < projected_pc[0,:]) & (bb[1] < projected_pc[1,:]) &\
            (projected_pc[0,:] < bb[2]) & (projected_pc[1,:] < bb[3])
        pc_distance_bb = projected_pc[2,:][filter_bb]
        
        if pc_distance_bb.size != 0: # Checks if there's any point on the bb 2D
            # Approximate distance
            distance = np.median(np.squeeze(np.asarray(pc_distance_bb)))
            height = bb[3] - bb[1] # bottom - top
            if type_name == 'Car':
                f = model.car.function
                rectification = f(distance, *model.car.variables)
                distance += rectification
            elif type_name == 'Pedestrian':
                f = model.pedestrian.function
                rectification = f(distance, *model.pedestrian.variables)
                distance += rectification
            elif type_name == 'Cyclist':
                f = model.cyclist.function
                rectification = f(distance, *model.cyclist.variables)
                distance += rectification
            else:
                print("Type must be Car, Pedestrian or Cyclist")
            
            bbs_distance['distance'].append(distance)
        else:
            bbs_distance['distance'].append(None)
        
        # Removing points from the pointcloud in the bb
        filter_intersection = (bb[0] < projected_pc[0,:]) &\
                       (bb[1] < projected_pc[1,:]) &\
                       (projected_pc[0,:] < bb[2]) &\
                       (projected_pc[1,:] < bb[3])
        projected_pc = np.concatenate((projected_pc[0,:][~filter_intersection],\
                                       projected_pc[1,:][~filter_intersection],\
                                       projected_pc[2,:][~filter_intersection]), axis=0)
    
    return bbs_distance

In [None]:
loading_bar = tqdm(total=len(df['frame'].unique()), position=0, leave=True)
distance_approx = {'frame':[], 'left':[], 'top':[], 'right':[], 'bottom':[], 'type':[], 'distance':[]}
for frame in df['frame'].unique():
    frame_approx = approximate_distance_pc(frame, pc_proj_refinement_model)
    dict_items = ['frame','left','top','right','bottom','type','distance']
    for item in dict_items:
        distance_approx[item].extend(frame_approx[item])
    loading_bar.update(1)
df_approx = pd.DataFrame(columns = dict_items[:-1]+['distance_pointcloud'],
                         data = list(zip(distance_approx['frame'], distance_approx['left'],
                                         distance_approx['top'], distance_approx['right'],
                                         distance_approx['bottom'], distance_approx['type'],
                                         distance_approx['distance'])))

 45%|████▌     | 3379/7481 [01:01<01:17, 53.13it/s]

In [None]:
df_train = pd.merge(df_train, df_approx,  how='inner',
                  left_on=['frame','left','top','right','bottom','type'],
                  right_on=['frame','left','top','right','bottom','type'])
df_val = pd.merge(df_val, df_approx,  how='inner',
                  left_on=['frame','left','top','right','bottom','type'],
                  right_on=['frame','left','top','right','bottom','type'])

In [None]:
df_val

# Analyze model error as dependent on attributes

In [None]:
df_train['error_distance_height'] = df_train['distance'] - df_train['distance_height']
df_val['error_distance_height'] = df_val['distance'] - df_val['distance_height']
df_train['error_distance_pointcloud'] = df_train['distance'] - df_train['distance_pointcloud']
df_val['error_distance_pointcloud'] = df_val['distance'] - df_val['distance_pointcloud']

In [None]:
df_train

## Analyze height model error

### Error base on approximate distance

In [None]:
jump = 5
start = 5
end = 60

# Loop over classes
for class_name in ["Car","Cyclist","Pedestrian"]:
    df_train_class = df_train[df_train['type'] == class_name]
    
    # Calculate quartiles
    MDistanceE_distances = []
    for distance in range(start, end, jump):
        MDistanceE_distance = df_train_class[(distance < df_train_class['distance_height']) & \
                                           (df_train_class['distance_height'] < distance+jump)].apply(lambda row: row['distance']-row['distance_height'],
                                                                                                    axis=1).quantile(q=[0.1,0.5,0.9])
        MDistanceE_distances.append(MDistanceE_distance)
    q_0_1 = list(map(lambda x: x.get(0.1), MDistanceE_distances))
    q_0_5 = list(map(lambda x: x.get(0.5), MDistanceE_distances))
    q_0_9 = list(map(lambda x: x.get(0.9), MDistanceE_distances))

    Y = df_train_class['error_distance_height'].values
    X = df_train_class['distance_height'].values

    plt.title("Class %s" % class_name)
    plt.xlabel('Approximate distance (m)')
    plt.ylabel('Error (m)')

    plt.scatter(X, Y, s=0.3, c='black')
    
    x = np.array(list(range(start,end,jump)))+jump/2
    plt.plot(x, q_0_1)
    plt.plot(x, q_0_5)
    plt.plot(x, q_0_9)
    plt.legend(['q 0.1', 'q 0.5', 'q 0.9'])
    
    plt.xlim((0,70))
    plt.ylim((-10,10))
    
    #plt.grid(alpha=0.5)
    
    plt.show()

### Error base on height

In [None]:
print(min(df['height_image']))
print(max(df['height_image']))

In [None]:
jump = 20
start = 0
end = 280

# Loop over classes
for class_name in ["Car","Cyclist","Pedestrian"]:
    df_train_class = df_train[df_train['type'] == class_name]
    
    # Calculate quartiles
    MHeightE_heigths = []
    for height in range(start, end, jump):
        MHeightE_heigth = df_train_class[(height < df_train_class['height_image']) & \
                                           (df_train_class['height_image'] < height+jump)].apply(lambda row: row['distance']-row['distance_height'],
                                                                                                    axis=1).quantile(q=[0.1,0.5,0.9])
        MHeightE_heigths.append(MHeightE_heigth)
    q_0_1 = list(map(lambda x: x.get(0.1), MHeightE_heigths))
    q_0_5 = list(map(lambda x: x.get(0.5), MHeightE_heigths))
    q_0_9 = list(map(lambda x: x.get(0.9), MHeightE_heigths))

    Y = df_train_class['error_distance_height'].values
    X = df_train_class['height_image'].values

    plt.title("Class %s" % class_name)
    plt.xlabel('Height (pixels)')
    plt.ylabel('Error (m)')

    plt.scatter(X, Y, s=0.3, c='black')
    
    x = np.array(list(range(start,end,jump)))+jump/2
    plt.plot(x, q_0_1)
    plt.plot(x, q_0_5)
    plt.plot(x, q_0_9)
    plt.legend(['q 0.1', 'q 0.5', 'q 0.9'])
    
    plt.xlim((0,300))
    plt.ylim((-10,10))
    
    #plt.grid(alpha=0.5)
    
    plt.show()

## Analyze pointcloud model error

In [None]:
def get_n_points_pc_projection_without_intersections(image_id):
    bbs = get_bounding_boxes_2D(image_id)
    projected_pc = get_point_cloud_projected(image_id)
    
    bbs_n_points = [[],[]]
    # Sort bbs by height
    bbs = sorted(bbs, key=lambda bb: bb[3]-bb[1], reverse=True)
    # Iterate over bb
    for bb in bbs:
        filter_bb = (bb[0] < projected_pc[0,:]) & (bb[1] < projected_pc[1,:]) &\
            (projected_pc[0,:] < bb[2]) & (projected_pc[1,:] < bb[3])
        pc_distance_bb = projected_pc[2,:][filter_bb]
        
        if pc_distance_bb.size != 0: # Checks if there's any point on the bb 2D
            bbs_n_points[0].append(bb)
            bbs_n_points[1].append(pc_distance_bb.size)
        
        # Removing points from the pointcloud in the bb
        filter_intersection = (bb[0] < projected_pc[0,:]) &\
                       (bb[1] < projected_pc[1,:]) &\
                       (projected_pc[0,:] < bb[2]) &\
                       (projected_pc[1,:] < bb[3])
        projected_pc = np.concatenate((projected_pc[0,:][~filter_intersection],\
                                       projected_pc[1,:][~filter_intersection],\
                                       projected_pc[2,:][~filter_intersection]), axis=0)
    
    return bbs_n_points

In [None]:
frame_list, left_list, top_list, right_list, bottom_list, n_points_list = [], [], [], [], [], []
loading_bar = tqdm(total=len(df['frame'].unique()), position=0, leave=True)
# Iterate over frames
for frame in df['frame'].unique():
    # Get distance approimation using 2nd method
    bbs2D, n_points = get_n_points_pc_projection_without_intersections(frame)
    # Fill columns data in lists
    frame_list.extend(len(n_points)*[frame])
    ## Iterate over bbs2D
    for bb2D in bbs2D:
        left_list.append(bb2D[0])
        top_list.append(bb2D[1])
        right_list.append(bb2D[2])
        bottom_list.append(bb2D[3])
    n_points_list.extend(n_points)
    
    loading_bar.update(1)
df_approx = pd.DataFrame(columns = ['frame','left','top','right','bottom','n_points'],
                         data = list(zip(frame_list, left_list, top_list, right_list, bottom_list, n_points_list)))

In [None]:
df_approx

In [None]:
df_train = pd.merge(df_train, df_approx,  how='inner',
                  left_on=['frame','left','top','right','bottom'],
                  right_on=['frame','left','top','right','bottom'])
df_val = pd.merge(df_val, df_approx,  how='inner',
                  left_on=['frame','left','top','right','bottom'],
                  right_on=['frame','left','top','right','bottom'])

In [None]:
df_val

### Error base on approximate distance

In [None]:
jump = 5
start = 0
end = 60

# Loop over classes
for class_name in ["Car","Cyclist","Pedestrian"]:
    df_train_class = df_train[df_train['type'] == class_name]
    
    # Calculate quartiles
    MDistanceE_distances = []
    for distance in range(start, end, jump):
        MDistanceE_distance = df_train_class[(distance < df_train_class['distance_pointcloud']) & \
                                           (df_train_class['distance_pointcloud'] < distance+jump)].apply(lambda row: row['distance']-row['distance_pointcloud'],
                                                                                                    axis=1).quantile(q=[0.1,0.5,0.9])
        MDistanceE_distances.append(MDistanceE_distance)
    q_0_1 = list(map(lambda x: x.get(0.1), MDistanceE_distances))
    q_0_5 = list(map(lambda x: x.get(0.5), MDistanceE_distances))
    q_0_9 = list(map(lambda x: x.get(0.9), MDistanceE_distances))

    Y = df_train_class['error_distance_pointcloud'].values
    X = df_train_class['distance_pointcloud'].values

    plt.title("Class %s" % class_name)
    plt.xlabel('Approximate distance (m)')
    plt.ylabel('Error (m)')

    plt.scatter(X, Y, s=0.3, c='black')
    
    x = np.array(list(range(start,end,jump)))+jump/2
    plt.plot(x, q_0_1)
    plt.plot(x, q_0_5)
    plt.plot(x, q_0_9)
    plt.legend(['q 0.1', 'q 0.5', 'q 0.9'])
    
    plt.xlim((0,70))
    plt.ylim((-10,10))
    
    #plt.grid(alpha=0.5)
    
    plt.show()

### Error base on pointcloud density

In [None]:
print(min(df_train['n_points']))
print(max(df_train['n_points']))

In [None]:
df_train.hist(column='n_points')

In [None]:
jump = 250
start = 0
end = 3500

# Loop over classes
for class_name in ["Car","Cyclist","Pedestrian"]:
    df_train_class = df_train[df_train['type'] == class_name]
    
    # Calculate quartiles
    MPointE_points = []
    for point in range(start, end, jump):
        MPointE_point = df_train_class[(point < df_train_class['n_points']) & \
                                           (df_train_class['n_points'] < point+jump)].apply(lambda row: row['distance']-row['distance_pointcloud'],
                                                                                                    axis=1).quantile(q=[0.1,0.5,0.9])
        MPointE_points.append(MPointE_point)
    q_0_1 = list(map(lambda x: x.get(0.1), MPointE_points))
    q_0_5 = list(map(lambda x: x.get(0.5), MPointE_points))
    q_0_9 = list(map(lambda x: x.get(0.9), MPointE_points))

    Y = df_train_class['error_distance_pointcloud'].values
    X = df_train_class['distance_pointcloud'].values

    plt.title("Class %s" % class_name)
    plt.xlabel('Pointcloud density (n points)')
    plt.ylabel('Error (m)')

    plt.scatter(X, Y, s=0.3, c='black')
    
    x = np.array(list(range(start,end,jump)))+jump/2
    plt.plot(x, q_0_1)
    plt.plot(x, q_0_5)
    plt.plot(x, q_0_9)
    plt.legend(['q 0.1', 'q 0.5', 'q 0.9'])
    
    plt.xlim((0,3500))
    plt.ylim((-10,10))
    
    #plt.grid(alpha=0.5)
    
    plt.show()

# Create ensemble method

## Create error functions

Due to the possible appearance of a value not found in the training dataset, a function is created to infer the error range for any value.

In [None]:
class Regression_model_error:
    def __init__(self, height_distance, height_2d, pointcloud_distance, pointcloud_n_points):
        self.height_distance = height_distance
        self.height_2d = height_2d
        self.pointcloud_distance = pointcloud_distance
        self.pointcloud_n_points = pointcloud_n_points
    def __str__(self):
        return "height_distance: {%s},\nheight_2d: {%s},\npointcloud_distance: {%s},\npointcloud_n_points: {%s}"%\
                                (self.height_distance, self.height_2d, self.pointcloud_distance, self.pointcloud_n_points)

In [None]:
# Fuctions to fit
def objective_2f(x, a, b, c):
    return a * x**2 + b * x + c

def objective_3f(x, a, b, c, d):
    return a * x**3 + b * x**2 + c * x + d

def objective_4f(x, a, b, c, d, e):
    return a * x**4 + b * x**3 + c * x**2 + d * x + e

def objective_5f(x, a, b, c, d, e, f):
    return a * x**5 + b * x**4 + c * x**3 + d * x**2 + e * x + f

def objective_6f(x, a, b, c, d, e, f, g):
    return a * x**6 + b * x**5 + c * x**4 + d * x**3 + e * x**2 + f * x + g

def objective_7f(x, a, b, c, d, e, f, g, h):
    return a * x**7 + b * x**6 + c * x**5 + d * x**4 + e * x**3 + f * x**2 + g * x + h

def objective_8f(x, a, b, c, d, e, f, g, h, i):
    return a * x**8 + b * x**7 + c * x**6 + d * x**5 + e * x**4 + f * x**3 + g * x**2 + h * x + i

def objective_9f(x, a, b, c, d, e, f, g, h, i, j):
    return a * x**9 + b * x**8 + c * x**7 + d * x**6 + e * x**5 + f * x**4 + g * x**3 + h * x**2 + i * x + j

def objective_10f(x, a, b, c, d, e, f, g, h, i, j, k):
    return a * x**10 + b * x**9 + c * x**8 + d * x**7 + e * x**6 + f * x**5 + g * x**4 + h * x**3 + i * x**2 + j * x + k
 
# Variables for the different error models
jumps = [5, 20, 5, 250]
starts = [0, 0, 0, 0]
ends = [65, 280, 60, 3500]

functions = [objective_2f, objective_3f, objective_4f, objective_5f, objective_6f, objective_7f, objective_8f]
error_model_names = ['height_distance', 'height_2d', 'pointcloud_distance', 'pointcloud_n_points']
model_columns = ['distance_height', 'height_image', 'distance_pointcloud', 'n_points']
class_names = ['Car', 'Cyclist', 'Pedestrian']

# Loop over error models
for error_model_name, model_column, jump, start, end in zip(error_model_names, model_columns,jumps, starts, ends):
    # Loop over class names
    for class_name in class_names:
        
        approximate_distance_column = 'distance_' + error_model_name.split("_")[0]
        
        df_train_class = df_train[df_train['type'] == class_name]
    
        # Calculate deciles
        MPointE_points = []
        for point in range(start, end, jump):
            MPointE_point = df_train_class[(point < df_train_class[model_column]) & \
                                               (df_train_class[model_column] < point+jump)].apply(lambda row: row['distance']-row[approximate_distance_column],
                                                                                                        axis=1).quantile(q=[0.1,0.9])
            MPointE_points.append(MPointE_point)
        q_0_1 = list(map(lambda x: x.get(0.1), MPointE_points))
        q_0_9 = list(map(lambda x: x.get(0.9), MPointE_points))

        # Create axis
        Y = [x1 - x2 for (x1, x2) in zip(q_0_9, q_0_1)]
        X = (np.array(list(range(start,end,jump)))+jump/2).tolist()
        
        popt_functions = list(map(lambda function: curve_fit(function, X, Y,maxfev = 1000000)[0], functions))
        mses_class_model_error = list(map(lambda function, popt: mean_squared_error(Y, list(map(lambda x: function(x, *popt), X))), functions, popt_functions))
        
        min_mse = min(mses_class_model_error)
        id_function = mses_class_model_error.index(min_mse)
        function = functions[id_function]
        popt = popt_functions[id_function]
        
        X_line = (np.array(list(range(start,end,1)))+1/2).tolist()
        
        print(error_model_name)
        print(class_name)
        print(function)
        print(popt)
        print(mses_class_model_error)
        plt.plot(X, Y)
        plt.plot(X_line, list(map(lambda x: function(x, *popt), X_line)), '--', color='red')
        plt.xlim((start,end))
        plt.show()

Reduce the overfitting

In [None]:
# Variables for the different error models
jumps = [5, 20, 5, 250]
starts = [0, 0, 0, 0]
ends = [65, 280, 60, 3500]

functions = [objective_2f, objective_3f, objective_4f, objective_5f, objective_6f, objective_7f, objective_8f, objective_9f, objective_10f]
error_model_names = ['height_distance', 'height_2d', 'pointcloud_distance', 'pointcloud_n_points']
model_columns = ['distance_height', 'height_image', 'distance_pointcloud', 'n_points']
class_names = ['Car', 'Cyclist', 'Pedestrian']

# Loop over error models
for error_model_name, model_column, jump, start, end in zip(error_model_names, model_columns,jumps, starts, ends):
    # Loop over class names
    for class_name in class_names:
        
        approximate_distance_column = 'distance_' + error_model_name.split("_")[0]
        
        df_train_class = df_train[df_train['type'] == class_name]
    
        # Calculate deciles
        MPointE_points = []
        for point in range(start, end, jump):
            MPointE_point = df_train_class[(point < df_train_class[model_column]) & \
                                               (df_train_class[model_column] < point+jump)].apply(lambda row: row['distance']-row[approximate_distance_column],
                                                                                                        axis=1).quantile(q=[0.1,0.9])
            MPointE_points.append(MPointE_point)
        q_0_1 = list(map(lambda x: x.get(0.1), MPointE_points))
        q_0_9 = list(map(lambda x: x.get(0.9), MPointE_points))

        # Create axis
        Y = [x1 - x2 for (x1, x2) in zip(q_0_9, q_0_1)]
        X = (np.array(list(range(start,end,jump)))+jump/2).tolist()
        
        # Only one function with training data is to be created, so the validation dataset is not necessary
        start_ = int(start + jump/2)
        end_ = int(end - jump/2)
        MPointE_points_ = []
        for point in range(start_, end_, jump):
            MPointE_point = df_train_class[(point < df_train_class[model_column]) & \
                                               (df_train_class[model_column] < point+jump)].apply(lambda row: row['distance']-row[approximate_distance_column],
                                                                                                        axis=1).quantile(q=[0.1,0.9])
            MPointE_points_.append(MPointE_point)
        q_0_1_ = list(map(lambda x: x.get(0.1), MPointE_points_))
        q_0_9_ = list(map(lambda x: x.get(0.9), MPointE_points_))
        Y_val = [x1 - x2 for (x1, x2) in zip(q_0_9_, q_0_1_)]
        X_val = (np.array(list(range(start_,end_,jump)))+jump/2).tolist()
        
        
        popt_functions = list(map(lambda function: curve_fit(function, X, Y,maxfev = 1000000)[0], functions))
        mses_class_model_error = list(map(lambda function, popt: mean_squared_error(Y_val, list(map(lambda x: function(x, *popt), X_val))), functions, popt_functions))
        
        min_mse = min(mses_class_model_error)
        id_function = mses_class_model_error.index(min_mse)
        function = functions[id_function]
        popt = popt_functions[id_function]
        
        X_line = (np.array(list(range(start,end,1)))+1/2).tolist()
        
        print(error_model_name)
        print(class_name)
        print(function)
        print(popt)
        print(mses_class_model_error)
        plt.plot(X, Y)
        plt.plot(X_line, list(map(lambda x: function(x, *popt), X_line)), '--', color='red')
        plt.xlim((start,end))
        plt.ylim((0,15))
        plt.show()

Reduce overfitting in the first and last values

In [None]:
dict_models = {'height_distance':{'Car':None,'Cyclist':None,'Pedestrian':None},
               'height_2d':{'Car':None,'Cyclist':None,'Pedestrian':None},
               'pointcloud_distance':{'Car':None,'Cyclist':None,'Pedestrian':None},
               'pointcloud_n_points':{'Car':None,'Cyclist':None,'Pedestrian':None}}

# Variables for the different error models
jumps = [5, 20, 5, 250]
starts = [3, 10, 3, 120]
ends = [65, 280, 60, 3500]

functions = [objective_2f, objective_3f, objective_4f, objective_5f, objective_6f, objective_7f, objective_8f, objective_9f, objective_10f]
error_model_names = ['height_distance', 'height_2d', 'pointcloud_distance', 'pointcloud_n_points']
model_columns = ['distance_height', 'height_image', 'distance_pointcloud', 'n_points']
class_names = ['Car', 'Cyclist', 'Pedestrian']

# Loop over error models
for error_model_name, model_column, jump, start, end in zip(error_model_names, model_columns,jumps, starts, ends):
    # Loop over class names
    for class_name in class_names:
        
        approximate_distance_column = 'distance_' + error_model_name.split("_")[0]
        
        df_train_class = df_train[df_train['type'] == class_name]
    
        # Calculate deciles
        MPointE_points = []
        for point in range(start, end, jump):
            MPointE_point = df_train_class[(point < df_train_class[model_column]) & \
                                               (df_train_class[model_column] < point+jump)].apply(lambda row: row['distance']-row[approximate_distance_column],
                                                                                                        axis=1).quantile(q=[0.1,0.9])
            MPointE_points.append(MPointE_point)
        q_0_1 = list(map(lambda x: x.get(0.1), MPointE_points))
        q_0_9 = list(map(lambda x: x.get(0.9), MPointE_points))

        # Create axis
        Y = [x1 - x2 for (x1, x2) in zip(q_0_9, q_0_1)]
        X = (np.array(list(range(start,end,jump)))+jump/2).tolist()
        
        # Only one function with training data is to be created, so the validation dataset is not necessary
        start_ = int(start - jump/2)
        end_ = int(end + jump/2)
        MPointE_points_ = []
        for point in range(start_, end_, jump):
            MPointE_point = df_train_class[(point < df_train_class[model_column]) & \
                                               (df_train_class[model_column] < point+jump)].apply(lambda row: row['distance']-row[approximate_distance_column],
                                                                                                        axis=1).quantile(q=[0.1,0.9])
            MPointE_points_.append(MPointE_point)
        q_0_1_ = list(map(lambda x: x.get(0.1), MPointE_points_))
        q_0_9_ = list(map(lambda x: x.get(0.9), MPointE_points_))
        Y_val = [x1 - x2 for (x1, x2) in zip(q_0_9_, q_0_1_)]
        X_val = (np.array(list(range(start_,end_,jump)))+jump/2).tolist()
        
        
        popt_functions = list(map(lambda function: curve_fit(function, X, Y,maxfev = 1000000)[0], functions))
        mses_class_model_error = list(map(lambda function, popt: mean_squared_error(Y_val, list(map(lambda x: function(x, *popt), X_val))), functions, popt_functions))
        
        min_mse = min(mses_class_model_error)
        id_function = mses_class_model_error.index(min_mse)
        function = functions[id_function]
        popt = popt_functions[id_function]
        
        X_line = (np.array(list(range(start,end,1)))+1/2).tolist()
        
        dict_models[error_model_name][class_name] = Regression_model(function, popt)
        
        print(error_model_name)
        print(class_name)
        print(function)
        print(popt)
        print(mses_class_model_error)
        plt.plot(X, Y)
        plt.plot(X_line, list(map(lambda x: function(x, *popt), X_line)), '--', color='red')
        plt.xlim((start,end))
        plt.ylim((0,15))
        plt.show()

In [None]:
dict_models

In [None]:
def get_error_model(dict_models):
    error_model = Regression_model_error(Regression_model_class(dict_models['height_distance']['Car'],
                                                             dict_models['height_distance']['Cyclist'],
                                                             dict_models['height_distance']['Pedestrian']),
                                      Regression_model_class(dict_models['height_2d']['Car'],
                                                             dict_models['height_2d']['Cyclist'],
                                                             dict_models['height_2d']['Pedestrian']),
                                      Regression_model_class(dict_models['pointcloud_distance']['Car'],
                                                             dict_models['pointcloud_distance']['Car'],
                                                             dict_models['pointcloud_distance']['Car']),
                                      Regression_model_class(dict_models['pointcloud_n_points']['Car'],
                                                             dict_models['pointcloud_n_points']['Cyclist'],
                                                             dict_models['pointcloud_n_points']['Pedestrian']))
    return error_model

In [None]:
error_model = get_error_model(dict_models)

In [None]:
print(error_model)

In [None]:
#with open('error_model.pkl', 'wb') as f:
    #f.write(pickle.dumps(error_model))

# Validate ensemble method

## Usage of the new method

In [None]:
def ajust_distance(error_model, height_distance, height_2d, pointcloud_distance, pointcloud_n_points, class_name):
    # Obtain regression models based on the object type
    if class_name == 'Car':
        height_distance_model = error_model.height_distance.car
        height_2d_model = error_model.height_2d.car
        pointcloud_distance_model = error_model.pointcloud_distance.car
        pointcloud_n_points_model = error_model.pointcloud_n_points.car
    elif class_name == 'Cyclist':
        height_distance_model = error_model.height_distance.cyclist
        height_2d_model = error_model.height_2d.cyclist
        pointcloud_distance_model = error_model.pointcloud_distance.cyclist
        pointcloud_n_points_model = error_model.pointcloud_n_points.cyclist
    elif class_name == 'Pedestrian':
        height_distance_model = error_model.height_distance.pedestrian
        height_2d_model = error_model.height_2d.pedestrian
        pointcloud_distance_model = error_model.pointcloud_distance.pedestrian
        pointcloud_n_points_model = error_model.pointcloud_n_points.pedestrian
    else:
        raise ValueError("Type must be Car, Pedestrian or Cyclist")   
    # Obtain an approximate error based on different metrics
    height_distance_error = height_distance_model.function(height_distance, *height_distance_model.variables)
    height_2d_error = height_2d_model.function(height_2d, *height_2d_model.variables)
    pointcloud_distance_error = pointcloud_distance_model.function(pointcloud_distance, *pointcloud_distance_model.variables)
    pointcloud_n_points_error = pointcloud_n_points_model.function(pointcloud_n_points, *pointcloud_n_points_model.variables)
    # Obtain final distance to use
    sum_error = height_distance_error + height_2d_error + pointcloud_distance_error + pointcloud_n_points_error
    distance = (1 - (height_distance_error + height_2d_error)/sum_error) * height_distance +\
               (1 - (pointcloud_distance_error + pointcloud_n_points_error)/sum_error) * pointcloud_distance
    return distance

In [None]:
ajust_distance(error_model, 30, 40, 20, 5, 'Car')

In [None]:
df_train['distance_ensemble'] = df_train.apply(lambda row: ajust_distance(error_model,
                                                                          row['distance_height'],
                                                                          row['height_image'],
                                                                          row['distance_pointcloud'],
                                                                          row['n_points'],
                                                                          row['type']) if row['distance_pointcloud'] != None else row['distance_height'], axis=1)
df_val['distance_ensemble'] = df_val.apply(lambda row: ajust_distance(error_model,
                                                                      row['distance_height'],
                                                                      row['height_image'],
                                                                      row['distance_pointcloud'],
                                                                      row['n_points'],
                                                                      row['type']) if row['distance_pointcloud'] != None else row['distance_height'], axis=1)

In [None]:
df_train

## Validate using KITTI difficulties

In [None]:
def define_difficulty(row):
    height = row['bottom'] - row['top']
    occlusion = row['occluded']
    truncation = row['truncated']
    if(height >= 40 and occlusion == 0 and truncation <= 0.15):
        return 0
    elif(height >= 25 and occlusion <= 1 and truncation <= 0.3):
        return 1
    elif(height >= 25 and occlusion <= 2 and truncation <= 0.5):
        return 2
    else:
        return 3

In [None]:
df_train['difficulty'] = df_train.apply(lambda row: define_difficulty(row), axis=1)
df_val['difficulty'] = df_val.apply(lambda row: define_difficulty(row), axis=1)

In [None]:
df_val

In [None]:
def evaluate(distance_column):
    difficulties = [0, 1, 2]                       
    classes = ["Car","Cyclist","Pedestrian"]
    text_difficulties = ["Easy", "Moderate", "Hard"]

    def mse(frame):
        df_frame = df_val[df_val['frame'] == frame]
        bbs_distance_gt = df_frame['distance'].tolist()
        bbs_distance_approx = df_frame[distance_column].tolist()
        mse_value = mean_squared_error(bbs_distance_gt, bbs_distance_approx)

        return mse_value

    print("################################", end="\n\n")

    # Iterate over difficulties
    for difficulty, text_difficulty in zip(difficulties, text_difficulties):
        # Iterate over classes
        for class_name in classes:                  
            df_difficulty = df_val[(df_val['difficulty'] <= difficulty) & (df_val['type'] == class_name)]
            #mse_by_frame = df_difficulty['frame'].unique()
            start_time = time.time()
            #mse_by_frame = list(map(mse, mse_by_frame))
            mse = mean_squared_error(df_difficulty['distance'].tolist(),
                                     df_difficulty[distance_column].tolist())

            #mse_by_frame_without_none = list(filter(partial(is_not, None), mse_by_frame))

            #mean_mse = statistics.mean(mse_by_frame_without_none)
            print("--- %s seconds to calculate MSE, %s difficulty, %s class  ---" % (time.time() - start_time, text_difficulty, class_name))

            print("\nMean of all MSE by frame, %s difficulty, %s class = %s\n" % (text_difficulty, class_name, mse))

        print("################################", end="\n\n")

In [None]:
evaluate('distance_ensemble')

# Use of the square error in the ensemble model 

In [None]:
def ajust_distance2(error_model, height_distance, height_2d, pointcloud_distance, pointcloud_n_points, class_name):
    # Obtain regression models based on the object type
    if class_name == 'Car':
        height_distance_model = error_model.height_distance.car
        height_2d_model = error_model.height_2d.car
        pointcloud_distance_model = error_model.pointcloud_distance.car
        pointcloud_n_points_model = error_model.pointcloud_n_points.car
    elif class_name == 'Cyclist':
        height_distance_model = error_model.height_distance.cyclist
        height_2d_model = error_model.height_2d.cyclist
        pointcloud_distance_model = error_model.pointcloud_distance.cyclist
        pointcloud_n_points_model = error_model.pointcloud_n_points.cyclist
    elif class_name == 'Pedestrian':
        height_distance_model = error_model.height_distance.pedestrian
        height_2d_model = error_model.height_2d.pedestrian
        pointcloud_distance_model = error_model.pointcloud_distance.pedestrian
        pointcloud_n_points_model = error_model.pointcloud_n_points.pedestrian
    else:
        raise ValueError("Type must be Car, Pedestrian or Cyclist")   
    # Obtain an approximate error based on different metrics
    height_distance_error = height_distance_model.function(height_distance, *height_distance_model.variables) ** 2
    height_2d_error = height_2d_model.function(height_2d, *height_2d_model.variables) ** 2
    pointcloud_distance_error = pointcloud_distance_model.function(pointcloud_distance, *pointcloud_distance_model.variables) ** 2
    pointcloud_n_points_error = pointcloud_n_points_model.function(pointcloud_n_points, *pointcloud_n_points_model.variables) ** 2
    # Obtain final distance to use
    sum_error = height_distance_error + height_2d_error + pointcloud_distance_error + pointcloud_n_points_error
    distance = (1 - (height_distance_error + height_2d_error)/sum_error) * height_distance +\
               (1 - (pointcloud_distance_error + pointcloud_n_points_error)/sum_error) * pointcloud_distance
    return distance

In [None]:
df_train['distance_ensemble2'] = df_train.apply(lambda row: ajust_distance2(error_model,
                                                                          row['distance_height'],
                                                                          row['height_image'],
                                                                          row['distance_pointcloud'],
                                                                          row['n_points'],
                                                                          row['type']) if row['distance_pointcloud'] != None else row['distance_height'], axis=1)
df_val['distance_ensemble2'] = df_val.apply(lambda row: ajust_distance2(error_model,
                                                                      row['distance_height'],
                                                                      row['height_image'],
                                                                      row['distance_pointcloud'],
                                                                      row['n_points'],
                                                                      row['type']) if row['distance_pointcloud'] != None else row['distance_height'], axis=1)

In [None]:
evaluate('distance_ensemble2')

# Using centiles instead of deciles to create the error function

In [None]:
dict_models = {'height_distance':{'Car':None,'Cyclist':None,'Pedestrian':None},
               'height_2d':{'Car':None,'Cyclist':None,'Pedestrian':None},
               'pointcloud_distance':{'Car':None,'Cyclist':None,'Pedestrian':None},
               'pointcloud_n_points':{'Car':None,'Cyclist':None,'Pedestrian':None}}

# Variables for the different error models
jumps = [5, 20, 5, 250]
starts = [3, 10, 3, 120]
ends = [65, 280, 60, 3500]

functions = [objective_2f, objective_3f, objective_4f, objective_5f, objective_6f, objective_7f, objective_8f, objective_9f, objective_10f]
error_model_names = ['height_distance', 'height_2d', 'pointcloud_distance', 'pointcloud_n_points']
model_columns = ['distance_height', 'height_image', 'distance_pointcloud', 'n_points']
class_names = ['Car', 'Cyclist', 'Pedestrian']

# Loop over error models
for error_model_name, model_column, jump, start, end in zip(error_model_names, model_columns,jumps, starts, ends):
    # Loop over class names
    for class_name in class_names:
        
        approximate_distance_column = 'distance_' + error_model_name.split("_")[0]
        
        df_train_class = df_train[df_train['type'] == class_name]
    
        # Calculate deciles
        MPointE_points = []
        for point in range(start, end, jump):
            MPointE_point = df_train_class[(point < df_train_class[model_column]) & \
                                               (df_train_class[model_column] < point+jump)].apply(lambda row: row['distance']-row[approximate_distance_column],
                                                                                                        axis=1).quantile(q=[0.01,0.99])
            MPointE_points.append(MPointE_point)
        q_0_01 = list(map(lambda x: x.get(0.01), MPointE_points))
        q_0_99 = list(map(lambda x: x.get(0.99), MPointE_points))

        # Create axis
        Y = [x1 - x2 for (x1, x2) in zip(q_0_99, q_0_01)]
        X = (np.array(list(range(start,end,jump)))+jump/2).tolist()
        
        # Only one function with training data is to be created, so the validation dataset is not necessary
        start_ = int(start - jump/2)
        end_ = int(end + jump/2)
        MPointE_points_ = []
        for point in range(start_, end_, jump):
            MPointE_point = df_train_class[(point < df_train_class[model_column]) & \
                                               (df_train_class[model_column] < point+jump)].apply(lambda row: row['distance']-row[approximate_distance_column],
                                                                                                        axis=1).quantile(q=[0.01,0.99])
            MPointE_points_.append(MPointE_point)
        q_0_01_ = list(map(lambda x: x.get(0.01), MPointE_points_))
        q_0_99_ = list(map(lambda x: x.get(0.99), MPointE_points_))
        Y_val = [x1 - x2 for (x1, x2) in zip(q_0_99_, q_0_01_)]
        X_val = (np.array(list(range(start_,end_,jump)))+jump/2).tolist()
        
        
        popt_functions = list(map(lambda function: curve_fit(function, X, Y,maxfev = 1000000)[0], functions))
        mses_class_model_error = list(map(lambda function, popt: mean_squared_error(Y_val, list(map(lambda x: function(x, *popt), X_val))), functions, popt_functions))
        
        min_mse = min(mses_class_model_error)
        id_function = mses_class_model_error.index(min_mse)
        function = functions[id_function]
        popt = popt_functions[id_function]
        
        X_line = (np.array(list(range(start,end,1)))+1/2).tolist()
        
        dict_models[error_model_name][class_name] = Regression_model(function, popt)
        
        print(error_model_name)
        print(class_name)
        print(function)
        print(popt)
        print(mses_class_model_error)
        plt.plot(X, Y)
        plt.plot(X_line, list(map(lambda x: function(x, *popt), X_line)), '--', color='red')
        plt.xlim((start,end))
        plt.ylim((0,30))
        plt.show()

In [None]:
error_model = get_error_model(dict_models)

In [None]:
df_train['distance_ensemble3'] = df_train.apply(lambda row: ajust_distance(error_model,
                                                                          row['distance_height'],
                                                                          row['height_image'],
                                                                          row['distance_pointcloud'],
                                                                          row['n_points'],
                                                                          row['type']), axis=1)
df_val['distance_ensemble3'] = df_val.apply(lambda row: ajust_distance(error_model,
                                                                      row['distance_height'],
                                                                      row['height_image'],
                                                                      row['distance_pointcloud'],
                                                                      row['n_points'],
                                                                      row['type']) if row['distance_pointcloud'] != None else row['distance_height'], axis=1)

In [None]:
evaluate('distance_ensemble3')

# Using centiles instead of deciles to create the error function and square error in the ensemble model

In [None]:
dict_models = {'height_distance':{'Car':None,'Cyclist':None,'Pedestrian':None},
               'height_2d':{'Car':None,'Cyclist':None,'Pedestrian':None},
               'pointcloud_distance':{'Car':None,'Cyclist':None,'Pedestrian':None},
               'pointcloud_n_points':{'Car':None,'Cyclist':None,'Pedestrian':None}}

# Variables for the different error models
jumps = [5, 20, 5, 250]
starts = [3, 10, 3, 120]
ends = [65, 280, 60, 3500]

functions = [objective_2f, objective_3f, objective_4f, objective_5f, objective_6f, objective_7f, objective_8f, objective_9f, objective_10f]
error_model_names = ['height_distance', 'height_2d', 'pointcloud_distance', 'pointcloud_n_points']
model_columns = ['distance_height', 'height_image', 'distance_pointcloud', 'n_points']
class_names = ['Car', 'Cyclist', 'Pedestrian']

# Loop over error models
for error_model_name, model_column, jump, start, end in zip(error_model_names, model_columns,jumps, starts, ends):
    # Loop over class names
    for class_name in class_names:
        
        approximate_distance_column = 'distance_' + error_model_name.split("_")[0]
        
        df_train_class = df_train[df_train['type'] == class_name]
    
        # Calculate deciles
        MPointE_points = []
        for point in range(start, end, jump):
            MPointE_point = df_train_class[(point < df_train_class[model_column]) & \
                                               (df_train_class[model_column] < point+jump)].apply(lambda row: row['distance']-row[approximate_distance_column],
                                                                                                        axis=1).quantile(q=[0.01,0.99])
            MPointE_points.append(MPointE_point)
        q_0_01 = list(map(lambda x: x.get(0.01), MPointE_points))
        q_0_99 = list(map(lambda x: x.get(0.99), MPointE_points))

        # Create axis
        Y = [(x1 - x2)**2 for (x1, x2) in zip(q_0_99, q_0_01)]
        X = (np.array(list(range(start,end,jump)))+jump/2).tolist()
        
        # Only one function with training data is to be created, so the validation dataset is not necessary
        start_ = int(start - jump/2)
        end_ = int(end + jump/2)
        MPointE_points_ = []
        for point in range(start_, end_, jump):
            MPointE_point = df_train_class[(point < df_train_class[model_column]) & \
                                               (df_train_class[model_column] < point+jump)].apply(lambda row: row['distance']-row[approximate_distance_column],
                                                                                                        axis=1).quantile(q=[0.01,0.99])
            MPointE_points_.append(MPointE_point)
        q_0_01_ = list(map(lambda x: x.get(0.01), MPointE_points_))
        q_0_99_ = list(map(lambda x: x.get(0.99), MPointE_points_))
        Y_val = [(x1 - x2)**2 for (x1, x2) in zip(q_0_99_, q_0_01_)]
        X_val = (np.array(list(range(start_,end_,jump)))+jump/2).tolist()
        
        
        popt_functions = list(map(lambda function: curve_fit(function, X, Y,maxfev = 1000000)[0], functions))
        mses_class_model_error = list(map(lambda function, popt: mean_squared_error(Y_val, list(map(lambda x: function(x, *popt), X_val))), functions, popt_functions))
        
        min_mse = min(mses_class_model_error)
        id_function = mses_class_model_error.index(min_mse)
        function = functions[id_function]
        popt = popt_functions[id_function]
        
        X_line = (np.array(list(range(start,end,1)))+1/2).tolist()
        
        dict_models[error_model_name][class_name] = Regression_model(function, popt)
        
        print(error_model_name)
        print(class_name)
        print(function)
        print(popt)
        print(mses_class_model_error)
        plt.plot(X, Y)
        plt.plot(X_line, list(map(lambda x: function(x, *popt), X_line)), '--', color='red')
        plt.xlim((start,end))
        plt.ylim((0,500))
        plt.show()

In [None]:
error_model = get_error_model(dict_models)

In [None]:
error_model = Regression_model_error(Regression_model_class(dict_models['height_distance']['Car'],
                                                             dict_models['height_distance']['Cyclist'],
                                                             dict_models['height_distance']['Pedestrian']),
                                      Regression_model_class(dict_models['height_2d']['Car'],
                                                             dict_models['height_2d']['Cyclist'],
                                                             dict_models['height_2d']['Pedestrian']),
                                      Regression_model_class(dict_models['pointcloud_distance']['Car'],
                                                             dict_models['pointcloud_distance']['Car'],
                                                             dict_models['pointcloud_distance']['Car']),
                                      Regression_model_class(dict_models['pointcloud_n_points']['Car'],
                                                             dict_models['pointcloud_n_points']['Cyclist'],
                                                             dict_models['pointcloud_n_points']['Pedestrian']))

In [None]:
df_train['distance_ensemble4'] = df_train.apply(lambda row: ajust_distance(error_model,
                                                                          row['distance_height'],
                                                                          row['height_image'],
                                                                          row['distance_pointcloud'],
                                                                          row['n_points'],
                                                                          row['type']) if row['distance_pointcloud'] != None else row['distance_height'], axis=1)
df_val['distance_ensemble4'] = df_val.apply(lambda row: ajust_distance(error_model,
                                                                      row['distance_height'],
                                                                      row['height_image'],
                                                                      row['distance_pointcloud'],
                                                                      row['n_points'],
                                                                      row['type']) if row['distance_pointcloud'] != None else row['distance_height'], axis=1)

In [None]:
evaluate('distance_ensemble4')

# Error of the ensemble model

In [None]:
df_train['error_distance_ensemble4'] = df_train['distance'] - df_train['distance_ensemble4']
df_val['error_distance_ensemble4'] = df_val['distance'] - df_val['distance_ensemble4']

In [None]:
jump = 5
start = 5
end = 60

# Loop over classes
for class_name in ["Car","Cyclist","Pedestrian"]:
    df_train_class = df_train[df_train['type'] == class_name]
    
    # Calculate quartiles
    MDistanceE_distances = []
    for distance in range(start, end, jump):
        MDistanceE_distance = df_train_class[(distance < df_train_class['distance_ensemble4']) & \
                                           (df_train_class['distance_ensemble4'] < distance+jump)].apply(lambda row: row['distance']-row['distance_ensemble4'],
                                                                                                    axis=1).quantile(q=[0.1,0.5,0.9])
        MDistanceE_distances.append(MDistanceE_distance)
    q_0_1 = list(map(lambda x: x.get(0.1), MDistanceE_distances))
    q_0_5 = list(map(lambda x: x.get(0.5), MDistanceE_distances))
    q_0_9 = list(map(lambda x: x.get(0.9), MDistanceE_distances))

    Y = df_train_class['error_distance_ensemble4'].values
    X = df_train_class['distance_ensemble4'].values

    plt.title("Class %s" % class_name)
    plt.xlabel('Approximate distance (m)')
    plt.ylabel('Error (m)')

    plt.scatter(X, Y, s=0.3, c='black')
    
    x = np.array(list(range(start,end,jump)))+jump/2
    plt.plot(x, q_0_1)
    plt.plot(x, q_0_5)
    plt.plot(x, q_0_9)
    plt.legend(['q 0.1', 'q 0.5', 'q 0.9'])
    
    plt.xlim((0,70))
    plt.ylim((-10,10))
    
    #plt.grid(alpha=0.5)
    
    plt.show()

In [None]:
df_val['error_distance_ensemble4'].quantile(q=[0.01,0.05,0.5,0.95,0.99])

In [None]:
df_val[df_val['distance'] < 50]['error_distance_ensemble4'].quantile(q=[0.01,0.05,0.5,0.95,0.99])

# Save last model

In [None]:
error_model = Regression_model_error(Regression_model_class(dict_models['height_distance']['Car'],
                                                             dict_models['height_distance']['Cyclist'],
                                                             dict_models['height_distance']['Pedestrian']),
                                      Regression_model_class(dict_models['height_2d']['Car'],
                                                             dict_models['height_2d']['Cyclist'],
                                                             dict_models['height_2d']['Pedestrian']),
                                      Regression_model_class(dict_models['pointcloud_distance']['Car'],
                                                             dict_models['pointcloud_distance']['Car'],
                                                             dict_models['pointcloud_distance']['Car']),
                                      Regression_model_class(dict_models['pointcloud_n_points']['Car'],
                                                             dict_models['pointcloud_n_points']['Cyclist'],
                                                             dict_models['pointcloud_n_points']['Pedestrian']))

In [None]:
with open('error_model.pkl', 'wb') as f:
    f.write(pickle.dumps(error_model))