# This is the script to explain how the LiDAR's pkl file and annotation's .png file should be constructed for using in CLFCN and CLFT nwtowrks

In [1]:
import os 
import cv2
import pickle
import numpy as np

In [3]:
lidar_path = '../test_images/test_lidar.pkl'
anno_path = '../test_images/test_anno.png'

In [5]:
lidar_file = open(lidar_path, 'rb')
lidar_data = pickle.load(lidar_file)
lidar_file.close()

### There are three dicts in .pkl file. '3d_points', 'class_instance', and 'camera_coordiantes' 

In [8]:
lidar_data

{'3d_points': array([[-5.2509296e+01,  1.0832564e+01,  4.3221998e+00],
        [-5.2449059e+01,  1.0953444e+01,  4.3208528e+00],
        [-5.2428661e+01,  1.1082363e+01,  4.3210974e+00],
        ...,
        [-1.3185942e+00,  2.3033498e-01,  5.4545771e-02],
        [-1.3414596e+00, -2.0684135e-01,  5.1952340e-02],
        [-1.3731146e+00, -1.8327464e-01,  4.2206958e-02]], dtype=float32),
 'class_instance': array([[5, 0],
        [5, 0],
        [5, 0],
        ...,
        [5, 0],
        [5, 0],
        [5, 0]], dtype=uint8),
 'camera_coordinates': array([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]], dtype=uint16)}

In [14]:
print(len(lidar_data['3d_points']))
print(np.max(lidar_data['3d_points']))
print(np.min((lidar_data['3d_points'])))
print(len(lidar_data['class_instance']))
print(np.max(lidar_data['class_instance']))
print(np.min((lidar_data['class_instance'])))
print(len(lidar_data['camera_coordinates']))
print(np.max(lidar_data['camera_coordinates']))
print(np.min((lidar_data['camera_coordinates'])))

155111
76.0497
-72.484505
155111
83
0
155111
1919
0


### From the max ad min of these three dicts, you can see the '3d_points' is the LiDAR's reading in meters, the 'class_instance' is the label of each LiDAR point (this is how Waymo label their data), and the 'camera_coordinates' is the LiDAR point's corresponding camera-projection's pixel coordination (the waymo's camera image is 1920x1080). 

### Therefore, the most important part in this part is figuring out the 'LiDAR point's corresponding camera-projection'. This involves the camera-lidar extrinsic calibration to get the transforamtion matrix. But if use the large-scale open dataset, this has been done and usually the lidar-camera-projection is provided as 'range image'. 

### One more thing is for the waymo pkl file in this repo, it contains the 360 degrees LiDAR points and they fall on the 6 camera planes. 

In [23]:
camera_coord = lidar_data['camera_coordinates']
points_3d = lidar_data['3d_points']
np.unique(camera_coord[:,0])

array([0, 1, 2, 3, 4, 5], dtype=uint16)

### But in the pipeline, only the front camera and corresponding LiDAR points are used. 

In [26]:
# select camera front
mask = camera_coord[:, 0] == 1
front_points = points_3d[mask, :]
front_camera_coord = camera_coord[mask, 1:3]
print(len(front_points))
print(len(front_camera_coord))

17223
17223


### Now for each point from LiDAR, there will be a camera-plane coordination. 

In [29]:
print(front_points[100])
print(front_camera_coord[100])

[ 65.50708   -14.290524    4.7711673]
[1428  550]


### For annotaiton's png file, it is in size 480x320, becuase the size of the camera data's png file is also 480x320. 

### It is single channel and the pixel values are the class indices. In our case, 0->ignore, 1->vehicle, 2->pedestrian, 3->sign, 4->cyclist, 5->background. 

In [30]:
anno = cv2.imread(anno_path, cv2.IMREAD_UNCHANGED)

In [33]:
print(anno.shape)
print(np.unique(anno))

(320, 480)
[0 1 3 5]
