In [73]:
import os
import os.path as osp

import numpy as np
import struct
import easydict

import open3d as o3d

In [74]:
def read_lidar_info(file_path):
    size = os.path.getsize(file_path)
    point_num = int(size / 16)
    assert point_num * 16 == size, "invalid binary structure"

    lidar_pt_list = []
    with open(file_path, "rb") as f:
        bin_data = None
        while True:
            bin_data = f.read(4)
            if len(bin_data) < 4:
                break
            lidar_pt_list.append(struct.unpack('f', bin_data))
    return np.array(lidar_pt_list).reshape((-1, 4))

In [75]:
sample_index = "004369"

In [76]:
lidar_pt = read_lidar_info(f"data/training/velodyne/{sample_index}.bin")
print(lidar_pt)

[[35.11199951  4.33099985  1.40499997  0.        ]
 [78.68599701  9.98900032  2.89100003  0.        ]
 [78.71299744 10.24400043  2.89299989  0.        ]
 ...
 [ 3.79999995 -1.40400004 -1.773       0.        ]
 [16.11199951 -4.49900007 -7.68400002  0.23      ]
 [16.2329998  -4.47800016 -7.73600006  0.        ]]


In [77]:
pcd = pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(lidar_pt[:, :3])
pcd.colors = o3d.utility.Vector3dVector(np.ones((len(lidar_pt), 3)) * lidar_pt[:, 3].reshape((-1, 1)))
pcd_ds = pcd.voxel_down_sample(0.05)
o3d.io.write_point_cloud("data/output/output.ply", pcd_ds)

True

![snapshot](data/images/Snipaste_2024-11-28_20-27-59.png)
![snapshot](data/images/Snipaste_2024-11-29_10-28-22.png)


# label file field explanation

- `<object_type>:` The type of the annotated object. This can be one of the following: 'Car', 'Van', 'Truck', 'Pedestrian', - 'Person_sitting', 'Cyclist', 'Tram', 'Misc', or 'DontCare'. 'DontCare' is used for objects that are present but ignored for evaluation.
- `<truncation>`: The fraction of the object that is visible. It is a float value in the range [0.0, 1.0]. A value of 0.0 means the - object is fully visible, and 1.0 means the object is completely outside the image frame.
- `<occlusion>`: The level of occlusion of the object. It is an integer value indicating the degree of occlusion, where 0 means fully - visible, and higher values indicate increasing levels of occlusion.
- `<alpha>`: The observation angle of the object in radians, relative to the camera. It is the angle between the object's heading - direction and the positive x-axis of the camera.
- `<left>, <top>, <right>, <bottom>`: The 2D bounding box coordinates of the object in the image. They represent the pixel locations of - the top-left and bottom-right corners of the bounding box.
- `<height>, <width>, <length>`: The 3D dimensions of the object (height, width, and length) in meters.
- `<x>, <y>, <z>`: The 3D location of the object's centroid in the camera coordinate system (in meters).
- `<rotation_y>`: The rotation of the object around the y-axis in camera coordinates, in radians.

see also: https://medium.com/@abdulhaq.ah/explain-label-file-of-kitti-dataset-738528de36f4

In [None]:
# label the thing with colors
def is_float(item):
    try:
        item = float(item)
    except Exception as e:
        return False
    return True

label_field_name_list = [
    "object_type",
    "truncation",
    "occlusion",
    "alpha",
    "left", "top", "right", "bottom",
    "height", "width", "length",
    "x", "y", "z",
    "rotation_y"
]
label_list = []
with open(f"data/training/label_2/{sample_index}.txt", 'r') as f:
    label_list = [tuple([float(item) if is_float(item) else item for item in line.strip().split()]) for line in f.readlines()]
# transform each label to dictionary item
label_list = [
    easydict.EasyDict({field:value for (field, value) in zip(label_field_name_list, item)}) for item in label_list
]

calib_field_name_list = [
    "P0", "P1", "P2", "P3",
    "R0_rect", "Tr_velo_to_cam", "Tr_imu_to_velo"
]
calib_info = []
with open(f"data/training/calib/{sample_index}.txt", 'r') as f:
    calib_list = [line.strip().split() for line in f.readlines() if len(line.strip()) > 0]
calib_info = easydict.EasyDict(
    {seq[0][:-1]:[float(x) for x in seq[1:]] for seq in calib_list}
)

print(*label_list, sep='\n')
for key, val in calib_info.items():
    print(key, val)

{'object_type': 'Car', 'truncation': 0.0, 'occlusion': 0.0, 'alpha': -1.65, 'left': 634.89, 'top': 184.77, 'right': 675.04, 'bottom': 213.79, 'height': 1.32, 'width': 1.63, 'length': 4.1, 'x': 2.15, 'y': 1.93, 'z': 35.89, 'rotation_y': -1.6}
{'object_type': 'Car', 'truncation': 0.0, 'occlusion': 2.0, 'alpha': 0.21, 'left': 394.31, 'top': 178.6, 'right': 507.49, 'bottom': 219.61, 'height': 1.58, 'width': 1.56, 'length': 4.25, 'x': -6.44, 'y': 1.84, 'z': 29.32, 'rotation_y': -0.0}
{'object_type': 'Car', 'truncation': 0.0, 'occlusion': 1.0, 'alpha': -0.3, 'left': 733.33, 'top': 182.14, 'right': 880.89, 'bottom': 236.7, 'height': 1.5, 'width': 1.62, 'length': 3.88, 'x': 5.71, 'y': 1.8, 'z': 21.31, 'rotation_y': -0.04}
{'object_type': 'Car', 'truncation': 0.0, 'occlusion': 2.0, 'alpha': -2.83, 'left': 202.13, 'top': 171.25, 'right': 419.02, 'bottom': 258.03, 'height': 1.77, 'width': 1.68, 'length': 4.12, 'x': -6.54, 'y': 1.76, 'z': 16.01, 'rotation_y': 3.07}
{'object_type': 'Car', 'truncati

In [None]:
def R_mat(angle: float, axis, radian: bool=True):
    """
    generate the 3x3 rotation matrix with the specified axis and angle

    - param angle: rotation angle, in radian
    - param axis: rotation axis, [1,3] or [3]
    - return: rotation matrix R [3,3]
    """
    # angle to radian
    angle_rad = angle
    if not radian:
        angle_rad = np.radians(angle)

    # normalize rotation axis
    if type(axis) is not np.ndarray:
        axis = np.array(axis)
    axis = axis / np.linalg.norm(axis)

    # compute each component
    x, y, z = axis
    c = np.cos(angle_rad)
    s = np.sin(angle_rad)
    C = 1 - c

    # build the rotation matrix
    rotation_matrix = np.array([
        [x*x*C + c,   x*y*C - z*s, x*z*C + y*s],
        [y*x*C + z*s, y*y*C + c,   y*z*C - x*s],
        [z*x*C - y*s, z*y*C + x*s, z*z*C + c  ]
    ])

    return rotation_matrix


def T_mat(values: list):
    return np.array(values).reshape((-1, 1))

# add label colors

Note that `<x,y,z>` coordinates in the KITTI object detection dataset are placed under the camera coordinate, if it's to be used with lidar point cloud data, you should do the coordinate transformation first

![kitti_coor](data/images/kitti_coord.png)

In [None]:
points = lidar_pt[:, :3]
colors = np.zeros(points.shape)

anchor_points = []
anchor_colors = []
for label in label_list:
    if (label.object_type == "DontCare"):
        continue

    radn = label.rotation_y
    axis = [0, 1, 0]
    R = R_mat(radn, axis)
    T = T_mat([label.x, label.y, label.z])
    
    # transform from camera coordinate to velodyn coordinate
    vly2cam = np.array(calib_info.Tr_velo_to_cam).reshape((3, 4))
    T = vly2cam[:3,:3].T @ (T - vly2cam[:, 3].reshape((-1, 1)))
    
    # visualize center point
    anchor_points.append(T.T)
    anchor_colors.append([0.0, 1.0, 0.0])

    # visualize bounding box corner
    lb_coord = np.array([-label.width/2, -label.length/2, 0.0])
    rt_coord = np.array([+label.width/2, +label.length/2, +label.height])
    anchor_points.append(lb_coord @ R.T + T.T)
    anchor_points.append(rt_coord @ R.T + T.T)
    anchor_colors.append([1.0, 0.0, 0.0])
    anchor_colors.append([1.0, 0.0, 0.0])

    # visualize detection area
    points_r = (points - T.T) @ R
    mask = np.all((points_r >= lb_coord) & (points_r <= rt_coord), axis=1)
    print(label.object_type, "includes point num:", mask.astype(np.int32).sum())

    colors[mask] = np.array([0.0, 0.0, 1.0])
    

points = np.vstack([points, np.vstack(anchor_points)]) # add other points to main part
colors = np.vstack([colors, np.vstack(anchor_colors)]) # add other colors to main part

Car 12
Car 103
Car 311
Car 6
Car 165
Car 596
Car 272
Car 20
Car 494
Car 1124


In [81]:
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(points)
pcd.colors = o3d.utility.Vector3dVector(colors)
pcd_ds = pcd.voxel_down_sample(0.05)
o3d.io.write_point_cloud("data/output/gdth.ply", pcd_ds)

True

# bounding box segmentation result
![bbox_visualization](data/images/bbox_seg.png)

Green dots represent the center of the bottom of the bounding box, red dots represent left bottom and right top corners of the bounding box, blue dots represent the `object_type`

In [82]:
a = np.array([[1,1,1]]).reshape((-1, 1))
b = np.array([[1,1,1]]).reshape((-1, 1))

print(a.shape, b.shape)
c = a - b

print(c)

(3, 1) (3, 1)
[[0]
 [0]
 [0]]
