# See how many points are inside and outside of the labels, before and after filtering

In [132]:
# Libraries
import os
import random
import numpy as np
import pandas as pd
from pathlib import Path

In [133]:
# List of paths to dataset
BASE_DIR = '../../data/eval_data'
ARCS_DIR = 'arcs'
ARCS_FILTERED_DIR = 'arcs_azimuth_filtered'
ARCS_LABEL_FILTERED_DIR = 'arcs_label_filtered'
ARCS_LABEL_DIR = 'labels'

In [134]:
evaluation_area = [21.0175, 11.717, -0.3925, 40.035, 55.9349, 7.535, 0.5]

In [135]:
# Evaluate filter (before eval script)
BASE_DIR = '../../data'
ARCS_DIR = 'velodyne_points'
DIST_FILTERED_DIR = 'arcs_distance_filtered'
HEIGHT_FILTERED_DIR = 'filtered_points'
ARCS_LABEL_FILTERED_DIR = 'label_filtered_points'
ARCS_LABEL_DIR = 'pillar_pred_format_labels'

In [136]:
# test_dir = 'test_folder'

In [137]:
def point_is_in_an_roi_box(point, roi_box_list):
    # Extract the point coordinates
    x, y, z = point
    
    for bbox in roi_box_list:
        # Extract the box parameters
        # dz, dy, dx, cx, cy, cz, yaw = bbox
        cx, cy, cz, dx, dy, dz, yaw = bbox
        
        # Create the rotation matrix for the yaw angle
        cos_yaw = np.cos(-yaw)  # Negative for the inverse rotation
        sin_yaw = np.sin(-yaw)
        
        rotation_matrix = np.array([
            [cos_yaw, -sin_yaw, 0],
            [sin_yaw,  cos_yaw, 0],
            [0,       0,       1]
        ])
        
        # Translate the point to the box's coordinate frame
        translated_point = np.array([x - cx, y - cy, z - cz])
        
        # Rotate the point to align with the box
        rotated_point = rotation_matrix.dot(translated_point)
        
        # Check if the point is within the box dimensions
        half_lengths = [dz / 2, dy / 2, dx / 2]
        in_box = all([
            -half_lengths[i] <= rotated_point[i] <= half_lengths[i] for i in range(3)
        ])
    
        if in_box:
            return True
    
    return False

In [138]:
def get_roi_boxes(label_file):
    bboxes = []
    with open(label_file, 'r') as file:
        for line in file:
            parts = line.strip().split()
            bbox = [float(value) for value in parts[8:15]]  # Extract the bounding box dimensions and location            
            bboxes.append(bbox)
    return np.array(bboxes)

In [139]:
def is_label_point(point, label_file_path):
    list_roi_boxes = get_roi_boxes(label_file_path)
    if point_is_in_an_roi_box(point, list_roi_boxes):
        return True
    return False

In [140]:
def convert_to_dataframe(bin_path):
    pre_filtered_data = np.fromfile(bin_path, dtype=np.float32).reshape(-1, 4) 
    columns = ['x', 'y', 'z', 'intensity']
    df = pd.DataFrame(pre_filtered_data, columns=columns)
    return df

In [141]:
def get_frame_point_statistics(data_file_path, label_file_path):
    # Running totals
    total_points = 0
    label_points = 0

    points_df = convert_to_dataframe(data_file_path)
    
    # For each point in the data
    for index, row in points_df.iterrows():
    # for index, row in points_df.iloc[:100].iterrows():
        point = (row['x'], row['y'], row['z'])
        if point_is_in_an_roi_box(point, [evaluation_area]):
            # Increment the total
            total_points += 1
            
            # If the point is a label point
            if is_label_point(point, label_file_path):
                # Increment the label total
                label_points += 1

    return total_points, label_points

In [142]:
# test_dir_path = Path(BASE_DIR, test_dir)
test_dir_path = Path(BASE_DIR, DIST_FILTERED_DIR)
files = [f for f in os.listdir(test_dir_path) if f.endswith('.bin')]
random.shuffle(files)
files = files[:100]
print(files)

['001182.bin', '002793.bin', '005393.bin', '004071.bin', '006230.bin', '005686.bin', '000044.bin', '002442.bin', '004025.bin', '001716.bin', '001424.bin', '000197.bin', '001378.bin', '005798.bin', '006381.bin', '000528.bin', '004066.bin', '005456.bin', '003693.bin', '006099.bin', '002516.bin', '005878.bin', '000190.bin', '002861.bin', '003158.bin', '001348.bin', '003413.bin', '003668.bin', '002052.bin', '002719.bin', '004602.bin', '006187.bin', '006035.bin', '004121.bin', '000260.bin', '003186.bin', '001904.bin', '000284.bin', '002463.bin', '004633.bin', '000851.bin', '000548.bin', '001821.bin', '005244.bin', '000467.bin', '000108.bin', '001817.bin', '000706.bin', '001888.bin', '001153.bin', '000374.bin', '004572.bin', '006382.bin', '004679.bin', '003464.bin', '005826.bin', '004408.bin', '005256.bin', '000876.bin', '000819.bin', '004504.bin', '001240.bin', '003070.bin', '003410.bin', '001336.bin', '005245.bin', '001012.bin', '005420.bin', '006067.bin', '002629.bin', '006053.bin', '0029

In [143]:
def get_dataset_point_statistics(data_path, label_path):
    # Running totals
    num_frames = 0
    total_num_points = 0
    total_num_label_points = 0
    total_file_size = 0
    
    for filename in files:
        # Get file index
        file_id, extension = os.path.splitext(filename)
        print(file_id, end=' ')
        
        # Append file name to locations
        data_file_path = Path(data_path, filename)
        label_filename = file_id + '.txt'
        label_file_path = Path(label_path, label_filename)

        # Get the number of points, and the number of points inside the labels
        num_frame_points, num_frame_label_points = get_frame_point_statistics(data_file_path, label_file_path)

        # Get the file size
        size = os.path.getsize(data_file_path)
        
        # Add to running total
        num_frames += 1
        total_num_points += num_frame_points
        total_num_label_points += num_frame_label_points
        total_file_size += size

    # return total_num_points / num_frames, total_num_label_points / num_frames
    return num_frames, total_num_points, total_num_label_points, total_file_size

In [144]:
def evaluate_filter(dir, dataset_name):
    # Create directory path
    data_path = Path(BASE_DIR, dir)
    label_path = Path(BASE_DIR, ARCS_LABEL_DIR)
    print(data_path)

    # Get the dataset statistics
    num_frames, total_num_points, total_num_label_points, total_file_size = get_dataset_point_statistics(data_path, label_path)

    results = {
        'dataset_name': dataset_name,
        'num_frames': num_frames,
        'total_num_points': total_num_points,
        'total_num_label_points': total_num_label_points,
        'total_num_non_label_points': total_num_points - total_num_label_points,
        'avg_frame_pts': total_num_points / num_frames,
        'avg_frame_label_pts': total_num_label_points / num_frames,
        'avg_frame_non_label_pts': (total_num_points - total_num_label_points) / num_frames,
        'total_files_size': total_file_size
    }

    return results

### Run datasets through filter evaluator, print results

In [146]:
results = evaluate_filter(ARCS_DIR, 'ARCS')

..\..\data\velodyne_points
001182 002793 005393 004071 006230 005686 000044 002442 004025 001716 001424 000197 001378 005798 006381 000528 004066 005456 003693 006099 002516 005878 000190 002861 003158 001348 003413 003668 002052 002719 004602 006187 006035 004121 000260 003186 001904 000284 002463 004633 000851 000548 001821 005244 000467 000108 001817 000706 001888 001153 000374 004572 006382 004679 003464 005826 004408 005256 000876 000819 004504 001240 003070 003410 001336 005245 001012 005420 006067 002629 006053 002969 003218 000134 004223 000855 000203 001095 001030 004497 001400 001866 006127 005169 002500 003178 005013 004798 005665 000052 003465 002665 000456 004586 006450 001516 005940 005670 005990 001595 

In [147]:
distance_filter_results = evaluate_filter(DIST_FILTERED_DIR, 'Height Filtered ARC')
# filter_results = evaluate_filter(ARCS_FILTERED_DIR, 'Filtered ARCS')

..\..\data\arcs_distance_filtered
001182 002793 005393 004071 006230 005686 000044 002442 004025 001716 001424 000197 001378 005798 006381 000528 004066 005456 003693 006099 002516 005878 000190 002861 003158 001348 003413 003668 002052 002719 004602 006187 006035 004121 000260 003186 001904 000284 002463 004633 000851 000548 001821 005244 000467 000108 001817 000706 001888 001153 000374 004572 006382 004679 003464 005826 004408 005256 000876 000819 004504 001240 003070 003410 001336 005245 001012 005420 006067 002629 006053 002969 003218 000134 004223 000855 000203 001095 001030 004497 001400 001866 006127 005169 002500 003178 005013 004798 005665 000052 003465 002665 000456 004586 006450 001516 005940 005670 005990 001595 

In [148]:
height_filter_results = evaluate_filter(HEIGHT_FILTERED_DIR, 'Distance Filtered ARC')

..\..\data\filtered_points
001182 002793 005393 004071 006230 005686 000044 002442 004025 001716 001424 000197 001378 005798 006381 000528 004066 005456 003693 006099 002516 005878 000190 002861 003158 001348 003413 003668 002052 002719 004602 006187 006035 004121 000260 003186 001904 000284 002463 004633 000851 000548 001821 005244 000467 000108 001817 000706 001888 001153 000374 004572 006382 004679 003464 005826 004408 005256 000876 000819 004504 001240 003070 003410 001336 005245 001012 005420 006067 002629 006053 002969 003218 000134 004223 000855 000203 001095 001030 004497 001400 001866 006127 005169 002500 003178 005013 004798 005665 000052 003465 002665 000456 004586 006450 001516 005940 005670 005990 001595 

In [149]:
label_filter_results = evaluate_filter(ARCS_LABEL_FILTERED_DIR, 'Label Filtered ARCS')

..\..\data\label_filtered_points
001182 002793 005393 004071 006230 005686 000044 002442 004025 001716 001424 000197 001378 005798 006381 000528 004066 005456 003693 006099 002516 005878 000190 002861 003158 001348 003413 003668 002052 002719 004602 006187 006035 004121 000260 003186 001904 000284 002463 004633 000851 000548 001821 005244 000467 000108 001817 000706 001888 001153 000374 004572 006382 004679 003464 005826 004408 005256 000876 000819 004504 001240 003070 003410 001336 005245 001012 005420 006067 002629 006053 002969 003218 000134 004223 000855 000203 001095 001030 004497 001400 001866 006127 005169 002500 003178 005013 004798 005665 000052 003465 002665 000456 004586 006450 001516 005940 005670 005990 001595 

In [150]:
results['percent_of_non_label_pts_retained'] = results['total_num_non_label_points'] / results['total_num_non_label_points']
results['percent_of_label_pts_retained'] = results['total_num_label_points'] / results['total_num_label_points']
results['percent_of_original_file_size'] = results['total_files_size'] / results['total_files_size']

In [151]:
# filter_results['percent_of_non_label_pts_retained'] = filter_results['total_num_non_label_points'] / results['total_num_non_label_points']
# filter_results['percent_of_label_pts_retained'] = filter_results['total_num_label_points'] / results['total_num_label_points']
# filter_results['percent_of_original_file_size'] = filter_results['total_files_size'] / results['total_files_size']

distance_filter_results['percent_of_non_label_pts_retained'] = distance_filter_results['total_num_non_label_points'] / results['total_num_non_label_points']
distance_filter_results['percent_of_label_pts_retained'] = distance_filter_results['total_num_label_points'] / results['total_num_label_points']
distance_filter_results['percent_of_original_file_size'] = distance_filter_results['total_files_size'] / results['total_files_size']

In [152]:
height_filter_results['percent_of_non_label_pts_retained'] = height_filter_results['total_num_non_label_points'] / results['total_num_non_label_points']
height_filter_results['percent_of_label_pts_retained'] = height_filter_results['total_num_label_points'] / results['total_num_label_points']
height_filter_results['percent_of_original_file_size'] = height_filter_results['total_files_size'] / results['total_files_size']

In [153]:
label_filter_results['percent_of_non_label_pts_retained'] = label_filter_results['total_num_non_label_points'] / results['total_num_non_label_points']
label_filter_results['percent_of_label_pts_retained'] = label_filter_results['total_num_label_points'] / results['total_num_label_points']
label_filter_results['percent_of_original_file_size'] = label_filter_results['total_files_size'] / results['total_files_size']

In [154]:
results_df = pd.DataFrame([results, distance_filter_results, height_filter_results, label_filter_results])
# results_df = pd.DataFrame([results, filter_results, label_filter_results])
# results_df = pd.DataFrame([results, filter_results])
columns_to_display = ['dataset_name',
                      'avg_frame_non_label_pts',
                      'avg_frame_label_pts',
                      'percent_of_non_label_pts_retained',
                      'percent_of_label_pts_retained',
                      'percent_of_original_file_size']
display(results_df[columns_to_display])

Unnamed: 0,dataset_name,avg_frame_non_label_pts,avg_frame_label_pts,percent_of_non_label_pts_retained,percent_of_label_pts_retained,percent_of_original_file_size
0,ARCS,921.96,76.68,1.0,1.0,1.0
1,Height Filtered ARC,90.26,67.05,0.0979,0.874413,0.166239
2,Height Filtered ARC,11.97,62.03,0.012983,0.808946,0.077163
3,Label Filtered ARCS,0.0,76.68,0.0,1.0,0.016941


In [155]:
columns_to_display = ['dataset_name',
                      'percent_of_non_label_pts_retained',
                      'percent_of_label_pts_retained',
                      'percent_of_original_file_size']
display(results_df[columns_to_display])

Unnamed: 0,dataset_name,percent_of_non_label_pts_retained,percent_of_label_pts_retained,percent_of_original_file_size
0,ARCS,1.0,1.0,1.0
1,Height Filtered ARC,0.0979,0.874413,0.166239
2,Height Filtered ARC,0.012983,0.808946,0.077163
3,Label Filtered ARCS,0.0,1.0,0.016941


In [156]:
# test_dir = '../../data/eval_data/test_folder'
# results = evaluate_filter(test_dir, 'test')
# print(results)

In [157]:
# import matplotlib.pyplot as plt
# # These tests were done with azimuth_resolution = 0.01, height_resolution = 0.25

# # Defining the parameter changes and their corresponding results
# parameters = ['+0.2', '0', '-0.1', '-0.2', '-0.5', '-1']
# non_label_points_retained = [0.888438, 0.690376, 0.195896, 0.165541, 0.142986, 0.136502]
# label_points_retained = [0.965184, 0.933963, 0.88492, 0.874891, 0.855174, 0.837658]

# # Creating the plot
# plt.figure(figsize=(10, 5))
# plt.plot(parameters, non_label_points_retained, label='Non-label Points Retained', marker='o')
# plt.plot(parameters, label_points_retained, label='Label Points Retained', marker='o')
# plt.title('Effect of Distance Parameter Adjustment on Point Retention')
# plt.xlabel('Parameter Adjustment')
# plt.ylabel('Retention Rate')
# plt.legend()
# plt.grid(True)
# plt.show()