# Filter each stationary frame

In [6]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import json
import os
import itertools
from mpl_toolkits.mplot3d import Axes3D
from pathlib import Path

## Constants and such

In [8]:
# Set background map resolutions
azimuth_resolution = 1
height_resolution = 0.5
std_dev_cutoff = 1.5

In [9]:
DATA_DIR_ROOT = '../data'

In [10]:
# Set to true if you want to make a new map, otherwise, will load previous map
make_new_distances_df = False
make_new_lookup_table = False

## First, functions for making background map

In [13]:
# Create mappings for azimuth and height using integers
def create_mappings(azimuth_step=azimuth_resolution, height_step=height_resolution):
    azimuth_range = np.arange(-180, 180 + azimuth_step, azimuth_step)
    height_range = np.arange(-30, 10 + height_step, height_step)
    azimuth_map = {int(az * 10): idx for idx, az in enumerate(azimuth_range)}
    height_map = {int(ht * 10): idx for idx, ht in enumerate(height_range)}
    return azimuth_map, height_map

In [14]:
# Create the grid DataFrames
def create_grid_dataframes():
    azimuth_map, height_map = create_mappings()
    grid_shape = (len(height_map), len(azimuth_map))
    df_distances = pd.DataFrame({key: [[] for _ in range(len(height_map))] \
                                 for key in azimuth_map.keys()}, index=height_map.keys())
    df_intensities = pd.DataFrame({key: [[] for _ in range(len(height_map))] \
                                   for key in azimuth_map.keys()}, index=height_map.keys())
    return df_distances, df_intensities, azimuth_map, height_map

In [15]:
# Process file into grid
def process_files_to_grid(data_dir):
    # Create empty grid
    df_distances, df_intensities, azimuth_map, height_map = create_grid_dataframes()

    lidar_dir = Path(data_dir, 'velodyne_points')
    # For each file in the directory
    for file_path in lidar_dir.iterdir():
    # for file_path in itertools.islice(lidar_dir.iterdir(), 400):
        print('-', end ='')
        data = np.fromfile(file_path, dtype=np.float32).reshape(-1, 4)
        for x, y, z, intensity in data:
            # Convert to azimuth, height, distance format
            distance = np.sqrt(x**2 + y**2 + z**2)
            azimuth = np.degrees(np.arctan2(y, x))
            height = np.degrees(np.arctan2(z, np.sqrt(x**2 + y**2)))
            # Convert and scale
            azimuth_idx = int(np.floor((azimuth + 180) / \
                                       azimuth_resolution) * azimuth_resolution * 10) - 1800
            height_idx = int(np.floor((height + 30) / height_resolution)) - 300
            # Update DataFrames directly using indices
            if azimuth_idx in azimuth_map and height_idx in height_map:
                df_distances.at[height_idx, azimuth_idx].append(distance)
                # df_intensities.at[height_idx, azimuth_idx].append(intensity)
    # return df_distances, df_intensities
    return df_distances

In [None]:
def get distances_dataframe(dir):
    # Get the distances of the background map from the lidar files
    # if make_new_map or not files_exist("df_distances.pkl", "df_intensities.pkl"):
    if make_new_distances_df:
        df_distances = process_files_to_grid(dir)
        
        # Save the DataFrames
        print('\nSaving dataframe')
        df_distances.to_pickle("new_df_distances.pkl")
    else:
        # Load the DataFrames
        print('\nLoading dataframe')
        df_distances = pd.read_pickle("df_distances.pkl")

In [16]:
def files_exist(*files):
    return all(os.path.exists(file) for file in files)

In [67]:
from sklearn.cluster import DBSCAN

def create_background_lookup_table(df_distances):
    if make_new_lookup_table:
        df_distances = process_files_to_grid(dir)
        
        # Save the DataFrames
        print('\nSaving lookup table')
        df_distances.to_pickle("new_lookup_table.pkl")
    else:
        # Load the DataFrames
        print('\nLoading lookup table')
        df_distances = pd.read_pickle("dbscan_lookup_table.pkl")
    # Create a new DataFrame with the same index and columns as df_distances
    lookup_table = pd.DataFrame(index=df_distances.index, columns=df_distances.columns)
    # Iterate through each cell in df_distances
    for (height, azimuth), distances in df_distances.stack().items():
        if distances:  # If the list is not empty
            
            distances = np.array(distances).reshape(-1, 1)

            # Note: Adjust eps and min_samples based on the distribution of your data
            dbscan = DBSCAN(eps=10, min_samples=2).fit(distances)
            
            # Step 2: Identify the cluster labels
            labels = dbscan.labels_
            
            # Step 3: Filter out noise and find the cluster with the largest numbers
            # Find indices of the cluster with the maximum mean (or maximum minimum to find the farthest cluster)
            clusters = {}
            for label in np.unique(labels):
                if label != -1:  # Ignore noise if present
                    cluster_members = distances[labels == label].flatten()
                    clusters[label] = cluster_members
            
            # Step 4: Find the cluster with the largest numbers (farthest distances)
            # Using max of minimums of each cluster to ensure we're finding the cluster farthest away
            farthest_cluster_label = max(clusters, key=lambda x: clusters[x].min())
            
            # Step 5: Find the smallest number in the farthest cluster
            smallest_in_farthest = clusters[farthest_cluster_label].min()


            
            # sorted_distances = np.sort(distances)

            # # Drop the nearest half
            # # remaining_distances = sorted_distances[len(sorted_distances) // 2:]
            # if len(distances) >= 10:
            #     remaining_distances = sorted_distances[-10:]
            # else:
            #     remaining_distances = sorted_distances
           
            # # Calculate the value as the largest distance minus the standard deviation
            # # value = np.max(distances) - (2 * np.std(distances))
            # cutoff = np.median(remaining_distances)
            # standard_deviation = np.std(remaining_distances)
            # adjustment = standard_deviation
            
            # cutoff = cutoff - adjustment
                
            value = smallest_in_farthest
        else:
            value = np.nan  # If the list is empty, set the cell to NaN

        # Set the value in the new DataFrame
        lookup_table.at[height, azimuth] = value

    return lookup_table

## Filtering and saving functions

In [61]:
def convert_to_dataframe(bin_path):
    pre_filtered_data = np.fromfile(bin_path, dtype=np.float32).reshape(-1, 4) 
    columns = ['x', 'y', 'z', 'intensity']
    df = pd.DataFrame(pre_filtered_data, columns=columns)
    return df

In [62]:
def add_lookup_coords_to_xyz(points_df):
    # Calculate the distance, azimuth, and height using vectorized operations
    x, y, z, intensity = points_df['x'], points_df['y'], points_df['z'], points_df['intensity']
    distance = np.sqrt(x**2 + y**2 + z**2)
    azimuth = np.degrees(np.arctan2(y, x))
    height = np.degrees(np.arctan2(z, np.sqrt(x**2 + y**2)))
    
    # Convert and scale
    azimuth_idx = np.floor((azimuth + 180) / azimuth_resolution).astype(int) \
        * azimuth_resolution * 10 - 1800
    height_idx = np.floor((height + 30) / height_resolution).astype(int) - 300
    
    # Add new columns to dataframe
    points_df['distance'] = distance
    points_df['azimuth_idx'] = azimuth_idx
    points_df['height_idx'] = height_idx
    
    return points_df

In [63]:
def filter_points(input_file, lookup_table):
    # Get dataframe from file
    pre_filtered_points = convert_to_dataframe(input_file)

    # Add lookup table coordinates
    pre_filtered_grid_lookup = add_lookup_coords_to_xyz(pre_filtered_points)
    
    # Initialize a list to store rows that meet the criteria
    filtered_data = []

    # Iterate through each row in the input DataFrame
    # for idx, row in pre_filtered_grid_lookup.iloc[:10].iterrows():
    for idx, row in pre_filtered_grid_lookup.iterrows():
        azimuth_idx = int(row['azimuth_idx'])
        height_idx = int(row['height_idx'])
        
        # Check if the indices exist in the lookup table and the value is not NaN
        if azimuth_idx in lookup_table.columns and height_idx in lookup_table.index:
            # print('.', end='')
            lookup_value = lookup_table.at[height_idx, azimuth_idx]
            
            if not pd.isna(lookup_value) and row['distance'] < lookup_value:
                # If criteria are met, add the row's x, y, z, and intensity to the filtered_data list
                filtered_data.append({
                    'x': row['x'],
                    'y': row['y'],
                    'z': row['z'],
                    'intensity': row['intensity']
                })

    # Create a DataFrame from the filtered data
    filtered_df = pd.DataFrame(filtered_data)
    return filtered_df

In [64]:
def save_as_binary(df, bin_path):
    # Ensure the DataFrame is in the correct order and data type
    data = df[['x', 'y', 'z', 'intensity']].astype(np.float32).values
    
    # Write the data to a binary file
    data.tofile(bin_path)

In [65]:
def filter_frames(dir, background_lookup_table):
    # Create a new folder for the filtered frames in the directory
    new_save_location = Path(dir, 'filtered_points')
    new_save_location.mkdir(exist_ok=True)
    
    lidar_dir = Path(dir, 'velodyne_points')
    
    # Get just the file names
    files = [f for f in os.listdir(lidar_dir) if f.endswith('.bin')]
    # For each file
    for filename in files[:30]:
    # for filename in files:
        # Append file name to location
        print('.', end='')
        from_file = Path(lidar_dir, filename)

        # Filter file
        filtered_df = filter_points(from_file, background_lookup_table)

        # APPEND FILE NAME TO NEW LOCATION
        to_file = Path(new_save_location, filename)

        # CONVERT BACK TO BINARY and save
        save_as_binary(filtered_df, to_file)


In [69]:
dir = Path(DATA_DIR_ROOT)
map_save_name = 'test_map'

get
# Create background lookup table for distance cutoffs
background_distance_lookup_table = create_background_lookup_table(dir)

MemoryError: bad allocation

In [73]:
# Filter and save each filtered frame
filter_frames(dir, background_distance_lookup_table)

..............................

In [74]:
background_distance_lookup_table.to_pickle("dbscan_lookup_table.pkl")