# Práctica 1: LiDAR Clustering



## Índice

1. Integrantes del equipo

2. Librerías

3. Carga de Datos <br>

    3.1. Dataset vehículos

4. Clustering DBSCAN
5. Conclusiones


## Integrantes del equipo

* Alejandro Cortijo Benito
* Alejandro García Mota

## <a id='imports'></a>Librerias

In [2]:
import os
import cv2
from tqdm import tqdm

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.cluster import DBSCAN
from sklearn.linear_model import RANSACRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline

import plotly.express as px
import plotly.graph_objects as go

from IPython.display import Video, display

## <a id='dataset'></a>Carga de datos

### Dataset vehículos

Cargamos todos los frames del dataset original ordenamos para evitar inconsistencias en los datos.

In [3]:
# Definir el directorio donde se encuentran los archivos CSV
directory = './data/pointclouds/'

# Function to extract the numerical part of the filenames
def extract_timestamp(filename):
    # Remove 'pointcloud_' and '.csv' to keep only the numbers
    return int(filename.replace('pointcloud_', '').replace('.csv', '').replace('_', ''))

dataframes = []

# Sort the files based on the extracted numbers
for filename in sorted(os.listdir(directory), key=extract_timestamp)[100:115:5]:
    if filename.endswith('.csv'):  # Check if the file is a CSV
        file_path = os.path.join(directory, filename)  # Create the full file path
        df = pd.read_csv(file_path)  # Load the CSV into a DataFrame
        dataframes.append(df)  # Append the DataFrame to the list

len(dataframes)

3

Usando los conocimientos de `LiDAR_Clustering` procedemos a clusterizar cada frame por separado, hemos decidio usar **DBSCAN** ya que a priori desconocemos el número de de vehículos. Por otro lado, para identificar si los vehículos que salen en escena son los mismos que en el frame anterior vamos a usar la diferencia entre los centroides para computar si el vehivulo del instante `t+1` es el mismo o no. 

In [4]:
def get_cluster(last_centroids, center_x, center_y, center_z):
    if len(last_centroids) == 0:
        last_centroids[0] = [center_x, center_y, center_z]
        return 0
    
    max_distance = 2
    distances = {cluster: np.linalg.norm(np.array([center_x, center_y, center_z]) - np.array(centroid))
                 for cluster, centroid in last_centroids.items()}
    closest_cluster = min(distances, key=distances.get)
    if distances[closest_cluster] < max_distance:
        return closest_cluster
    else:
        new_cluster_id = max(last_centroids.keys()) + 1
        last_centroids[new_cluster_id] = [center_x, center_y, center_z]
        return new_cluster_id

In [5]:
import os
import cv2
from plotly.subplots import make_subplots

centroids = []
last_centroids = {}
for i, frame in enumerate(tqdm(dataframes)):
    X = frame[['x', 'y', 'z']]

    dbscan = DBSCAN(eps=2, min_samples=10, n_jobs=-1)

    y_dbscan = dbscan.fit_predict(X)

    df_clustered = X.copy()
    df_clustered['cluster'] = y_dbscan

    # Delete noise
    df_clustered = df_clustered[df_clustered['cluster'] != -1]

    # print(np.unique(y_dbscan))

    fig = px.scatter(df_clustered, x='x', y='y', color='cluster')

    df_result = pd.DataFrame()
    df_result['x'] = df_clustered['x']
    df_result['y'] = df_clustered['y']
    df_result['z'] = df_clustered['z']
    df_result['cluster'] = df_clustered['cluster']

    # Calculate bounding boxes for each cluster
    bounding_boxes = {}

    for cluster in np.unique(y_dbscan)[1:]:
        cluster_points = df_result[df_result['cluster'] == cluster]
        min_x, max_x = cluster_points['x'].min(), cluster_points['x'].max()
        min_y, max_y = cluster_points['y'].min(), cluster_points['y'].max()
        min_z, max_z = cluster_points['z'].min(), cluster_points['z'].max()
        bounding_boxes[cluster] = {
            'min_x': min_x, 'max_x': max_x,
            'min_y': min_y, 'max_y': max_y,
            'min_z': min_z, 'max_z': max_z
        }
        # Calculate the center of the bounding box
        center_x = (min_x + max_x) / 2
        center_y = (min_y + max_y) / 2
        center_z = (min_z + max_z) / 2
        centroids.append((get_cluster(last_centroids, center_x, center_y, center_z),
                          center_x, center_y, center_z))

    fig = go.Figure()

    # Add original data points
    fig.add_trace(go.Scatter3d(
        x=df_result['x'],
        y=df_result['y'],
        z=df_result['z'],
        mode='markers',
        marker=dict(
            size=3,  # Increase marker size
            color=df_result['cluster'],
            colorscale='Viridis',
            opacity=0.5
        ),
        name='Data Points'  # Add a name for the legend
    ))

    # Add bounding boxes
    for cluster, bbox in bounding_boxes.items():
        fig.add_trace(go.Scatter3d(
            x=[bbox['min_x'], bbox['max_x'], bbox['max_x'], bbox['min_x'], bbox['min_x'], bbox['min_x'], bbox['max_x'], bbox['max_x'], bbox['min_x'], bbox['min_x'], bbox['max_x'], bbox['max_x'], bbox['max_x'], bbox['max_x'], bbox['min_x'], bbox['min_x'], bbox['min_x']],
            y=[bbox['min_y'], bbox['min_y'], bbox['max_y'], bbox['max_y'], bbox['min_y'], bbox['min_y'], bbox['min_y'], bbox['max_y'], bbox['max_y'], bbox['max_y'], bbox['max_y'], bbox['max_y'], bbox['min_y'], bbox['min_y'], bbox['min_y'], bbox['min_y'], bbox['max_y']],
            z=[bbox['min_z'], bbox['min_z'], bbox['min_z'], bbox['min_z'], bbox['min_z'], bbox['max_z'], bbox['max_z'], bbox['max_z'], bbox['max_z'], bbox['min_z'], bbox['min_z'], bbox['max_z'], bbox['max_z'], bbox['min_z'], bbox['min_z'], bbox['max_z'], bbox['max_z']],
            mode='lines',
            line=dict(color='red', width=3),  # Increase line width
            name=f'Vehiculo {cluster}'  # Improved naming
        ))

    # Add centroids to the plot
    fig.add_trace(go.Scatter3d(
        x=[bbox['min_x'] + (bbox['max_x'] - bbox['min_x']) / 2 for bbox in bounding_boxes.values()],
        y=[bbox['min_y'] + (bbox['max_y'] - bbox['min_y']) / 2 for bbox in bounding_boxes.values()],
        z=[bbox['min_z'] + (bbox['max_z'] - bbox['min_z']) / 2 for bbox in bounding_boxes.values()],
        mode='markers',
        marker=dict(
            size=10,
            color='red',
            symbol='x'
        ),
        name='Centroids'
    ))


    # Draw lines connecting centroids of each cluster with different colors
    colors = px.colors.qualitative.Dark24
    for cluster_id, centroid in last_centroids.items():
        cluster_centroids = [c for c in centroids if c[0] == cluster_id]
        if len(cluster_centroids) > 1:
            fig.add_trace(go.Scatter3d(
                x=[c[1] for c in cluster_centroids],
                y=[c[2] for c in cluster_centroids],
                z=[c[3] for c in cluster_centroids],
                mode='lines',
                line=dict(color=colors[cluster_id % len(colors)], width=2),
                name=f'Cluster {cluster_id} Path'
            ))


    # Update layout
    fig.update_layout(
        scene=dict(
            xaxis_title='X',
            yaxis_title='Y',
            zaxis_title='Z',
            aspectmode='data',
            camera=dict(
                eye=dict(x=5, y=3, z=4)  # Adjust camera angle
            )
        ),
        title='Carretera',
        legend=dict(title='Legend')  # Improve legend title
    )

  0%|          | 0/3 [00:00<?, ?it/s]

 33%|███▎      | 1/3 [00:02<00:04,  2.08s/it]

 67%|██████▋   | 2/3 [00:03<00:01,  1.51s/it]

100%|██████████| 3/3 [00:04<00:00,  1.40s/it]
