**Test plot**

This file will be used to test the details and features of the used packages, including:
- numpy
- pandas
- plotly
- scikit-learn
- matplotlib

In [40]:
# Imports
import sys
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN

**Data Dictionary**

In [54]:
# Data dictionary
data = {
    '1': r'1_coche.csv',
    '2': r'coche_coche.csv',
    'moto': r'coche_coche_moto.csv',
    'empty': r'carretera.csv'
}

**Data Preprocessing**

In [53]:
df = pd.read_csv('1_coche.csv')
# Substracting columns from one another"
'''
result = df1[['x', 'y', 'z']] - df2[['x', 'y', 'z']] + 0.1
result = df1[['x', 'y', 'z']].where(df1[['x', 'y', 'z']] > 0.1, np.nan)
result = pd.concat([result, df1.drop(['x', 'y', 'z'], axis=1)], axis=1)
result
'''
print(f'''We initially have a type {type(df['x'][4])} that has a size of {sys.getsizeof(df['x'][4])} bytes
The whole dataset has a size of {sys.getsizeof(df)} bytes with the first three columns having a size of {sys.getsizeof(df[['x','y','z']])} bytes
''')

# Convert from float64 to float16
df = df.astype('float16')

print(f'''After Data preprocessing (conversioon from float64 to float16) we have a dataframe of total size {sys.getsizeof(df)} bytes 
with the first three columns having a size of {sys.getsizeof(df[['x','y','z']])} bytes
''')

We initially have a type <class 'numpy.float64'> that has a size of 32 bytes
The whole dataset has a size of 9437348 bytes with the first three columns having a size of 3145892 bytes

After Data preprocessing (conversioon from float64 to float16) we have a dataframe of total size 2359460 bytes 
with the first three columns having a size of 786596 bytes




overflow encountered in cast



**Data Clustering with DBSCAN**

In [33]:
highway = pd.read_csv(data['empty'])
df = pd.read_csv(data['2'])


df[['x', 'y', 'z']] = df[['x', 'y', 'z']] - highway[['x', 'y', 'z']]
df[['x', 'y', 'z']] = df[['x', 'y', 'z']].where(df[['x', 'y', 'z']] > 0, np.nan)

# Crea un objeto DBSCAN
dbscan = DBSCAN(eps=0.3, min_samples=5)
def apply_dbscan(df, eps=0.3, min_samples=5):
    # Only use rows without NaN in x, y, z
    xyz = df[['x', 'y', 'z']].dropna()
    dbscan = DBSCAN(eps=eps, min_samples=min_samples)
    labels = dbscan.fit_predict(xyz)
    # Map labels back to original dataframe index
    result = pd.Series(np.nan, index=df.index)
    result[xyz.index] = labels
    return result

y_dbscan = apply_dbscan(df)

# Zoom factor of the figure
zoom = 0.25
zoom = 1/zoom

fig = go.Figure(
    data = [
        go.Scatter3d(
            x = df['x'],
            y = df['z'],
            z = -df['y'],
            mode = 'markers',
            marker = dict(
                size = 1, color = y_dbscan, colorscale = 'hot', opacity = 0.5
            ),
        )
    ],
    layout = go.Layout(
        scene = dict(
            xaxis = dict(title = 'X = X', range=[-10*zoom, 10*zoom]),
            yaxis = dict(title = 'Y = Z', range=[-10*zoom, 10*zoom]),
            zaxis = dict(title = 'Z = -Y', range=[-15*zoom, 5*zoom])
        ),
        width = 700,
        height = 700,
    )
)
fig.update_layout(
    title = dict(
        text = "\"Highway Substraction\" LiDAR Plot",
        x = 0.5,
        y = 0.95,
        xanchor = 'center',
        yanchor = 'top',
        font = dict(
            family = "Verdana, sans-serif",
            size = 35,
            color = "Gray"
        )
    ),
    font = dict (
        family = "Arial, monospace", size = 10, color = "Black", variant = 'small-caps'
    ),
)


# Show the figure
fig.show()

In [None]:
data = {
    'x': [1, 2, 3],
    'y': [1, 1, 1],
    'z': [2, 3, 2]
}
df = pd.DataFrame(data)
df.loc[1] = np.nan
print(df)
fig = px.scatter_3d(df, x = 'x', y = 'y', z = 'z')
fig.show()

     x    y    z
0  1.0  1.0  2.0
1  NaN  NaN  NaN
2  3.0  1.0  2.0
