In [None]:
import pandas as pd
import pyarrow
import pyarrow.parquet

from paths import PROCESSED_DATA_DIR, RAW_DATA_DIR
from  zip_reader import read_zip
from preprocessing import preprocess_full, preprocess_partial

In [None]:
df = pyarrow.parquet.read_table(PROCESSED_DATA_DIR).to_pandas()

In [None]:
df['ts_diff'] = df.sort_values(by='Timestamp').groupby('MMSI')['Timestamp'].diff()


In [None]:
df

In [None]:
import pandas as pd

unique_mmsi = df['MMSI'].unique()

# Randomly select 50 MMSIs
random_mmsi = pd.Series(unique_mmsi).sample(n=50, random_state=42) 

# Filter the DataFrame for those MMSIs
df_sampled = df[df['MMSI'].isin(random_mmsi)]


In [None]:
# Create a new column combining MMSI and Segment
df_sampled['VesselSegment'] = df_sampled['MMSI'].astype(str) + '_' + df_sampled['Segment'].astype(str) 


In [None]:
df.columns

In [None]:
import plotly.graph_objects as go
import pandas as pd

def plot_vessel_tracks_with_filter(df: pd.DataFrame):
    # Ensure data is sorted
    df = df.sort_values(['MMSI', 'Segment', 'Timestamp'])
    
    # Prepare traces: one trace per vessel segment
    traces = []
    vessel_list = df['MMSI'].unique()
    for vessel in vessel_list:
        vessel_df = df[df['MMSI'] == vessel]
        for segment in vessel_df['Segment'].unique():
            segment_df = vessel_df[vessel_df['Segment'] == segment]
            traces.append(
                go.Scattermap(
                    lat=segment_df['Latitude'],
                    lon=segment_df['Longitude'],
                    mode='lines',
                    line=dict(width=2),
                    name=str(vessel),
                    visible=True,  
                    hoverinfo='text',
                    text=segment_df.apply(lambda row: f"MMSI: {row.MMSI}<br>Segment: {row.Segment}<br>SOG: {row.SOG}<br>COG: {row.COG}<br>Time: {row.Timestamp}", axis=1)
                )
            )
    
    # Create figure
    fig = go.Figure(data=traces)
    
    # Create buttons for filtering
    buttons = []
    # Button for "All vessels"
    buttons.append(dict(
        label="All vessels",
        method="update",
        args=[{"visible": [True]*len(traces)},
              {"title": "All Vessels"}]
    ))
    
    # One option per vessel
    for i, vessel in enumerate(vessel_list):
        visibility = [False]*len(traces)
        # Set visible=True for all segments of this vessel
        for j, trace in enumerate(traces):
            if trace.name == str(vessel):
                visibility[j] = True
        buttons.append(dict(
            label=str(vessel),
            method="update",
            args=[{"visible": visibility},
                  {"title": f"Vessel {vessel}"}]
        ))
    
    # Add dropdown menu
    fig.update_layout(
        mapbox_style="open-street-map",
        mapbox_center={"lat":56, "lon": 8},       
        height=800,
        margin={"r":0,"t":50,"l":0,"b":0},
        updatemenus=[dict(
            active=0,
            buttons=buttons,
            x=0,
            y=1.05,
            xanchor='left',
            yanchor='top'
        )],
        title=dict(
        text="Vessel Track using AIS Data",
        x=0.5,         
        xanchor='center', 
        yanchor='top'
    )
    )
    
    fig.show()
plot_vessel_tracks_with_filter(df_sampled)