In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import os

In [None]:
AIS_dir = "../../../../data/test_correlation/AIS/AIS.pkl"
AIS = pd.read_pickle(AIS_dir)
output_path = "/home/imonge/Documents/AIS"

In [None]:
# Overall length

# Keep occurrences with length info
df_length = AIS[~AIS["length"].isna()].drop_duplicates(subset="mmsi", keep="first")

# Total number of counts
total_vessels = df_length.shape[0]

# Plot interactive histogram
fig = px.histogram(
    df_length,
    x="length",
    nbins=None,
    title="Overall length of boats",
    labels={"length": "Length (m)"},
)


fig.update_traces(
    xbins=dict(
        start=df_length["length"].min(),
        end=df_length["length"].max(),
        size=2,
    ),
    hovertemplate="Length: %{x} m<br>Number of boats: %{y}"
)

fig.update_layout(
    bargap=0.05,
    annotations=[
        dict(
            text=f"Total vessels: {total_vessels}",
            xref="paper", yref="paper",
            x=1.0, y=1.1,
            showarrow=False,
            font=dict(size=14)
        )
    ]
)

fig.show()

In [None]:
AIS_speed = AIS[~AIS["sog"].isna()].copy()

plt.figure(figsize=(10, 6))

v = AIS_speed[AIS_speed["sog"] < 30]


plt.hist(v["sog"], bins=100)
plt.show()

In [None]:
## Mean sog
# Filter sog above 3 knots
df_speed = AIS[(~AIS["sog"].isna()) & (AIS["sog"] > 3)]

# Keep occurrences with speed info
df_speed = df_speed.groupby("mmsi", as_index=False)["sog"].mean()

# Total number of counts
total_vessels = df_speed.shape[0]

# Plot interactive histogram
fig = px.histogram(
    df_speed,
    x="sog",
    nbins=None,
    title="Distribution of vessel mean speed (knots)",
    labels={"speed": "Speed (knots)"},
)

fig.update_traces(
    xbins=dict(
        start=df_speed["sog"].min(),
        end=df_speed["sog"].max(),
        size=0.5,
    ),
    hovertemplate="Speed: %{x} knots<br>Number of boats: %{y}"
)

fig.update_layout(
    bargap=0.05,
    annotations=[
        dict(
            text=f"Total vessels: {total_vessels}",
            xref="paper", yref="paper",
            x=1.0, y=1.1,
            showarrow=False,
            font=dict(size=14)
        )
    ]
)

fig.show()

In [None]:
# ##Max sog
# # Keep occurrences with speed info
# df_speed = AIS[~AIS["sog"].isna()].groupby("mmsi", as_index=False)["sog"].max().rename(columns={"sog": "max_sog"})
#
# # Total number of counts
# total_vessels = df_speed.shape[0]
#
# # Plot interactive histogram
# fig = px.histogram(
#     df_speed,
#     x="max_sog",
#     nbins=None,
#     title="Distribution of vessel maximum speed (knots)",
#     labels={"speed": "Speed (knots)"},
# )
#
# fig.update_traces(
#     xbins=dict(
#         start=df_length["sog"].min(),
#         end=df_length["sog"].max(),
#         size=0.5,
#     ),
#     hovertemplate="Speed: %{x} knots<br>Number of boats: %{y}"
# )
#
# fig.update_layout(
#     bargap=0.05,
#     annotations=[
#         dict(
#             text=f"Total vessels: {total_vessels}",
#             xref="paper", yref="paper",
#             x=1.0, y=1.1,
#             showarrow=False,
#             font=dict(size=14)
#         )
#     ]
# )
#
# fig.show()

In [None]:
## Boat occurrences in time
# Define timestamp (daily)
AIS['day'] = AIS['datetime'].dt.floor('D')

# Identify stopped boats
daily_status = (AIS.groupby(['day', 'mmsi'])['sog'].agg(all_stopped=lambda x: (x < 3).all()).reset_index())
daily_status['category'] = daily_status['all_stopped'].map({True: 'stopped', False: 'moving'})

AIS = AIS.merge(daily_status[['day', 'mmsi', 'category']], on=['day', 'mmsi'], how='left')

# Count by group
grouped = AIS.drop_duplicates(subset=['day', 'mmsi'])
grouped = AIS.groupby(['day', 'category'])['mmsi'].nunique().unstack(fill_value=0)

# Rename columns
if 'stopped' in grouped.columns:
    grouped.columns = ["moving", "stopped"]
else:
    grouped.columns = ["moving"]

# Add column for total
grouped['total'] = grouped.sum(axis=1)

# Figure parameters
fig = go.Figure()

fig.add_bar(
    x=grouped.index,
    y=grouped.get('stopped', pd.Series(0, index=grouped.index)),
    name="stopped (<3 knots)",
    marker_color="cadetblue",
    hovertemplate="%{y}<br>Total: %{customdata}",
    customdata=grouped['total'].values.reshape(-1, 1),
)

fig.add_bar(
    x=grouped.index,
    y=grouped['moving'],
    name="moving (>=3 knots)",
    marker_color="orange",
    hovertemplate="%{y}",
)

fig.update_layout(
    barmode="stack",
    title="Daily boat occurrences",
    xaxis_title="Date",
    yaxis_title="Number of occurrences",
    hovermode='x unified',
    legend_title='Legend',
    height=700
)

fig.show()

In [None]:
# ## Boat occurrences in time
# AIS['day'] = AIS['datetime'].dt.floor('D')
#
# # Identify stopped boats
# daily_status = (AIS.groupby(['day', 'mmsi'])['sog'].agg(all_stopped=lambda x: (x < 3).all()).reset_index())
# daily_status['category'] = daily_status['all_stopped'].map({True: 'stopped', False: 'moving'})
#
# # Keep moving boats
# moving_mmsi = daily_status[daily_status['category'] == 'moving'][['day', 'mmsi']]
#
# # Merge with AIS dataset
# AIS_moving = AIS.merge(moving_mmsi, on=['day', 'mmsi'], how='inner')
#
# # Daily mean speed per mmsi
# daily_mmsi_speed = AIS_moving.groupby(['day', 'mmsi'])['sog'].mean().reset_index()
#
# # Daily speed for all mmsi
# daily_speed = daily_mmsi_speed.groupby('day')['sog'].mean()
#
# # Number of unique mmsi per day
# daily_count = daily_mmsi_speed.groupby('day')['mmsi'].nunique()
#
# # Combined dataframe
# daily_data = pd.DataFrame({
#     'count': daily_count,
#     'mean_sog': daily_speed
# })
#
# # Normalize mean speed for the 0 to 1 gradient
# norm_sog = (daily_data['mean_sog'] - daily_data['mean_sog'].min()) / (daily_data['mean_sog'].max() - daily_data['mean_sog'].min())
#
# # Convert normalized speed in a color palet
# cmap = plt.get_cmap('jet')
# colors = [f'rgba({int(r*255)},{int(g*255)},{int(b*255)},1)' for r, g, b, _ in cmap(norm_sog)]
#
# # Figure parameters
# fig = go.Figure()
#
# fig.add_bar(
#     x=daily_data.index,
#     y=daily_data["count"],
#     marker=dict(
#         color=daily_data['mean_sog'], colorscale='jet', colorbar=dict(title='Mean speed (knots)', tickformat='.1f')),
#     customdata=np.round(daily_data['mean_sog'].values, 2).reshape(-1, 1),
#     hovertemplate="Number of boats : %{y}<br>Mean speed: %{customdata[0]} kn<extra></extra>"
# )
#
# fig.update_layout(
#     title="Daily boat occurrences with mean speed",
#     xaxis_title="Date",
#     yaxis_title="Number of occurrences",
#     hovermode='x unified',
#     legend_title='Legend',
#     height=700
# )
#
# fig.show()

In [None]:
## Boat mean occurrences in time
# Create a column for stopped vessels
AIS = AIS.copy()
AIS['month'] = AIS['datetime'].dt.to_period('M')
AIS['day'] = AIS['datetime'].dt.floor('D')

daily_status = (AIS.groupby(['day', 'mmsi'])['sog'].agg(all_stopped=lambda x: (x < 3).all()).reset_index())
daily_status['category'] = daily_status['all_stopped'].map({True: 'stopped', False: 'moving'})

monthly_status = (AIS.groupby(['month', 'mmsi'])['sog'].agg(all_stopped=lambda x: (x < 3).all()).reset_index())
monthly_status['category'] = monthly_status['all_stopped'].map({True: 'stopped', False: 'moving'})

# Monthly unique mmsi
monthly_unique = (monthly_status.groupby(['month', 'category'])['mmsi'].nunique().unstack(fill_value=0).reset_index().rename_axis(None, axis=1))
monthly_unique['month'] = monthly_unique['month'].dt.to_timestamp()
monthly_unique['total'] = monthly_unique.get('stopped', 0) + monthly_unique.get('moving', 0)

# Daily mean of unique mmsi by month
daily_counts = (daily_status.groupby(['day', 'category'])['mmsi'].nunique().unstack(fill_value=0))
daily_counts['month'] = daily_counts.index.to_period('M')

monthly_mean = (daily_counts.groupby('month')[['stopped', 'moving']].mean().reset_index())
monthly_mean['month'] = monthly_mean['month'].dt.to_timestamp()


# Figure parameters
fig = go.Figure()

# Mensual mmsi histogram
fig.add_bar(
    x=monthly_unique['month'],
    y=monthly_unique.get('stopped', 0),
    name="Stopped (< 3 knots all month)",
    marker_color="cadetblue",
    customdata=monthly_unique['total'],
    hovertemplate='%{y}<br>Total: %{customdata}',
)

fig.add_bar(
    x=monthly_unique['month'],
    y=monthly_unique.get('moving', 0),
    name="Moving (>= 3 knots at least once in the month)",
    marker_color="orange",
    hovertemplate='%{y}',
)

# Daily mean curve
fig.add_traces(go.Scatter(
    x=monthly_mean['month'],
    y=monthly_mean['stopped'],
    mode='lines+markers',
    name="Daily mean for stopped vessel (> 3 knots all month)",
    line=dict(color='darkslategray', width=2),
    hovertemplate='%{y:.1f}'
))

fig.add_traces(go.Scatter(
    x=monthly_mean['month'],
    y=monthly_mean['moving'],
    mode='lines+markers',
    name="Daily mean for moving vessel (>= 3 knots at least once in the month)",
    line=dict(color='darkorange', width=2),
    hovertemplate='%{y:.1f}'
))

fig.update_layout(
    title="Monthly sum and mean number of unique vessel transiting",
    xaxis_title="Month",
    yaxis_title="Number of boats (unique mmsi)",
    barmode='stack',
    hovermode='x unified',
    legend_title='Legend',
    height=700
)
fig.show()

In [None]:
## Boat mean occurrences in time
set1_colors = [

    "#f781bf",  # pink
    "#a65628",  # brown
    "#377eb8",  # blue
    "#7e03a8",  # purple
    "#e6ab02",  # yellow
    "#f46d43",  # orange
    "#d9ef8b",  # green
    "#e41a1c",  # red
    "#999999",  # grey
]

# Create a column "month"
AIS['month'] = AIS['datetime'].dt.to_period('M')

# Define length classes
bins = list(range(1, 401, 50))
bins.append(401)
labels = [f"{bins[i]}-{bins[i+1]-1} m" for i in range(len(bins)-1)]

AIS = AIS.copy()
AIS['length_class'] = pd.cut(AIS['length'], bins=bins, labels=labels, right=True, include_lowest=True)

# No doublets for a mmsi
AIS_unique = AIS.drop_duplicates(subset=['month', 'mmsi'])

# Group by month and length
grouped = (AIS_unique.groupby(['month', 'length_class'])['mmsi'].nunique().unstack(fill_value=0).sort_index())


# Convert month in timestamp for plotly
grouped.index = grouped.index.to_timestamp()

# Figure parameters
fig = go.Figure()

for i, col in enumerate(grouped.columns):
    fig.add_bar(
        x=grouped.index,
        y=grouped[col],
        name=col,
        marker_color=set1_colors[i % len(set1_colors)],
        hovertemplate="Number of boats: %{y}",
    )

dates = grouped.index

# filter to keep january and july
tickvals = [d for d in dates if d.month in [1, 7]]
ticktext = [d.strftime('%b %Y') for d in tickvals]

# Layout
fig.update_layout(
    title="Monthly number of boats by length class",
    yaxis_title="Number of boats",
    barmode='stack',
    hovermode='x unified',
    legend_title='Length classes',
    width=1000,
    height=500,
    plot_bgcolor='white',
    paper_bgcolor='white',
    font=dict(color='black', size=16),
    legend=dict(
        font=dict(size=16)),
    xaxis=dict(
        tickvals=tickvals,
        ticktext=ticktext,
        ticks='outside',
        tickfont=dict(size=16)
    )
)

fig.write_image("/home/imonge/Documents/AIS/boat_class.png")

#fig.show()