In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Point, Polygon

# --- Drammen Terminal Polygon ---
drammen_terminal_poly = Polygon([
    (10.2371879,59.7395623),(10.2325317,59.7340905),
    (10.2366301,59.7344906),(10.2399774,59.735572),
    (10.2397629,59.7394974),(10.2371879,59.7395623)
])

# --- Read Drammen Data ---
df = pd.read_csv('Drammen.csv', dayfirst=True, low_memory=False)
df['date_time_utc'] = pd.to_datetime(df['date_time_utc'], utc=True, errors='coerce')
df = df.dropna(subset=['date_time_utc'])

# --- Add Terminal Polygon Inclusion Boolean ---
df['in_terminal'] = df.apply(lambda row: drammen_terminal_poly.contains(Point(row['longitude'], row['latitude'])), axis=1)

# --- Sort data for processing ---
df = df.sort_values(['mmsi', 'date_time_utc']).reset_index(drop=True)

summary_rows = []

# --- Process per Vessel ---
for mmsi, vessel_df in df.groupby('mmsi'):
    vessel_df = vessel_df.sort_values('date_time_utc').reset_index(drop=True)
    vessel_name = vessel_df['ship_name'].iloc[0]
    vessel_type = vessel_df['ship_type'].iloc[0]
    vessel_length = vessel_df['length'].iloc[0]
    prev_time = None
    visit_num = 0

    # Identify visits: Split where gap >10 hours
    vessel_df['prev_time'] = vessel_df['date_time_utc'].shift(1)
    vessel_df['hour_gap'] = (vessel_df['date_time_utc'] - vessel_df['prev_time']).dt.total_seconds()/3600
    vessel_df.loc[vessel_df['hour_gap'].isnull(), 'hour_gap'] = 0
    visit_breaks = vessel_df.index[vessel_df['hour_gap'] > 10].tolist()
    visit_breaks = [0] + visit_breaks + [len(vessel_df)]

    # For each visit
    for i in range(len(visit_breaks)-1):
        visit_num += 1
        visit_start = visit_breaks[i]
        visit_end = visit_breaks[i+1]
        visit_df = vessel_df.iloc[visit_start:visit_end].copy()
        visit_start_time = visit_df['date_time_utc'].iloc[0]
        visit_end_time = visit_df['date_time_utc'].iloc[-1]
        visit_duration_hr = (visit_end_time - visit_start_time).total_seconds()/3600
        
        # Stops in port (speed<1), with breakdown
        visit_df['stopped'] = visit_df['speed_over_ground'] < 1
        visit_df['stopped_shift'] = visit_df['stopped'].shift(1, fill_value=False)
        stop_starts = visit_df.index[(visit_df['stopped']) & (~visit_df['stopped_shift'])].tolist()
        stop_ends = visit_df.index[(~visit_df['stopped']) & (visit_df['stopped_shift'])].tolist()
        if visit_df['stopped'].iloc[-1]:
            stop_ends.append(visit_df.index[-1])
        if len(stop_ends) and (not len(stop_starts) or stop_ends[0] < stop_starts[0]):
            stop_starts = [visit_df.index[0]] + stop_starts
        stops = []
        stop_durations_total = 0
        stop_durations_terminal = 0
        stop_count = 0
        stop_count_terminal = 0
        for s, e in zip(stop_starts, stop_ends):
            stop_start_time = visit_df.loc[s, 'date_time_utc']
            stop_end_time = visit_df.loc[e, 'date_time_utc']
            stop_duration = (stop_end_time - stop_start_time).total_seconds()/3600
            in_terminal = visit_df.loc[s:e, 'in_terminal'].any()
            stops.append({
                "stop_start": stop_start_time,
                "stop_end": stop_end_time,
                "duration_hr": stop_duration,
                "in_terminal": in_terminal
            })
            stop_count += 1
            stop_durations_total += stop_duration
            if in_terminal:
                stop_count_terminal += 1
                stop_durations_terminal += stop_duration

        # Time spent in terminal polygon for this visit
        terminal_times = visit_df[visit_df['in_terminal']]
        if not terminal_times.empty:
            terminal_duration = (terminal_times['date_time_utc'].iloc[-1] - terminal_times['date_time_utc'].iloc[0]).total_seconds()/3600
        else:
            terminal_duration = 0
        nonterminal_duration = visit_duration_hr - terminal_duration

        # Time since last visit
        if i > 0:
            last_end = vessel_df.loc[visit_breaks[i]-1, 'date_time_utc']
            time_since_last_visit_hr = (visit_start_time - last_end).total_seconds()/3600
        else:
            time_since_last_visit_hr = np.nan

        # Compose summary row
        summary_rows.append({
            "mmsi": mmsi,
            "ship_name": vessel_name,
            "ship_type": vessel_type,
            "length": vessel_length,
            "visit_num": visit_num,
            "visit_start_time": visit_start_time,
            "visit_end_time": visit_end_time,
            "visit_duration_hr": round(visit_duration_hr, 2),
            "stops_count": stop_count,
            "stops_total_duration_hr": round(stop_durations_total, 2),
            "stops_terminal_count": stop_count_terminal,
            "stops_terminal_duration_hr": round(stop_durations_terminal, 2),
            "terminal_duration_hr": round(terminal_duration, 2),
            "nonterminal_duration_hr": round(nonterminal_duration, 2),
            "time_since_last_visit_hr": round(time_since_last_visit_hr, 2) if not np.isnan(time_since_last_visit_hr) else "",
            "stops_detail": str(stops)
        })

summary_df = pd.DataFrame(summary_rows)

# --- Save to CSV ---
summary_df.to_csv("Drammen_kpi_summary.csv", index=False)

print("Summary saved as Drammen_kpi_summary.csv")
display(summary_df.head(10))


Summary saved as Drammen_kpi_summary.csv


Unnamed: 0,mmsi,ship_name,ship_type,length,visit_num,visit_start_time,visit_end_time,visit_duration_hr,stops_count,stops_total_duration_hr,stops_terminal_count,stops_terminal_duration_hr,terminal_duration_hr,nonterminal_duration_hr,time_since_last_visit_hr,stops_detail
0,209314000,JUTLAND,70.0,120.0,1,2024-04-26 18:32:29+00:00,2024-04-28 11:15:36+00:00,40.72,2,40.33,0,0.0,0.0,40.72,,[{'stop_start': Timestamp('2024-04-26 18:43:30...
1,209325000,RIX RIVER,79.0,88.0,1,2024-02-02 05:45:15+00:00,2024-02-02 20:41:26+00:00,14.94,4,14.08,0,0.0,0.0,14.94,,[{'stop_start': Timestamp('2024-02-02 06:06:25...
2,209336000,RIX ALLIANCE,70.0,88.0,1,2024-03-18 15:24:40+00:00,2024-03-19 21:13:38+00:00,29.82,1,29.11,1,29.11,29.11,0.71,,[{'stop_start': Timestamp('2024-03-18 15:51:51...
3,209535000,NORDEN,70.0,90.0,1,2024-03-09 17:07:12+00:00,2024-03-10 15:42:19+00:00,22.59,1,21.63,0,0.0,0.0,22.59,,[{'stop_start': Timestamp('2024-03-09 17:43:33...
4,209535000,NORDEN,70.0,90.0,2,2024-04-19 05:25:09+00:00,2024-04-20 03:57:56+00:00,22.55,1,21.74,0,0.0,0.0,22.55,949.71,[{'stop_start': Timestamp('2024-04-19 05:55:30...
5,209536000,KONGSDAL,79.0,90.0,1,2024-03-19 08:34:13+00:00,2024-03-20 07:49:49+00:00,23.26,2,22.45,0,0.0,0.0,23.26,,[{'stop_start': Timestamp('2024-03-19 09:01:33...
6,209726000,SONORO,70.0,100.0,1,2024-03-05 11:38:57+00:00,2024-03-06 15:14:42+00:00,27.6,2,27.09,0,0.0,0.0,27.6,,[{'stop_start': Timestamp('2024-03-05 11:53:58...
7,209974000,RIX BAY,79.0,90.0,1,2024-01-17 06:26:25+00:00,2024-01-18 17:41:50+00:00,35.26,3,34.2,0,0.0,0.0,35.26,,[{'stop_start': Timestamp('2024-01-17 06:38:25...
8,210382000,SWE-BULK,70.0,88.0,1,2024-02-24 22:20:24+00:00,2024-02-26 17:22:26+00:00,43.03,2,42.53,0,0.0,0.0,43.03,,[{'stop_start': Timestamp('2024-02-24 22:38:35...
9,211141000,BIANCA RAMBOW,70.0,134.0,1,2024-01-03 03:12:51+00:00,2024-01-03 09:59:22+00:00,6.78,1,6.07,1,6.07,6.11,0.66,,[{'stop_start': Timestamp('2024-01-03 03:46:13...


In [7]:
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
from dash import Dash, dcc, html, dash_table, Input, Output
import dash_bootstrap_components as dbc

# Load summary and add month
summary = pd.read_csv("Drammen_kpi_summary.csv", parse_dates=['visit_start_time', 'visit_end_time'])
vessels_to_exclude = summary.loc[summary['stops_count'] > 5, 'mmsi'].unique()
summary = summary[~summary['mmsi'].isin(vessels_to_exclude)].reset_index(drop=True)
summary['month'] = summary['visit_start_time'].dt.strftime('%Y-%m')

# Unique vessels per month in port (bar)
monthly_counts = summary.groupby('month')['mmsi'].nunique().reset_index(name='unique_vessels')

# Unique vessels per month in terminal (bar)
summary['visited_terminal'] = summary['terminal_duration_hr'] > 0
terminal_visits_month = summary[summary['visited_terminal']].groupby('month')['mmsi'].nunique().reset_index(name='unique_terminal_vessels')

# Visits per vessel
visits_per_vessel = summary.groupby('mmsi')['visit_num'].max().reset_index(name='total_visits')
visits_hist = visits_per_vessel['total_visits'].value_counts().sort_index()

# Stops per visit
stops_per_visit = summary['stops_count']

# Box and scatter plots for turnaround time at terminal vs vessel length (core KPI)
terminal_visits = summary[summary['terminal_duration_hr'] > 0].copy()
terminal_visits['length_bin'] = pd.cut(terminal_visits['length'], bins=[0, 100, 150, 200, 400], labels=["0-100m", "100-150m", "150-200m", "200m+"])

# Colors (custom palette)
bar_color = "#3A6351"
terminal_color = "#489FB5"
hist_color = "#F7B801"
scatter_color = "#005F73"

app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

app.layout = dbc.Container([
    html.H2("Drammen Port Vessel KPI Dashboard (Curated Visuals)", style={'color': '#22223b', 'marginBottom': 20}),
    dbc.Row([
        dbc.Col([
            dcc.Graph(
                id="bar-unique-vessels-port",
                figure=px.bar(
                    monthly_counts, x='month', y='unique_vessels',
                    title="Unique Vessels Per Month (Port Area)",
                    labels={"month": "Month", "unique_vessels": "Unique Vessels"},
                    color_discrete_sequence=[bar_color]
                ).update_layout(plot_bgcolor='white', yaxis=dict(gridcolor='gainsboro'))
            )
        ], md=6),
        dbc.Col([
            dcc.Graph(
                id="bar-unique-vessels-terminal",
                figure=px.bar(
                    terminal_visits_month, x='month', y='unique_terminal_vessels',
                    title="Unique Vessels Per Month (Terminal Area)",
                    labels={"month": "Month", "unique_terminal_vessels": "Unique Vessels (Terminal)"},
                    color_discrete_sequence=[terminal_color]
                ).update_layout(plot_bgcolor='white', yaxis=dict(gridcolor='gainsboro'))
            )
        ], md=6),
    ]),
    dbc.Row([
        dbc.Col([
            dcc.Graph(
                id="bar-visits-per-vessel",
                figure=px.bar(
                    x=visits_hist.index, y=visits_hist.values,
                    title="Number of Port Visits per Vessel<br><span style='font-size:0.8em;color:gray'>Click any bar to see MMSIs</span>",
                    labels={"x": "Number of Visits", "y": "Number of Vessels"},
                    color_discrete_sequence=[terminal_color]
                ).update_traces(marker_line_color='white', marker_line_width=1.5)
                 .update_layout(plot_bgcolor='white', yaxis=dict(gridcolor='gainsboro'))
            ),
            html.Div(id='bar-click-mmsi-table')
        ], md=12)
    ]),
    dbc.Row([
        dbc.Col([
            dcc.Graph(
                id="hist-stops-per-visit",
                figure=px.histogram(
                    summary, x='stops_count', nbins=summary['stops_count'].max()+1,
                    title="Stops Per Visit Distribution",
                    labels={"stops_count": "Stops per Visit", "count": "Visit Count"},
                    color_discrete_sequence=[hist_color]
                ).update_layout(plot_bgcolor='white', yaxis=dict(gridcolor='gainsboro'))
            ),
        ], md=6),
        dbc.Col([
            dcc.Graph(
                id="box-turnaround-vs-length",
                figure=px.box(
                    terminal_visits, x="length_bin", y="terminal_duration_hr",
                    title="Turnaround Time at Terminal vs. Vessel Length",
                    labels={"terminal_duration_hr": "Turnaround Time at Terminal (hrs)", "length_bin": "Vessel Length Bin"},
                    color_discrete_sequence=[scatter_color]
                ).update_layout(plot_bgcolor='white', yaxis=dict(gridcolor='gainsboro'))
            ),
        ], md=6),
    ]),
    dbc.Row([
        dbc.Col([
            dcc.Graph(
                id="scatter-turnaround-vs-length",
                figure=px.scatter(
                    terminal_visits, x="length", y="terminal_duration_hr",
                    title="Turnaround Time at Terminal (Scatter: Length vs. Time)",
                    labels={"length": "Vessel Length (m)", "terminal_duration_hr": "Turnaround Time at Terminal (hrs)"},
                    color_discrete_sequence=[bar_color]
                ).update_traces(marker=dict(size=9, opacity=0.6))
                 .update_layout(plot_bgcolor='white', yaxis=dict(gridcolor='gainsboro'))
            ),
        ], md=12)
    ]),
    html.H4("KPI Table (filterable, first 20 rows):", style={'marginTop': 30}),
    dash_table.DataTable(
        data=summary.head(20).to_dict('records'),
        columns=[{"name": i, "id": i} for i in summary.columns],
        page_size=20,
        filter_action="native",
        sort_action="native",
        style_table={'overflowX': 'auto', 'margin-top': '20px'},
        style_cell={'textAlign': 'left', 'padding':'4px', 'fontSize': 13},
        style_header={'backgroundColor': '#E8E8E8', 'fontWeight': 'bold'}
    ),
], fluid=True)

# --- Callback: Show MMSIs for clicked bar on "visits per vessel" graph ---
@app.callback(
    Output('bar-click-mmsi-table', 'children'),
    Input('bar-visits-per-vessel', 'clickData')
)
def display_mmsis(clickData):
    if clickData:
        num_visits = clickData['points'][0]['x']
        vessels_list = visits_per_vessel.loc[visits_per_vessel['total_visits'] == num_visits, 'mmsi'].tolist()
        if vessels_list:
            return html.Div([
                html.H6(f"Vessels with {num_visits} visits:"),
                dash_table.DataTable(
                    data=[{'mmsi': m} for m in vessels_list],
                    columns=[{"name": "mmsi", "id": "mmsi"}],
                    page_size=10,
                    style_table={'maxHeight': '180px', 'overflowY': 'auto'},
                    style_cell={'fontSize': 12}
                )
            ], style={'marginTop': '15px'})
    return ""

app.run(debug=True, mode='inline', port=8060)
