In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Point, Polygon, MultiPolygon

# --- Oslo Terminals as MultiPolygon ---
oslo_terminals = MultiPolygon([
    Polygon([
        (10.7198972,59.9088206),(10.7098765,59.9057866),
        (10.7111854,59.9042049),(10.7217855,59.9079599),
        (10.7198972,59.9088206)
    ]),
    Polygon([
        (10.7448538,59.8860217),(10.7451757,59.8829641),
        (10.7670625,59.8834271),(10.765775,59.8860217),
        (10.7625349,59.8877442),(10.7448538,59.8860217)
    ])
])

# --- Read Oslo Data ---
df = pd.read_csv('Oslo.csv', dayfirst=True, low_memory=False)
df['date_time_utc'] = pd.to_datetime(df['date_time_utc'], utc=True, errors='coerce')
df = df.dropna(subset=['date_time_utc'])

# --- Add Terminal Area Inclusion Boolean (either terminal) ---
df['in_terminal'] = df.apply(lambda row: oslo_terminals.contains(Point(row['longitude'], row['latitude'])), axis=1)

# --- Sort data for processing ---
df = df.sort_values(['mmsi', 'date_time_utc']).reset_index(drop=True)

summary_rows = []

# --- Process per Vessel ---
for mmsi, vessel_df in df.groupby('mmsi'):
    vessel_df = vessel_df.sort_values('date_time_utc').reset_index(drop=True)
    vessel_name = vessel_df['ship_name'].iloc[0]
    vessel_type = vessel_df['ship_type'].iloc[0]
    vessel_length = vessel_df['length'].iloc[0]
    prev_time = None
    visit_num = 0

    # Identify visits: Split where gap >10 hours
    vessel_df['prev_time'] = vessel_df['date_time_utc'].shift(1)
    vessel_df['hour_gap'] = (vessel_df['date_time_utc'] - vessel_df['prev_time']).dt.total_seconds()/3600
    vessel_df.loc[vessel_df['hour_gap'].isnull(), 'hour_gap'] = 0
    visit_breaks = vessel_df.index[vessel_df['hour_gap'] > 10].tolist()
    visit_breaks = [0] + visit_breaks + [len(vessel_df)]

    for i in range(len(visit_breaks)-1):
        visit_num += 1
        visit_start = visit_breaks[i]
        visit_end = visit_breaks[i+1]
        visit_df = vessel_df.iloc[visit_start:visit_end].copy()
        visit_start_time = visit_df['date_time_utc'].iloc[0]
        visit_end_time = visit_df['date_time_utc'].iloc[-1]
        visit_duration_hr = (visit_end_time - visit_start_time).total_seconds()/3600

        # Stops in port (speed<1), with breakdown
        visit_df['stopped'] = visit_df['speed_over_ground'] < 1
        visit_df['stopped_shift'] = visit_df['stopped'].shift(1, fill_value=False)
        stop_starts = visit_df.index[(visit_df['stopped']) & (~visit_df['stopped_shift'])].tolist()
        stop_ends = visit_df.index[(~visit_df['stopped']) & (visit_df['stopped_shift'])].tolist()
        if visit_df['stopped'].iloc[-1]:
            stop_ends.append(visit_df.index[-1])
        if len(stop_ends) and (not len(stop_starts) or stop_ends[0] < stop_starts[0]):
            stop_starts = [visit_df.index[0]] + stop_starts
        stops = []
        stop_durations_total = 0
        stop_durations_terminal = 0
        stop_count = 0
        stop_count_terminal = 0
        for s, e in zip(stop_starts, stop_ends):
            stop_start_time = visit_df.loc[s, 'date_time_utc']
            stop_end_time = visit_df.loc[e, 'date_time_utc']
            stop_duration = (stop_end_time - stop_start_time).total_seconds()/3600
            in_terminal = visit_df.loc[s:e, 'in_terminal'].any()
            stops.append({
                "stop_start": stop_start_time,
                "stop_end": stop_end_time,
                "duration_hr": stop_duration,
                "in_terminal": in_terminal
            })
            stop_count += 1
            stop_durations_total += stop_duration
            if in_terminal:
                stop_count_terminal += 1
                stop_durations_terminal += stop_duration

        # Time spent in terminal area for this visit (any terminal)
        terminal_times = visit_df[visit_df['in_terminal']]
        if not terminal_times.empty:
            terminal_duration = (terminal_times['date_time_utc'].iloc[-1] - terminal_times['date_time_utc'].iloc[0]).total_seconds()/3600
        else:
            terminal_duration = 0
        nonterminal_duration = visit_duration_hr - terminal_duration

        # Time since last visit
        if i > 0:
            last_end = vessel_df.loc[visit_breaks[i]-1, 'date_time_utc']
            time_since_last_visit_hr = (visit_start_time - last_end).total_seconds()/3600
        else:
            time_since_last_visit_hr = np.nan

        summary_rows.append({
            "mmsi": mmsi,
            "ship_name": vessel_name,
            "ship_type": vessel_type,
            "length": vessel_length,
            "visit_num": visit_num,
            "visit_start_time": visit_start_time,
            "visit_end_time": visit_end_time,
            "visit_duration_hr": round(visit_duration_hr, 2),
            "stops_count": stop_count,
            "stops_total_duration_hr": round(stop_durations_total, 2),
            "stops_terminal_count": stop_count_terminal,
            "stops_terminal_duration_hr": round(stop_durations_terminal, 2),
            "terminal_duration_hr": round(terminal_duration, 2),
            "nonterminal_duration_hr": round(nonterminal_duration, 2),
            "time_since_last_visit_hr": round(time_since_last_visit_hr, 2) if not np.isnan(time_since_last_visit_hr) else "",
            "stops_detail": str(stops)
        })

summary_df = pd.DataFrame(summary_rows)
summary_df.to_csv("Oslo_kpi_summary.csv", index=False)

print("Summary saved as Oslo_kpi_summary.csv")
display(summary_df.head(10))


Summary saved as Oslo_kpi_summary.csv


Unnamed: 0,mmsi,ship_name,ship_type,length,visit_num,visit_start_time,visit_end_time,visit_duration_hr,stops_count,stops_total_duration_hr,stops_terminal_count,stops_terminal_duration_hr,terminal_duration_hr,nonterminal_duration_hr,time_since_last_visit_hr,stops_detail
0,209207000,BF CARTAGENA,71.0,101.0,1,2024-03-12 13:13:00+00:00,2024-03-12 18:36:49+00:00,5.4,1,4.9,1,4.9,5.1,0.3,,[{'stop_start': Timestamp('2024-03-12 13:34:00...
1,209207000,BF CARTAGENA,71.0,101.0,2,2024-03-19 04:45:34+00:00,2024-03-19 11:26:33+00:00,6.68,2,6.33,2,6.33,6.43,0.26,154.15,[{'stop_start': Timestamp('2024-03-19 05:00:54...
2,209207000,BF CARTAGENA,71.0,101.0,3,2024-03-26 16:41:59+00:00,2024-03-27 00:19:25+00:00,7.62,1,7.22,1,7.22,7.27,0.35,173.26,[{'stop_start': Timestamp('2024-03-26 17:00:13...
3,209207000,BF CARTAGENA,71.0,101.0,4,2024-04-09 04:52:28+00:00,2024-04-09 11:49:45+00:00,6.95,1,6.5,1,6.5,6.65,0.3,316.55,[{'stop_start': Timestamp('2024-04-09 05:13:36...
4,209207000,BF CARTAGENA,71.0,101.0,5,2024-04-16 09:21:07+00:00,2024-04-16 13:16:07+00:00,3.92,1,3.43,1,3.43,3.57,0.35,165.52,[{'stop_start': Timestamp('2024-04-16 09:41:19...
5,209207000,BF CARTAGENA,71.0,101.0,6,2024-04-23 02:41:26+00:00,2024-04-23 09:11:45+00:00,6.51,1,6.0,1,6.0,6.16,0.35,157.42,[{'stop_start': Timestamp('2024-04-23 03:02:37...
6,209207000,BF CARTAGENA,71.0,101.0,7,2024-04-29 19:35:36+00:00,2024-04-30 06:28:02+00:00,10.87,2,10.57,2,10.57,10.62,0.25,154.4,[{'stop_start': Timestamp('2024-04-29 19:50:42...
7,209217000,OTELLO,80.0,104.0,1,2024-02-26 15:18:48+00:00,2024-02-27 20:16:49+00:00,28.97,3,28.19,1,11.14,11.19,17.77,,[{'stop_start': Timestamp('2024-02-26 15:41:01...
8,209332000,RIX PACIFIC,70.0,90.0,1,2024-02-23 00:01:19+00:00,2024-02-27 02:24:18+00:00,98.38,1,98.18,0,0.0,0.0,98.38,,[{'stop_start': Timestamp('2024-02-23 00:01:19...
9,209541000,BF PERCH,71.0,139.0,1,2024-03-16 20:45:54+00:00,2024-03-17 02:16:30+00:00,5.51,1,4.96,1,4.96,5.11,0.4,,[{'stop_start': Timestamp('2024-03-16 21:10:05...


In [3]:
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
from dash import Dash, dcc, html, dash_table, Input, Output
import dash_bootstrap_components as dbc

# --- Load Oslo KPI Summary ---
summary = pd.read_csv("Oslo_kpi_summary.csv", parse_dates=['visit_start_time', 'visit_end_time'])
vessels_to_exclude = summary.loc[summary['stops_count'] > 5, 'mmsi'].unique()
summary = summary[~summary['mmsi'].isin(vessels_to_exclude)].reset_index(drop=True)
summary['month'] = summary['visit_start_time'].dt.strftime('%Y-%m')

# Unique vessels per month in port (bar)
monthly_counts = summary.groupby('month')['mmsi'].nunique().reset_index(name='unique_vessels')

# Unique vessels per month in either terminal (bar)
summary['visited_terminal'] = summary['terminal_duration_hr'] > 0
terminal_visits_month = summary[summary['visited_terminal']].groupby('month')['mmsi'].nunique().reset_index(name='unique_terminal_vessels')

# Visits per vessel
visits_per_vessel = summary.groupby('mmsi')['visit_num'].max().reset_index(name='total_visits')
visits_hist = visits_per_vessel['total_visits'].value_counts().sort_index()

# Box and scatter plots for turnaround time at terminal vs vessel length
terminal_visits = summary[summary['terminal_duration_hr'] > 0].copy()
terminal_visits['length_bin'] = pd.cut(terminal_visits['length'], bins=[0, 100, 150, 200, 400], labels=["0-100m", "100-150m", "150-200m", "200m+"])

# Colors
bar_color = "#3A6351"
terminal_color = "#489FB5"
hist_color = "#F7B801"
scatter_color = "#005F73"

# App
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

app.layout = dbc.Container([
    html.H2("Oslo Port Vessel KPI Dashboard (Two Terminals Combined)", style={'color': '#22223b', 'marginBottom': 20}),
    html.P("All 'terminal' KPIs/plots refer to EITHER of the two selected terminal polygons in Oslo."),
    dbc.Row([
        dbc.Col([
            dcc.Graph(
                id="bar-unique-vessels-port",
                figure=px.bar(
                    monthly_counts, x='month', y='unique_vessels',
                    title="Unique Vessels Per Month (Port Area)",
                    labels={"month": "Month", "unique_vessels": "Unique Vessels"},
                    color_discrete_sequence=[bar_color]
                ).update_layout(plot_bgcolor='white', yaxis=dict(gridcolor='gainsboro'))
            )
        ], md=6),
        dbc.Col([
            dcc.Graph(
                id="bar-unique-vessels-terminal",
                figure=px.bar(
                    terminal_visits_month, x='month', y='unique_terminal_vessels',
                    title="Unique Vessels Per Month (Either Terminal)",
                    labels={"month": "Month", "unique_terminal_vessels": "Unique Vessels (Terminal Area)"},
                    color_discrete_sequence=[terminal_color]
                ).update_layout(plot_bgcolor='white', yaxis=dict(gridcolor='gainsboro'))
            )
        ], md=6),
    ]),
    dbc.Row([
        dbc.Col([
            dcc.Graph(
                id="bar-visits-per-vessel",
                figure=px.bar(
                    x=visits_hist.index, y=visits_hist.values,
                    title="Number of Port Visits per Vessel<br><span style='font-size:0.8em;color:gray'>Click any bar to see MMSIs</span>",
                    labels={"x": "Number of Visits", "y": "Number of Vessels"},
                    color_discrete_sequence=[terminal_color]
                ).update_traces(marker_line_color='white', marker_line_width=1.5)
                 .update_layout(plot_bgcolor='white', yaxis=dict(gridcolor='gainsboro'))
            ),
            html.Div(id='bar-click-mmsi-table')
        ], md=12)
    ]),
    dbc.Row([
        dbc.Col([
            dcc.Graph(
                id="hist-stops-per-visit",
                figure=px.histogram(
                    summary, x='stops_count', nbins=summary['stops_count'].max()+1,
                    title="Stops Per Visit Distribution",
                    labels={"stops_count": "Stops per Visit", "count": "Visit Count"},
                    color_discrete_sequence=[hist_color]
                ).update_layout(plot_bgcolor='white', yaxis=dict(gridcolor='gainsboro'))
            ),
        ], md=6),
        dbc.Col([
            dcc.Graph(
                id="box-turnaround-vs-length",
                figure=px.box(
                    terminal_visits, x="length_bin", y="terminal_duration_hr",
                    title="Turnaround Time at Terminal vs. Vessel Length",
                    labels={"terminal_duration_hr": "Turnaround Time at Terminal (hrs)", "length_bin": "Vessel Length Bin"},
                    color_discrete_sequence=[scatter_color]
                ).update_layout(plot_bgcolor='white', yaxis=dict(gridcolor='gainsboro'))
            ),
        ], md=6),
    ]),
    dbc.Row([
        dbc.Col([
            dcc.Graph(
                id="scatter-turnaround-vs-length",
                figure=px.scatter(
                    terminal_visits, x="length", y="terminal_duration_hr",
                    title="Turnaround Time at Terminal (Scatter: Length vs. Time)",
                    labels={"length": "Vessel Length (m)", "terminal_duration_hr": "Turnaround Time at Terminal (hrs)"},
                    color_discrete_sequence=[bar_color]
                ).update_traces(marker=dict(size=9, opacity=0.6))
                 .update_layout(plot_bgcolor='white', yaxis=dict(gridcolor='gainsboro'))
            ),
        ], md=12)
    ]),
    html.H4("KPI Table (filterable, first 20 rows):", style={'marginTop': 30}),
    dash_table.DataTable(
        data=summary.head(20).to_dict('records'),
        columns=[{"name": i, "id": i} for i in summary.columns],
        page_size=20,
        filter_action="native",
        sort_action="native",
        style_table={'overflowX': 'auto', 'margin-top': '20px'},
        style_cell={'textAlign': 'left', 'padding':'4px', 'fontSize': 13},
        style_header={'backgroundColor': '#E8E8E8', 'fontWeight': 'bold'}
    ),
], fluid=True)

@app.callback(
    Output('bar-click-mmsi-table', 'children'),
    Input('bar-visits-per-vessel', 'clickData')
)
def display_mmsis(clickData):
    if clickData:
        num_visits = clickData['points'][0]['x']
        vessels_list = visits_per_vessel.loc[visits_per_vessel['total_visits'] == num_visits, 'mmsi'].tolist()
        if vessels_list:
            return html.Div([
                html.H6(f"Vessels with {num_visits} visits:"),
                dash_table.DataTable(
                    data=[{'mmsi': m} for m in vessels_list],
                    columns=[{"name": "mmsi", "id": "mmsi"}],
                    page_size=10,
                    style_table={'maxHeight': '180px', 'overflowY': 'auto'},
                    style_cell={'fontSize': 12}
                )
            ], style={'marginTop': '15px'})
    return ""

app.run(debug=True, mode='inline', port=8070)


In [5]:
import pandas as pd
import numpy as np
from shapely.geometry import Point, Polygon

# Define both Oslo terminal polygons
terminal_A_poly = Polygon([
    (10.7198972,59.9088206),(10.7098765,59.9057866),
    (10.7111854,59.9042049),(10.7217855,59.9079599),
    (10.7198972,59.9088206)
])
terminal_B_poly = Polygon([
    (10.7448538,59.8860217),(10.7451757,59.8829641),
    (10.7670625,59.8834271),(10.765775,59.8860217),
    (10.7625349,59.8877442),(10.7448538,59.8860217)
])

# Load your Oslo data
df = pd.read_csv('Oslo.csv', dayfirst=True, low_memory=False)
df['date_time_utc'] = pd.to_datetime(df['date_time_utc'], utc=True, errors='coerce')
df = df.dropna(subset=['date_time_utc'])

# Flag terminal A and B (and 'either')
df['in_terminal_A'] = df.apply(lambda row: terminal_A_poly.contains(Point(row['longitude'], row['latitude'])), axis=1)
df['in_terminal_B'] = df.apply(lambda row: terminal_B_poly.contains(Point(row['longitude'], row['latitude'])), axis=1)
df['in_terminal'] = df['in_terminal_A'] | df['in_terminal_B']

df = df.sort_values(['mmsi', 'date_time_utc']).reset_index(drop=True)
summary_rows = []

for mmsi, vessel_df in df.groupby('mmsi'):
    vessel_df = vessel_df.sort_values('date_time_utc').reset_index(drop=True)
    vessel_name = vessel_df['ship_name'].iloc[0]
    vessel_type = vessel_df['ship_type'].iloc[0]
    vessel_length = vessel_df['length'].iloc[0]
    visit_num = 0

    vessel_df['prev_time'] = vessel_df['date_time_utc'].shift(1)
    vessel_df['hour_gap'] = (vessel_df['date_time_utc'] - vessel_df['prev_time']).dt.total_seconds()/3600
    vessel_df.loc[vessel_df['hour_gap'].isnull(), 'hour_gap'] = 0
    visit_breaks = vessel_df.index[vessel_df['hour_gap'] > 10].tolist()
    visit_breaks = [0] + visit_breaks + [len(vessel_df)]

    for i in range(len(visit_breaks)-1):
        visit_num += 1
        visit_start = visit_breaks[i]
        visit_end = visit_breaks[i+1]
        visit_df = vessel_df.iloc[visit_start:visit_end].copy()
        visit_start_time = visit_df['date_time_utc'].iloc[0]
        visit_end_time = visit_df['date_time_utc'].iloc[-1]
        visit_duration_hr = (visit_end_time - visit_start_time).total_seconds()/3600

        # Stops
        visit_df['stopped'] = visit_df['speed_over_ground'] < 1
        visit_df['stopped_shift'] = visit_df['stopped'].shift(1, fill_value=False)
        stop_starts = visit_df.index[(visit_df['stopped']) & (~visit_df['stopped_shift'])].tolist()
        stop_ends = visit_df.index[(~visit_df['stopped']) & (visit_df['stopped_shift'])].tolist()
        if visit_df['stopped'].iloc[-1]:
            stop_ends.append(visit_df.index[-1])
        if len(stop_ends) and (not len(stop_starts) or stop_ends[0] < stop_starts[0]):
            stop_starts = [visit_df.index[0]] + stop_starts

        stops = []
        stop_durations_total = 0
        stop_durations_A = 0
        stop_durations_B = 0
        stop_count = 0
        stop_count_A = 0
        stop_count_B = 0
        for s, e in zip(stop_starts, stop_ends):
            stop_start_time = visit_df.loc[s, 'date_time_utc']
            stop_end_time = visit_df.loc[e, 'date_time_utc']
            stop_duration = (stop_end_time - stop_start_time).total_seconds()/3600
            in_A = visit_df.loc[s:e, 'in_terminal_A'].any()
            in_B = visit_df.loc[s:e, 'in_terminal_B'].any()
            stops.append({
                "stop_start": stop_start_time,
                "stop_end": stop_end_time,
                "duration_hr": stop_duration,
                "in_terminal_A": in_A,
                "in_terminal_B": in_B
            })
            stop_count += 1
            stop_durations_total += stop_duration
            if in_A:
                stop_count_A += 1
                stop_durations_A += stop_duration
            if in_B:
                stop_count_B += 1
                stop_durations_B += stop_duration

        # Terminal durations
        terminal_A_times = visit_df[visit_df['in_terminal_A']]
        terminal_B_times = visit_df[visit_df['in_terminal_B']]
        terminal_times = visit_df[visit_df['in_terminal']]

        terminal_A_duration = (terminal_A_times['date_time_utc'].iloc[-1] - terminal_A_times['date_time_utc'].iloc[0]).total_seconds()/3600 if not terminal_A_times.empty else 0
        terminal_B_duration = (terminal_B_times['date_time_utc'].iloc[-1] - terminal_B_times['date_time_utc'].iloc[0]).total_seconds()/3600 if not terminal_B_times.empty else 0
        terminal_duration = (terminal_times['date_time_utc'].iloc[-1] - terminal_times['date_time_utc'].iloc[0]).total_seconds()/3600 if not terminal_times.empty else 0

        nonterminal_duration = visit_duration_hr - terminal_duration

        # Time since last visit
        if i > 0:
            last_end = vessel_df.loc[visit_breaks[i]-1, 'date_time_utc']
            time_since_last_visit_hr = (visit_start_time - last_end).total_seconds()/3600
        else:
            time_since_last_visit_hr = np.nan

        summary_rows.append({
            "mmsi": mmsi,
            "ship_name": vessel_name,
            "ship_type": vessel_type,
            "length": vessel_length,
            "visit_num": visit_num,
            "visit_start_time": visit_start_time,
            "visit_end_time": visit_end_time,
            "visit_duration_hr": round(visit_duration_hr, 2),
            "stops_count": stop_count,
            "stops_total_duration_hr": round(stop_durations_total, 2),
            "stops_A_count": stop_count_A,
            "stops_A_duration_hr": round(stop_durations_A, 2),
            "stops_B_count": stop_count_B,
            "stops_B_duration_hr": round(stop_durations_B, 2),
            "terminal_A_duration_hr": round(terminal_A_duration, 2),
            "terminal_B_duration_hr": round(terminal_B_duration, 2),
            "terminal_duration_hr": round(terminal_duration, 2),
            "nonterminal_duration_hr": round(nonterminal_duration, 2),
            "time_since_last_visit_hr": round(time_since_last_visit_hr, 2) if not np.isnan(time_since_last_visit_hr) else "",
            "stops_detail": str(stops)
        })

summary_df = pd.DataFrame(summary_rows)
summary_df.to_csv("Oslo_kpi_summary.csv", index=False)
print("Summary saved as Oslo_kpi_summary.csv")
display(summary_df.head(10))


Summary saved as Oslo_kpi_summary.csv


Unnamed: 0,mmsi,ship_name,ship_type,length,visit_num,visit_start_time,visit_end_time,visit_duration_hr,stops_count,stops_total_duration_hr,stops_A_count,stops_A_duration_hr,stops_B_count,stops_B_duration_hr,terminal_A_duration_hr,terminal_B_duration_hr,terminal_duration_hr,nonterminal_duration_hr,time_since_last_visit_hr,stops_detail
0,209207000,BF CARTAGENA,71.0,101.0,1,2024-03-12 13:13:00+00:00,2024-03-12 18:36:49+00:00,5.4,1,4.9,0,0.0,1,4.9,0.0,5.1,5.1,0.3,,[{'stop_start': Timestamp('2024-03-12 13:34:00...
1,209207000,BF CARTAGENA,71.0,101.0,2,2024-03-19 04:45:34+00:00,2024-03-19 11:26:33+00:00,6.68,2,6.33,0,0.0,2,6.33,0.0,6.43,6.43,0.26,154.15,[{'stop_start': Timestamp('2024-03-19 05:00:54...
2,209207000,BF CARTAGENA,71.0,101.0,3,2024-03-26 16:41:59+00:00,2024-03-27 00:19:25+00:00,7.62,1,7.22,0,0.0,1,7.22,0.0,7.27,7.27,0.35,173.26,[{'stop_start': Timestamp('2024-03-26 17:00:13...
3,209207000,BF CARTAGENA,71.0,101.0,4,2024-04-09 04:52:28+00:00,2024-04-09 11:49:45+00:00,6.95,1,6.5,0,0.0,1,6.5,0.0,6.65,6.65,0.3,316.55,[{'stop_start': Timestamp('2024-04-09 05:13:36...
4,209207000,BF CARTAGENA,71.0,101.0,5,2024-04-16 09:21:07+00:00,2024-04-16 13:16:07+00:00,3.92,1,3.43,0,0.0,1,3.43,0.0,3.57,3.57,0.35,165.52,[{'stop_start': Timestamp('2024-04-16 09:41:19...
5,209207000,BF CARTAGENA,71.0,101.0,6,2024-04-23 02:41:26+00:00,2024-04-23 09:11:45+00:00,6.51,1,6.0,0,0.0,1,6.0,0.0,6.16,6.16,0.35,157.42,[{'stop_start': Timestamp('2024-04-23 03:02:37...
6,209207000,BF CARTAGENA,71.0,101.0,7,2024-04-29 19:35:36+00:00,2024-04-30 06:28:02+00:00,10.87,2,10.57,0,0.0,2,10.57,0.0,10.62,10.62,0.25,154.4,[{'stop_start': Timestamp('2024-04-29 19:50:42...
7,209217000,OTELLO,80.0,104.0,1,2024-02-26 15:18:48+00:00,2024-02-27 20:16:49+00:00,28.97,3,28.19,0,0.0,1,11.14,0.0,11.19,11.19,17.77,,[{'stop_start': Timestamp('2024-02-26 15:41:01...
8,209332000,RIX PACIFIC,70.0,90.0,1,2024-02-23 00:01:19+00:00,2024-02-27 02:24:18+00:00,98.38,1,98.18,0,0.0,0,0.0,0.0,0.0,0.0,98.38,,[{'stop_start': Timestamp('2024-02-23 00:01:19...
9,209541000,BF PERCH,71.0,139.0,1,2024-03-16 20:45:54+00:00,2024-03-17 02:16:30+00:00,5.51,1,4.96,0,0.0,1,4.96,0.0,5.11,5.11,0.4,,[{'stop_start': Timestamp('2024-03-16 21:10:05...


In [7]:
import pandas as pd
import plotly.express as px
from dash import Dash, dcc, html, dash_table
import dash_bootstrap_components as dbc

# Helper for vessel length binning
def add_length_bin(df):
    return pd.cut(df['length'], bins=[0, 100, 150, 200, 400], labels=["0-100m", "100-150m", "150-200m", "200m+"])
    
# Load preprocessed summary (Oslo_kpi_summary.csv)
summary = pd.read_csv("Oslo_kpi_summary.csv", parse_dates=['visit_start_time', 'visit_end_time'])
summary['month'] = summary['visit_start_time'].dt.strftime('%Y-%m')
vessels_to_exclude = summary.loc[summary['stops_count'] > 5, 'mmsi'].unique()
summary = summary[~summary['mmsi'].isin(vessels_to_exclude)].reset_index(drop=True)

# Data for each terminal
terminal_A = summary[summary['terminal_A_duration_hr'] > 0].copy()
terminal_A['length_bin'] = add_length_bin(terminal_A)
terminal_B = summary[summary['terminal_B_duration_hr'] > 0].copy()
terminal_B['length_bin'] = add_length_bin(terminal_B)
terminal_either = summary[summary['terminal_duration_hr'] > 0].copy()
terminal_either['length_bin'] = add_length_bin(terminal_either)

# Monthly unique vessel counts
monthly_A = terminal_A.groupby('month')['mmsi'].nunique().reset_index(name='unique_terminal_A_vessels')
monthly_B = terminal_B.groupby('month')['mmsi'].nunique().reset_index(name='unique_terminal_B_vessels')
monthly_either = terminal_either.groupby('month')['mmsi'].nunique().reset_index(name='unique_terminal_vessels')

# Visuals
fig_bar_A = px.bar(monthly_A, x="month", y="unique_terminal_A_vessels",
                   title="Unique Vessels Per Month (Terminal A)", color_discrete_sequence=["#6B9080"])
fig_bar_B = px.bar(monthly_B, x="month", y="unique_terminal_B_vessels",
                   title="Unique Vessels Per Month (Terminal B)", color_discrete_sequence=["#A4C3B2"])
fig_bar_either = px.bar(monthly_either, x="month", y="unique_terminal_vessels",
                        title="Unique Vessels Per Month (Either Terminal)", color_discrete_sequence=["#489FB5"])

fig_box_A = px.box(
    terminal_A, x="length_bin", y="terminal_A_duration_hr",
    title="Turnaround at Terminal A vs. Vessel Length",
    labels={"terminal_A_duration_hr": "Turnaround Time (hrs)", "length_bin": "Length Bin"},
    color_discrete_sequence=["#6B9080"]
)
fig_box_B = px.box(
    terminal_B, x="length_bin", y="terminal_B_duration_hr",
    title="Turnaround at Terminal B vs. Vessel Length",
    labels={"terminal_B_duration_hr": "Turnaround Time (hrs)", "length_bin": "Length Bin"},
    color_discrete_sequence=["#A4C3B2"]
)
fig_box_either = px.box(
    terminal_either, x="length_bin", y="terminal_duration_hr",
    title="Turnaround at Either Terminal vs. Vessel Length",
    labels={"terminal_duration_hr": "Turnaround Time (hrs)", "length_bin": "Length Bin"},
    color_discrete_sequence=["#489FB5"]
)

fig_hist_A = px.histogram(terminal_A, x='stops_A_count', nbins=terminal_A['stops_A_count'].max()+1,
    title="Stops Per Visit (Terminal A)", color_discrete_sequence=["#6B9080"])
fig_hist_B = px.histogram(terminal_B, x='stops_B_count', nbins=terminal_B['stops_B_count'].max()+1,
    title="Stops Per Visit (Terminal B)", color_discrete_sequence=["#A4C3B2"])
fig_hist_either = px.histogram(terminal_either, x='stops_count', nbins=terminal_either['stops_count'].max()+1,
    title="Stops Per Visit (Either Terminal)", color_discrete_sequence=["#489FB5"])

app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

app.layout = dbc.Container([
    html.H2("Oslo Port Vessel KPI Dashboard — Terminals A, B, and Either", style={'color': '#22223b', 'marginBottom': 20}),
    dbc.Row([
        dbc.Col([dcc.Graph(figure=fig_bar_A)], md=4),
        dbc.Col([dcc.Graph(figure=fig_bar_B)], md=4),
        dbc.Col([dcc.Graph(figure=fig_bar_either)], md=4),
    ]),
    dbc.Row([
        dbc.Col([dcc.Graph(figure=fig_box_A)], md=4),
        dbc.Col([dcc.Graph(figure=fig_box_B)], md=4),
        dbc.Col([dcc.Graph(figure=fig_box_either)], md=4),
    ]),
    dbc.Row([
        dbc.Col([dcc.Graph(figure=fig_hist_A)], md=4),
        dbc.Col([dcc.Graph(figure=fig_hist_B)], md=4),
        dbc.Col([dcc.Graph(figure=fig_hist_either)], md=4),
    ]),
    html.H4("KPI Table (first 20 rows, filterable):"),
    dash_table.DataTable(
        data=summary.head(20).to_dict('records'),
        columns=[{"name": i, "id": i} for i in summary.columns],
        page_size=20,
        filter_action="native",
        sort_action="native",
        style_table={'overflowX': 'auto'},
        style_cell={'textAlign': 'left', 'padding':'4px', 'fontSize': 13},
        style_header={'backgroundColor': '#E8E8E8', 'fontWeight': 'bold'}
    ),
], fluid=True)

app.run(debug=True, mode='inline', port=8062)
