In [3]:
import os
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon, MultiPolygon
import ipywidgets as widgets
from IPython.display import display, clear_output

# Paste your port_polygons and small_polygons dicts here (if not already)
# ── Define your polygons (copy them here) ──
port_polygons = {
    "Bergen": Polygon([(5.3119966,60.4048807),(5.2689096,60.3941969),(5.3233262,60.3710367),(5.3526803,60.3832551),(5.3119966,60.4048807)]),
    "Stavanger": Polygon([(5.5596949,58.9367236),(5.5461337,58.9152819),(5.5955721,58.9039355),(5.6122233,58.9324718),(5.5596949,58.9367236)]),
    "Kristiansand": Polygon([(8.0021276,58.157283),(7.9581823,58.1337279),(8.0220403,58.1192246),(8.0388631,58.1449638),(8.0021276,58.157283)]),
    "Haugesund": Polygon([(5.2809529,59.351359),(5.2675634,59.3426946),(5.2979474,59.3231697),(5.3307347,59.3342031),(5.2809529,59.351359)]),
    "Larvik":    Polygon([(10.0060228,59.0470095),(10.0200991,59.0198933),(10.0786356,59.03129),(10.0568346,59.0541603),(10.0146059,59.0497464),(10.0060228,59.0470095)]),
    "Moss":      Polygon([(10.6414806,59.4396391),(10.6102382,59.4125713),(10.6557285,59.4072425),(10.673238,59.4375443),(10.6414806,59.4396391)]),
    "Drammen":   Polygon([(10.2470522,59.7538991),(10.1914339,59.7347841),(10.2879076,59.7134939),(10.3028421,59.7476728),(10.2470522,59.7538991)]),
    "Oslo":      Polygon([(10.7220254,59.9127971),(10.6775651,59.9016079),(10.7091508,59.8693961),(10.7781586,59.8841278),(10.7587609,59.9135716),(10.7220254,59.9127971)]),
}
small_polygons = {
    "Bergen":      Polygon([(5.3090064,60.3901893),(5.305144,60.3861391),(5.3155724,60.3830215),(5.317643,60.3840554),(5.3189305,60.3850946),(5.3141137,60.387984),(5.3090064,60.3901893)]),
    "Stavanger":   Polygon([(5.5832126,58.9213942),(5.582998,58.9181596),(5.5870321,58.9179602),(5.5879333,58.9214607),(5.5832126,58.9213942)]),
    "Kristiansand":Polygon([(7.9882631,58.1412228),(7.9940996,58.1396088),(7.9933915,58.142933),(7.9897759,58.1426895),(7.9882631,58.1412228)]),
    "Haugesund":   Polygon([(5.3064528,59.3392539),(5.3055086,59.3355661),(5.3117314,59.3362118),(5.3108516,59.3392867),(5.3064528,59.3392539)]),
    "Larvik":      Polygon([(10.044119,59.0404963),(10.0424238,59.0412138),(10.0354715,59.0382223),(10.0383683,59.036743),(10.044119,59.0404963)]),
    "Moss":        Polygon([(10.6538617,59.4303284),(10.6530463,59.4250242),(10.6580674,59.4250242),(10.6580245,59.4302192),(10.6538617,59.4303284)]),
    "Drammen":     Polygon([(10.2376171,59.7395407),(10.2351281,59.73581),(10.2404066,59.7355504),(10.2401921,59.7394758),(10.2376171,59.7395407)]),
    "Oslo":        MultiPolygon([
                       Polygon([(10.7198972,59.9088206),(10.7098765,59.9057866),(10.7111854,59.9042049),(10.7217855,59.9079599),(10.7198972,59.9088206)]),
                       Polygon([(10.7448538,59.8860217),(10.7451757,59.8829641),(10.7670625,59.8834271),(10.765775,59.8860217),(10.7625349,59.8877442),(10.7448538,59.8860217)])
                   ])
}
# Detect available ports
csv_ports = [f[:-4] for f in os.listdir('.') if f.endswith('.csv') and f[:-4] in port_polygons]

# Jupyter widget for port selection
port_selector = widgets.Dropdown(
    options=csv_ports,
    value=csv_ports[0] if csv_ports else None,
    description='Port:'
)
button = widgets.Button(description="Calculate KPIs")
output = widgets.Output()
display(port_selector, button, output)

def parse_datetime(dt):
    try:
        return pd.to_datetime(dt)
    except:
        return pd.NaT

def find_event_periods(series):
    # Finds start and end indices of True blocks
    changes = series.ne(series.shift()).cumsum()
    groups = series.groupby(changes)
    periods = []
    for _, group in groups:
        if group.iloc[0]:
            periods.append((group.index[0], group.index[-1]))
    return periods

def on_button_clicked(b):
    with output:
        clear_output()
        port = port_selector.value
        if not port:
            print("No port selected.")
            return
        file = f"{port}.csv"
        if not os.path.exists(file):
            print(f"File {file} not found.")
            return

        print(f"Processing {port}...")

        # Load data
        usecols = ['date_time_utc','mmsi','longitude','latitude','speed_over_ground']
        df = pd.read_csv(file, usecols=usecols)
        df['geometry'] = [Point(xy) for xy in zip(df.longitude, df.latitude)]
        df['date_time_utc'] = pd.to_datetime(df['date_time_utc'], errors='coerce')
        df = df.sort_values(['mmsi', 'date_time_utc'])
        gdf = gpd.GeoDataFrame(df, geometry='geometry', crs='EPSG:4326')

        # Fast vectorized spatial checks
        port_poly = port_polygons[port]
        berth_poly = small_polygons[port]
        gdf['in_port'] = gdf['geometry'].within(port_poly)
        if isinstance(berth_poly, MultiPolygon):
            gdf['in_berth'] = gdf['geometry'].apply(lambda p: any(poly.contains(p) for poly in berth_poly.geoms))
        else:
            gdf['in_berth'] = gdf['geometry'].within(berth_poly)
        gdf['at_berth'] = gdf['in_berth'] & (gdf['speed_over_ground'] < 0.5)

        # Efficient event detection per vessel
        result_rows = []
        for mmsi, vessel_df in gdf.groupby('mmsi'):
            vessel_df = vessel_df.reset_index(drop=True)
            # Find visits (in_port blocks)
            visit_periods = find_event_periods(vessel_df['in_port'])
            visit_num = 0
            for start_idx, end_idx in visit_periods:
                visit_num += 1
                visit = vessel_df.loc[start_idx:end_idx]
                entry = visit['date_time_utc'].iloc[0]
                exit = visit['date_time_utc'].iloc[-1]
                turnaround = (exit-entry).total_seconds()/3600

                # Find berthing within this visit
                berth_periods = find_event_periods(visit['at_berth'])
                total_berth_time = 0
                waiting_time = None
                first_berth_start = None
                if berth_periods:
                    for b_start, b_end in berth_periods:
                        berth = visit.iloc[b_start:b_end+1]
                        b_start_time = berth['date_time_utc'].iloc[0]
                        b_end_time = berth['date_time_utc'].iloc[-1]
                        total_berth_time += (b_end_time - b_start_time).total_seconds()/3600
                        if first_berth_start is None:
                            first_berth_start = b_start_time
                    waiting_time = (first_berth_start - entry).total_seconds()/3600 if first_berth_start else None
                else:
                    total_berth_time = 0

                result_rows.append({
                    'mmsi': mmsi,
                    'visit': visit_num,
                    'entry': entry,
                    'exit': exit,
                    'turnaround_hrs': turnaround,
                    'total_berth_hrs': total_berth_time,
                    'waiting_hrs': waiting_time,
                })
        result_df = pd.DataFrame(result_rows)
        if not result_df.empty:
            display(result_df)
            print(f"Total vessel visits: {result_df['visit'].count()}")
            print(f"Average turnaround time (hrs): {result_df['turnaround_hrs'].mean():.2f}")
            print(f"Median waiting time (hrs): {result_df['waiting_hrs'].median():.2f}")
            print(f"Average berth occupancy per visit (hrs): {result_df['total_berth_hrs'].mean():.2f}")
        else:
            print("No visits found for this port.")

button.on_click(on_button_clicked)


Dropdown(description='Port:', options=('Bergen', 'Drammen', 'Haugesund', 'Kristiansand', 'Larvik', 'Moss', 'Os…

Button(description='Calculate KPIs', style=ButtonStyle())

Output()