## Summaries files development
Here you see we have 2nd polygon which exactly bounds the terminal locations for the selected ports. So, it allows to filter down turnaround times and comparsion with their length distribution. Here I considered that the vessel was berthe if a vessel comes within the provided polygon below and stays within that polygon for atleast 60 minutes then it means the vessel was actually berthed. Then, there is also the possibility that a vessel made multiple visit as we have 4 months AIS data for which I have considered as a vessel is making multiple visit if there is difference of more than 24 hours between two timestamps for a unique vessel based on timestamps available in the data.

In [5]:
from shapely.geometry import Polygon

In [13]:
import os
import pandas as pd
from shapely.geometry import Point, Polygon

# --- Paths ---
cleaned_folder = r'D:\Thesis Work MLS\Norway Data Filtered\Port_Split_Result\Cleaned'
summary_folder = os.path.join(cleaned_folder, "Terminal_Summaries2")
os.makedirs(summary_folder, exist_ok=True)

files = [f for f in os.listdir(cleaned_folder) if f.lower().endswith('.csv')]

# --- Terminal polygons ---
TERMINAL_POLYGONS = {
    "Bergen_SecondPolygon": Polygon([
        (5.3090064, 60.3901893), (5.305144, 60.3861391), (5.3155724, 60.3830215),
        (5.317643, 60.3840554), (5.3189305, 60.3850946), (5.3141137, 60.387984),
        (5.3090064, 60.3901893)]),
    "Stavanger_2ndPolygon": Polygon([
        (5.5832126, 58.9213942), (5.582998, 58.9181596), (5.5870321, 58.9179602),
        (5.5879333, 58.9214607), (5.5832126, 58.9213942)]),
    "Kristiansand_2ndPolygon": Polygon([
        (7.9882631, 58.1412228), (7.9940996, 58.1396088), (7.9933915, 58.142933),
        (7.9897759, 58.1426895), (7.9882631, 58.1412228)]),
    "Drammen_2ndPolygon": Polygon([
        (10.2376171, 59.7395407), (10.2351281, 59.73581), (10.2404066, 59.7355504),
        (10.2401921, 59.7394758), (10.2376171, 59.7395407)]),
    "Oslo_2ndPolygon_Filipstad": Polygon([
        (10.7198972, 59.9088206), (10.7098765, 59.9057866), (10.7111854, 59.9042049),
        (10.7217855, 59.9079599), (10.7198972, 59.9088206)]),
    "Oslo_2ndPolygon_Sjuroya": Polygon([
        (10.7448538, 59.8860217), (10.7451757, 59.8829641), (10.7670625, 59.8834271),
        (10.765775, 59.8860217), (10.7625349, 59.8877442), (10.7448538, 59.8860217)])
}

def vessel_category(ship_type):
    try:
        code = int(float(ship_type))
        if 70 <= code <= 79:
            return 'Cargo'
        elif 80 <= code <= 89:
            return 'Tanker'
        else:
            return f"Other_{code}"
    except:
        return "Other"

def vessel_visits_and_berths(df, terminal_poly):
    df = df.sort_values(['mmsi', 'date_time_utc'])
    result = []
    # Store mmsi that got a berth event (for marking non-berthing vessels)
    mmsi_berth_record = set()

    for mmsi, group in df.groupby('mmsi'):
        group = group.reset_index(drop=True)
        group['date_time_utc'] = pd.to_datetime(group['date_time_utc'])
        group['longitude'] = pd.to_numeric(group['longitude'], errors='coerce')
        group['latitude'] = pd.to_numeric(group['latitude'], errors='coerce')
        group['speed_over_ground'] = pd.to_numeric(group['speed_over_ground'], errors='coerce')
        group['inside'] = group.apply(
            lambda r: terminal_poly.contains(Point(r['longitude'], r['latitude'])), axis=1)
        # Segment visits (>=24h break = new visit)
        visit_ids = [0]
        for i in range(1, len(group)):
            gap = (group.loc[i, 'date_time_utc'] - group.loc[i-1, 'date_time_utc']).total_seconds()
            visit_ids.append(visit_ids[-1]+1 if gap > 24*3600 else visit_ids[-1])
        group['visit_id'] = visit_ids

        # Vessel attributes
        ship_type_val = group['ship_type'].iloc[0] if 'ship_type' in group else ''
        ship_name = group['ship_name'].iloc[0] if 'ship_name' in group else ''
        length = group['length'].iloc[0] if 'length' in group else ''

        vessel_main_type = vessel_category(ship_type_val)

        any_berth = False
        for visit_num, visit_grp in group.groupby('visit_id'):
            visit_grp = visit_grp.reset_index(drop=True)
            in_berth = False
            entry_time = None
            entry_idx = None

            for i, row in visit_grp.iterrows():
                # Enter terminal and SOG < 0.5
                if row['inside'] and not in_berth and row['speed_over_ground'] < 0.5:
                    entry_idx = i
                    entry_time = row['date_time_utc']
                    in_berth = True
                elif (not row['inside'] or row['speed_over_ground'] >= 0.5) and in_berth:
                    exit_idx = i-1
                    exit_time = visit_grp.loc[exit_idx, 'date_time_utc']
                    # Only count if 1 hour+ and SOG < 0.5 throughout stay
                    period = visit_grp.loc[entry_idx:exit_idx]
                    duration = (exit_time - entry_time).total_seconds()/60
                    if duration >= 60 and (period['speed_over_ground']<0.5).all():
                        # Detect possible waiting time (outside polygon and SOG < 0.5, min 30min, and not moved)
                        waiting_time = None
                        wait_end = entry_idx-1
                        stop_start = stop_end = None
                        for j in range(wait_end, -1, -1):
                            r = visit_grp.loc[j]
                            if (not r['inside']) and (r['speed_over_ground'] < 0.5):
                                if stop_end is None:
                                    stop_end = j
                                stop_start = j
                            else:
                                if stop_end is not None:
                                    stop_duration = (visit_grp.loc[stop_end, 'date_time_utc'] - visit_grp.loc[stop_start, 'date_time_utc']).total_seconds()/60
                                    if stop_duration >= 30:
                                        lon1, lat1 = visit_grp.loc[stop_start, ['longitude','latitude']]
                                        lon2, lat2 = visit_grp.loc[stop_end, ['longitude','latitude']]
                                        moved = ((abs(lon2-lon1) > 0.0001) or (abs(lat2-lat1) > 0.0001))
                                        if not moved:
                                            waiting_time = stop_duration
                                        break
                                    stop_end = stop_start = None
                        result.append({
                            'mmsi': row['mmsi'],
                            'length': length,
                            'ship_name': ship_name,
                            'ship_type': vessel_main_type,
                            'berth_entry_time': entry_time,
                            'berth_exit_time': exit_time,
                            'turnaround_minutes': duration,
                            'waiting_time_minutes': waiting_time,
                            'visit_number': visit_num+1
                        })
                        any_berth = True
                        mmsi_berth_record.add(mmsi)
                    in_berth = False
            # If vessel still inside at end
            if in_berth:
                exit_time = visit_grp.iloc[-1]['date_time_utc']
                period = visit_grp.loc[entry_idx:]
                duration = (exit_time - entry_time).total_seconds()/60
                if duration >= 60 and (period['speed_over_ground']<0.5).all():
                    result.append({
                        'mmsi': row['mmsi'],
                        'length': length,
                        'ship_name': ship_name,
                        'ship_type': vessel_main_type,
                        'berth_entry_time': entry_time,
                        'berth_exit_time': exit_time,
                        'turnaround_minutes': duration,
                        'waiting_time_minutes': None,
                        'visit_number': visit_num+1
                    })
                    any_berth = True
                    mmsi_berth_record.add(mmsi)
        # If vessel never berthed, record basic info with 0
        if not any_berth:
            result.append({
                'mmsi': group['mmsi'].iloc[0],
                'length': length,
                'ship_name': ship_name,
                'ship_type': vessel_main_type,
                'berth_entry_time': None,
                'berth_exit_time': None,
                'turnaround_minutes': 0,
                'waiting_time_minutes': None,
                'visit_number': 1
            })
    return result

# ---- MAIN: Loop Over Each Cleaned File and Terminal ----
for fname in files:
    in_path = os.path.join(cleaned_folder, fname)
    print(f"\nProcessing {fname} for terminal summaries...")
    df = pd.read_csv(in_path, dtype=str)
    if df.empty:
        print(f"  {fname}: No records")
        continue
    df = df.dropna(subset=['mmsi', 'longitude', 'latitude', 'date_time_utc', 'length', 'ship_type', 'ship_name'])
    for term_name, poly in TERMINAL_POLYGONS.items():
        if not term_name.split("_")[0].lower() in fname.lower():
            continue
        result = vessel_visits_and_berths(df, poly)
        if result:
            result_df = pd.DataFrame(result)
            out_path = os.path.join(summary_folder, f"{term_name}_summary.csv")
            result_df.to_csv(out_path, index=False)
            print(f"  {term_name}: {len(result_df)} berth events/info saved to {out_path}")
        else:
            print(f"  {term_name}: No qualifying events found.")

print("\nAll terminal summaries generated!")



Processing Bergen Terminal.csv for terminal summaries...
  Bergen_SecondPolygon: 381 berth events/info saved to D:\Thesis Work MLS\Norway Data Filtered\Port_Split_Result\Cleaned\Terminal_Summaries2\Bergen_SecondPolygon_summary.csv

Processing Drammen Port.csv for terminal summaries...
  Drammen_2ndPolygon: 152 berth events/info saved to D:\Thesis Work MLS\Norway Data Filtered\Port_Split_Result\Cleaned\Terminal_Summaries2\Drammen_2ndPolygon_summary.csv

Processing Kristiansand Terminal.csv for terminal summaries...
  Kristiansand_2ndPolygon: 199 berth events/info saved to D:\Thesis Work MLS\Norway Data Filtered\Port_Split_Result\Cleaned\Terminal_Summaries2\Kristiansand_2ndPolygon_summary.csv

Processing Oslo Port Area.csv for terminal summaries...
  Oslo_2ndPolygon_Filipstad: 157 berth events/info saved to D:\Thesis Work MLS\Norway Data Filtered\Port_Split_Result\Cleaned\Terminal_Summaries2\Oslo_2ndPolygon_Filipstad_summary.csv
  Oslo_2ndPolygon_Sjuroya: 349 berth events/info saved to 