AIS Data

In [2]:
# Read data1, data2 and data3, extract tankers (VesselType==80), compute overlaps and produce a map
import pandas as pd
import folium
from folium import IFrame, DivIcon, PolyLine
from folium.plugins import Fullscreen
import numpy as np
from matplotlib import cm
from matplotlib.colors import Normalize, rgb2hex
# Helper: find a timestamp column in a dataframe
def find_timestamp_col(df):
    candidates = ['BaseDateTime','Timestamp','Date','Datetime','date','time','BaseDateTimeUTC','DateTimeUTC','received_at','timestamp']
    for c in candidates:
        if c in df.columns:
            return c
    return None
# Load CSVs (adjust paths if needed)
df1 = pd.read_csv('data.csv', low_memory=False)
df2 = pd.read_csv('data2.csv', low_memory=False)
df3 = pd.read_csv('data3.csv', low_memory=False)
# Filter tankers
t1 = df1[df1.get('VesselType') == 80].copy()
t2 = df2[df2.get('VesselType') == 80].copy()
t3 = df3[df3.get('VesselType') == 80].copy()
# Unique counts
u1 = set(t1['MMSI'].dropna().unique())
u2 = set(t2['MMSI'].dropna().unique())
u3 = set(t3['MMSI'].dropna().unique())
print(f'Unique tankers in data.csv: {len(u1)}')
print(f'Unique tankers in data2.csv: {len(u2)}')
print(f'Unique tankers in data3.csv: {len(u3)}')
# Intersections
print(f'Common between data1 and data2: {len(u1 & u2)}')
print(f'Common between data1 and data3: {len(u1 & u3)}')
print(f'Common between data2 and data3: {len(u2 & u3)}')
print(f'Present in all three: {len(u1 & u2 & u3)}')
# Detect timestamp columns
ts1 = find_timestamp_col(t1)
ts2 = find_timestamp_col(t2)
ts3 = find_timestamp_col(t3)
if ts1 is not None:
    t1[ts1] = pd.to_datetime(t1[ts1], errors='coerce')
if ts2 is not None:
    t2[ts2] = pd.to_datetime(t2[ts2], errors='coerce')
if ts3 is not None:
    t3[ts3] = pd.to_datetime(t3[ts3], errors='coerce')
# Tag source
t1['__source'] = 'data1'
t2['__source'] = 'data2'
t3['__source'] = 'data3'
# Unified timestamp column for ordering across files
t1['_ts'] = t1[ts1] if ts1 is not None else pd.NaT
t2['_ts'] = t2[ts2] if ts2 is not None else pd.NaT
t3['_ts'] = t3[ts3] if ts3 is not None else pd.NaT
# Combine datasets preserving chronological info when available
t_all = pd.concat([t1, t2, t3], ignore_index=True, sort=False)
# Determine last position per MMSI: prefer data3 > data2 > data1
last_positions = []
all_mmsis = sorted(set(t_all['MMSI'].dropna().unique()))
for mmsi in all_mmsis:
    if mmsi in u3:
        group = t3[t3['MMSI'] == mmsi]
        if ts3 is not None:
            group = group.sort_values(ts3)
        row = group.iloc[-1] if len(group) else None
    elif mmsi in u2:
        group = t2[t2['MMSI'] == mmsi]
        if ts2 is not None:
            group = group.sort_values(ts2)
        row = group.iloc[-1] if len(group) else None
    else:
        group = t1[t1['MMSI'] == mmsi]
        if ts1 is not None:
            group = group.sort_values(ts1)
        row = group.iloc[-1] if len(group) else None
    if row is not None:
        last_positions.append(row)
# Convert to DataFrame
if last_positions:
    last_df = pd.DataFrame(last_positions)
else:
    last_df = pd.DataFrame(columns=t_all.columns)
# Compute overall latest timestamp across all data (for disappearance detection)
overall_latest = t_all['_ts'].max() if '_ts' in t_all.columns and t_all['_ts'].notna().any() else None
# Build a color map across all MMSIs
unique_vessels = list(all_mmsis)
colors = cm.viridis(Normalize()(np.arange(len(unique_vessels))))
color_map = {v: rgb2hex(c) for v, c in zip(unique_vessels, colors)}
# Create the map centered on average last positions (fallback)
if len(last_df):
    center = [last_df['LAT'].astype(float).mean(), last_df['LON'].astype(float).mean()]
else:
    center = [0, 0]
m = folium.Map(location=center, zoom_start=5, tiles='CartoDB dark_matter')
folium.TileLayer('https://tiles.openseamap.org/seamark/{z}/{x}/{y}.png', name='SeaMarks', attr='OpenSeaMap - seamark').add_to(m)
Fullscreen().add_to(m)
# Plot trails for each MMSI using points from all files ordered by unified timestamp when available
for mmsi in all_mmsis:
    group = t_all[t_all['MMSI'] == mmsi].copy()
    # prefer _ts if available, otherwise try specific columns
    if group['_ts'].notna().any():
        group = group.sort_values('_ts')
    else:
        if ts1 in group.columns and group[ts1].notna().any():
            group = group.sort_values(ts1)
        elif ts2 in group.columns and group[ts2].notna().any():
            group = group.sort_values(ts2)
        elif ts3 in group.columns and group[ts3].notna().any():
            group = group.sort_values(ts3)
    coords = [(float(r['LAT']), float(r['LON'])) for _, r in group.iterrows() if pd.notna(r.get('LAT')) and pd.notna(r.get('LON'))]
    if len(coords) >= 2:
        trail_color = color_map.get(mmsi, '#00ffff')
        PolyLine(locations=coords, color=trail_color, weight=2, opacity=0.9, dash_array='6,6').add_to(m)
# Add last-position markers (SVG arrows) — data3 wins if present
for _, row in last_df.iterrows():
    lat = row.get('LAT')
    lon = row.get('LON')
    if pd.isna(lat) or pd.isna(lon):
        continue
    mmsi = row.get('MMSI')
    color = color_map.get(mmsi, '#00ffff')
    heading = row.get('Heading')
    try:
        heading = float(heading)
    except Exception:
        heading = 0.0
    # determine last timestamp for this ship
    last_ts = row.get('_ts') if pd.notna(row.get('_ts')) else (row.get(ts3) if ts3 in row and pd.notna(row.get(ts3)) else (row.get(ts2) if ts2 in row and pd.notna(row.get(ts2)) else (row.get(ts1) if ts1 in row and pd.notna(row.get(ts1)) else None)))
    disappeared = False
    if overall_latest is not None and last_ts is not None and pd.notna(last_ts):
        try:
            disappeared = (overall_latest - pd.to_datetime(last_ts)) > pd.Timedelta(days=1)
        except Exception:
            disappeared = False
    svg_size = 44
    # if disappeared, add a small red exclamation overlay in top-right
    ex_html = '<span style="position:absolute;top:0;right:0;color:#ff3333;font-weight:bold;font-size:18px;">&#x2757;</span>' if disappeared else ''
    svg = f'''<div style="position:relative;width:{svg_size}px;height:{svg_size}px;display:flex;align-items:center;justify-content:center;">{ex_html}
    <svg xmlns="http://www.w3.org/2000/svg" width="{svg_size}" height="{svg_size}" viewBox="0 0 24 24" style="transform: rotate({heading}deg); transform-origin: 12px 12px;">
      <path d="M12 2 L19 21 L12 17 L5 21 Z" fill="{color}" stroke="#ffffff" stroke-width="1" />
    </svg>
    </div>'''
    icon = DivIcon(html=svg)
    source = row.get('__source', '')
    ts_val = row.get(ts3) if (ts3 in row and pd.notna(row.get(ts3)) and source == 'data3') else (row.get(ts2) if (ts2 in row and pd.notna(row.get(ts2)) and source == 'data2') else (row.get(ts1) if (ts1 in row and pd.notna(row.get(ts1)) and source == 'data1') else row.get('_ts')))
    popup_html = f"MMSI: {mmsi}<br>Vessel: {row.get('VesselName','')}<br>Speed (SOG): {row.get('SOG','')}<br>Heading: {heading}<br>Source: {source}<br>Time: {ts_val}"
    iframe = IFrame(popup_html, width=320, height=150)
    popup = folium.Popup(iframe, max_width=360)
    folium.Marker(location=[float(lat), float(lon)], icon=icon, popup=popup).add_to(m)
# Finish and save
folium.LayerControl().add_to(m)
m.save('tankers_last_positions_map.html')



Unique tankers in data.csv: 463
Unique tankers in data2.csv: 446
Unique tankers in data3.csv: 441
Common between data1 and data2: 397
Common between data1 and data3: 378
Common between data2 and data3: 398
Present in all three: 361
