In [5]:
import requests
import pandas as pd
from io import BytesIO
from zipfile import ZipFile

In [3]:
target_date = pd.to_datetime("2023-05-01").date()
fname = f"AIS_2023_{target_date.month:02d}_{target_date.day:02d}.zip"
url = f"https://coast.noaa.gov/htdata/CMSP/AISDataHandler/2023/{fname}"

In [None]:
r = requests.get(url, timeout=60)
with ZipFile(BytesIO(r.content)) as z:
    csv_name = z.namelist()[0]
    with z.open(csv_name) as f:
        raw_ais = pd.read_csv(f)

In [None]:
column_renames = columns = {
    "MMSI": "mmsi",
    "BaseDateTime": "utc",
    "LAT": "lat",
    "LON": "lon",
    "SOG": "sog",
    "COG": "cog",
    "Heading": "heading",
    "VesselName": "name",
    "IMO": "imo",
    "CallSign": "call_sign",
    "VesselType": "vessel_type",
    "Status": "status",
    "Length": "length",
    "Width": "width",
    "Draft": "draft",
    "Cargo": "cargo",
    "TransceiverClass": "transceiver_class",
}

In [None]:
ais = (
    raw_ais.rename(columns=column_renames)
    .sort_values(by=["mmsi", "utc"])
    .assign(ping=lambda x: x.groupby("mmsi").cumcount())
    .set_index(["mmsi", "ping"])
)