In [None]:
import os
import requests
import pandas as pd
import numpy as np

In [None]:
base_url = "https://coast.noaa.gov/htdata/CMSP/AISDataHandler/2023/AIS_2023_01_01.zip"
filename = "AIS_2023_01_01.zip"
r = requests.get(base_url)
with open(filename, 'wb') as f:
    f.write(r.content)

In [None]:
df = pd.read_csv(filename)
df

In [None]:
mask1 = df.TransceiverClass == 'A'
mask2 = df.SOG > 1
mask3 = df.SOG < 80
mask4 = df.Length > 30
mask5 = df.Length < 400

clean_df = df[mask1 & mask2 & mask3 & mask4 & mask5].reset_index(drop=True)


In [None]:
clean_df.SOG.hist(bins=100)

In [None]:
clean_df.plot.scatter(x='LON', y='LAT')

In [None]:
gulf_df

In [None]:
gulf_df = clean_df[
    (clean_df["LAT"] >= 21.0) & (clean_df["LAT"] <= 31.0) &
    (clean_df["LON"] >= -97.0) & (clean_df["LON"] <= -81.0)
].reset_index(drop=True)

In [None]:
gulf_df = gulf_df.sort_values(by=['MMSI', 'BaseDateTime'])

In [None]:
id_count = pd.DataFrame(gulf_df.MMSI.value_counts())

In [None]:
active_ships = id_count[id_count['count'] >= 5].index
gulf_df = gulf_df[gulf_df["MMSI"].isin(active_ships)]

In [None]:
gulf_df.MMSI.value_counts().hist(bins=100)

In [None]:
gulf_df = gulf_df.reset_index(drop=True)

In [None]:
gulf_df.describe().round(1)

In [None]:
gulf_df.Heading = gulf_df.Heading.replace({511: np.nan})

In [None]:
gulf_df

# sat visualization

In [None]:
# realized theres way too much data
# pull in sat meta data, only keep what the sat will end up seeing

In [None]:
from pystac_client import Client
from shapely.geometry import shape, Point

import geopandas as gpd
import matplotlib.pyplot as plt


from datetime import timedelta

In [None]:


api = Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")

results = api.search(
    collections=["sentinel-2-l2a"],
    bbox=[-97, 21, -81, 31],  # Gulf bounding box
    datetime="2023-01-01/2023-03-01",
    query={"eo:cloud_cover": {"lt": 80}}
)

sentinel_passes = []
for item in results.get_all_items():
    geom = shape(item.geometry)
    dt = item.datetime
    sentinel_passes.append((geom, dt))

In [None]:
jan_passes = [
    (poly, dt) for poly, dt in sentinel_passes
    if dt.month == 1 and dt.year == 2023 and dt.day == 1
]

In [None]:
geoms = [poly for poly, _ in jan_passes]
times = [dt for _, dt in jan_passes]

gdf = gpd.GeoDataFrame({'datetime': times}, geometry=geoms, crs='EPSG:4326')

# Plot footprints only
fig, ax = plt.subplots(figsize=(10, 6))
gdf.plot(ax=ax, edgecolor='blue', facecolor='none', linewidth=0.5)
ax.set_title('Sentinel-2 Overpasses (Jan 2023)')
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')
ax.set_xlim(-97, -81)
ax.set_ylim(21, 31)
ax.grid(True)
plt.show()


In [None]:


# Convert gulf_df into GeoDataFrame
ais_gdf = gpd.GeoDataFrame(
    intersected_ais_df,
    geometry=gpd.points_from_xy(intersected_ais_df["LON"], intersected_ais_df["LAT"]),
    crs="EPSG:4326"
)

# Optional: filter AIS to a single day to avoid clutter
ais_gdf["BaseDateTime"] = pd.to_datetime(ais_gdf["BaseDateTime"])
ais_day = ais_gdf[
    (ais_gdf["BaseDateTime"].dt.date == pd.to_datetime("2023-01-01").date())
]

# Plot S2 footprints + AIS pings
fig, ax = plt.subplots(figsize=(10, 6))
gdf.plot(ax=ax, edgecolor='blue', facecolor='none', linewidth=0.5)
ais_day.plot(ax=ax, color='red', markersize=2, alpha=0.5)

ax.set_xlim(-97, -81)
ax.set_ylim(21, 31)
ax.set_title('Sentinel-2 Overpasses + AIS Ship Positions (2023-01-01)')
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')
ax.grid(True)
plt.show()


# sat intersection

In [None]:


# Prep sentinel GeoDataFrame
poly_list = []
time_list = []

#Only Sentinel passes from the AIS date we're testing (e.g. Jan 1)
target_date = pd.to_datetime("2023-01-01").date()
filtered_passes = [
    (poly, dt) for poly, dt in sentinel_passes
    if dt.date() == target_date
]

poly_list = [poly for poly, _ in filtered_passes]
time_list = [dt for _, dt in filtered_passes]

sentinel_gdf = gpd.GeoDataFrame({'datetime': time_list}, geometry=poly_list, crs='EPSG:4326')


In [None]:
# Convert AIS df to GeoDataFrame with timestamp
gulf_df["BaseDateTime"] = pd.to_datetime(gulf_df["BaseDateTime"])

ais_gdf = gpd.GeoDataFrame(
    gulf_df,
    geometry=gpd.points_from_xy(gulf_df["LON"], gulf_df["LAT"]),
    crs='EPSG:4326'
)


In [None]:
ais_gdf["BaseDateTime"] = pd.to_datetime(ais_gdf["BaseDateTime"], utc=True)

In [None]:
# We'll accumulate matching AIS rows into this list
matching_rows = []

# Define time window in seconds
TIME_WINDOW = timedelta(minutes=30)

# Iterate over sentinel polygons
for _, s2_row in sentinel_gdf.iterrows():
    poly = s2_row.geometry
    s2_time = s2_row.datetime

    # Filter AIS to time window first (fast)
    time_mask = (ais_gdf["BaseDateTime"] >= s2_time - TIME_WINDOW) & \
                (ais_gdf["BaseDateTime"] <= s2_time + TIME_WINDOW)
    candidate_ais = ais_gdf[time_mask]

    # Spatial filter: points inside this polygon
    inside_mask = candidate_ais.geometry.intersects(poly)
    intersecting = candidate_ais[inside_mask]

    if not intersecting.empty:
        matching_rows.append(intersecting)

# Concatenate results into one DataFrame
intersected_ais_df = pd.concat(matching_rows, ignore_index=True)


In [None]:
intersected_ais_df.describe().round(1)

In [None]:
print(ais_gdf["BaseDateTime"].iloc[0])


In [None]:
print(sentinel_gdf["datetime"].iloc[0])

In [None]:
print(sum(len(chunk) for chunk in matching_rows))  # total raw rows before concat
print(len(intersected_ais_df))  # actual size after concat

In [None]:
print(f"S2 tile at {s2_time}: {len(intersecting)} AIS hits")


In [None]:
intersected_ais_df.MMSI.value_counts()