AIS Data

In [1]:
import pandas as pd

chunks = pd.read_csv("data.csv", chunksize=100_000)
for chunk in chunks:
    tankers = chunk[chunk["VesselType"] == 80]
    # Process or store tankers here


In [2]:
#Print how many Tankers are in the dataset
print(f"There are {len(tankers)} tankers in the dataset")


There are 1330 tankers in the dataset


In [5]:
#Print how many unique tankers are in the dataset
print(f"There are {tankers['MMSI'].nunique()} unique tankers in the dataset")


There are 292 unique tankers in the dataset


In [6]:
#print first 5 unique tankers names and data
print(tankers.drop_duplicates(subset=["MMSI"]).head())

              MMSI         BaseDateTime       LAT        LON  SOG    COG  \
7000081  240991000  2020-01-01T23:05:03  40.65215  -74.08700  0.0  173.0   
7000085  235076275  2020-01-01T23:05:58  27.40668  -96.69615  0.3  250.7   
7000122  367006790  2020-01-01T23:05:49  33.71950 -118.22519  5.8  240.4   
7000136  563164000  2020-01-01T23:04:43  30.19782  -91.10732  0.0  225.4   
7000180  538002784  2020-01-01T23:03:31  42.40297  -70.85053  8.0  227.1   

         Heading          VesselName         IMO CallSign  VesselType  Status  \
7000081    240.0              FOURNI  IMO9405564    SVAX9        80.0     5.0   
7000085    224.0  PARAMOUNT HELSINKI  IMO9453963    2CWB3        80.0     3.0   
7000122    511.0           VICKI ANN         NaN  WYE3508        80.0     5.0   
7000136    225.0       FSL HONG KONG  IMO9346732    9VLY8        80.0     1.0   
7000180    235.0     IVER PROSPERITY  IMO9351921    V7LP8        80.0     0.0   

         Length  Width  Draft  Cargo TransceiverClass  


In [35]:
#Print PARAMOUNT HELSINKI details
print(tankers[tankers["VesselName"] == "PARAMOUNT HELSINKI"])

              MMSI        BaseDateTime       LAT       LON  SOG    COG  \
7000085  235076275 2020-01-01 23:05:58  27.40668 -96.69615  0.3  250.7   
7008612  235076275 2020-01-01 23:17:57  27.40618 -96.69707  0.2  229.0   
7009568  235076275 2020-01-01 23:21:07  27.40605 -96.69730  0.3  231.7   
7019064  235076275 2020-01-01 23:32:57  27.40562 -96.69826  0.4  236.7   
7020862  235076275 2020-01-01 23:39:36  27.40538 -96.69884  0.2  234.2   
7029544  235076275 2020-01-01 23:54:37  27.40483 -96.69990  0.7  208.3   
7033036  235076275 2020-01-01 23:59:16  27.40458 -96.70011  0.3  199.5   

         Heading          VesselName         IMO CallSign  VesselType  Status  \
7000085    224.0  PARAMOUNT HELSINKI  IMO9453963    2CWB3        80.0     3.0   
7008612    230.0  PARAMOUNT HELSINKI  IMO9453963    2CWB3        80.0     3.0   
7009568    230.0  PARAMOUNT HELSINKI  IMO9453963    2CWB3        80.0     3.0   
7019064    224.0  PARAMOUNT HELSINKI  IMO9453963    2CWB3        80.0     3.0   
70

In [8]:
#Print names of top 5 tankers with most entries and their number of entries
top_5_tankers = tankers['VesselName'].value_counts().head(5)
print(top_5_tankers)

VesselName
RAPPAHANNOCK        434
ALTO ACRUX           19
CLEAN THRASHER        9
OCEAN BREEZE          9
OVERSEAS HOUSTON      9
Name: count, dtype: int64


In [34]:
#Print number of rows with a heading that is not a number, and where it isnt between 0 and 360
invalid_heading = tankers[~tankers["Heading"].apply(lambda x: isinstance(x, (int, float)) and 0 <= x <= 360)]
print(f"There are {len(invalid_heading)} rows with invalid heading")
#Print the invalid headings values
print(invalid_heading["Heading"])



There are 19 rows with invalid heading
7000122    511.0
7003462    511.0
7003489    511.0
7005295    511.0
7005348    511.0
7005529    511.0
7009696    511.0
7010802    511.0
7014597    511.0
7015258    511.0
7016867    511.0
7017724    511.0
7018642    511.0
7020163    511.0
7020243    511.0
7025247    511.0
7027201    511.0
7031224    511.0
7031465    511.0
Name: Heading, dtype: float64


In [33]:
# Make a folium map similar to MarineTraffic: dark basemap + seamark overlay
# Show only the last known position of each ship (arrow marker) and a dashed trail for its path
import folium
from folium import IFrame, DivIcon, PolyLine
from folium.plugins import Fullscreen
import numpy as np
from matplotlib import cm
from matplotlib.colors import Normalize, rgb2hex
import pandas as pd
# Center map on data (fallback to 0,0)
if len(tankers):
    center = [tankers['LAT'].mean(), tankers['LON'].mean()]
else:
    center = [0, 0]
# Use a dark basemap for MarineTraffic-like appearance
m = folium.Map(location=center, zoom_start=5, tiles='CartoDB dark_matter')
# Add OpenSeaMap seamark overlay (nautical symbols)
folium.TileLayer('https://tiles.openseamap.org/seamark/{z}/{x}/{y}.png', name='SeaMarks', attr='OpenSeaMap - seamark').add_to(m)
# Fullscreen control
Fullscreen().add_to(m)
# Prepare colors per vessel
unique_vessels = tankers['MMSI'].unique()
colors = cm.viridis(Normalize()(np.arange(len(unique_vessels))))
color_map = {vessel: rgb2hex(color) for vessel, color in zip(unique_vessels, colors)}
# Try to find a timestamp column to order positions. Use common candidate names.
timestamp_candidates = ['BaseDateTime','Timestamp','Date','Datetime','date','time','BaseDateTimeUTC','DateTimeUTC','received_at','timestamp']
timestamp_col = next((c for c in timestamp_candidates if c in tankers.columns), None)
if timestamp_col is not None:
    tankers = tankers.copy()
    tankers[timestamp_col] = pd.to_datetime(tankers[timestamp_col], errors='coerce')
    tankers_sorted = tankers.sort_values(timestamp_col)
else:
    tankers_sorted = tankers
# Build last known position per MMSI (last in time order)
last_positions = tankers_sorted.groupby('MMSI', sort=False).last().reset_index()
# Plot trails (dashed) and last position arrow for each ship
for mmsi, group in tankers_sorted.groupby('MMSI'):
    # sort group by timestamp if available
    if timestamp_col is not None and timestamp_col in group.columns:
        group = group.sort_values(timestamp_col)
    coords = [(r['LAT'], r['LON']) for _, r in group.iterrows() if pd.notna(r.get('LAT')) and pd.notna(r.get('LON'))]
    if len(coords) >= 2:
        # trail uses same color, dashed line
        trail_color = color_map.get(mmsi, '#00ffff')
        PolyLine(locations=coords, color=trail_color, weight=2, opacity=0.8, dash_array='6,6').add_to(m)
# Add last position markers with arrow SVGs on top
for _, row in last_positions.iterrows():
    lat = row.get('LAT')
    lon = row.get('LON')
    if pd.isna(lat) or pd.isna(lon):
        continue
    mmsi = row.get('MMSI')
    color = color_map.get(mmsi, '#00ffff')
    heading = row.get('Heading')
    try:
        heading = float(heading)
    except Exception:
        heading = 0.0
    svg_size = 36
    svg = f'''<div style="width:{svg_size}px;height:{svg_size}px;display:flex;align-items:center;justify-content:center;">
    <svg xmlns="http://www.w3.org/2000/svg" width="{svg_size}" height="{svg_size}" viewBox="0 0 24 24" style="transform: rotate({heading}deg); transform-origin: 12px 12px;">
      <path d="M12 2 L19 21 L12 17 L5 21 Z" fill="{color}" stroke="#ffffff" stroke-width="0.8" />
    </svg>
    </div>'''
    icon = DivIcon(html=svg)
    iframe = IFrame(f"Vessel Name: {row.get('VesselName','')}<br>Speed (SOG): {row.get('SOG','')}<br>Heading: {heading}", width=260, height=120)
    popup = folium.Popup(iframe, max_width=320)
    folium.Marker(location=[lat, lon], icon=icon, popup=popup).add_to(m)
# Layer control and save
folium.LayerControl().add_to(m)
m.save("tankers_detailed_map.html")

