## Understanding how raw data files looks like
Here I used just single file to draw all unique vessels starting and ending point.

In [1]:
import pandas as pd
import folium
from pathlib import Path

# ── CONFIG ──
CHUNK_SIZE = 300_000
PROCESSED_DIR = Path(r"D:\Thesis Work MLS\Denmark AIS data\processed_files")
FILENAME      = "aisdk-2025-01-01.csv"

# ── STORAGE FOR FIRST/LAST ──
# will map MMSI → {'first': (ts,lat,lon), 'last': (ts,lat,lon)}
first_last = {}

# ── 1) STREAM THROUGH THE FILE IN CHUNKS ──
for chunk in pd.read_csv(
        PROCESSED_DIR / FILENAME,
        usecols=['# Timestamp','MMSI','Latitude','Longitude'],
        parse_dates=['# Timestamp'],
        chunksize=CHUNK_SIZE
    ):
    # drop any rows missing coords
    chunk = chunk.dropna(subset=['Latitude','Longitude'])

    for _, row in chunk.iterrows():
        m, ts, lat, lon = row['MMSI'], row['# Timestamp'], row['Latitude'], row['Longitude']
        if m not in first_last:
            first_last[m] = {
                'first': (ts, lat, lon),
                'last' : (ts, lat, lon)
            }
        else:
            # update first
            if ts < first_last[m]['first'][0]:
                first_last[m]['first'] = (ts, lat, lon)
            # update last
            if ts > first_last[m]['last'][0]:
                first_last[m]['last'] = (ts, lat, lon)

# ── 2) REPORT UNIQUE VESSEL COUNT ──
unique_vessels = len(first_last)
print(f"Found {unique_vessels} unique vessels in {FILENAME!r}.")

# ── 3) BUILD A FOLIUM MAP CENTERED ON THE MEAN LOCATION ──
# collect all coords
all_lats = []
all_lons = []
for data in first_last.values():
    all_lats += [data['first'][1], data['last'][1]]
    all_lons += [data['first'][2], data['last'][2]]

center = [sum(all_lats) / len(all_lats), sum(all_lons) / len(all_lons)]
m = folium.Map(location=center, zoom_start=6)

# feature groups
fg_start = folium.FeatureGroup(name='Start Positions')
fg_end   = folium.FeatureGroup(name='End Positions')

for mmsi, data in first_last.items():
    # unpack
    _, slat, slon = data['first']
    _, elat, elon = data['last']
    # green start
    fg_start.add_child(
        folium.CircleMarker(
            location=[slat, slon],
            radius=3,
            color='green',
            fill=True, fill_opacity=0.7,
            popup=f"MMSI {mmsi}\nStart: {data['first'][0]}"
        )
    )
    # red end
    fg_end.add_child(
        folium.CircleMarker(
            location=[elat, elon],
            radius=3,
            color='red',
            fill=True, fill_opacity=0.7,
            popup=f"MMSI {mmsi}\nEnd: {data['last'][0]}"
        )
    )

m.add_child(fg_start)
m.add_child(fg_end)
m.add_child(folium.LayerControl())

# ── 4) DISPLAY MAP ──
m


Found 2165 unique vessels in 'aisdk-2025-01-01.csv'.


## Density map
Used to better understand all the vessels found in single AIS file

In [9]:
import pandas as pd
import folium
from folium.plugins import FastMarkerCluster
from pathlib import Path

# ── CONFIG ──
CHUNK_SIZE    = 500_000
PROCESSED_DIR = Path(r"D:\Thesis Work MLS\Denmark AIS data\processed_files")
FILENAME      = "aisdk-2025-01-01.csv"

# ── BUILD FIRST/LAST POSITIONS ──
first_last = {}
cols = ['# Timestamp','MMSI','Latitude','Longitude']

for chunk in pd.read_csv(
        PROCESSED_DIR / FILENAME,
        usecols=cols,
        parse_dates=['# Timestamp'],
        chunksize=CHUNK_SIZE
    ):
    # drop any missing coordinates
    chunk = chunk.dropna(subset=['Latitude','Longitude'])
    
    # iterate as plain tuples: (timestamp, mmsi, lat, lon)
    for ts, mmsi, lat, lon in chunk.itertuples(index=False, name=None):
        if mmsi not in first_last:
            first_last[mmsi] = {'first': (ts, lat, lon), 'last': (ts, lat, lon)}
        else:
            if ts < first_last[mmsi]['first'][0]:
                first_last[mmsi]['first'] = (ts, lat, lon)
            if ts > first_last[mmsi]['last'][0]:
                first_last[mmsi]['last'] = (ts, lat, lon)

print(f"Unique vessels: {len(first_last)}")

# ── PREPARE COORDINATES FOR CLUSTERING ──
start_coords = [[v['first'][1], v['first'][2]] for v in first_last.values()]
end_coords   = [[v['last'][1],  v['last'][2]]  for v in first_last.values()]

# ── CENTER MAP ──
all_lats = [c[0] for c in start_coords + end_coords]
all_lons = [c[1] for c in start_coords + end_coords]
center = [sum(all_lats) / len(all_lats), sum(all_lons) / len(all_lons)]

m = folium.Map(location=center, zoom_start=6)

# ── FAST CLUSTER LAYERS ──
FastMarkerCluster(start_coords, name='Start Positions').add_to(m)
FastMarkerCluster(end_coords,   name='End Positions').add_to(m)

folium.LayerControl().add_to(m)

# ── RENDER ──
m


Unique vessels: 2165
