In [None]:
#conda install geopandas

import geopandas as gpd

import matplotlib.pyplot as plt

# Load the shapefile

# Path to your shapefile

from pathlib import Path
import zipfile
import geopandas as gpd

zip_path = Path("data/NWB_roads.zip") 
extract_to = zip_path.with_suffix("")  # -> data/NWB_roads

with zipfile.ZipFile(zip_path) as z:
    z.extractall(extract_to)

shp_path = extract_to / "wegen_in_out.shp"
if not shp_path.exists(): 
    shp_path = next(extract_to.rglob("*.shp"))

gdf = gpd.read_file(shp_path)


# Plot the shapefile

gdf.plot(figsize=(10, 10), edgecolor='black')

plt.title("Shapefile Visualization")

plt.xlabel("Longitude")

plt.ylabel("Latitude")

plt.show()

In [None]:
import pandas as pd
df_vessel = pd.read_csv("data/Vesselposition_data_20-24Aug2025.csv")
df_vessel.head()

In [None]:
import pandas as pd

df = pd.read_csv("data/Vesselposition_data_20-24Aug2025.csv")

cols = ["lon", "lat", "upload-timestamp", "id", "name", "port-role", "speed-in-centimeters-per-second", "identifier-sensor"]
keep = [c for c in cols if c in df.columns]
pos = df[keep].copy()

pos["lon"] = pd.to_numeric(pos["lon"], errors="coerce")
pos["lat"] = pd.to_numeric(pos["lat"], errors="coerce")
pos = pos.dropna(subset=["lon", "lat"])

pos.head()

In [None]:
import pandas as pd

df_tomtom = pd.read_csv("data/TomTom_data_20-24Aug2025.csv")
df_tomtom.head()

In [None]:
import streamlit as st
import pandas as pd
import plotly.express as px
from Vessels_and_Car_Flow import load_carflow_flat, CARFLOW_SRC

st.set_page_config(page_title="Car Flow Line Graph", page_icon="ðŸš—", layout="wide")
st.title("Car Flow â€” Top 5 roads")

@st.cache_data
def get_data(src: str):
    return load_carflow_flat(src)

df = get_data(CARFLOW_SRC)  # <-- force using the raw CSV; skips any corrupt carflow_flat.parquet
if df.empty:
    st.error("No data loaded.")
    st.stop()

df = df.dropna(subset=["time_utc","id","traffic_level"]).copy()
df["time_utc"] = pd.to_datetime(df["time_utc"], utc=True, errors="coerce")
df = df.dropna(subset=["time_utc"])
df["time_local"] = df["time_utc"].dt.tz_convert("Europe/Amsterdam")
df["id_str"] = df["id"].astype("Int64").astype(str)

tmin, tmax = df["time_local"].min(), df["time_local"].max()
start_dt, end_dt = st.sidebar.slider(
    "Time range",
    min_value=tmin.to_pydatetime(),
    max_value=tmax.to_pydatetime(),
    value=(tmin.to_pydatetime(), tmax.to_pydatetime()),
    format="YYYY-MM-DD HH:mm",
)
start = pd.Timestamp(start_dt, tz="Europe/Amsterdam")
end = pd.Timestamp(end_dt, tz="Europe/Amsterdam")

d = df[(df["time_local"] >= start) & (df["time_local"] <= end)]
if d.empty:
    st.info("No data in the selected time range.")
    st.stop()

res = (
    d.set_index("time_local")
     .groupby("id_str")["traffic_level"]
     .resample("3T").mean()
     .reset_index()
     .dropna(subset=["traffic_level"])
)

if res.empty:
    st.info("No data after resampling.")
    st.stop()

top5 = (
    res.groupby("id_str")["traffic_level"]
       .mean()
       .sort_values(ascending=False)
       .head(5)
       .index.tolist()
)

plot_df = res[res["id_str"].isin(top5)].sort_values(["id_str","time_local"])
plot_df = plot_df.groupby("id_str", group_keys=False).tail(20)

fig = px.line(plot_df, x="time_local", y="traffic_level", color="id_str", title="3-minute mean")
fig.update_yaxes(range=[0, 1])
st.plotly_chart(fig, use_container_width=True)

st.caption(f"IDs shown: {len(top5)} â€¢ Points: {len(plot_df):,} â€¢ Window: {plot_df['time_local'].min()} â†’ {plot_df['time_local'].max()}")

In [None]:
import pandas as pd

def parse_time_iso8601_utc(s: pd.Series) -> pd.Series:
    
    if pd.api.types.is_datetime64_any_dtype(s):
        return pd.to_datetime(s, utc=True, errors="coerce")

    s = s.astype("string").str.strip() 

    t = pd.to_datetime(s, format="%Y-%m-%dT%H:%M:%S.%f%z", utc=True, errors="coerce")
    m = t.isna()
    if m.any():
   
        t.loc[m] = pd.to_datetime(s[m], format="%Y-%m-%dT%H:%M:%S%z", utc=True, errors="coerce")
    m = t.isna()
    if m.any():

        t.loc[m] = pd.to_datetime(s[m], utc=True, errors="coerce")
    return t

import csv, sys
from io import StringIO
import pandas as pd

def _raise_csv_field_limit():
    lim = sys.maxsize
    while True:
        try:
            csv.field_size_limit(lim)
            break
        except OverflowError:
            lim //= 10

def _pack(recs):
    df = pd.DataFrame(recs, columns=["time_raw","id","traffic_level"])
    t = parse_time_iso8601_utc(df["time_raw"])
    df = df.assign(
        time_utc=t,
        time_ams=t.dt.tz_convert("Europe/Amsterdam"),
        id=pd.to_numeric(df["id"], errors="coerce"),
        traffic_level=pd.to_numeric(df["traffic_level"], errors="coerce"),
    ).dropna(subset=["time_utc","id","traffic_level"])
    return df[["time_utc","time_ams","id","traffic_level"]]

def carflow_flat_iter(path, batch_rows=250_000):
    """Yield chunks with columns: time_utc, time_ams, id, traffic_level."""
    _raise_csv_field_limit()
    with open(path, "r", encoding="utf-8-sig", newline="") as f:
        outer = csv.reader(f, delimiter=",", quotechar='"')
        header = next(outer)
        cols = {h.strip().lower(): i for i, h in enumerate(header)}
        t_idx, d_idx = cols.get("time"), cols.get("data")
        if t_idx is None or d_idx is None:
            raise ValueError(f"Expected 'time' and 'data' in header, got: {header}")

        buf = []
        for row in outer:
            if not row or len(row) <= d_idx:
                continue
            t = row[t_idx]
            inner = row[d_idx]
            if not inner:
                continue

            inner_io = StringIO(inner)
            ir = csv.reader(inner_io, delimiter=",", quotechar='"')
            hdr = next(ir, None)

            if not hdr:
                txt = inner.replace("\n", "")
                parts = [p.strip() for p in (txt.split(",") if "," in txt else txt.split(";"))]
                if len(parts) == 2:
                    buf.append((t, parts[0], parts[1]))
                if len(buf) >= batch_rows:
                    yield _pack(buf); buf = []
                continue

            if len(hdr) == 1 and ";" in hdr[0]:
                inner_io = StringIO(inner)
                ir = csv.reader(inner_io, delimiter=";", quotechar='"')
                hdr = next(ir, None)

            hdr = [h.strip().lower() for h in hdr]
            if "id" in hdr and "traffic_level" in hdr:
                id_i, tl_i = hdr.index("id"), hdr.index("traffic_level")
                for r in ir:
                    if len(r) <= max(id_i, tl_i):
                        continue
                    buf.append((t, r[id_i], r[tl_i]))
                    if len(buf) >= batch_rows:
                        yield _pack(buf); buf = []
            else:
    
                if len(hdr) == 2:
                    buf.append((t, hdr[0], hdr[1]))
                    if len(buf) >= batch_rows:
                        yield _pack(buf); buf = []
                for r in ir:
                    if len(r) >= 2:
                        buf.append((t, r[0], r[1]))
                        if len(buf) >= batch_rows:
                            yield _pack(buf); buf = []
        if buf:
            yield _pack(buf)

def carflow_flat_all(path):
    chunks = list(carflow_flat_iter(path))
    return pd.concat(chunks, ignore_index=True) if chunks else pd.DataFrame(columns=["time_utc","time_ams","id","traffic_level"])

In [None]:
cf = carflow_flat_all("data/TomTom_data_20-24Aug2025.csv")
print(cf.shape)
print(cf[["time_utc","time_ams"]].head(10))
print("distinct times:", cf["time_utc"].nunique())
print("range UTC:", cf["time_utc"].min(), "â†’", cf["time_utc"].max())

if not cf.empty:
    example_id = cf["id"].iloc[0]
    print(cf[cf["id"] == example_id].sort_values("time_utc").head(10))