# Getting a feel for the data

In [None]:
import sys
import os
import sqlite3
import csv

In [None]:
# Path to Rejsekort CSV folder
csv_folder = r"C:\Users\Chris\Desktop\DTU\8. Semester\Bachelorprojekt\Data"

# Create SQLite database
db_path = os.path.join(csv_folder, "rejsekort_data.db")
con = sqlite3.connect(db_path)
cur = con.cursor()

# Process all CSV files in the folder
for file in os.listdir(csv_folder):
    if file.endswith(".csv"):
        table_name = os.path.splitext(file)[0]  # Use filename as table name
        file_path = os.path.join(csv_folder, file)
        
        with open(file_path, 'r', encoding='utf-8') as fin:
            dr = csv.reader(fin)
            headers = next(dr)  # Read header row
            
            # Create table if not exists
            cur.execute(f"CREATE TABLE IF NOT EXISTS {table_name} ({', '.join([f'[{col}] TEXT' for col in headers])})")

            # Insert data
            to_db = [tuple(row) for row in dr]
            cur.executemany(f"INSERT INTO {table_name} VALUES ({', '.join(['?'] * len(headers))})", to_db)

# Commit changes and close connection
con.commit()
con.close()

print("GTFS data successfully imported into SQLite!")


In [None]:
import pandas as pd
import numpy as np
import networkx as nx
from geopy.distance import geodesic
import pickle


# Load data
# stops = pd.read_csv(r"C:\Users\Chris\Desktop\DTU\8. Semester\Bachelorprojekt\Data\GTFS_20230925\stops.txt")
stop_times = pd.read_csv(
    r"C:\Users\Chris\Desktop\DTU\8. Semester\Bachelorprojekt\Data\GTFS_20230925\stop_times.txt",
    dtype={"trip_id": str, "arrival_time": str, "departure_time": str, "stop_id": str, "stop_sequence": int},
    low_memory=False
)

# Load stops
stops = pd.read_csv(r"C:\Users\Chris\Desktop\DTU\8. Semester\Bachelorprojekt\Data\GTFS_20230925\stops.txt")

# Load a subset of stop_times, keeping only trips that contain these stops
# stop_times = pd.read_csv("stop_times.txt")
#stop_times = stop_times[stop_times["stop_id"].isin(stops["stop_id"])]

# Also limit to a small number of trip_ids
#trip_subset = stop_times["trip_id"].unique()[:50]  # Keep only first 50 trip_ids
#stop_times = stop_times[stop_times["trip_id"].isin(trip_subset)]



# Convert data types
stops["stop_lat"] = stops["stop_lat"].astype(float)
stops["stop_lon"] = stops["stop_lon"].astype(float)
stop_times["arrival_time"] = pd.to_datetime(stop_times["arrival_time"], format="%H:%M:%S", errors="coerce")
stop_times["departure_time"] = pd.to_datetime(stop_times["departure_time"], format="%H:%M:%S", errors="coerce")

# Create a directed graph
G = nx.DiGraph()

# Add stops as nodes
for _, row in stops.iterrows():
    G.add_node(row["stop_id"], lat=row["stop_lat"], lon=row["stop_lon"])

# Process edges
for trip_id, group in stop_times.groupby("trip_id"):
    group = group.sort_values("stop_sequence")
    for i in range(len(group) - 1):
        stop_a = group.iloc[i]
        stop_b = group.iloc[i + 1]
        travel_time = (stop_b["arrival_time"] - stop_a["departure_time"]).seconds / 60
        G.add_edge(stop_a["stop_id"], stop_b["stop_id"], weight=travel_time)


# Identify transfer edges
transfer_penalty = 5  # in minutes
for stop_id, group in stop_times.groupby("stop_id"):
    if len(group["trip_id"].unique()) > 1:
        sorted_trips = group.sort_values("arrival_time")
        for i in range(len(sorted_trips) - 1):
            current_trip = sorted_trips.iloc[i]
            next_trip = sorted_trips.iloc[i + 1]
            wait_time = (next_trip["departure_time"] - current_trip["arrival_time"]).seconds / 60
            if wait_time < 30:  # Max transfer window
                G.add_edge(current_trip["stop_id"], next_trip["stop_id"], weight=wait_time + transfer_penalty)

# Compute pedestrian transfer edges (within 50m)
def haversine(lat1, lon1, lat2, lon2):
    return geodesic((lat1, lon1), (lat2, lon2)).meters

for _, stop_a in stops.iterrows():
    for _, stop_b in stops.iterrows():
        if stop_a["stop_id"] != stop_b["stop_id"]:
            distance = haversine(stop_a["stop_lat"], stop_a["stop_lon"], stop_b["stop_lat"], stop_b["stop_lon"])
            if distance <= 200:  # If within 200m
                walk_time = distance / (1.5 * 60)  # Walking speed ~1.5m/s
                G.add_edge(stop_a["stop_id"], stop_b["stop_id"], weight=walk_time + 2)  # Add small penalty

with open("cleaned_transport_graph.gpickle", "wb") as f:
    pickle.dump(G, f)


In [None]:
print(f"Number of nodes: {G.number_of_nodes()}")
print(f"Number of edges: {G.number_of_edges()}")
print("Nodes (first 5):", list(G.nodes(data=True))[:5])
print("Edges (first 5):", list(G.edges(data=True))[:5])


In [None]:
import folium
import pandas as pd
import networkx as nx

# Load stop names from stops.txt, ensuring stop_id is a string
stops_df = pd.read_csv(r"C:\Users\Chris\Desktop\DTU\8. Semester\Bachelorprojekt\Data\GTFS_20230925\stops.txt",
                       dtype={"stop_id": str}, usecols=["stop_id", "stop_name"])

# Remove leading zeros from stop_id in stops_df
stops_df["stop_id"] = stops_df["stop_id"].str.lstrip("0")

# Convert to dictionary for fast lookup
stop_name_dict = stops_df.set_index("stop_id")["stop_name"].to_dict()

# Ensure all stop_ids in the graph are also stripped of leading zeros
G = nx.relabel_nodes(G, lambda x: str(int(x)))  # Converts to int then back to string (removes leading zeros)

# DEBUG: Print first few stop IDs after removing leading zeros
print("Sample stop IDs from stops.txt after stripping zeros:", stops_df["stop_id"].head(10).tolist())
print("Sample keys in stop_name_dict:", list(stop_name_dict.keys())[:10])
print("Sample stop IDs in the graph:", list(G.nodes)[:10])

# Create a Folium map centered at the mean location of all stops
map_center = [sum(nx.get_node_attributes(G, "lat").values()) / len(G.nodes),
              sum(nx.get_node_attributes(G, "lon").values()) / len(G.nodes)]
m = folium.Map(location=map_center, zoom_start=10)

# Add stop markers with names
for node, data in G.nodes(data=True):
    stop_name = stop_name_dict.get(str(node), "Unknown Stop")

    # DEBUG: Print stop_id, expected name, and actual name
    if stop_name == "Unknown Stop":
        print(f"STOP MISMATCH: Node ID {node} not found in stop_name_dict")

    folium.CircleMarker(
        location=[data["lat"], data["lon"]],
        radius=4,
        color="blue",
        fill=True,
        fill_color="blue",
        fill_opacity=0.8,
        popup=f"<b>{stop_name}</b><br>Stop ID: {node}<br>Lat: {data['lat']}, Lon: {data['lon']}"
    ).add_to(m)

# Add edges (connections between stops)
for edge in G.edges(data=True):
    stop_a, stop_b, edge_data = edge
    lat_a, lon_a = G.nodes[stop_a]["lat"], G.nodes[stop_a]["lon"]
    lat_b, lon_b = G.nodes[stop_b]["lat"], G.nodes[stop_b]["lon"]
    
    folium.PolyLine(
        [(lat_a, lon_a), (lat_b, lon_b)],
        color="red",
        weight=2,
        opacity=0.6,
        popup=f"Travel Time: {edge_data['weight']:.2f} min"
    ).add_to(m)

# Save as an HTML file for viewing
m.save("interactive_transport_map.html")

# Display in Jupyter Notebook (if applicable)
m


# Try with a bounding box

In [None]:
import folium
# Filter for stops within Zealand's bounding box
zealand_stops = stops[
    (stops["stop_lat"] >= 54.95) & (stops["stop_lat"] <= 56.2) &
    (stops["stop_lon"] >= 11) & (stops["stop_lon"] <= 12.7)
]

# Create a Folium map centered at Zealand
map_center = [55.5, 11.8]
m = folium.Map(location=map_center, zoom_start=9)

# Add stop markers with names
for _, row in zealand_stops.iterrows():
    folium.CircleMarker(
        location=[row["stop_lat"], row["stop_lon"]],
        radius=4,
        color="blue",
        fill=True,
        fill_color="blue",
        fill_opacity=0.8,
        popup=f"<b>{row['stop_name']}</b><br>Stop ID: {row['stop_id']}<br>Lat: {row['stop_lat']}, Lon: {row['stop_lon']}"
    ).add_to(m)

# Save as an HTML file for viewing
m.save("zealand_transport_map.html")

# Display in Jupyter Notebook (if applicable)
m

In [None]:
import pandas as pd
import numpy as np

# -----------------------------
# Load Data
# -----------------------------
stops = pd.read_csv(r"C:\Users\Chris\Desktop\DTU\8. Semester\Bachelorprojekt\Data\GTFS_20230925\stops.txt",
                    dtype={"stop_id": str})
stop_times = pd.read_csv(
    r"C:\Users\Chris\Desktop\DTU\8. Semester\Bachelorprojekt\Data\GTFS_20230925\stop_times.txt",
    dtype={"trip_id": str, "arrival_time": str, "departure_time": str, "stop_id": str, "stop_sequence": int},
    low_memory=False
)

# -----------------------------
# Filter Zealand Stops (Bounding Box)
# -----------------------------
zealand_stops = stops[
    (stops["stop_lat"] >= 54.95) & (stops["stop_lat"] <= 56.2) &
    (stops["stop_lon"] >= 11) & (stops["stop_lon"] <= 12.7)
]

print(f"Zealand stops count: {zealand_stops.shape[0]}")

# -----------------------------
# Filter stop_times by Exact stop_id Match
# -----------------------------
zealand_trips = stop_times[stop_times["stop_id"].isin(zealand_stops["stop_id"])]

print(f"Filtered stop_times shape: {zealand_trips.shape}")

# -----------------------------
# Save and Preview
# -----------------------------
zealand_trips.to_csv("zealand_stop_times.csv", index=False)
print(zealand_trips.head())

# Save Zealand stops for sanity checking
zealand_stops.to_csv("zealand_stops.csv", index=False)

# Confirm file creation and preview
print(f"Zealand stops saved. Total stops: {zealand_stops.shape[0]}")
print(zealand_stops.head())



In [None]:
import pandas as pd
import numpy as np
import networkx as nx
import pickle
from sklearn.neighbors import BallTree
from tqdm import tqdm  # For progress bars

# -----------------------------
# Helper Functions
# -----------------------------
def parse_time_to_seconds(t):
    """
    Parse a time string (possibly >24 hours, e.g., '25:30:00')
    into total seconds. Returns NaN if the format is invalid.
    """
    try:
        parts = t.split(":")
        if len(parts) != 3:
            return np.nan
        hours, minutes, seconds = map(int, parts)
        return hours * 3600 + minutes * 60 + seconds
    except Exception:
        return np.nan


def haversine(lat1, lon1, lat2, lon2):
    """
    Calculate the great-circle distance between two points on Earth.
    Returns the distance in meters.
    """
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat / 2) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2) ** 2
    c = 2 * np.arcsin(np.sqrt(a))
    return 6378000 * c  # Earth radius in meters

# -----------------------------
# Load Preprocessed Data (Zealand)
# -----------------------------
stops = pd.read_csv(r"C:\Users\Chris\Desktop\DTU\8. Semester\Bachelorprojekt\Notebooks\zealand_stops.csv",
                    dtype={"stop_id": str})
stop_times = pd.read_csv(r"C:\Users\Chris\Desktop\DTU\8. Semester\Bachelorprojekt\Notebooks\zealand_stop_times.csv",
                         dtype={"trip_id": str, "arrival_time": str, "departure_time": str, "stop_id": str, "stop_sequence": int},
                         low_memory=False)

print(f"Loaded {stops.shape[0]} Zealand stops and {stop_times.shape[0]} stop_times entries.")

# -----------------------------
# Preprocess Time Columns
# -----------------------------
stop_times["departure_sec"] = stop_times["departure_time"].apply(parse_time_to_seconds)
stop_times["arrival_sec"] = stop_times["arrival_time"].apply(parse_time_to_seconds)

# -----------------------------
# Build the Graph
# -----------------------------
G = nx.DiGraph()

# --- Add stops as nodes ---
for idx, row in tqdm(stops.iterrows(), total=stops.shape[0], desc="Adding nodes"):
    G.add_node(row["stop_id"], lat=row["stop_lat"], lon=row["stop_lon"])
print(f"Added {len(G.nodes)} nodes to the graph.")

# --- Create Trip Edges ---
stop_times.sort_values(by=["trip_id", "stop_sequence"], inplace=True)
stop_times["next_stop_id"] = stop_times.groupby("trip_id")["stop_id"].shift(-1)
stop_times["next_arrival_sec"] = stop_times.groupby("trip_id")["arrival_sec"].shift(-1)
stop_times["travel_time"] = (stop_times["next_arrival_sec"] - stop_times["departure_sec"]) / 60

trip_edges_df = stop_times.dropna(subset=["next_stop_id", "travel_time"])
trip_edges_df = trip_edges_df[trip_edges_df["travel_time"] >= 0]

# Instead of a simple list of weighted edges, we now include extra attributes.
trip_edges = []
for _, row in trip_edges_df.iterrows():
    u = row["stop_id"]
    v = row["next_stop_id"]
    travel_time = row["travel_time"]
    # Annotate as a bus route (trip) edge with blue color.
    trip_edges.append((u, v, travel_time, {'edge_type': 'trip', 'color': 'blue', 'label': f"{u} → {v}"}))
G.add_weighted_edges_from(trip_edges)
print(f"Added {len(trip_edges)} trip edges to the graph.")

# --- Create Transfer Edges ---
transfer_penalty = 5  # minutes
for stop_id, group in tqdm(stop_times.groupby("stop_id"), total=stop_times["stop_id"].nunique(), desc="Adding transfer edges"):
    group_sorted = group.sort_values("arrival_sec")
    rows = group_sorted.reset_index(drop=True)
    for i in range(len(rows) - 1):
        if rows.loc[i, "trip_id"] != rows.loc[i + 1, "trip_id"]:
            wait_time = (rows.loc[i + 1, "departure_sec"] - rows.loc[i, "arrival_sec"]) / 60
            if 0 <= wait_time < 30:
                # Annotate transfer edges with a red color.
                G.add_edge(stop_id, stop_id, weight=wait_time + transfer_penalty,
                           edge_type='transfer', color='red', label=f"{stop_id} → {stop_id}")
print("Transfer edges added based on waiting times.")

# --- Create Pedestrian Transfer Edges ---
coords = np.radians(stops[["stop_lat", "stop_lon"]].values)
tree = BallTree(coords, metric="haversine")
radius = 200 / 6371000  # 200m radius in radians

indices = tree.query_radius(coords, r=radius)
pedestrian_edges = 0
for i, neighbors in tqdm(enumerate(indices), total=len(indices), desc="Adding pedestrian edges"):
    current_stop_id = stops.iloc[i]["stop_id"]
    for j in neighbors:
        if i == j:
            continue  # Skip self
        neighbor_stop_id = stops.iloc[j]["stop_id"]
        lat1, lon1 = stops.iloc[i][["stop_lat", "stop_lon"]]
        lat2, lon2 = stops.iloc[j][["stop_lat", "stop_lon"]]
        distance = haversine(lat1, lon1, lat2, lon2)
        walk_time = distance / 90  # walking speed ~90m/min
        # Annotate pedestrian edges with a green color.
        G.add_edge(current_stop_id, neighbor_stop_id, weight=walk_time + 2,
                   edge_type='pedestrian', color='green', label=f"{current_stop_id} → {neighbor_stop_id}")
        pedestrian_edges += 1
print(f"Added {pedestrian_edges} pedestrian transfer edges.")

# -----------------------------
# Save the Graph
# -----------------------------
with open("zealand_cleaned_transport_graph.gpickle", "wb") as f:
    pickle.dump(G, f)
print(f"Graph saved with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges.")


In [None]:
import folium
import pandas as pd
import networkx as nx
import pickle

# -----------------------------
# Load Preprocessed Graph & Stops
# -----------------------------
# Load the graph
with open("zealand_cleaned_transport_graph.gpickle", "rb") as f:
    G = pickle.load(f)

# Load stop names (already cleaned)
stops_df = pd.read_csv(
    r"C:\Users\Chris\Desktop\DTU\8. Semester\Bachelorprojekt\Notebooks\zealand_stops.csv",
    dtype={"stop_id": str},
    usecols=["stop_id", "stop_name"]
)

# Convert to dictionary for fast lookup
stop_name_dict = stops_df.set_index("stop_id")["stop_name"].to_dict()

# -----------------------------
# Create Folium Map
# -----------------------------
# Center map at mean coordinates of all stops
map_center = [
    sum(nx.get_node_attributes(G, "lat").values()) / len(G.nodes),
    sum(nx.get_node_attributes(G, "lon").values()) / len(G.nodes)
]
m = folium.Map(location=map_center, zoom_start=10, control_scale=True)

# -----------------------------
# Add Stop Markers
# -----------------------------
for node, data in G.nodes(data=True):
    stop_name = stop_name_dict.get(str(node), "Unknown Stop")

    folium.CircleMarker(
        location=[data["lat"], data["lon"]],
        radius=4,
        color="blue",
        fill=True,
        fill_color="blue",
        fill_opacity=0.8,
        popup=f"<b>{stop_name}</b><br>Stop ID: {node}<br>Lat: {data['lat']}, Lon: {data['lon']}"
    ).add_to(m)

# -----------------------------
# Add Edges (Connections Between Stops)
# -----------------------------
for stop_a, stop_b, edge_data in G.edges(data=True):
    lat_a, lon_a = G.nodes[stop_a]["lat"], G.nodes[stop_a]["lon"]
    lat_b, lon_b = G.nodes[stop_b]["lat"], G.nodes[stop_b]["lon"]

    folium.PolyLine(
        [(lat_a, lon_a), (lat_b, lon_b)],
        color="red",
        weight=2,
        opacity=0.6,
        popup=f"Travel Time: {edge_data['weight']:.2f} min"
    ).add_to(m)

# -----------------------------
# Save & Display Map
# -----------------------------
m.save("interactive_transport_map.html")
m  # Display directly in Jupyter Notebook


# With new colours?

In [None]:
import pandas as pd
import numpy as np
import networkx as nx
import pickle
from sklearn.neighbors import BallTree
from tqdm import tqdm  # For progress bars

# -----------------------------
# Helper Functions
# -----------------------------
def parse_time_to_seconds(t):
    """
    Parse a time string (possibly >24 hours, e.g., '25:30:00')
    into total seconds. Returns NaN if the format is invalid.
    """
    try:
        parts = t.split(":")
        if len(parts) != 3:
            return np.nan
        hours, minutes, seconds = map(int, parts)
        return hours * 3600 + minutes * 60 + seconds
    except Exception:
        return np.nan

def haversine(lat1, lon1, lat2, lon2):
    """
    Calculate the great-circle distance between two points on Earth.
    Returns the distance in meters.
    """
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat / 2) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2) ** 2
    c = 2 * np.arcsin(np.sqrt(a))
    return 6378000 * c  # Earth radius in meters

# -----------------------------
# Load Preprocessed Data (Zealand)
# -----------------------------
stops = pd.read_csv(r"C:\Users\Chris\Desktop\DTU\8. Semester\Bachelorprojekt\Notebooks\zealand_stops.csv",
                    dtype={"stop_id": str})
stop_times = pd.read_csv(r"C:\Users\Chris\Desktop\DTU\8. Semester\Bachelorprojekt\Notebooks\zealand_stop_times.csv",
                         dtype={"trip_id": str, "arrival_time": str, "departure_time": str, "stop_id": str, "stop_sequence": int},
                         low_memory=False)

print(f"Loaded {stops.shape[0]} Zealand stops and {stop_times.shape[0]} stop_times entries.")

# -----------------------------
# Preprocess Time Columns
# -----------------------------
stop_times["departure_sec"] = stop_times["departure_time"].apply(parse_time_to_seconds)
stop_times["arrival_sec"] = stop_times["arrival_time"].apply(parse_time_to_seconds)

# -----------------------------
# Build the Graph
# -----------------------------
G = nx.DiGraph()

# --- Add stops as nodes ---
for idx, row in tqdm(stops.iterrows(), total=stops.shape[0], desc="Adding nodes"):
    G.add_node(row["stop_id"], lat=row["stop_lat"], lon=row["stop_lon"])
print(f"Added {len(G.nodes)} nodes to the graph.")

# --- Create Transport (Trip) Edges ---
stop_times.sort_values(by=["trip_id", "stop_sequence"], inplace=True)
stop_times["next_stop_id"] = stop_times.groupby("trip_id")["stop_id"].shift(-1)
stop_times["next_arrival_sec"] = stop_times.groupby("trip_id")["arrival_sec"].shift(-1)
stop_times["travel_time"] = (stop_times["next_arrival_sec"] - stop_times["departure_sec"]) / 60

trip_edges_df = stop_times.dropna(subset=["next_stop_id", "travel_time"])
trip_edges_df = trip_edges_df[trip_edges_df["travel_time"] >= 0]

trip_edges = []
for _, row in trip_edges_df.iterrows():
    u = row["stop_id"]
    v = row["next_stop_id"]
    travel_time = row["travel_time"]
    # Annotate as a generic transport edge with blue color.
    trip_edges.append((u, v, {
        'weight': travel_time,  # Include travel_time as 'weight' here
        'edge_type': 'transport',
        'color': 'blue',
        'label': f"{u} → {v}"
    }))

# Use add_edges_from for 4-tuple input (u, v, weight, attr_dict)
G.add_edges_from(trip_edges)
print(f"Added {len(trip_edges)} transport edges to the graph.")

# --- Create Transfer Edges ---
transfer_penalty = 5  # minutes
for stop_id, group in tqdm(stop_times.groupby("stop_id"), total=stop_times["stop_id"].nunique(), desc="Adding transfer edges"):
    group_sorted = group.sort_values("arrival_sec")
    rows = group_sorted.reset_index(drop=True)
    for i in range(len(rows) - 1):
        if rows.loc[i, "trip_id"] != rows.loc[i + 1, "trip_id"]:
            wait_time = (rows.loc[i + 1, "departure_sec"] - rows.loc[i, "arrival_sec"]) / 60
            if 0 <= wait_time < 30:
                G.add_edge(stop_id, stop_id, weight=wait_time + transfer_penalty,
                           edge_type='transfer', color='red', label=f"{stop_id} → {stop_id}")
print("Transfer edges added based on waiting times.")

# --- Create Pedestrian Transfer Edges ---
coords = np.radians(stops[["stop_lat", "stop_lon"]].values)
tree = BallTree(coords, metric="haversine")
radius = 200 / 6371000  # 200m radius in radians

indices = tree.query_radius(coords, r=radius)
pedestrian_edges = 0
for i, neighbors in tqdm(enumerate(indices), total=len(indices), desc="Adding pedestrian edges"):
    current_stop_id = stops.iloc[i]["stop_id"]
    for j in neighbors:
        if i == j:
            continue  # Skip self
        neighbor_stop_id = stops.iloc[j]["stop_id"]
        lat1, lon1 = stops.iloc[i][["stop_lat", "stop_lon"]]
        lat2, lon2 = stops.iloc[j][["stop_lat", "stop_lon"]]
        distance = haversine(lat1, lon1, lat2, lon2)
        walk_time = distance / 90  # walking speed ~90m/min
        G.add_edge(current_stop_id, neighbor_stop_id, weight=walk_time + 2,
                   edge_type='pedestrian', color='green', label=f"{current_stop_id} → {neighbor_stop_id}")
        pedestrian_edges += 1
print(f"Added {pedestrian_edges} pedestrian transfer edges.")

# -----------------------------
# Save the Graph
# -----------------------------
with open("zealand_cleaned_transport_graph.gpickle", "wb") as f:
    pickle.dump(G, f)
print(f"Graph saved with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges.")


In [None]:
import folium
import pandas as pd
import networkx as nx
import pickle

# -----------------------------
# Load Preprocessed Graph & Stops
# -----------------------------
with open("zealand_cleaned_transport_graph.gpickle", "rb") as f:
    G = pickle.load(f)

stops_df = pd.read_csv(
    r"C:\Users\Chris\Desktop\DTU\8. Semester\Bachelorprojekt\Notebooks\zealand_stops.csv",
    dtype={"stop_id": str},
    usecols=["stop_id", "stop_name"]
)
stop_name_dict = stops_df.set_index("stop_id")["stop_name"].to_dict()

# -----------------------------
# Create Folium Map
# -----------------------------
node_attrs = nx.get_node_attributes(G, "lat")
map_center = [
    sum(nx.get_node_attributes(G, "lat").values()) / len(G.nodes),
    sum(nx.get_node_attributes(G, "lon").values()) / len(G.nodes)
]
m = folium.Map(location=map_center, zoom_start=10, control_scale=True)

# -----------------------------
# Add Stop Markers
# -----------------------------
for node, data in G.nodes(data=True):
    stop_name = stop_name_dict.get(str(node), "Unknown Stop")
    folium.CircleMarker(
        location=[data["lat"], data["lon"]],
        radius=4,
        color="blue",
        fill=True,
        fill_color="blue",
        fill_opacity=0.8,
        popup=f"<b>{stop_name}</b><br>Stop ID: {node}<br>Lat: {data['lat']}, Lon: {data['lon']}"
    ).add_to(m)

# -----------------------------
# Add Edges with Custom Attributes
# -----------------------------
for stop_a, stop_b, edge_data in G.edges(data=True):
    lat_a, lon_a = G.nodes[stop_a]["lat"], G.nodes[stop_a]["lon"]
    lat_b, lon_b = G.nodes[stop_b]["lat"], G.nodes[stop_b]["lon"]
    
    # Retrieve custom attributes, with sensible defaults.
    edge_color = edge_data.get("color", "black")
    edge_type = edge_data.get("edge_type", "unknown")
    edge_label = edge_data.get("label", f"{stop_a} → {stop_b}")
    
    popup_text = (
        f"<b>Edge: {edge_label}</b><br>"
        f"Type: {edge_type}<br>"
        f"Travel Time: {edge_data['weight']:.2f} min"
    )
    
    folium.PolyLine(
        [(lat_a, lon_a), (lat_b, lon_b)],
        color=edge_color,
        weight=2,
        opacity=0.6,
        popup=popup_text
    ).add_to(m)

# -----------------------------
# Save & Display Map
# -----------------------------
m.save("interactive_transport_map2.html")
m  # This will display the map in a Jupyter Notebook


In [None]:
import networkx as nx
import pickle
import pandas as pd

def get_shortest_path(G, origin, destination):
    """
    Find the shortest path between origin and destination using Dijkstra's algorithm.
    The graph must have an edge attribute 'weight'.
    """
    try:
        path = nx.shortest_path(G, origin, destination, weight='weight')
        cost = nx.shortest_path_length(G, origin, destination, weight='weight')
        return path, cost
    except nx.NetworkXNoPath:
        print("No path found between {} and {}".format(origin, destination))
        return None, None

# Example usage:
if __name__ == "__main__":
    # Update the path to your gpickle file accordingly
    
    with open("zealand_cleaned_transport_graph.gpickle", "rb") as f:
        G = pickle.load(f)
    
    # Example origin and destination stop IDs
    origin = "000008600650"
    destination = "000008603305"
    
    path, cost = get_shortest_path(G, origin, destination)
    if path:
        print("Shortest path:", path)
        print("Total cost:", cost)

# Map shortest path IDs to stop names by opening stops.txt and getting third entry
stops = pd.read_csv(r"C:\Users\Chris\Desktop\DTU\8. Semester\Bachelorprojekt\Data\GTFS_20230925\stops.txt",
                    dtype={"stop_id": str})
stop_name_dict = stops.set_index("stop_id")["stop_name"].to_dict()

# Convert IDs to names
path_names = [stop_name_dict.get(node, "Unknown Stop") for node in path]
print("Shortest path (stop names):", path_names)

