# Algorithm Comparison Notebook

Compare Python implementations of C++ shortest path algorithms:
- `query_classic`: Bidirectional Dijkstra with inside filtering
- `query_pruned`: + H3 resolution-based pruning
- `dijkstra_general`: Standard Dijkstra (baseline)
- `expand_path`: Expand shortcut path to base edges

In [2]:
import sys
sys.path.insert(0, '.')

from cpp_algorithms import (
    load_data, 
    query_classic, 
    query_pruned, 
    dijkstra_general,
    expand_path,
    compare_algorithms,
    run_comparison
)

## 1. Load Data

In [3]:
SHORTCUTS_PATH = "/home/kaveh/projects/road-to-shortcut-duckdb/output/Somerset_shortcuts"
EDGES_PATH = "/home/kaveh/projects/osm-to-road/data/output/Somerset/Somerset_driving_simplified_edges_with_h3.csv"

print("Loading data...")
data = load_data(SHORTCUTS_PATH, EDGES_PATH)
print(f"Loaded {len(data.shortcuts):,} shortcuts")
print(f"Loaded {len(data.edge_meta):,} edges")
print(f"Forward adj: {len(data.fwd_adj)} entries")
print(f"Backward adj: {len(data.bwd_adj)} entries")

Loading data...
Loaded 481,812 shortcuts
Loaded 6,378 edges
Forward adj: 6378 entries
Backward adj: 6378 entries


## 2. Single Example Test

In [3]:
source, target = 5099, 2011

r = compare_algorithms(source, target, data)

print(f"Query: {source} -> {target}")
print(f"  Dijkstra: cost={r['dijkstra_cost']:.4f}, path={r['dijkstra_path']}")
print(f"  Classic:  cost={r['classic_cost']:.4f}, path={r['classic_path']}")
print(f"  Pruned:   cost={r['pruned_cost']:.4f}, path={r['pruned_path']}")
print(f"  Dijkstra==Classic: {'✓' if r['dijkstra_classic_match'] else '✗'}")
print(f"  Dijkstra==Pruned:  {'✓' if r['dijkstra_pruned_match'] else '✗'}")
print(f"  Classic==Pruned:   {'✓' if r['classic_pruned_match'] else '✗'}")

Query: 5099 -> 2011
  Dijkstra: cost=100.0369, path=[5099, 5064, 1078, 1028, 3333, 204, 3549, 2011]
  Classic:  cost=100.0369, path=[5099, 5064, 204, 3549, 2011]
  Pruned:   cost=116.5764, path=[5099, 5064, 3137, 1691, 3549, 2011]
  Dijkstra==Classic: ✓
  Dijkstra==Pruned:  ✗
  Classic==Pruned:   ✗


## 3. Run 100 Sample Comparison

In [4]:
results = run_comparison(data, n_samples=100)

print(f"Total samples: {results['total']}")
print(f"Dijkstra == Classic: {results['dijkstra_classic_matches']}/{results['total']}")
print(f"Dijkstra == Pruned:  {results['dijkstra_pruned_matches']}/{results['total']}")
print(f"Classic == Pruned:   {results['classic_pruned_matches']}/{results['total']}")

if results['failures']:
    print("\nFirst 5 failures:")
    for f in results['failures'][:5]:
        print(f"  {f['source']}->{f['target']}: D={f['dijkstra_cost']:.2f} C={f['classic_cost']:.2f} P={f['pruned_cost']:.2f}")

Total samples: 100
Dijkstra == Classic: 100/100
Dijkstra == Pruned:  58/100
Classic == Pruned:   58/100

First 5 failures:
  1726->4939: D=101.14 C=101.14 P=130.29
  5099->2011: D=100.04 C=100.04 P=116.58
  1559->2309: D=57.39 C=57.39 P=66.06
  1280->171: D=168.11 C=168.11 P=174.14
  2795->4355: D=28.99 C=28.99 P=30.14


## 4. Test expand_path

In [5]:
# Get a shortcut path from pruned
result = query_pruned(5099, 2011, data)

print(f"Shortcut path: {result.path} ({len(result.path)} nodes)")

# Expand to base edges
expanded = expand_path(result.path, data)
print(f"Expanded path: {expanded[:15]}{'...' if len(expanded) > 15 else ''} ({len(expanded)} nodes)")

Shortcut path: [5099, 5064, 3137, 1691, 3549, 2011] (6 nodes)
Expanded path: [5099, 5075, 5068, 5099, 5064, 5096, 5085, 5067, 5069, 5099, 5079, 5067, 5077, 5090, 5099]... (285 nodes)


## 5. Compare with C++ Server (Optional)

In [6]:
import requests

def get_cpp_result(source, target):
    try:
        resp = requests.post(
            "http://localhost:8082/route_by_edge",
            json={"dataset": "somerset", "source_edge": source, "target_edge": target},
            timeout=5
        )
        if resp.status_code == 200:
            route = resp.json().get('route', {})
            return route.get('shortcut_path', []), route.get('distance', -1)
    except:
        pass
    return None, -1

# Compare Python vs C++
source, target = 5099, 2011

py_result = dijkstra_general(source, target, data)
cpp_path, cpp_cost = get_cpp_result(source, target)

print(f"Query: {source} -> {target}")
print(f"  Python Dijkstra: cost={py_result.distance:.4f}")
print(f"  C++ Server:      cost={cpp_cost:.4f}")
print(f"  Match: {'✓' if abs(py_result.distance - cpp_cost) < 0.01 else '✗'}")

Query: 5099 -> 2011
  Python Dijkstra: cost=100.0369
  C++ Server:      cost=100.0369
  Match: ✓


## 6. Manual Algorithm Debugging

In [7]:
# Pick any source/target to debug
source, target = 5099, 2011

r_dij = dijkstra_general(source, target, data)
r_cls = query_classic(source, target, data)
r_pru = query_pruned(source, target, data)

print(f"Query: {source} -> {target}")
print(f"\nDijkstra (baseline):")
print(f"  Cost: {r_dij.distance:.4f}")
print(f"  Path: {r_dij.path}")

print(f"\nClassic:")
print(f"  Cost: {r_cls.distance:.4f}")
print(f"  Path: {r_cls.path}")

print(f"\nPruned:")
print(f"  Cost: {r_pru.distance:.4f}")
print(f"  Path: {r_pru.path}")

Query: 5099 -> 2011

Dijkstra (baseline):
  Cost: 100.0369
  Path: [5099, 5064, 1078, 1028, 3333, 204, 3549, 2011]

Classic:
  Cost: 100.0369
  Path: [5099, 5064, 204, 3549, 2011]

Pruned:
  Cost: 116.5764
  Path: [5099, 5064, 3137, 1691, 3549, 2011]


In [4]:
import importlib
import cpp_algorithms
importlib.reload(cpp_algorithms)
from cpp_algorithms import compare_with_cpp, expand_path

# Test single query vs C++
r = compare_with_cpp(5099, 2011, data)
print(f"Dijkstra: {r['dijkstra_cost']:.4f}")
print(f"C++:      {r['cpp_cost']:.4f}")
print(f"Match:    {'✓' if r['dijkstra_cpp_match'] else '✗'}")

# Test expand_path
result = query_classic(5099, 2011, data)
expanded = expand_path(result.path, data)
print(f"Expanded: {len(expanded)} edges")

Dijkstra: 100.0369
C++:      100.0369
Match:    ✓


KeyboardInterrupt: 

In [5]:
result = query_classic(5099, 2011, data)


In [6]:
result

QueryResult(distance=100.03691711768761, path=[5099, 5064, 204, 3549, 2011], reachable=True)