# Algorithm Comparison Notebook

Compare Python implementations of C++ shortest path algorithms:
- `query_classic`: Bidirectional Dijkstra with inside filtering
- `query_pruned`: + H3 resolution-based pruning
- `dijkstra_general`: Standard Dijkstra (baseline)
- `expand_path`: Expand shortcut path to base edges

In [2]:
import sys
sys.path.insert(0, '.')

from cpp_algorithms import (
    load_data, 
    query_classic, 
    query_pruned, 
    dijkstra_general,
    expand_path,
    compare_algorithms,
    run_comparison
)

## 1. Load Data

In [4]:
SHORTCUTS_PATH = "/home/kaveh/projects/h3-routing-platform/tools/shortcut-generator/output/Somerset_shortcuts"
EDGES_PATH = "/home/kaveh/projects/h3-routing-platform/tools/osm-importer/data/output/Somerset/Somerset_driving_simplified_edges_with_h3.csv"

print("Loading data...")
data = load_data(SHORTCUTS_PATH, EDGES_PATH)
print(f"Loaded {len(data.shortcuts):,} shortcuts")
print(f"Loaded {len(data.edge_meta):,} edges")
print(f"Forward adj: {len(data.fwd_adj)} entries")
print(f"Backward adj: {len(data.bwd_adj)} entries")

Loading data...
Loaded 481,812 shortcuts
Loaded 6,378 edges
Forward adj: 6378 entries
Backward adj: 6378 entries


## 2. Single Example Test

In [5]:
source, target = 1169, 390

r = compare_algorithms(source, target, data)

print(f"Query: {source} -> {target}")
print(f"  Dijkstra: cost={r['dijkstra_cost']:.4f}, path={r['dijkstra_path']}")
print(f"  Classic:  cost={r['classic_cost']:.4f}, path={r['classic_path']}")
print(f"  Pruned:   cost={r['pruned_cost']:.4f}, path={r['pruned_path']}")
print(f"  Dijkstra==Classic: {'✓' if r['dijkstra_classic_match'] else '✗'}")
print(f"  Dijkstra==Pruned:  {'✓' if r['dijkstra_pruned_match'] else '✗'}")
print(f"  Classic==Pruned:   {'✓' if r['classic_pruned_match'] else '✗'}")

Query: 1169 -> 390
  Dijkstra: cost=75.7865, path=[1169, 2090, 2816, 390]
  Classic:  cost=75.7865, path=[1169, 2090, 2816, 390]
  Pruned:   cost=75.7865, path=[1169, 2090, 2816, 390]
  Dijkstra==Classic: ✓
  Dijkstra==Pruned:  ✓
  Classic==Pruned:   ✓


## 3. Run 100 Sample Comparison

In [7]:
results = run_comparison(data, n_samples=100)

print(f"Total samples: {results['total']}")
print(f"Dijkstra == Classic: {results['dijkstra_classic_matches']}/{results['total']}")
print(f"Dijkstra == Pruned:  {results['dijkstra_pruned_matches']}/{results['total']}")
print(f"Classic == Pruned:   {results['classic_pruned_matches']}/{results['total']}")

if results['failures']:
    print("\nFirst 5 failures:")
    for f in results['failures'][:5]:
        print(f"  {f['source']}->{f['target']}: D={f['dijkstra_cost']:.2f} C={f['classic_cost']:.2f} P={f['pruned_cost']:.2f}")

Total samples: 100
Dijkstra == Classic: 100/100
Dijkstra == Pruned:  58/100
Classic == Pruned:   58/100

First 5 failures:
  5666->3036: D=35.65 C=35.65 P=45.15
  1705->5879: D=68.01 C=68.01 P=77.48
  5256->4210: D=146.68 C=146.68 P=181.43
  6239->823: D=138.30 C=138.30 P=147.86
  5016->6320: D=45.56 C=45.56 P=52.51


## 4. Test expand_path

In [8]:
# Get a shortcut path from pruned
source, target = 1169, 390

result = query_pruned(source, target, data)

print(f"Shortcut path: {result.path} ({len(result.path)} nodes)")

# Expand to base edges
expanded = expand_path(result.path, data)
print(f"Expanded path: {expanded[:15]}{'...' if len(expanded) > 15 else ''} ({len(expanded)} nodes)")

Shortcut path: [1169, 2090, 2816, 390] (4 nodes)
Expanded path: [1169, 2091, 1170, 2090, 2215, 3390, 3281, 3261, 3259, 2146, 1587, 892, 2816, 390] (14 nodes)


## 5. Compare with C++ Server (Optional)

In [None]:
import requests

def get_cpp_result(source, target):
    try:
        resp = requests.post(
            "http://localhost:8082/route_by_edge",
            json={"dataset": "somerset", "source_edge": source, "target_edge": target},
            timeout=5
        )
        if resp.status_code == 200:
            route = resp.json().get('route', {})
            return route.get('shortcut_path', []), route.get('distance', -1)
    except:
        pass
    return None, -1

# Compare Python vs C++
source, target = 19, 1177

py_result = dijkstra_general(source, target, data)
cpp_path, cpp_cost = get_cpp_result(source, target)

print(f"Query: {source} -> {target}")
print(f"  Python Dijkstra: cost={py_result.distance:.4f}")
print(f"  C++ Server:      cost={cpp_cost:.4f}")
print(f"  Match: {'✓' if abs(py_result.distance - cpp_cost) < 0.01 else '✗'}")

Query: 19 -> 1177
  Python Dijkstra: cost=89.5449
  C++ Server:      cost=89.5449
  Match: ✓


## 6. Manual Algorithm Debugging

In [14]:
# Pick any source/target to debug
source, target = 19, 1177

r_dij = dijkstra_general(source, target, data)
r_cls = query_classic(source, target, data)
r_pru = query_pruned(source, target, data)

print(f"Query: {source} -> {target}")
print(f"\nDijkstra (baseline):")
print(f"  Cost: {r_dij.distance:.4f}")
print(f"  Path: {r_dij.path}")

print(f"\nClassic:")
print(f"  Cost: {r_cls.distance:.4f}")
print(f"  Path: {r_cls.path}")

print(f"\nPruned:")
print(f"  Cost: {r_pru.distance:.4f}")
print(f"  Path: {r_pru.path}")

Query: 19 -> 1177

Dijkstra (baseline):
  Cost: 89.5449
  Path: [19, 2262, 2265, 1115, 1177]

Classic:
  Cost: 89.5449
  Path: [19, 2262, 2265, 1177]

Pruned:
  Cost: 101.6656
  Path: [19, 21, 993, 1177]


In [17]:
import importlib
import cpp_algorithms
importlib.reload(cpp_algorithms)
from cpp_algorithms import compare_with_cpp, expand_path

# Test single query vs C++
r = compare_with_cpp(source, target, data)
print(f"Dijkstra: {r['dijkstra_cost']:.4f}")
print(f"C++:      {r['cpp_cost']:.4f}")
print(f"Match:    {'✓' if r['dijkstra_cpp_match'] else '✗'}")

# Test expand_path
result = query_classic(source, target, data)
expanded = expand_path(result.path, data)
print(f"Expanded: {len(expanded)} edges")

Dijkstra: 89.5449
C++:      89.5449
Match:    ✓
Expanded: 9 edges


In [18]:
expanded

[19, 2067, 2262, 2265, 2267, 1118, 1115, 1111, 1177]

In [5]:
result = query_classic(5099, 2011, data)


In [6]:
result

QueryResult(distance=100.03691711768761, path=[5099, 5064, 204, 3549, 2011], reachable=True)