# Read Shortcut Data

Read shortcuts from:
1. **Parquet output** - Final deduplicated shortcuts
2. **DuckDB database** - All tables

In [1]:
import duckdb
import pandas as pd
from pathlib import Path

In [2]:
# Configuration
DISTRICT = "Burnaby"  # or "All_Vancouver"
PARQUET_FILE = Path(f"../output/{DISTRICT}_shortcuts")
DB_FILE = Path(f"../persist/{DISTRICT}.db")

## 1. Read from Parquet

In [3]:
shortcuts_df = duckdb.read_parquet(str(PARQUET_FILE)).df()
print(f"Loaded {len(shortcuts_df):,} shortcuts")
shortcuts_df.head()

Loaded 4,173,086 shortcuts


Unnamed: 0,from_edge,to_edge,cost,via_edge
0,5825,10410,78.275022,18248
1,5825,33272,74.904332,10504
2,5829,1348,48.450907,5397
3,5829,10678,40.860713,5819
4,5829,25200,62.901467,17322


In [4]:
print(f"Cost range: {shortcuts_df['cost'].min():.2f} to {shortcuts_df['cost'].max():.2f}")
print(f"Unique from_edge: {shortcuts_df['from_edge'].nunique():,}")
print(f"Unique to_edge: {shortcuts_df['to_edge'].nunique():,}")

Cost range: 0.01 to 377.59
Unique from_edge: 35,217
Unique to_edge: 35,217


## 2. Read from DuckDB

In [5]:
con = duckdb.connect(str(DB_FILE), read_only=True)

# List tables
for t in con.execute("SHOW TABLES").fetchall():
    count = con.execute(f"SELECT count(*) FROM {t[0]}").fetchone()[0]
    print(f"{t[0]}: {count:,} rows")

edges: 35,217 rows
elementary_shortcuts: 99,497 rows
forward_deactivated: 3,601,310 rows
shortcuts: 4,173,086 rows


In [6]:
# Edges table
con.execute("SELECT * FROM edges LIMIT 5").df()

Unnamed: 0,id,from_cell,to_cell,lca_res
0,31852,644733726876424710,644733726560578696,5
1,31853,644733726876424710,644733726875755904,8
2,31854,644733694717892692,644733694717909313,10
3,31855,644733694719239020,644733694718810467,8
4,31856,644733726528031339,644733726527402229,8


In [7]:
# Shortcuts table
con.execute("SELECT * FROM shortcuts LIMIT 5").df()

Unnamed: 0,from_edge,to_edge,cost,via_edge
0,30147,5236,17.51095,5256
1,30148,30041,17.71491,5290
2,30193,25123,4.2829,30190
3,30454,22217,23.365647,8120
4,30454,32694,23.19715,2185


In [8]:
# Cost statistics
con.execute("""
    SELECT MIN(cost), AVG(cost), MEDIAN(cost), MAX(cost)
    FROM shortcuts
""").df()

Unnamed: 0,"min(""cost"")","avg(""cost"")","median(""cost"")","max(""cost"")"
0,0.01396,48.829908,33.618793,377.594275


In [9]:
# Top connected edges
con.execute("""
    SELECT from_edge, COUNT(*) as destinations
    FROM shortcuts GROUP BY from_edge
    ORDER BY destinations DESC LIMIT 10
""").df()

Unnamed: 0,from_edge,destinations
0,16422,1805
1,23718,1805
2,18852,1801
3,18853,1801
4,18848,1801
5,19746,1801
6,18343,1798
7,16421,1798
8,3051,1798
9,3115,1798


In [10]:
con.close()