# Read Shortcut Data

Read shortcuts from:
1. **Parquet output** - Final deduplicated shortcuts
2. **DuckDB database** - All tables

In [1]:
import duckdb
import pandas as pd
from pathlib import Path

In [2]:
# Configuration
DISTRICT = "Somerset"  # or "All_Vancouver"
PARQUET_FILE = Path(f"../output/{DISTRICT}_shortcuts")
DB_FILE = Path(f"../persist/{DISTRICT}.db")

## 1. Read from Parquet

In [3]:
shortcuts_df = duckdb.read_parquet(str(PARQUET_FILE)).df()
print(f"Loaded {len(shortcuts_df):,} shortcuts")
shortcuts_df.head()

Loaded 481,812 shortcuts


Unnamed: 0,from_edge,to_edge,cost,via_edge,inside,cell
0,3566,410,23.910975,6100,1,0
1,3566,2780,20.249545,0,-2,0
2,3566,5217,11.842502,3569,1,0
3,3569,727,9.708133,440,1,608661712568057855
4,3572,5523,9.837917,3586,0,613165312189136895


In [5]:
df = shortcuts_df
df[(df['from_edge'] == 204) & (df['to_edge'] == 2656)]

Unnamed: 0,from_edge,to_edge,cost,via_edge,inside,cell
49966,204,2656,4.882264,777,-1,608661713071374335


In [6]:
df[(df['from_edge'] == 204) & (df['to_edge'] == 777)]

Unnamed: 0,from_edge,to_edge,cost,via_edge,inside,cell
343127,204,777,17.00338,2652,-1,608661713071374335


In [7]:
df[(df['from_edge'] == 204) & (df['to_edge'] == 2652)]

Unnamed: 0,from_edge,to_edge,cost,via_edge,inside,cell
34523,204,2652,5.56278,2656,-1,608661713071374335


In [15]:
print(f"Cost range: {shortcuts_df['cost'].min():.2f} to {shortcuts_df['cost'].max():.2f}")
print(f"Unique from_edge: {shortcuts_df['from_edge'].nunique():,}")
print(f"Unique to_edge: {shortcuts_df['to_edge'].nunique():,}")

Cost range: 0.05 to 475.35
Unique from_edge: 6,378
Unique to_edge: 6,378


## 2. Read from DuckDB

In [16]:
con = duckdb.connect(str(DB_FILE), read_only=True)

# List tables
for t in con.execute("SHOW TABLES").fetchall():
    count = con.execute(f"SELECT count(*) FROM {t[0]}").fetchone()[0]
    print(f"{t[0]}: {count:,} rows")

edges: 6,378 rows
elementary_shortcuts: 17,680 rows
forward_deactivated: 413,952 rows
shortcuts: 481,812 rows


In [17]:
# Edges table
con.execute("SELECT * FROM edges LIMIT 5").df()

Unnamed: 0,id,from_cell,to_cell,lca_res
0,0,645224977383611141,645224977384028320,8
1,1,645224977383611141,645224977383614665,10
2,2,645224977383611141,645224977384658840,8
3,3,645224977383614665,645224977383653531,9
4,4,645224977383614665,645224977383600429,10


In [18]:
# Shortcuts table
con.execute("SELECT * FROM shortcuts LIMIT 5").df()

Unnamed: 0,from_edge,to_edge,cost,via_edge,inside,cell
0,4497,4521,4.8823,4495,-2,622706979823190015
1,5280,3127,6.302217,3130,0,618203380197883903
2,4371,4243,4.5327,4376,-2,618203380197097471
3,6064,6065,2.5934,6063,-2,622706979823648767
4,4323,4464,2.44225,4460,-1,622706979822960639


In [19]:
# Cost statistics
con.execute("""
    SELECT MIN(cost), AVG(cost), MEDIAN(cost), MAX(cost)
    FROM shortcuts
""").df()

Unnamed: 0,"min(""cost"")","avg(""cost"")","median(""cost"")","max(""cost"")"
0,0.045675,40.170969,27.732806,475.348886


In [20]:
# Top connected edges
con.execute("""
    SELECT from_edge, COUNT(*) as destinations
    FROM shortcuts GROUP BY from_edge
    ORDER BY destinations DESC LIMIT 10
""").df()

Unnamed: 0,from_edge,destinations
0,2342,704
1,5659,704
2,1073,704
3,3051,704
4,2695,704
5,1068,703
6,1072,703
7,4022,700
8,2639,681
9,2398,681


In [21]:
con.close()