In [3]:
import geopandas as gpd
import json
import pandas as pd
import numpy as np
from pathlib import Path
from shapely.geometry import box
from shapely.geometry import shape

In [4]:
BASE_DIR = Path.cwd()
if BASE_DIR.name.lower() == "notebooks":
    BASE_DIR = BASE_DIR.parent

DATA_RAW = BASE_DIR / "data_raw"
OUTPUTS_DIR = BASE_DIR / "outputs"

PEDESTRIAN_DIR = DATA_RAW / "pedestrian_proxy"

GRID_FILE = OUTPUTS_DIR / "grid_250m_bins_clean.gpkg"
CRS_PROJECTED = "EPSG:26917"

print("BASE_DIR:", BASE_DIR)
print("DATA_RAW exists:", DATA_RAW.exists())
print("PEDESTRIAN_DIR exists:", PEDESTRIAN_DIR.exists())
print("OUTPUTS_DIR exists:", OUTPUTS_DIR.exists())
print("GRID_FILE exists:", GRID_FILE.exists())
print("GRID_FILE:", GRID_FILE)

BASE_DIR: c:\Projects\Toronto_Waste_Analytics
DATA_RAW exists: True
PEDESTRIAN_DIR exists: True
OUTPUTS_DIR exists: True
GRID_FILE exists: True
GRID_FILE: c:\Projects\Toronto_Waste_Analytics\outputs\grid_250m_bins_clean.gpkg


In [5]:
ped_csv = PEDESTRIAN_DIR / "Pedestrian Network Data - 4326.csv"

print("File exists:", ped_csv.exists())
print("Path:", ped_csv)

File exists: True
Path: c:\Projects\Toronto_Waste_Analytics\data_raw\pedestrian_proxy\Pedestrian Network Data - 4326.csv


In [6]:
ped_df = pd.read_csv(ped_csv)

print("Number of rows:", len(ped_df))
print("Columns:")
ped_df.columns.tolist()

Number of rows: 87105
Columns:


['_id',
 'OBJECTID',
 'ROAD_TYPE',
 'SIDEWALK_CODE',
 'SIDEWALK_DESCRIPTION',
 'CROSSWALK',
 'CROSSWALK_TYPE',
 'PX',
 'PX_TYPE',
 'LENGTH',
 'geometry']

In [7]:
ped_df.head()

Unnamed: 0,_id,OBJECTID,ROAD_TYPE,SIDEWALK_CODE,SIDEWALK_DESCRIPTION,CROSSWALK,CROSSWALK_TYPE,PX,PX_TYPE,LENGTH,geometry
0,1,1,Local,7.0,Sidewalk on both sides,,,,,93.86768,"{""coordinates"": [[[-79.5639645086874, 43.73782..."
1,2,2,Collector,7.0,Sidewalk on both sides,,,,,32.546284,"{""coordinates"": [[[-79.567921505888, 43.636179..."
2,3,3,,2.0,Sidewalk on north side only,,,,,117.669206,"{""coordinates"": [[[-79.3779147846482, 43.67845..."
3,4,4,,7.0,Sidewalk on both sides,,,,,223.26971,"{""coordinates"": [[[-79.5185205814575, 43.70501..."
4,5,5,Local,7.0,Sidewalk on both sides,,,,,201.335648,"{""coordinates"": [[[-79.3012214002186, 43.77483..."


In [8]:
ped_df["geometry"] = ped_df["geometry"].apply(lambda x: shape(json.loads(x)))
ped_gdf = gpd.GeoDataFrame(ped_df, geometry="geometry", crs="EPSG:4326")
print("GeoDataFrame created")
print("CRS:", ped_gdf.crs)
print("Total segments:", len(ped_gdf))

GeoDataFrame created
CRS: EPSG:4326
Total segments: 87105


In [9]:
ped_gdf = ped_gdf.to_crs(CRS_PROJECTED)

print("Reprojected CRS:", ped_gdf.crs)

Reprojected CRS: EPSG:26917


In [10]:
grid = gpd.read_file(GRID_FILE, layer="grid_bins")

print("Grid loaded")
print("Grid CRS:", grid.crs)
print("Number of cells:", len(grid))

Grid loaded
Grid CRS: EPSG:26917
Number of cells: 25024


In [11]:
ped_joined = gpd.sjoin(
    ped_gdf[["_id", "LENGTH", "geometry"]],
    grid[["cell_id", "geometry"]],
    how="left",
    predicate="intersects"
)

print("Joined rows:", len(ped_joined))
print("Segments without cell:", ped_joined["cell_id"].isna().sum())

Joined rows: 127651
Segments without cell: 0


In [12]:
ped_joined = (
    ped_joined
    .sort_values(["_id", "cell_id"])
    .drop_duplicates(subset=["_id"], keep="first")
)

In [13]:
ped_length = (
    ped_joined
    .groupby("cell_id")["LENGTH"]
    .sum()
    .rename("pedestrian_length_m")
    .reset_index()
)

ped_length.head()

Unnamed: 0,cell_id,pedestrian_length_m
0,1032,438.31046
1,1167,435.363377
2,1168,312.686099
3,1169,431.569534
4,1302,593.594198


In [14]:
grid_ped = grid.merge(ped_length, on="cell_id", how="left")
grid_ped["pedestrian_length_m"] = grid_ped["pedestrian_length_m"].fillna(0)

grid_ped.head()

Unnamed: 0,cell_id,bins_total,bins_park_assets,bins_street_furniture,geometry,pedestrian_length_m
0,0,0,0,0,"POLYGON ((607837.395 4824768.166, 607837.395 4...",0.0
1,1,0,0,0,"POLYGON ((607837.395 4825018.166, 607837.395 4...",0.0
2,2,0,0,0,"POLYGON ((607837.395 4825268.166, 607837.395 4...",0.0
3,3,0,0,0,"POLYGON ((607837.395 4825518.166, 607837.395 4...",0.0
4,4,0,0,0,"POLYGON ((607837.395 4825768.166, 607837.395 4...",0.0


In [15]:
(grid_ped["pedestrian_length_m"] > 0).sum()

9107

In [16]:
grid_ped.sort_values("pedestrian_length_m", ascending=False)[
    ["cell_id", "pedestrian_length_m"]
].head(10)

Unnamed: 0,cell_id,pedestrian_length_m
14320,14320,4010.15754
9700,9700,3457.43675
10789,10789,3209.72346
3456,3456,3097.853586
13778,13778,2994.277318
9833,9833,2987.949203
9569,9569,2971.295533
14461,14461,2881.388203
5774,5774,2833.522851
12163,12163,2799.755589


In [17]:
grid_ped["pedestrian_length_m"].describe()

count    25024.000000
mean       320.807280
std        518.559569
min          0.000000
25%          0.000000
50%          0.000000
75%        602.100060
max       4010.157540
Name: pedestrian_length_m, dtype: float64

In [18]:
out_ped_grid = OUTPUTS_DIR / "grid_250m_with_pedestrian_proxy.gpkg"

grid_ped.to_file(out_ped_grid, layer="grid_pedestrian", driver="GPKG")

print("Saved:", out_ped_grid)

Saved: c:\Projects\Toronto_Waste_Analytics\outputs\grid_250m_with_pedestrian_proxy.gpkg


In [19]:
## Notebook Summary (Pedestrian Proxy)

### Purpose
# This notebook created a pedestrian activity proxy from the Pedestrian Network dataset and integrated it into the 250m analysis grid.

### Steps Completed
# - Loaded the pedestrian network dataset (EPSG:4326) and converted GeoJSON geometry into valid line geometries.
# - Reprojected the dataset to EPSG:26917 (meters).
# - Spatially joined pedestrian segments to the 250m grid and aggregated total pedestrian network length per cell.
# - Merged the resulting proxy into the grid as `pedestrian_length_m`.

### Validation
# A descriptive summary was generated to confirm the proxy has meaningful variation across cells, including zeros in areas without pedestrian infrastructure and higher values in dense urban zones.

### Output
# - `outputs/grid_250m_with_pedestrian_proxy.gpkg` (layer: `grid_pedestrian`)
# - (optional) `outputs/grid_250m_with_pedestrian_proxy.csv`