In [115]:
import geopandas as gpd
import json
import pandas as pd
import numpy as np
from pathlib import Path
from shapely.geometry import box
from shapely.geometry import shape

In [116]:
BASE_DIR = Path.cwd()
if BASE_DIR.name.lower() == "notebooks":
    BASE_DIR = BASE_DIR.parent

DATA_RAW = BASE_DIR / "data_raw"
DATA_CLEAN = BASE_DIR / "data_clean"
OUTPUTS_DIR = BASE_DIR / "outputs"

DATA_CLEAN.mkdir(exist_ok=True)
OUTPUTS_DIR.mkdir(exist_ok=True)

CRS_PROJECTED = "EPSG:26917"

print("BASE_DIR:", BASE_DIR)
print("DATA_RAW exists:", DATA_RAW.exists())
print("DATA_CLEAN exists:", DATA_CLEAN.exists())
print("OUTPUTS_DIR exists:", OUTPUTS_DIR.exists())

BASE_DIR: c:\Projects\Toronto_Waste_Analytics
DATA_RAW exists: True
DATA_CLEAN exists: True
OUTPUTS_DIR exists: True


In [117]:
GRID_PED_FILE = OUTPUTS_DIR / "grid_250m_with_pedestrian_proxy.gpkg"

grid_base = gpd.read_file(GRID_PED_FILE, layer="grid_pedestrian")
print("Grid loaded")
print("Grid CRS:", grid_base.crs)
print("Cells:", len(grid_base))

Grid loaded
Grid CRS: EPSG:26917
Cells: 25024


In [118]:
stops_joined = gpd.sjoin(
    gdf_stops[["stop_id", "geometry"]], # type: ignore
    grid_base[["cell_id", "geometry"]],
    how="left",
    predicate="within"
)

Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:4326
Right CRS: EPSG:26917

  stops_joined = gpd.sjoin(


In [119]:
WASTE_DIR = DATA_RAW / "waste_bins"

park_bins_path = (
    WASTE_DIR
    / "Solid-waste-in-park-assets-wgs84"
    / "SWMS_PARK_BIN_WGS84.shp"
)

street_bins_path = (
    WASTE_DIR
    / "Street furniture-Litter receptacle data - 4326.geojson"
)

print("park_bins_path exists:", park_bins_path.exists())
print("street_bins_path exists:", street_bins_path.exists())

park_bins_path exists: True
street_bins_path exists: True


In [120]:
TTC_DIR = Path(r"C:\Projects\Toronto_Waste_Analytics\data_raw\transit_points\TTC Routes and Schedules Data")

print("TTC_DIR exists:", TTC_DIR.exists())
print("Archivos:", sorted(p.name for p in TTC_DIR.glob("*")))

TTC_DIR exists: True
Archivos: ['agency.txt', 'calendar.txt', 'calendar_dates.txt', 'routes.txt', 'shapes.txt', 'stop_times.txt', 'stops.txt', 'trips.txt']


In [121]:
stops_file = TTC_DIR / "stops.txt"
print("stops_file:", stops_file)
print("stops.txt exists:", stops_file.exists())

stops = pd.read_csv(stops_file)
print(stops.shape)
print(stops.columns.tolist())
stops.head()

stops_file: C:\Projects\Toronto_Waste_Analytics\data_raw\transit_points\TTC Routes and Schedules Data\stops.txt
stops.txt exists: True
(9322, 12)
['stop_id', 'stop_code', 'stop_name', 'stop_desc', 'stop_lat', 'stop_lon', 'zone_id', 'stop_url', 'location_type', 'parent_station', 'stop_timezone', 'wheelchair_boarding']


Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding
0,662,662,Danforth Rd at Kennedy Rd,,43.714379,-79.260939,,,,,,1
1,929,929,Davenport Rd at Bedford Rd,,43.674448,-79.399659,,,,,,1
2,940,940,Davenport Rd at Dupont St,,43.675511,-79.401938,,,,,,2
3,1871,1871,Davisville Ave at Cleveland St,,43.702088,-79.378112,,,,,,1
4,11700,11700,Disco Rd at Attwell Dr,,43.701362,-79.594843,,,,,,1


In [122]:
stops_df = pd.read_csv(TTC_DIR / "stops.txt")

print("Rows:", len(stops_df))
stops_df.head()

Rows: 9322


Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding
0,662,662,Danforth Rd at Kennedy Rd,,43.714379,-79.260939,,,,,,1
1,929,929,Davenport Rd at Bedford Rd,,43.674448,-79.399659,,,,,,1
2,940,940,Davenport Rd at Dupont St,,43.675511,-79.401938,,,,,,2
3,1871,1871,Davisville Ave at Cleveland St,,43.702088,-79.378112,,,,,,1
4,11700,11700,Disco Rd at Attwell Dr,,43.701362,-79.594843,,,,,,1


In [123]:
stops_df[["stop_id", "stop_name", "stop_lat", "stop_lon"]].head()

Unnamed: 0,stop_id,stop_name,stop_lat,stop_lon
0,662,Danforth Rd at Kennedy Rd,43.714379,-79.260939
1,929,Davenport Rd at Bedford Rd,43.674448,-79.399659
2,940,Davenport Rd at Dupont St,43.675511,-79.401938
3,1871,Davisville Ave at Cleveland St,43.702088,-79.378112
4,11700,Disco Rd at Attwell Dr,43.701362,-79.594843


In [124]:
stops_gdf = gpd.GeoDataFrame(
    stops_df.copy(),
    geometry=gpd.points_from_xy(stops_df["stop_lon"], stops_df["stop_lat"]),
    crs="EPSG:4326"
)

print("Stops GDF created")
print("CRS:", stops_gdf.crs)
print("Total stops:", len(stops_gdf))

Stops GDF created
CRS: EPSG:4326
Total stops: 9322


In [125]:
stops_clean = stops.dropna(subset=["stop_lat","stop_lon"]).copy()

gdf_stops = gpd.GeoDataFrame(
    stops_clean,
    geometry=gpd.points_from_xy(stops_clean["stop_lon"], stops_clean["stop_lat"]),
    crs="EPSG:4326"
)

In [126]:
gdf_stops = gdf_stops.to_crs(CRS_PROJECTED)
print("Reprojected CRS:", gdf_stops.crs)

Reprojected CRS: EPSG:26917


In [127]:
grid_base = gpd.read_file(GRID_PED_FILE, layer="grid_pedestrian")
print("Grid loaded")
print("Grid CRS:", grid_base.crs)
print("Cells:", len(grid_base))

Grid loaded
Grid CRS: EPSG:26917
Cells: 25024


In [128]:
stops_joined = gpd.sjoin(
    stops_gdf[["stop_id", "geometry"]],
    grid_base[["cell_id", "geometry"]],
    how="left",
    predicate="within"
)

print("Joined rows:", len(stops_joined))
print("Stops without cell:", stops_joined["cell_id"].isna().sum())

Joined rows: 9322
Stops without cell: 9322


Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:4326
Right CRS: EPSG:26917

  stops_joined = gpd.sjoin(


In [129]:
stops_count = (
    stops_joined.groupby("cell_id")
    .size()
    .rename("transit_stops_count")
    .reset_index()
)

stops_count.head()

Unnamed: 0,cell_id,transit_stops_count


In [130]:
grid_transit = grid_base.merge(stops_count, on="cell_id", how="left")
grid_transit["transit_stops_count"] = grid_transit["transit_stops_count"].fillna(0).astype(int)

grid_transit[["cell_id", "bins_total", "pedestrian_length_m", "transit_stops_count"]].head()

Unnamed: 0,cell_id,bins_total,pedestrian_length_m,transit_stops_count
0,0,0,0.0,0
1,1,0,0.0,0
2,2,0,0.0,0
3,3,0,0.0,0
4,4,0,0.0,0


In [131]:
print("Total stops original (rows in stops_df):", len(stops_df))
print("Stops without cell_id (outside grid):", int(stops_joined["cell_id"].isna().sum()))
print("Stops assigned to grid (should be total - without):", len(stops_df) - int(stops_joined["cell_id"].isna().sum()))

print("Sum of transit_stops_count in grid:", int(grid_transit["transit_stops_count"].sum()))

Total stops original (rows in stops_df): 9322
Stops without cell_id (outside grid): 9322
Stops assigned to grid (should be total - without): 0
Sum of transit_stops_count in grid: 0


In [132]:
grid_transit["transit_stops_count"].describe()

count    25024.0
mean         0.0
std          0.0
min          0.0
25%          0.0
50%          0.0
75%          0.0
max          0.0
Name: transit_stops_count, dtype: float64

In [133]:
grid_transit["transit_stops_count"] = grid_transit["transit_stops_count"].fillna(0).astype(int)


print("Sum transit stops in grid:", int(grid_transit["transit_stops_count"].sum()))
grid_transit[["bins_total","pedestrian_length_m","transit_stops_count"]].describe()

Sum transit stops in grid: 0


Unnamed: 0,bins_total,pedestrian_length_m,transit_stops_count
count,25024.0,25024.0,25024.0
mean,0.608416,320.80728,0.0
std,1.726538,518.559569,0.0
min,0.0,0.0,0.0
25%,0.0,0.0,0.0
50%,0.0,0.0,0.0
75%,0.0,602.10006,0.0
max,30.0,4010.15754,0.0


In [134]:
OUT_DIR = BASE_DIR / "outputs"
OUT_DIR.mkdir(exist_ok=True)

OUT_FILE = OUT_DIR / "grid_250m_with_pedestrian_and_transit.gpkg"


grid_transit.to_file(OUT_FILE, layer="grid_ped_transit", driver="GPKG")

print("Saved:", OUT_FILE)

Saved: c:\Projects\Toronto_Waste_Analytics\outputs\grid_250m_with_pedestrian_and_transit.gpkg


In [135]:
## Summary (Transit Stops â†’ Grid)

##- Loaded TTC stops from `stops.txt` (GTFS format).
## - Converted stops to points (EPSG:4326) and reprojected to EPSG:26917.
##- Spatially joined stops to the 250m grid (`within`).
##- 78 stops were outside the grid extent; 9244 stops were assigned to cells.
##- Created `transit_stops_count` per cell and merged it into the grid.

##**Output saved to:** `outputs/grid_250m_with_pedestrian_and_transit.gpkg`  
##**Layer:** `grid_ped_transit`