In [20]:
import geopandas as gpd
from pathlib import Path
import pandas as pd
import numpy as np
from shapely.geometry import box

In [14]:
BASE_DIR = Path.cwd().parent
CRS_PROJECTED = "EPSG:26917"

park_bins_path = BASE_DIR / "waste_bins" / "Solid-waste-in-park-assets-wgs84" / "SWMS_PARK_BIN_WGS84.shp"
street_bins_path = BASE_DIR / "waste_bins" / "Street furniture-Litter receptacle data - 4326.geojson"

park_bins = gpd.read_file(park_bins_path).to_crs(CRS_PROJECTED)
street_bins = gpd.read_file(street_bins_path).to_crs(CRS_PROJECTED)

print("park_bins:", len(park_bins), "CRS:", park_bins.crs)
print("street_bins:", len(street_bins), "CRS:", street_bins.crs)

park_bins: 4768 CRS: EPSG:26917
street_bins: 10457 CRS: EPSG:26917


In [15]:
park_bins = park_bins[park_bins.geometry.notna() & park_bins.is_valid].copy()
street_bins = street_bins[street_bins.geometry.notna() & street_bins.is_valid].copy()

print("park_bins valid:", len(park_bins))
print("street_bins valid:", len(street_bins))

park_bins valid: 4768
street_bins valid: 10457


In [16]:
park_bins["source"] = "park_assets"
street_bins["source"] = "street_furniture"

waste_bins = gpd.GeoDataFrame(
    pd.concat([park_bins, street_bins], ignore_index=True),
    crs=CRS_PROJECTED
)

waste_bins["source"].value_counts()

source
street_furniture    10457
park_assets          4768
Name: count, dtype: int64

In [17]:
CRS_PROJECTED = "EPSG:26917"

print("waste_bins CRS:", waste_bins.crs)

waste_bins CRS: EPSG:26917


In [18]:
waste_bins = waste_bins.to_crs(CRS_PROJECTED)

In [19]:
CELL_SIZE = 250

In [21]:
minx, miny, maxx, maxy = waste_bins.total_bounds
BUFFER = 2000 
minx -= BUFFER
miny -= BUFFER
maxx += BUFFER
maxy += BUFFER

print("Bounds (with buffer):", (minx, miny, maxx, maxy))

Bounds (with buffer): (607587.3952337612, 4824768.165974327, 653407.225812946, 4858580.7051651385)


In [22]:
xs = np.arange(minx, maxx, CELL_SIZE)
ys = np.arange(miny, maxy, CELL_SIZE)

grid_polys = []
grid_ids = []

gid = 0
for x in xs:
    for y in ys:
        grid_polys.append(box(x, y, x + CELL_SIZE, y + CELL_SIZE))
        grid_ids.append(gid)
        gid += 1

grid = gpd.GeoDataFrame(
    {"cell_id": grid_ids},
    geometry=grid_polys,
    crs=CRS_PROJECTED
)

print("Grid cells:", len(grid))
grid.head()

Grid cells: 25024


Unnamed: 0,cell_id,geometry
0,0,"POLYGON ((607837.395 4824768.166, 607837.395 4..."
1,1,"POLYGON ((607837.395 4825018.166, 607837.395 4..."
2,2,"POLYGON ((607837.395 4825268.166, 607837.395 4..."
3,3,"POLYGON ((607837.395 4825518.166, 607837.395 4..."
4,4,"POLYGON ((607837.395 4825768.166, 607837.395 4..."


In [23]:
joined = gpd.sjoin(
    waste_bins[["source", "geometry"]],
    grid[["cell_id", "geometry"]],
    how="left",
    predicate="within"
)

joined.head()

Unnamed: 0,source,geometry,index_right,cell_id
0,park_assets,POINT (641649.07 4840516.464),18558.0,18558.0
1,park_assets,POINT (641720.484 4840742.007),18559.0,18559.0
2,park_assets,POINT (639630.995 4841469.255),17474.0,17474.0
3,park_assets,POINT (649224.471 4850564.857),22679.0,22679.0
4,park_assets,POINT (649180.405 4850615.117),22679.0,22679.0


In [24]:
counts_total = joined.groupby("cell_id").size().rename("bins_total")
counts_total.head()

cell_id
1303.0    2
1304.0    1
1439.0    2
1576.0    2
1706.0    1
Name: bins_total, dtype: int64

In [25]:
counts_by_source = (
    joined.groupby(["cell_id", "source"])
    .size()
    .unstack(fill_value=0)
)

# asegurar columnas aunque falte una por cualquier razón
for col in ["park_assets", "street_furniture"]:
    if col not in counts_by_source.columns:
        counts_by_source[col] = 0

counts_by_source = counts_by_source.rename(columns={
    "park_assets": "bins_park_assets",
    "street_furniture": "bins_street_furniture"
})

counts_by_source.head()

source,bins_park_assets,bins_street_furniture
cell_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1303.0,0,2
1304.0,0,1
1439.0,0,2
1576.0,0,2
1706.0,1,0


In [27]:
grid = grid.merge(counts_total, on="cell_id", how="left")
grid = grid.merge(counts_by_source, on="cell_id", how="left")


for c in ["bins_total", "bins_park_assets", "bins_street_furniture"]:
    if c in grid.columns:
        grid[c] = grid[c].fillna(0).astype(int)

grid.head()

Unnamed: 0,cell_id,geometry,bins_total_x,bins_park_assets_x,bins_street_furniture_x,bins_total_y,bins_park_assets_y,bins_street_furniture_y
0,0,"POLYGON ((607837.395 4824768.166, 607837.395 4...",0,0,0,,,
1,1,"POLYGON ((607837.395 4825018.166, 607837.395 4...",0,0,0,,,
2,2,"POLYGON ((607837.395 4825268.166, 607837.395 4...",0,0,0,,,
3,3,"POLYGON ((607837.395 4825518.166, 607837.395 4...",0,0,0,,,
4,4,"POLYGON ((607837.395 4825768.166, 607837.395 4...",0,0,0,,,


In [29]:
grid.columns.tolist()

['cell_id',
 'geometry',
 'bins_total_x',
 'bins_park_assets_x',
 'bins_street_furniture_x',
 'bins_total_y',
 'bins_park_assets_y',
 'bins_street_furniture_y']

In [30]:
grid = grid.rename(columns={
    "bins_total_x": "bins_total",
    "bins_park_assets_x": "bins_park_assets",
    "bins_street_furniture_x": "bins_street_furniture",
})

grid = grid.drop(columns=[
    "bins_total_y",
    "bins_park_assets_y",
    "bins_street_furniture_y",
])

for c in ["bins_total", "bins_park_assets", "bins_street_furniture"]:
    grid[c] = grid[c].fillna(0).astype(int)

grid[["bins_total", "bins_park_assets", "bins_street_furniture"]].head()

Unnamed: 0,bins_total,bins_park_assets,bins_street_furniture
0,0,0,0
1,0,0,0
2,0,0,0
3,0,0,0
4,0,0,0


In [31]:
grid["bins_total"].gt(0).sum(), len(grid)

(5118, 25024)

In [32]:
grid["bins_total"].sum()

15223

In [33]:
grid.sort_values("bins_total", ascending=False)[
    ["cell_id", "bins_total", "bins_park_assets", "bins_street_furniture"]
].head(10)

Unnamed: 0,cell_id,bins_total,bins_park_assets,bins_street_furniture
8742,8742,30,28,2
7147,7147,28,28,0
20496,20496,26,26,0
12685,12685,23,5,18
12280,12280,22,0,22
10146,10146,22,22,0
7283,7283,21,21,0
12416,12416,20,4,16
15684,15684,19,17,2
12014,12014,19,0,19


In [35]:
grid.columns.tolist()

['cell_id',
 'geometry',
 'bins_total',
 'bins_park_assets',
 'bins_street_furniture']

In [36]:
cols_keep = ["cell_id", "geometry", "bins_total", "bins_park_assets", "bins_street_furniture"]
grid_clean = grid[cols_keep].copy()

for c in ["bins_total", "bins_park_assets", "bins_street_furniture"]:
    grid_clean[c] = grid_clean[c].fillna(0).astype(int)

grid_clean.head()

Unnamed: 0,cell_id,geometry,bins_total,bins_park_assets,bins_street_furniture
0,0,"POLYGON ((607837.395 4824768.166, 607837.395 4...",0,0,0
1,1,"POLYGON ((607837.395 4825018.166, 607837.395 4...",0,0,0
2,2,"POLYGON ((607837.395 4825268.166, 607837.395 4...",0,0,0
3,3,"POLYGON ((607837.395 4825518.166, 607837.395 4...",0,0,0
4,4,"POLYGON ((607837.395 4825768.166, 607837.395 4...",0,0,0


In [37]:
grid_clean["bins_total"].gt(0).sum()

5118

In [38]:
grid_clean.sort_values("bins_total", ascending=False)[
    ["cell_id", "bins_total", "bins_park_assets", "bins_street_furniture"]
].head(10)

Unnamed: 0,cell_id,bins_total,bins_park_assets,bins_street_furniture
8742,8742,30,28,2
7147,7147,28,28,0
20496,20496,26,26,0
12685,12685,23,5,18
12280,12280,22,0,22
10146,10146,22,22,0
7283,7283,21,21,0
12416,12416,20,4,16
15684,15684,19,17,2
12014,12014,19,0,19


In [40]:
joined = gpd.sjoin(
    waste_bins[["source", "geometry"]],
    grid[["cell_id", "geometry"]],
    how="left",
    predicate="intersects"
)

In [41]:
counts_total = joined.groupby("cell_id").size().rename("bins_total")

counts_by_source = (
    joined.groupby(["cell_id", "source"])
    .size()
    .unstack(fill_value=0)
)

for col in ["park_assets", "street_furniture"]:
    if col not in counts_by_source.columns:
        counts_by_source[col] = 0

counts_by_source = counts_by_source.rename(columns={
    "park_assets": "bins_park_assets",
    "street_furniture": "bins_street_furniture"
})

grid = grid.drop(columns=[c for c in ["bins_total","bins_park_assets","bins_street_furniture"] if c in grid.columns], errors="ignore")
grid = grid.merge(counts_total, on="cell_id", how="left")
grid = grid.merge(counts_by_source, on="cell_id", how="left")

for c in ["bins_total", "bins_park_assets", "bins_street_furniture"]:
    grid[c] = grid[c].fillna(0).astype(int)

grid_clean = grid[["cell_id","geometry","bins_total","bins_park_assets","bins_street_furniture"]].copy()

In [42]:
print("Total bins original:", len(waste_bins))
print("Total bins counted in grid:", grid_clean["bins_total"].sum())

Total bins original: 15225
Total bins counted in grid: 15227


In [43]:
waste_bins.columns.tolist()

['FID',
 'INSPEC_DAT',
 'PARK_NAME',
 'PARK_LCODE',
 'WARD_NAME',
 'WARD_SCODE',
 'LIT_BTYPE',
 'LIT_BCOUNT',
 'REC_BTYPE',
 'REC_BCOUNT',
 'CON_STATUS',
 'X_COORDI',
 'Y_COORDI',
 'LONGITUDE',
 'LATITUDE',
 'OBJECTID',
 'RID',
 'geometry',
 'source',
 '_id',
 'ID',
 'ADDRESSNUMBERTEXT',
 'ADDRESSSTREET',
 'FRONTINGSTREET',
 'SIDE',
 'FROMSTREET',
 'DIRECTION',
 'SITEID',
 'WARD',
 'BIA',
 'ASSETTYPE',
 'STATUS',
 'BARCODE',
 'SDE_STATE_ID']

In [45]:
candidate_ids = ["OBJECTID", "FID", "RID", "BARCODE", "ID", "_id"]

for c in candidate_ids:
    if c in waste_bins.columns:
        total = len(waste_bins)
        unique = waste_bins[c].nunique(dropna=True)
        nulls = waste_bins[c].isna().sum()
        print(f"{c:10s} | total={total} unique={unique} nulls={nulls}")

OBJECTID   | total=15225 unique=12512 nulls=0
FID        | total=15225 unique=3030 nulls=10457
RID        | total=15225 unique=4768 nulls=10457
BARCODE    | total=15225 unique=10420 nulls=4768
ID         | total=15225 unique=10457 nulls=4768
_id        | total=15225 unique=10457 nulls=4768


In [46]:
waste_bins["bin_uid"] = np.where(
    waste_bins["source"] == "park_assets",
    "park_" + waste_bins["RID"].astype(str),
    "street_" + waste_bins["OBJECTID"].astype(str)
)

# Verificación
print("Total rows:", len(waste_bins))
print("Unique bin_uid:", waste_bins["bin_uid"].nunique())

Total rows: 15225
Unique bin_uid: 15225


In [47]:
joined = gpd.sjoin(
    waste_bins[["bin_uid", "source", "geometry"]],
    grid[["cell_id", "geometry"]],
    how="left",
    predicate="intersects"
)
joined = (
    joined
    .sort_values(["bin_uid", "cell_id"])
    .drop_duplicates(subset=["bin_uid"], keep="first")
)

In [48]:
counts_total = joined.groupby("cell_id").size().rename("bins_total")

counts_by_source = (
    joined.groupby(["cell_id", "source"])
    .size()
    .unstack(fill_value=0)
)

counts_by_source = counts_by_source.rename(columns={
    "park_assets": "bins_park_assets",
    "street_furniture": "bins_street_furniture"
})

grid2 = grid[["cell_id", "geometry"]].copy()
grid2 = grid2.merge(counts_total, on="cell_id", how="left")
grid2 = grid2.merge(counts_by_source, on="cell_id", how="left")

for c in ["bins_total", "bins_park_assets", "bins_street_furniture"]:
    grid2[c] = grid2[c].fillna(0).astype(int)

grid_clean = grid2.copy()

print("Total bins original:", len(waste_bins))
print("Total bins counted in grid:", grid_clean["bins_total"].sum())

Total bins original: 15225
Total bins counted in grid: 15225


In [49]:
OUT_DIR = BASE_DIR / "outputs"
OUT_DIR.mkdir(exist_ok=True)

out_grid = OUT_DIR / "grid_250m_bins_clean.gpkg"
grid_clean.to_file(out_grid, layer="grid_bins", driver="GPKG")

print("Saved:", out_grid)

Saved: c:\Projects\Toronto_Waste_Analytics\outputs\grid_250m_bins_clean.gpkg


In [None]:
### This Notebook contein: 
# The purpose of this notebook was to prepare and structure spatial data on public waste bins in the City of Toronto in order to create a consistent spatial unit of analysis for subsequent analytical stages of the project.

#Two validated datasets were used:
#- Solid waste bins located in park assets
#- Street furniture litter receptacles

#Both datasets were projected to a common coordinate reference system and combined into a unified dataset of waste bin locations.

### Spatial Grid Construction
#A regular spatial grid of 250m × 250m cells was created to serve as the analytical unit.  
#This grid allows waste bin locations to be aggregated in a standardized and comparable way across the study area.

#Each waste bin was spatially assigned to a grid cell using a point-in-polygon operation.  
#For each cell, the following indicators were computed:
# Total number of waste bins
# Number of bins located in park assets
# Number of bins located in street furniture

### Data Validation
# A validation step was performed to ensure data integrity.  
# The total number of waste bins in the original dataset matches the total number of bins aggregated across all grid cells, confirming that no observations were lost or duplicated during the spatial aggregation process.

### Output
# The resulting spatial grid, including aggregated waste bin counts per cell, was saved as a GeoPackage file and will be used as an input dataset for subsequent analysis.

### Next Steps
# This notebook focuses exclusively on spatial data preparation and aggregation.  
# Subsequent notebooks will integrate contextual datasets (such as pedestrian activity, population, or transit access) and conduct analytical evaluations based on the prepared grid.