In [None]:
import os
import requests
import geopandas as gpd
import pandas as pd
from io import BytesIO
from zipfile import ZipFile
import osmnx as ox
from shapely.geometry import shape


# --- CONFIG ---
PLACE_BASE_URL = "https://www2.census.gov/geo/tiger/TIGER2025/PLACE/tl_2025_{statefp}_place.zip"
CACHE_DIR = "cache/place"
os.makedirs(CACHE_DIR, exist_ok=True)

STATE_FIPS = {
    "01": "Alabama", "02": "Alaska", "04": "Arizona", "05": "Arkansas",
    "06": "California", "08": "Colorado", "09": "Connecticut", "10": "Delaware",
    "12": "Florida", "13": "Georgia", "16": "Idaho", "17": "Illinois",
    "18": "Indiana", "19": "Iowa", "20": "Kansas", "21": "Kentucky",
    "22": "Louisiana", "23": "Maine", "24": "Maryland", "25": "Massachusetts",
    "26": "Michigan", "27": "Minnesota", "28": "Mississippi", "29": "Missouri",
    "30": "Montana", "31": "Nebraska", "32": "Nevada", "33": "New Hampshire",
    "34": "New Jersey", "35": "New Mexico", "36": "New York", "37": "North Carolina",
    "38": "North Dakota", "39": "Ohio", "40": "Oklahoma", "41": "Oregon",
    "42": "Pennsylvania", "44": "Rhode Island", "45": "South Carolina",
    "46": "South Dakota", "47": "Tennessee", "48": "Texas", "49": "Utah",
    "50": "Vermont", "51": "Virginia", "53": "Washington", "54": "West Virginia",
    "55": "Wisconsin", "56": "Wyoming"
}

def load_state_places(statefp):
    """Load (or download + cache) a state's PLACE shapefile as GeoDataFrame."""
    zip_path = os.path.join(CACHE_DIR, f"tl_2025_{statefp}_place.zip")

    # Download if not already cached
    if not os.path.exists(zip_path):
        url = PLACE_BASE_URL.format(statefp=statefp)
        print(f"Downloading {STATE_FIPS[statefp]} PLACE file...")
        r = requests.get(url, timeout=60)
        r.raise_for_status()
        with open(zip_path, "wb") as f:
            f.write(r.content)
    else:
        print(f"Using cached file for {STATE_FIPS[statefp]}")

    # Load from local zip
    return gpd.read_file(f"zip://{zip_path}")

 
def get_city_boundary(city_name, state):
    """
    Find and return the boundary GeoDataFrame for a given city within a known U.S. state.
    `state` can be the full state name (e.g., "Colorado") or FIPS (e.g., "08").
    """
    city_name = city_name.lower()

    # Resolve statefp from either state name or FIPS code
    state_lookup = {v.lower(): k for k, v in STATE_FIPS.items()}
    if state.isdigit():
        statefp = state.zfill(2)
        state_name = STATE_FIPS.get(statefp, f"FIPS {statefp}")
    else:
        statefp = state_lookup.get(state.lower())
        if not statefp:
            raise ValueError(f"Unknown state: {state}")
        state_name = state.title()

    # Load the stateâ€™s PLACE shapefile (cached if available)
    gdf = load_state_places(statefp)

    # Case-insensitive match for city name
    city_rows = gdf[gdf["NAME"].str.lower() == city_name]

    if city_rows.empty:
        raise ValueError(f"City '{city_name.title()}' not found in {state_name}.")

    city_rows = city_rows.copy()
    city_rows["state_name"] = state_name
    return city_rows





In [1]:
import os

# macOS R framework path
os.environ["R_HOME"] = "/Library/Frameworks/R.framework/Resources"
os.environ["PATH"] = os.environ["R_HOME"] + "/bin:" + os.environ["PATH"]




In [2]:

%load_ext rpy2.ipython


In [7]:
%%R
library(sf)
library(spsurvey)

# city_sf: your sf polygon from get_city_boundary()
# fraction: fraction of polygons to sample (e.g., 0.2)
# seed: for reproducibility
generate_grts_sample <- function(city_sf, fraction = 0.2, seed = 123) {
  set.seed(seed)
  
  # Ensure unique ID for merging later
  city_sf$UID <- 1:nrow(city_sf)
  
  # Convert polygons to GRTS spatial frame
  sframe <- grts::grts_frame_area(city_sf, stratum_var = NULL)
  
  # Determine number of base samples
  n_base <- ceiling(nrow(city_sf) * fraction)
  
  # Run GRTS
  samples <- grts(
    sframe = sframe,
    n_base = n_base,
    DesignID = "CityGRTS"
  )
  
  # Merge samples with original polygons to get geometry
  sampled_polygons <- merge(city_sf, samples, by.x = "UID", by.y = "siteID")
  
  return(list(
    polygons = city_sf,
    samples = sampled_polygons
  ))
}


In [12]:
%%R
library(sf)
library(spsurvey)
library(ggplot2)

# Read Missoula polygon
missoula <- st_read("/private/tmp/missoula.gpkg", layer = "missoula")

# Assign unique ID for merge later
missoula$UID <- 1:nrow(missoula)

# Generate 20% spatially balanced GRTS sample
set.seed(42)
n_samples <- ceiling(nrow(missoula) * 0.2)   # 20% of polygons
result <- grts(
  sframe = missoula,
  n_base = n_samples,
  DesignID = "CityGRTS"
)

# Merge sampled polygons
sampled_polygons <- merge(missoula, result, by.x = "UID", by.y = "siteID")

# Plot
ggplot() +
  geom_sf(data = missoula, fill = "lightblue", color = "black") +
  geom_sf(data = sampled_polygons, color = "red", size = 2) +
  ggtitle("GRTS Sample: 20% of Missoula polygons")




Reading layer `missoula' from data source `/private/tmp/missoula.gpkg' using driver `GPKG'
Simple feature collection with 1 feature and 17 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -114.1273 ymin: 46.79154 xmax: -113.9104 ymax: 46.94996
Geodetic CRS:  NAD83
During the check of the input to grtspts, one or more errors were identified.

Enter the following command to view all input error messages: stopprnt()

To view a subset of the errors (e.g., errors 1 and 5) enter stopprnt(m=c(1,5))
Error in dsgn_check(sframe = sframe, sf_type = sf_type, legacy_sites = legacy_sites,  :


RInterpreterError: Failed to parse and evaluate line 'library(sf)\nlibrary(spsurvey)\nlibrary(ggplot2)\n\n# Read Missoula polygon\nmissoula <- st_read("/private/tmp/missoula.gpkg", layer = "missoula")\n\n# Assign unique ID for merge later\nmissoula$UID <- 1:nrow(missoula)\n\n# Generate 20% spatially balanced GRTS sample\nset.seed(42)\nn_samples <- ceiling(nrow(missoula) * 0.2)   # 20% of polygons\nresult <- grts(\n  sframe = missoula,\n  n_base = n_samples,\n  DesignID = "CityGRTS"\n)\n\n# Merge sampled polygons\nsampled_polygons <- merge(missoula, result, by.x = "UID", by.y = "siteID")\n\n# Plot\nggplot() +\n  geom_sf(data = missoula, fill = "lightblue", color = "black") +\n  geom_sf(data = sampled_polygons, color = "red", size = 2) +\n  ggtitle("GRTS Sample: 20% of Missoula polygons")\n\n\n'.
R error message: 'Error in dsgn_check(sframe = sframe, sf_type = sf_type, legacy_sites = legacy_sites,  :'
R stdout:
During the check of the input to grtspts, one or more errors were identified.

Enter the following command to view all input error messages: stopprnt()

To view a subset of the errors (e.g., errors 1 and 5) enter stopprnt(m=c(1,5))