In [1]:
# Quick fetch: Manhattan slices (ACE violations, bus lanes, NTAs)
import math
import requests
import pandas as pd
from pathlib import Path

MN_BBOX = (40.8820, -74.0479, 40.7000, -73.9067)  # north, west, south, east
RAW = Path.cwd().parent / "data" / "raw"
RAW.mkdir(parents=True, exist_ok=True)

def fetch_paged(base_url, where=None, order=None, limit=50000, max_pages=50):
    params = {"$limit": limit}
    if where:
        params["$where"] = where
    if order:
        params["$order"] = order
    frames = []
    offset = 0
    for _ in range(max_pages):
        params["$offset"] = offset
        r = requests.get(base_url, params=params, timeout=60)
        r.raise_for_status()
        data = r.json()
        if not data:
            break
        frames.append(pd.DataFrame(data))
        if len(data) < limit:
            break
        offset += limit
    return pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()

# 1) ACE violations (bbox)
ace_where = f"within_box(violation_georeference,{MN_BBOX[0]},{MN_BBOX[1]},{MN_BBOX[2]},{MN_BBOX[3]})"
ace_df = fetch_paged(
    "https://data.ny.gov/resource/kh8p-hcbm.json",
    where=ace_where,
    order="first_occurrence"
)
ace_out = RAW / "ace_violations_manhattan.csv"
ace_df.to_csv(ace_out, index=False)
print(f"ACE Manhattan: {len(ace_df):,} → {ace_out}")

# 2) Bus lanes (Manhattan)
lanes_df = fetch_paged(
    "https://data.cityofnewyork.us/resource/ycrg-ses3.json",
    where="upper(boro) like '%MAN%'"
)
if lanes_df.empty:
    # Fallback to bbox filter if borough code format changes
    min_lon, min_lat, max_lon, max_lat = -74.0479, 40.7000, -73.9067, 40.8820
    lanes_df = fetch_paged(
        "https://data.cityofnewyork.us/resource/ycrg-ses3.json",
        where=f"within_box(the_geom,{min_lon},{min_lat},{max_lon},{max_lat})"
    )
lanes_out = RAW / "bus_lanes_manhattan.csv"
lanes_df.to_csv(lanes_out, index=False)
print(f"Bus lanes Manhattan: {len(lanes_df):,} → {lanes_out}")

# 3) NTAs (borocode=1)
ntas_df = fetch_paged(
    "https://data.cityofnewyork.us/resource/9nt8-h7nd.json",
    where="borocode=1"
)
ntas_out = RAW / "ntas_2020_manhattan.csv"
ntas_df.to_csv(ntas_out, index=False)
print(f"NTAs Manhattan: {len(ntas_df):,} → {ntas_out}")


ACE Manhattan: 1,687,191 → /Users/mohamedhiba/Fall 2025/datathon/data/raw/ace_violations_manhattan.csv
Bus lanes Manhattan: 1,241 → /Users/mohamedhiba/Fall 2025/datathon/data/raw/bus_lanes_manhattan.csv
NTAs Manhattan: 38 → /Users/mohamedhiba/Fall 2025/datathon/data/raw/ntas_2020_manhattan.csv


: 

In [2]:
# Fetch schools + hospitals (NYC-wide) and save Manhattan subsets
import requests, pandas as pd
from pathlib import Path

RAW = Path.cwd().parent / "data" / "raw"
RAW.mkdir(parents=True, exist_ok=True)

MN_BBOX = (40.8820, -74.0479, 40.7000, -73.9067)  # north, west, south, east

def fetch_to_csv(url, out, params=None):
    p = {"$limit": 50000}
    if params: p.update(params)
    r = requests.get(url, params=p, timeout=60); r.raise_for_status()
    df = pd.DataFrame(r.json()); df.to_csv(RAW/out, index=False)
    print(out, len(df))

# Schools (DOE) — NYC and Manhattan (bbox on location_1)
fetch_to_csv("https://data.cityofnewyork.us/resource/s3k6-pzi2.json", "schools_nyc.csv")
fetch_to_csv(
    "https://data.cityofnewyork.us/resource/s3k6-pzi2.json",
    "schools_manhattan.csv",
    params={
        "$where": "upper(borough) in ('M','MANHATTAN')"
    }
)

# Hospitals — correct dataset q6fj-vxf8; use borough filter for Manhattan
fetch_to_csv("https://data.cityofnewyork.us/resource/q6fj-vxf8.json", "hospitals_nyc.csv")
fetch_to_csv(
    "https://data.cityofnewyork.us/resource/q6fj-vxf8.json",
    "hospitals_manhattan.csv",
    params={"borough": "Manhattan"}
)


schools_nyc.csv 440
schools_manhattan.csv 107
hospitals_nyc.csv 78
hospitals_manhattan.csv 24
