In [37]:
import requests
import pandas as pd

# NYC Open Data API endpoint
base_url = "https://data.cityofnewyork.us/resource/pvqr-7yc4.json"

# Settings
limit = 50000         # Rows per request
offset = 0            # Starting index
max_records = 200000 # Total you want

# Container for all records
data = []

while offset < max_records:
    params = {
        "$limit": limit,
        "$offset": offset
    }

    response = requests.get(base_url, params=params)

    if response.status_code != 200:
        print(f"Request failed at offset {offset} with status code {response.status_code}")
        break

    batch = response.json()

    if not batch:
        print("No more data returned.")
        break

    data.extend(batch)
    offset += limit

    print(f"✅ Retrieved {len(batch)} records (Total so far: {len(data)})")

# Convert to DataFrame
df = pd.DataFrame(data)

df = df.dropna(subset=['house_number', 'street_name', 'violation_county'])

df.to_csv("nyc_parking_violations_sample.csv", index=False)


✅ Retrieved 50000 records (Total so far: 50000)
✅ Retrieved 50000 records (Total so far: 100000)
✅ Retrieved 50000 records (Total so far: 150000)
✅ Retrieved 50000 records (Total so far: 200000)


In [35]:
import requests
import pandas as pd
from datetime import datetime, timedelta

# NYC Open Data API endpoint
base_url = "https://data.cityofnewyork.us/resource/pvqr-7yc4.json"
csv_path = "data/nyc_parking_violations_sample.csv"

# Load existing data
existing_df = pd.read_csv(csv_path, low_memory=False)
existing_ids = set(existing_df['summons_number'])

# Target months: 2022 and 2023 only
target_months = [
    (year, f"{month:02d}")
    for year in [2022, 2023]
    for month in range(1, 13)
]

n_per_month = 3000
total_added = 0

for year, month in target_months:
    start_date = f"{year}-{month}-01T00:00:00.000"
    end_dt = (datetime(int(year), int(month), 1) + timedelta(days=32)).replace(day=1) - timedelta(days=1)
    end_date = end_dt.strftime("%Y-%m-%dT23:59:59.999")

    print(f"\n📅 Fetching up to {n_per_month} fines from {year}-{month}")

    params = {
        "$limit": n_per_month,
        "$order": "issue_date DESC",
        "$where": f"issue_date >= '{start_date}' AND issue_date <= '{end_date}'"
    }

    response = requests.get(base_url, params=params)
    if response.status_code != 200:
        print(f"❌ Request failed for {year}-{month}: {response.status_code}")
        continue

    batch = response.json()
    if not batch:
        print("🚫 No data found for this month.")
        continue

    new_df = pd.DataFrame(batch)
    new_df = new_df[~new_df['summons_number'].isin(existing_ids)]

    if new_df.empty:
        print("⚠️ All rows already in dataset.")
        continue

    existing_df = pd.concat([existing_df, new_df], ignore_index=True)
    existing_ids.update(new_df['summons_number'])
    total_added += len(new_df)

    print(f"✅ Added {len(new_df):,} new rows")

# Save updated dataset
existing_df.to_csv(csv_path, index=False)
print(f"\n💾 Final dataset saved with {len(existing_df):,} rows")
print(f"🆕 {total_added:,} new rows added from 2022–2023.")



📅 Fetching up to 3000 fines from 2022-01
✅ Added 6 new rows

📅 Fetching up to 3000 fines from 2022-02
✅ Added 7 new rows

📅 Fetching up to 3000 fines from 2022-03
✅ Added 1 new rows

📅 Fetching up to 3000 fines from 2022-04
✅ Added 9 new rows

📅 Fetching up to 3000 fines from 2022-05
✅ Added 8 new rows

📅 Fetching up to 3000 fines from 2022-06
✅ Added 52 new rows

📅 Fetching up to 3000 fines from 2022-07
✅ Added 62 new rows

📅 Fetching up to 3000 fines from 2022-08
✅ Added 42 new rows

📅 Fetching up to 3000 fines from 2022-09
✅ Added 49 new rows

📅 Fetching up to 3000 fines from 2022-10
✅ Added 40 new rows

📅 Fetching up to 3000 fines from 2022-11
✅ Added 39 new rows

📅 Fetching up to 3000 fines from 2022-12
✅ Added 35 new rows

📅 Fetching up to 3000 fines from 2023-01
✅ Added 685 new rows

📅 Fetching up to 3000 fines from 2023-02
✅ Added 156 new rows

📅 Fetching up to 3000 fines from 2023-03
✅ Added 181 new rows

📅 Fetching up to 3000 fines from 2023-04
✅ Added 215 new rows

📅 Fetchi

# Downlading NTA population data from Census API

In [41]:
import requests
import pandas as pd

# Replace with your actual Census API key
API_KEY = 'ee801ef1fbfc79c0d75038457cafb797d4f8add4'

# NYC counties (FIPS codes)
counties = {
    'New York': '061',
    'Kings': '047',
    'Queens': '081',
    'Bronx': '005',
    'Richmond': '085'
}

data_frames = []

for county_name, county_fips in counties.items():
    url = f'https://api.census.gov/data/2020/dec/pl?get=P1_001N,NAME&for=tract:*&in=state:36+county:{county_fips}&key={API_KEY}'
    
    response = requests.get(url)
    data = response.json()
    
    print(f"Status code: {response.status_code}")
    print(response.text)



    df = pd.DataFrame(data[1:], columns=data[0])
    df['county_name'] = county_name
    data_frames.append(df)

# Combine and save
final_df = pd.concat(data_frames, ignore_index=True)
final_df.rename(columns={'P1_001N': 'Population'}, inplace=True)
final_df.to_csv('data/nyc_census_tract_pop_2020.csv', index=False)
print(final_df.head())


Status code: 200
[["P1_001N","NAME","state","county","tract"],
["0","Census Tract 1, New York County, New York","36","061","000100"],
["2012","Census Tract 2.01, New York County, New York","36","061","000201"],
["7266","Census Tract 2.02, New York County, New York","36","061","000202"],
["5","Census Tract 5, New York County, New York","36","061","000500"],
["11616","Census Tract 6, New York County, New York","36","061","000600"],
["10542","Census Tract 7, New York County, New York","36","061","000700"],
["10871","Census Tract 8, New York County, New York","36","061","000800"],
["2016","Census Tract 9, New York County, New York","36","061","000900"],
["1767","Census Tract 10.01, New York County, New York","36","061","001001"],
["6300","Census Tract 10.02, New York County, New York","36","061","001002"],
["3776","Census Tract 12, New York County, New York","36","061","001200"],
["5402","Census Tract 13, New York County, New York","36","061","001300"],
["3543","Census Tract 14.01, New Yor