# Step 1: Pull Air Quality Data from OpenAQ



In [None]:
import requests
import pandas as pd
import time

# Settings
BASE_URL = "https://api.openaq.org/v2/measurements"
CITY = "San Francisco"
PARAMETER = "pm25"
DATE_FROM = "2024-07-01"
DATE_TO = "2024-07-07"
LIMIT = 100  # max per request
PAGES = 5    # adjust as needed

# Collect records
all_results = []

for page in range(1, PAGES + 1):
    params = {
        "country": "US",
        "city": CITY,
        "parameter": PARAMETER,
        "date_from": DATE_FROM,
        "date_to": DATE_TO,
        "limit": LIMIT,
        "page": page,
        "sort": "desc",
        "order_by": "datetime"
    }

    response = requests.get(BASE_URL, params=params)
    if response.status_code == 200:
        results = response.json().get("results", [])
        all_results.extend(results)
        time.sleep(1)  # be gentle with the server
    else:
        print(f"Error: {response.status_code}")
        break

# Convert to DataFrame
df = pd.json_normalize(all_results)
print(df.head())

# Save for BigQuery
df.to_csv("openaq_sf_pm25.csv", index=False)
