# --- 1. Import Libraries ---

In [12]:
import requests
import pandas as pd
import json
from dotenv import load_dotenv
import os

# --- 2. Fetch Air Quality Data from OpenAQ ---
# Example: Get PM2.5 yearly averages for last 10 years, globally

In [13]:
import requests
import pandas as pd
import time

# --- API setup ---

In [28]:
load_dotenv()

url = "https://api.openaq.org/v3/measurements?parameter=pm25&limit=5"

API_KEY = os.getenv("OPEN_AQ_API_KEY")

headers = {
    "X-API-Key": API_KEY
}

# --- Parameters ---

In [29]:
params = {
    "parameter": "pm25",            # pollutant
    "country_id": "IN",             # example: India (use "US", "CN", etc. for others)
    "date_from": "2015-01-01",
    "date_to": "2024-12-31",
    "limit": 100,                   # number of results per page (max 100)
    "page": 1
}

# --- Pagination Loop ---

In [26]:
all_results = []
while True:
    print(f"Fetching page {params['page']} ...")
    response = requests.get(url, headers=headers, params=params)
    
    if response.status_code != 200:
        print(f"Error {response.status_code}: {response.text}")
        break
    
    data = response.json()
    results = data.get("results", [])
    
    if not results:
        break  # no more results
    
    all_results.extend(results)
    
    # pagination info
    meta = data.get("meta", {})
    total_pages = meta.get("pages", 1)
    
    if params["page"] >= total_pages:
        break
    
    params["page"] += 1
    time.sleep(1)  # prevent rate-limit hits

print(f"Total records fetched: {len(all_results)}")

Fetching page 1 ...
Error 404: {"detail":"Not Found"}
Total records fetched: 0


# --- 3. Convert JSON to DataFrame ---

In [13]:
aq_df = pd.DataFrame(data["results"])
print("Air Quality Data (Raw):")
display(aq_df.head())

KeyError: 'results'

# --- 4. Select Important Columns ---

In [None]:
aq_df = aq_df[["country", "city", "year", "average", "parameter"]]
aq_df.rename(columns={"average": "pm25_value"}, inplace=True)

In [None]:
# --- 5. Fetch GDP Data from World Bank ---
# GDP per capita (constant USD)

In [None]:
wb_url = "https://api.worldbank.org/v2/country/all/indicator/NY.GDP.PCAP.CD?format=json&per_page=20000"
gdp_response = requests.get(wb_url)
gdp_data = gdp_response.json()

# World Bank data comes in 2 parts: [metadata, actual data]

In [None]:
gdp_df = pd.DataFrame(gdp_data[1])
gdp_df = gdp_df[["countryiso3code", "country", "date", "value"]]
gdp_df.rename(columns={"countryiso3code": "iso3", "date": "year", "value": "gdp_per_capita"}, inplace=True)
gdp_df["year"] = gdp_df["year"].astype(int)
print("GDP Data (Raw):")
display(gdp_df.head())

# --- 6. Save Raw Data Locally ---

In [None]:
aq_df.to_csv("../data/raw/openaq_pm25_raw.csv", index=False)
gdp_df.to_csv("../data/raw/worldbank_gdp_raw.csv", index=False)
print("Data saved to data/raw/")