In [5]:
def fetch_openaq_chennai(limit=1000):
    url = "https://api.openaq.org/v2/measurements"
    
    params = {
        "city": "Chennai",
        "limit": limit,
        "sort": "desc",
        "order_by": "datetime"
    }

    response = requests.get(url, params=params)
    json_data = response.json()

    records = []
    for d in json_data.get("data", []):
        records.append({
            "datetime": d["date"]["utc"],
            "parameter": d["parameter"],
            "value": d["value"]
        })

    return pd.DataFrame(records)


In [6]:
df = fetch_openaq_chennai()
print(df.head())

Empty DataFrame
Columns: []
Index: []


In [8]:
import requests
import pandas as pd

TOKEN = "6b13e30815928570eb416eaed5b63faa296a0b6e"

def fetch_waqi_chennai():
    url = f"https://api.waqi.info/feed/chennai/?token={TOKEN}"
    response = requests.get(url)
    data = response.json()

    if data["status"] != "ok":
        raise Exception(data)

    iaqi = data["data"]["iaqi"]
    time = data["data"]["time"]["s"]

    record = {
        "datetime": time,
        "pm25": iaqi.get("pm25", {}).get("v"),
        "pm10": iaqi.get("pm10", {}).get("v"),
        "no2": iaqi.get("no2", {}).get("v"),
        "so2": iaqi.get("so2", {}).get("v"),
        "co": iaqi.get("co", {}).get("v"),
        "o3": iaqi.get("o3", {}).get("v"),
        'pressure': iaqi.get("p", {}).get("v")
    }

    return pd.DataFrame([record])

df = fetch_waqi_chennai()
print(df)


              datetime  pm25  pm10  no2  so2  co  o3    pressure
0  2025-12-18 20:00:00    66    43  8.5  2.6  10   7  731.436667


In [9]:

# -----------------------------
import requests
import pandas as pd
from datetime import datetime, timedelta
import time

# -----------------------------
# Step 3: Define function to fetch WAQI data for Chennai
# -----------------------------
def fetch_waqi_chennai(token, start_date, end_date, sleep_time=1):
    """
    Fetch historical AQI data for Chennai from WAQI API.
    
    Parameters:
    - token: WAQI API token (string)
    - start_date: datetime object, start of data collection
    - end_date: datetime object, end of data collection
    - sleep_time: seconds to wait between API calls (to avoid rate limit)
    
    Returns:
    - DataFrame with pollutants, weather features, and timestamp
    """
    df_list = []
    
    current_date = start_date
    while current_date <= end_date:
        date_str = current_date.strftime('%Y-%m-%d')
        url = f"https://api.waqi.info/feed/chennai/?token={token}"
        
        try:
            response = requests.get(url)
            data = response.json()
            
            if data["status"] == "ok":
                iaqi = data["data"]["iaqi"]
                record = {
                    "datetime": datetime.now(),  # real-time timestamp
                    "pm25": iaqi.get("pm25", {}).get("v"),
                    "pm10": iaqi.get("pm10", {}).get("v"),
                    "no2": iaqi.get("no2", {}).get("v"),
                    "so2": iaqi.get("so2", {}).get("v"),
                    "co": iaqi.get("co", {}).get("v"),
                    "o3": iaqi.get("o3", {}).get("v"),
                    "temperature": iaqi.get("t", {}).get("v"),
                    "humidity": iaqi.get("h", {}).get("v"),
                    "pressure": iaqi.get("p", {}).get("v"),
                    "wind_speed": iaqi.get("w", {}).get("v"),
                }
                df_list.append(record)
            else:
                print(f"Error fetching data for {date_str}: {data}")
        
        except Exception as e:
            print(f"Exception for {date_str}: {e}")
        
        # Wait before next API call
        time.sleep(sleep_time)
        current_date += timedelta(days=1)
    
    df = pd.DataFrame(df_list)
    return df

# -----------------------------
# Step 4: Call function to fetch data
# -----------------------------
# Replace with your WAQI API token
WAQI_TOKEN = "6b13e30815928570eb416eaed5b63faa296a0b6e"

# Example: Fetch data for last 7 days (for demonstration)
start_date = datetime.now() - timedelta(days=7)
end_date = datetime.now()

df_chennai = fetch_waqi_chennai(WAQI_TOKEN, start_date, end_date)
print(df_chennai.head())

# -----------------------------
# Step 5: Save to CSV (for ETL / ingestion)
# -----------------------------
df_chennai.to_csv("chennai_aqi_data.csv", index=False)
print("Data saved to chennai_aqi_data.csv")


                    datetime  pm25  pm10  no2  so2   co  o3  temperature  \
0 2025-12-18 20:32:54.007239    66    43  8.9  2.5  9.9   7         24.8   
1 2025-12-18 20:32:55.778325    66    43  8.9  2.5  9.9   7         24.8   
2 2025-12-18 20:32:57.717590    66    43  8.9  2.5  9.9   7         24.8   
3 2025-12-18 20:32:59.456744    66    43  8.9  2.5  9.9   7         24.8   
4 2025-12-18 20:33:01.254027    66    43  8.9  2.5  9.9   7         24.8   

   humidity  pressure  wind_speed  
0     74.24    731.09        0.46  
1     74.24    731.09        0.46  
2     74.24    731.09        0.46  
3     74.24    731.09        0.46  
4     74.24    731.09        0.46  
Data saved to chennai_aqi_data.csv
