In [1]:
import requests
import json
from datetime import datetime, timedelta
import time
import os

# Configuration
AQI_API_KEY = "274f03e61fad9fd40610f133518aea20"
CITY = "Karachi"
OUTPUT_FILE = "historical_aqi_and_weather_data.jsonl"  # Using JSON Lines format

# Geocoding URL
GEO_URL = f"http://api.openweathermap.org/geo/1.0/direct?q={CITY}&limit=1&appid={AQI_API_KEY}"

def fetch_coordinates():
    while True:
        response = requests.get(GEO_URL)
        if response.status_code == 200:
            data = response.json()
            if data:
                return data[0]['lat'], data[0]['lon']
            else:
                raise Exception("City not found.")
        elif response.status_code == 429:
            retry_after = int(response.headers.get("Retry-After", 60))
            print(f"Rate limit hit. Retrying after {retry_after} seconds...")
            time.sleep(retry_after)
        else:
            raise Exception(f"Failed to fetch coordinates: {response.status_code}")

def fetch_historical_data(lat, lon, timestamp):
    AQI_URL = f"http://api.openweathermap.org/data/2.5/air_pollution/history?lat={lat}&lon={lon}&start={timestamp}&end={timestamp+3600}&appid={AQI_API_KEY}"
    date_str = datetime.utcfromtimestamp(timestamp).strftime('%Y-%m-%d')

    WEATHER_URL = (
        f"https://archive-api.open-meteo.com/v1/archive?"
        f"latitude={lat}&longitude={lon}&start_date={date_str}&end_date={date_str}"
        f"&hourly=temperature_2m,relative_humidity_2m,precipitation,wind_speed_10m,"
        f"wind_direction_10m,surface_pressure,cloudcover,visibility,"
        f"dew_point_2m,apparent_temperature,shortwave_radiation,et0_fao_evapotranspiration,"
        f"soil_temperature_0cm,soil_moisture_0_to_1cm"
        f"&timezone=UTC"
    )

    while True:
        aqi_response = requests.get(AQI_URL)
        weather_response = requests.get(WEATHER_URL)

        if aqi_response.status_code == 200 and weather_response.status_code == 200:
            aqi_data = aqi_response.json()
            weather_data_raw = weather_response.json()
            hour_index = datetime.utcfromtimestamp(timestamp).hour

            try:
                weather_data = {
                    "temperature_2m": weather_data_raw["hourly"]["temperature_2m"][hour_index],
                    "relative_humidity_2m": weather_data_raw["hourly"]["relative_humidity_2m"][hour_index],
                    "precipitation": weather_data_raw["hourly"]["precipitation"][hour_index],
                    "wind_speed_10m": weather_data_raw["hourly"]["wind_speed_10m"][hour_index],
                    "wind_direction_10m": weather_data_raw["hourly"]["wind_direction_10m"][hour_index],
                    "surface_pressure": weather_data_raw["hourly"]["surface_pressure"][hour_index],
                    "cloudcover": weather_data_raw["hourly"]["cloudcover"][hour_index],
                    "visibility": weather_data_raw["hourly"]["visibility"][hour_index],
                    "dew_point_2m": weather_data_raw["hourly"]["dew_point_2m"][hour_index],
                    "apparent_temperature": weather_data_raw["hourly"]["apparent_temperature"][hour_index],
                    "shortwave_radiation": weather_data_raw["hourly"]["shortwave_radiation"][hour_index],
                    "et0_fao_evapotranspiration": weather_data_raw["hourly"]["et0_fao_evapotranspiration"][hour_index],
                    "soil_temperature_0cm": weather_data_raw["hourly"]["soil_temperature_0cm"][hour_index],
                    "soil_moisture_0_to_1cm": weather_data_raw["hourly"]["soil_moisture_0_to_1cm"][hour_index],
                    "timestamp": timestamp
                }
            except (IndexError, KeyError):
                print(f"Weather data not available for timestamp {timestamp}")
                weather_data = {"timestamp": timestamp}

            return {
                "aqi": aqi_data,
                "weather": weather_data
            }

        elif aqi_response.status_code == 429 or weather_response.status_code == 429:
            retry_after = int(aqi_response.headers.get("Retry-After", 60))
            print(f"Rate limit hit for timestamp {timestamp}. Retrying after {retry_after} seconds...")
            time.sleep(retry_after)
        else:
            print(f"Failed to fetch data for timestamp {timestamp}: AQI - {aqi_response.status_code}, Weather - {weather_response.status_code}")
            return None

# Get coordinates
try:
    latitude, longitude = fetch_coordinates()
    print(f"Coordinates for {CITY}: Latitude {latitude}, Longitude {longitude}")
except Exception as e:
    print(f"Error fetching coordinates: {e}")
    exit(1)

# Backfill 180 days of hourly data
end_date = datetime.now()
start_date = end_date - timedelta(days=180)
current_date = start_date

# Create file if not exists
if not os.path.exists(OUTPUT_FILE):
    with open(OUTPUT_FILE, "w") as f:
        pass

# Start fetching and saving
while current_date <= end_date:
    timestamp = int(current_date.timestamp())
    try:
        historical_data = fetch_historical_data(latitude, longitude, timestamp)
        if historical_data:
            with open(OUTPUT_FILE, "a") as f:
                f.write(json.dumps(historical_data) + "\n")
            print(f"✅ Fetched and saved data for {current_date}")
        else:
            print(f"⚠️ Skipped data for {current_date}")
    except Exception as e:
        print(f"❌ Error at {current_date}: {e}")

    current_date += timedelta(hours=1)
    time.sleep(1)

print("✅ Finished fetching and saving historical data.")


Coordinates for Karachi: Latitude 24.8546842, Longitude 67.0207055
✅ Fetched and saved data for 2024-10-22 18:29:19.071848
✅ Fetched and saved data for 2024-10-22 19:29:19.071848
✅ Fetched and saved data for 2024-10-22 20:29:19.071848
✅ Fetched and saved data for 2024-10-22 21:29:19.071848
✅ Fetched and saved data for 2024-10-22 22:29:19.071848
✅ Fetched and saved data for 2024-10-22 23:29:19.071848
✅ Fetched and saved data for 2024-10-23 00:29:19.071848
✅ Fetched and saved data for 2024-10-23 01:29:19.071848
✅ Fetched and saved data for 2024-10-23 02:29:19.071848
✅ Fetched and saved data for 2024-10-23 03:29:19.071848
✅ Fetched and saved data for 2024-10-23 04:29:19.071848
✅ Fetched and saved data for 2024-10-23 05:29:19.071848
✅ Fetched and saved data for 2024-10-23 06:29:19.071848
✅ Fetched and saved data for 2024-10-23 07:29:19.071848
✅ Fetched and saved data for 2024-10-23 08:29:19.071848
✅ Fetched and saved data for 2024-10-23 09:29:19.071848
✅ Fetched and saved data for 2024-10-

KeyboardInterrupt: 