<a href="https://colab.research.google.com/github/Nasif17/Bangladesh_Air_Quality/blob/main/Bangladesh_Air_Quality.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Task
Modify the provided Python code to fetch historical air quality and weather data using the Open-Meteo API instead of the OpenWeatherMap API.

## Understand open-meteo api

### Subtask:
Research the Open-Meteo API documentation to understand how to request historical air quality and weather data, including the required parameters (like latitude, longitude, start date, end date, and specific variables) and the response format.


## Modify data fetching functions

### Subtask:
Rewrite the `get_air_data` and `get_weather_data` functions (or create new functions) to make requests to the Open-Meteo API endpoints for historical air quality and weather data. This will involve changing the API endpoint URLs and adapting the parameters passed in the requests.


**Reasoning**:
The previous code failed due to an unauthorized error with the OpenWeatherMap API. The subtask is to replace the OpenWeatherMap API calls with Open-Meteo API calls by rewriting the data fetching functions. This requires defining a new function to handle the Open-Meteo API requests for both weather and air quality data, removing the old functions, and updating the main loop to use the new function and process the different response structure.



In [None]:
import requests
import pandas as pd
import time
from datetime import datetime, timedelta, date

divisions = [
    {"division": "Dhaka",       "city": "Dhaka",       "lat": 23.8103, "lon": 90.4125},
    {"division": "Chattogram",  "city": "Chattogram",  "lat": 22.3569, "lon": 91.7832},
    {"division": "Khulna",      "city": "Khulna",      "lat": 22.8456, "lon": 89.5403},
    {"division": "Rajshahi",    "city": "Rajshahi",    "lat": 24.3745, "lon": 88.6042},
    {"division": "Sylhet",      "city": "Sylhet",      "lat": 24.8949, "lon": 91.8687},
    {"division": "Barisal",     "city": "Barisal",     "lat": 22.7010, "lon": 90.3535},
    {"division": "Rangpur",     "city": "Rangpur",     "lat": 25.7439, "lon": 89.2752},
    {"division": "Mymensingh",  "city": "Mymensingh",  "lat": 24.7471, "lon": 90.4203},
]

# Set the end date to today's date for historical data
end_date = datetime.combine(date.today(), datetime.min.time())
start_date = datetime(2024, 1, 1) # Example start date for historical data
delta = timedelta(days=1)  # Reduce delta to 1 day for testing

def get_open_meteo_data(lat, lon, start_date_str, end_date_str):
    weather_url = "https://archive-api.open-meteo.com/v1/archive"
    air_quality_url = "https://air-quality-api.open-meteo.com/v1/air-quality"

    weather_variables = [
        "temperature_2m",
        "relative_humidity_2m",
        "surface_pressure",
        "wind_speed_10m"
    ]
    air_quality_variables = [
        "european_aqi",
        "pm2_5",
        "pm10",
        "nitrogen_dioxide",
        "ozone",
        "carbon_monoxide",
        "sulphur_dioxide"
    ]

    # Construct weather URL
    weather_api_url = (f"{weather_url}?latitude={lat}&longitude={lon}&start_date={start_date_str}"
                       f"&end_date={end_date_str}&timezone=UTC")
    for var in weather_variables:
        weather_api_url += f"&hourly={var}"

    # Construct air quality URL
    air_quality_api_url = (f"{air_quality_url}?latitude={lat}&longitude={lon}&start_date={start_date_str}"
                           f"&end_date={end_date_str}&timezone=UTC")
    for var in air_quality_variables:
         air_quality_api_url += f"&hourly={var}"

    weather_data = {}
    air_quality_data = {}

    try:
        # Fetch weather data
        r_weather = requests.get(weather_api_url)
        r_weather.raise_for_status()
        weather_data = r_weather.json().get("hourly", {})

        # Fetch air quality data
        r_air_quality = requests.get(air_quality_api_url)
        r_air_quality.raise_for_status()
        air_quality_data = r_air_quality.json().get("hourly", {})

    except requests.exceptions.RequestException as e:
        print(f"API request failed: {e}")
        return []

    # Combine data based on timestamp
    results = []
    weather_times = weather_data.get("time", [])
    air_quality_times = air_quality_data.get("time", [])

    # Assuming both APIs return data for the same hourly timestamps
    # We'll iterate through the weather times and find corresponding air quality data
    if weather_times:
        for i in range(len(weather_times)):
            timestamp = weather_times[i]
            # Find the index of the same timestamp in air quality data
            try:
                aq_index = air_quality_times.index(timestamp)
            except ValueError:
                # If timestamp not found in air quality data, skip this entry
                continue

            results.append({
                "timestamp_utc": timestamp + "+00:00", # Add UTC offset
                "temp_c": weather_data.get("temperature_2m", [])[i] if i < len(weather_data.get("temperature_2m", [])) else None,
                "humidity_pct": weather_data.get("relative_humidity_2m", [])[i] if i < len(weather_data.get("relative_humidity_2m", [])) else None,
                "pressure_hpa": weather_data.get("surface_pressure", [])[i] if i < len(weather_data.get("surface_pressure", [])) else None,
                "wind_speed_mps": weather_data.get("wind_speed_10m", [])[i] if i < len(weather_data.get("wind_speed_10m", [])) else None,
                "aqi": air_quality_data.get("european_aqi", [])[aq_index] if aq_index < len(air_quality_data.get("european_aqi", [])) else None,
                "pm25": air_quality_data.get("pm2_5", [])[aq_index] if aq_index < len(air_quality_data.get("pm2_5", [])) else None,
                "pm10": air_quality_data.get("pm10", [])[aq_index] if aq_index < len(air_quality_data.get("pm10", [])) else None,
                "no2": air_quality_data.get("nitrogen_dioxide", [])[aq_index] if aq_index < len(air_quality_data.get("nitrogen_dioxide", [])) else None,
                "o3": air_quality_data.get("ozone", [])[aq_index] if aq_index < len(air_quality_data.get("ozone", [])) else None,
                "co": air_quality_data.get("carbon_monoxide", [])[aq_index] if aq_index < len(air_quality_data.get("carbon_monoxide", [])) else None,
                "so2": air_quality_data.get("sulphur_dioxide", [])[aq_index] if aq_index < len(air_quality_data.get("sulphur_dioxide", [])) else None,
            })


    return results

# Remove old functions (implicitly done by not including them)

# Update the main loop to use the new function and date format
final_results = []

for loc in divisions:
    print(f"Collecting for {loc['division']}...")
    current_start = start_date
    while current_start <= end_date:
        # Open-Meteo allows up to a year of historical data in a single request
        # However, to avoid potential issues with large requests and match the previous 5-day delta,
        # we will continue with the 5-day chunks.
        current_end = min(current_start + delta - timedelta(hours=1), end_date) # Open-Meteo is inclusive, adjust end time
        start_date_str = current_start.strftime('%Y-%m-%d')
        end_date_str = current_end.strftime('%Y-%m-%d')

        # Ensure start and end dates are valid for the API (start date <= end date)
        if current_start > current_end:
            break # Exit the loop if the start date goes beyond the end date

        data_list = get_open_meteo_data(loc['lat'], loc['lon'], start_date_str, end_date_str)

        if data_list:
             # Add division and city to each row
            for row in data_list:
                 row["division"] = loc["division"]
                 row["city"] = loc["city"]
                 final_results.append(row)

        print(f"  {start_date_str} to {end_date_str}: {len(data_list)} records.")
        current_start += delta # Move to the next 5-day interval
        time.sleep(0.1) # Add a small delay between requests


# Save to CSV
df = pd.DataFrame(final_results)

# Define the desired column order, including division and city
desired_column_order = [
    "timestamp_utc",
    "division",
    "city",
    "temp_c",
    "humidity_pct",
    "pressure_hpa",
    "wind_speed_mps",
    "aqi",
    "pm25",
    "pm10",
    "no2",
    "o3",
    "co",
    "so2"
]

# Reindex the DataFrame to match the desired column order
df = df.reindex(columns=desired_column_order)

df.to_csv("bangladesh_air_weather_2025.csv", index=False)
print("Saved as bangladesh_air_weather_2025.csv")
display(df.head())