### Import libraries

In [2]:
import requests
import pandas as pd
import sys, os
sys.path.append(os.path.dirname(os.getcwd()))
from db_connection import start_engine
from datetime import date

engine = start_engine()

### Function to fetch air pollution data for a given year


In [None]:
def air_pollution(year): 
    today = date.today()
    if year == today.year:
        end_date = today.strftime("%Y-%m-%d")
    else:
        end_date = f"{year}-12-31"
    params = {
        "latitude": LAT,
        "longitude": LON,
        "hourly": "pm10,pm2_5,nitrogen_dioxide,ozone,carbon_monoxide,sulphur_dioxide",
        "start_date": f"{year}-01-01",
        "end_date": end_date,
    }

    data = requests.get(API_URL, params=params).json()

    df = pd.DataFrame({
        "timestamp": data["hourly"]["time"],
        "pm10": data["hourly"]["pm10"],
        "pm2_5": data["hourly"]["pm2_5"],
        "no2": data["hourly"]["nitrogen_dioxide"],
        "o3": data["hourly"]["ozone"],
        "co": data["hourly"]["carbon_monoxide"],
        "so2": data["hourly"]["sulphur_dioxide"],
    })

    df["timestamp"] = pd.to_datetime(df["timestamp"])
    return df


### Launching a code to take data from API and push to database Hamburg 2015-2025

In [None]:
LAT = 53.5753
LON = 10.0153 # Hamburg center coordinates
years = range(2015,2026)   
API_URL = "https://air-quality-api.open-meteo.com/v1/air-quality"

years_data = [] # list to hold data for all years

for n in years:
    print(n)
    new_data = air_pollution(n, f"air_quality_{n}")
    years_data.append(new_data) # append yearly data to the list
all_years_data = pd.concat(years_data, ignore_index=True) # concatenate all yearly data into a single DataFrame
all_years_data.to_sql(name="air_quality_hamburg_2015_2025", con=engine,
                       if_exists="replace", index=False) # push to SQL


2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025


  all_years_data = pd.concat(years_data, ignore_index=True)


### Fetch air pollution data for a specific year and district

In [None]:
def air_pollution_by_bezirk(year, bezirk, lat, lon): 
    today = date.today()

    if year == today.year:
        end_date = today.strftime("%Y-%m-%d")
    else:
        end_date = f"{year}-12-31"

    params = {
        "latitude": lat,
        "longitude": lon,
        "hourly": "pm10,pm2_5,nitrogen_dioxide,ozone,carbon_monoxide,sulphur_dioxide",
        "start_date": f"{year}-01-01",
        "end_date": end_date,
    }

    raw = requests.get(API_URL, params=params).json()

    df = pd.DataFrame({
        "timestamp": raw["hourly"]["time"],
        "pm10": raw["hourly"]["pm10"],
        "pm2_5": raw["hourly"]["pm2_5"],
        "no2": raw["hourly"]["nitrogen_dioxide"],
        "o3": raw["hourly"]["ozone"],
        "co": raw["hourly"]["carbon_monoxide"],
        "so2": raw["hourly"]["sulphur_dioxide"],
    })

    df["year"] = year
    df["bezirk"] = bezirk

    return df

### Fetching the data and push it to database of Hamburg's districts air quality 2023-2025

In [None]:
stations = {
    "Hamburg-Mitte": (53.5450, 10.0150),
    "Altona": (53.5560, 9.8810),
    "Eimsbüttel": (53.5890, 9.9560),
    "Hamburg-Nord": (53.6050, 10.0250),
    "Wandsbek": (53.6050, 10.1200),
    "Bergedorf": (53.4860, 10.2160),
    "Harburg": (53.4600, 9.9830)
} # Approximate coordinates of air quality stations in each Bezirk

years = range(2023, 2026)
all_data = []

for bezirk, (lat, lon) in stations.items(): # Iterate over each Bezirk and its coordinates
    print(f"Downloading {bezirk}")

    for year in years:
        df = air_pollution_by_bezirk(year, bezirk, lat, lon)
        all_data.append(df) # Collect dataframes in a list

final_df = pd.concat(all_data, ignore_index=True) # Concatenate all dataframes into one
final_df.to_sql(name = "air_quality_bezirk_2023_2025", con= engine, # Push to DB
                       if_exists= "replace", index = False)


Downloading Hamburg-Mitte
Downloading Altona
Downloading Eimsbüttel
Downloading Hamburg-Nord
Downloading Wandsbek
Downloading Bergedorf
Downloading Harburg
