In [None]:
import os
import requests
import json
import pandas as pd
from datetime import date, datetime, timedelta
import sys
import hopsworks

In [None]:
os.makedirs("logs", exist_ok=True)
log_filename = f"logs/pipeline_log_{datetime.now():%Y-%m-%d}.txt"
sys.stdout = open(log_filename, "a")   # append mode so logs accumulate
sys.stderr = sys.stdout                # capture errors too

In [None]:
with open("../city_config/gothenburg_femman.json") as f:
    city_config = json.load(f)

CITY_NAME = city_config["city_name"]
LAT = city_config["city_lat"]
LON = city_config["city_lon"]
SENSOR = city_config["sensors"][0]  # only one station
FG_VERSIONS = city_config["fg_versions"]

YESTERDAY = date.today() - timedelta(days=1)
START_DATE = END_DATE = YESTERDAY.isoformat()

In [None]:
yesterday_quality_url = (
    f"https://air-quality-api.open-meteo.com/v1/air-quality?"
    f"latitude={LAT}&longitude={LON}"
    f"&past_days=1"
    f"&hourly=pm2_5"
)

yesterday_weather_url = (
    f"https://archive-api.open-meteo.com/v1/archive?"
    f"latitude={LAT}&longitude={LON}"
    f"&start_date={START_DATE}"
    f"&end_date={END_DATE}"
    f"&hourly=wind_speed_100m,wind_direction_100m,wind_gusts_10m,wind_direction_10m,wind_speed_10m,temperature_2m"
)

weather_prediction_url = (
    f"https://api.open-meteo.com/v1/forecast?"
    f"latitude={LAT}&longitude={LON}"
    f"&hourly=temperature_2m,wind_speed_10m,wind_direction_10m,wind_gusts_10m"
)

In [None]:
print("Fetching yesterday's air quality data...")
aq_response = requests.get(yesterday_quality_url)
aq_response.raise_for_status()
aq_data = aq_response.json()

air_df = pd.DataFrame({
    "datetime": aq_data["hourly"]["time"],
    "pm2_5": aq_data["hourly"]["pm2_5"]
})
air_df["datetime"] = pd.to_datetime(air_df["datetime"])
print(f"Air quality data shape: {air_df.shape}")

In [None]:
print("Downloading yesterday's weather data...")
w_response = requests.get(yesterday_weather_url)
w_response.raise_for_status()
w_data = w_response.json()

weather_df = pd.DataFrame(w_data["hourly"])
weather_df["time"] = pd.to_datetime(weather_df["time"])
print(f"Weather data shape: {weather_df.shape}")

In [None]:
print("Downloading 7 day weather forecast...")
f_response = requests.get(weather_prediction_url)
f_response.raise_for_status()
f_data = f_response.json()

forecast_df = pd.DataFrame(f_data["hourly"])
forecast_df["time"] = pd.to_datetime(forecast_df["time"])
print(f"Forecast data shape: {forecast_df.shape}")

# Standardize timestamp column name
forecast_df["datetime"] = pd.to_datetime(forecast_df["time"])
forecast_df.drop(columns=["time"], inplace=True)

In [None]:
print("Connecting to Hopsworks...")
hopsworks_key = os.getenv("HOPSWORKS_API_KEY")
project = hopsworks.login(api_key_value=hopsworks_key)  
fs = project.get_feature_store()

In [None]:
print("Updating feature groups...")

# Hourly PM2.5 air quality data
aqi_fg = fs.get_or_create_feature_group(
        name="air_quality",
        description=f"Air Quality characteristics of each day for {CITY_NAME} ({SENSOR['display_name']})",
        version=FG_VERSIONS["air_quality"],
        primary_key=["datetime"],        
        event_time="datetime"
    )

aqi_fg.insert(air_df, write_options={"wait_for_job": False})

# Weather data, containing wind_speed_100m, wind_direction_100m, wind_gusts_10m, wind_direction_10m, wind_speed_10m, temperature_2m
weather_fg = fs.get_or_create_feature_group(
        name="weather",
        version=FG_VERSIONS["weather"],
        description=f"Historical weather data for {CITY_NAME}",
        primary_key=["datetime"],
        event_time="datetime"
    )

weather_fg.insert(weather_df, write_options={"wait_for_job": False})

# Weather prediction for the next 7 days (default), containing temperature_2m, wind_speed_10m, wind_direction_10m, wind_gusts_10m
forecast_fg = fs.get_or_create_feature_group(
    name="weather_forecast_features",
    version=1,
    primary_key=["time"],
    description=f"7-day weather forecast (hourly) for {CITY_NAME}",
)
forecast_fg.insert(forecast_df, write_options={"wait_for_job": False})

print("All feature groups updated successfully!")