In [1]:
import sys
from pathlib import Path
import warnings

warnings.filterwarnings("ignore", module="IPython")

def is_google_colab() -> bool:
    if "google.colab" in str(get_ipython()):
        return True
    return False

def clone_repository() -> None:
    !git clone https://github.com/featurestorebook/mlfs-book.git
    %cd mlfs-book

def install_dependencies() -> None:
    !pip install --upgrade uv
    !uv pip install --all-extras --system --requirement pyproject.toml

if is_google_colab():
    clone_repository()
    install_dependencies()
    root_dir = str(Path().absolute())
    print("Google Colab environment")
else:
    root_dir = Path().absolute()
    if root_dir.parts[-1:] == ("src",):
        root_dir = Path(*root_dir.parts[:-1])
    if root_dir.parts[-1:] == ("airquality",):
        root_dir = Path(*root_dir.parts[:-1])
    if root_dir.parts[-1:] == ("notebooks",):
        root_dir = Path(*root_dir.parts[:-1])
    root_dir = str(root_dir)
    print("Local environment")

print(f"Root dir: {root_dir}")

if root_dir not in sys.path:
    sys.path.append(root_dir)
    print(f"Added the following directory to the PYTHONPATH: {root_dir}")

from utils import config

settings = config.HopsworksSettings(_env_file=f"{root_dir}/.env")

Local environment
Root dir: /Users/max/Repos/KTH/pm25-forecast-openmeteo-aqicn
Added the following directory to the PYTHONPATH: /Users/max/Repos/KTH/pm25-forecast-openmeteo-aqicn
HopsworksSettings initialized!


In [None]:
import datetime
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
import hopsworks
import json
from utils import airquality
import os

warnings.filterwarnings("ignore")

In [3]:
project = hopsworks.login(engine="python")
fs = project.get_feature_store()

secrets = hopsworks.get_secrets_api()
AQICN_API_KEY = secrets.get_secret("AQICN_API_KEY").value

# Retrieve feature groups
air_quality_fg = fs.get_feature_group(
    name="air_quality_all",
    version=1,
)
weather_fg = fs.get_feature_group(
    name="weather_all",
    version=1,
)

2025-11-17 11:39:26,020 INFO: Initializing external client
2025-11-17 11:39:26,020 INFO: Base URL: https://c.app.hopsworks.ai:443






2025-11-17 11:39:27,756 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1279179


Set SENSOR_CSV_FILE in .env with the relative path to a sensor to process it, or leave it unset to process all sensors in the `data` folder

In [4]:
sensor_csv_file = getattr(settings, 'SENSOR_CSV_FILE', None)

if sensor_csv_file:
    # Read one secret for single sensor mode
    _, _, _, _, _, sensor_id = airquality.read_sensor_data(sensor_csv_file)
    secret_name = f"SENSOR_LOCATION_JSON_{sensor_id}"
    location_str = secrets.get_secret(secret_name).value
    locations = {sensor_id: json.loads(location_str)}
else:
    # Read all individual secrets in batch mode
    all_secrets = secrets.get_secrets()
    locations = {}
    for secret in all_secrets:
        if secret.name.startswith("SENSOR_LOCATION_JSON_"):
            sensor_id = secret.name.replace("SENSOR_LOCATION_JSON_", "")
            location_str = secrets.get_secret(secret.name).value
            if location_str:
                locations[sensor_id] = json.loads(location_str)


## Helper Methods

In [None]:
def predict_for_sensor(sensor_weather, sensor_historical, location, model, model_feature_names):
    combined = pd.concat([
        sensor_historical[["date", "sensor_id", "pm25"]],
        sensor_weather[["date", "sensor_id"]].assign(pm25=None)
    ], ignore_index=True).sort_values(["sensor_id", "date"])
    
    combined = airquality.add_rolling_window_feature(combined, window_days=3, column="pm25", new_column="pm25_rolling_3d")
    combined = airquality.add_lagged_features(combined, column="pm25", lags=[1, 2, 3])
    combined = airquality.add_nearby_sensor_feature(combined, locations)
    feature_engineering_cols = [
        "pm25_rolling_3d",
        "pm25_lag_1d",
        "pm25_lag_2d",
        "pm25_lag_3d",
        "pm25_nearby_avg",
    ]
    available_feature_engineering_cols = [
        col for col in feature_engineering_cols if col in combined.columns
    ]
    if available_feature_engineering_cols:
        sensor_weather = sensor_weather.merge(
            combined[["date", *available_feature_engineering_cols]],
            on="date",
            how="left",
        )
    
    available_features = [col for col in model_feature_names if col in sensor_weather.columns]
    sensor_weather['predicted_pm25'] = model.predict(sensor_weather[available_features])
    sensor_weather['street'] = location['street']
    sensor_weather['city'] = location['city']
    sensor_weather['country'] = location['country']
    return sensor_weather


In [10]:
# Retrieve feature groups
air_quality_fg = fs.get_feature_group(
    name="air_quality_all",
    version=1,
)
weather_fg = fs.get_feature_group(
    name="weather_all",
    version=1,
)

today = datetime.datetime.now().replace(tzinfo=None)
batch_data = weather_fg.filter(weather_fg.date >= today).read()
batch_data["date"] = pd.to_datetime(batch_data["date"]).dt.tz_localize(None)
print(batch_data.info())


Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.14s) 
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 75 entries, 0 to 74
Data columns (total 9 columns):
 #   Column                       Non-Null Count  Dtype         
---  ------                       --------------  -----         
 0   date                         75 non-null     datetime64[us]
 1   temperature_2m_mean          75 non-null     float32       
 2   precipitation_sum            75 non-null     float32       
 3   wind_speed_10m_max           75 non-null     float32       
 4   wind_direction_10m_dominant  75 non-null     float32       
 5   city                         75 non-null     object        
 6   sensor_id                    75 non-null     object        
 7   latitude                     75 non-null     float64       
 8   longitude                    75 non-null     float64       
dtypes: datetime64[us](1), float32(4), float64(2), object(2)
memory usage: 4.2+ KB
None


In [11]:
mr = project.get_model_registry()

MODEL_NAME_TEMPLATE = "air_quality_xgboost_model_{sensor_id}"

retrieved_models = {}
saved_model_dirs = {}
skipped_sensors = []

for sensor_id in locations.keys():
    model_name = MODEL_NAME_TEMPLATE.format(sensor_id=sensor_id)
    retrieved_model = None
    resolved_version = None

    available_models = mr.get_models(name=model_name)
    if available_models:
        retrieved_model = max(available_models, key=lambda model: model.version)
        resolved_version = retrieved_model.version


    saved_model_dir = retrieved_model.download()

    retrieved_models[sensor_id] = retrieved_model
    saved_model_dirs[sensor_id] = saved_model_dir

fv = next(iter(retrieved_models.values())).get_feature_view()

Downloading: 0.000%|          | 0/539562 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 1 files)... 

Downloading: 0.000%|          | 0/21691 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 2 files)... 

Downloading: 0.000%|          | 0/129890 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 3 files)... DONE

Downloading: 0.000%|          | 0/559312 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 1 files)... 

Downloading: 0.000%|          | 0/21775 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 2 files)... 

Downloading: 0.000%|          | 0/121844 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 3 files)... DONE

Downloading: 0.000%|          | 0/552835 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 1 files)... 

Downloading: 0.000%|          | 0/21757 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 2 files)... 

Downloading: 0.000%|          | 0/90655 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 3 files)... DONE

Downloading: 0.000%|          | 0/516859 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 1 files)... 

Downloading: 0.000%|          | 0/21920 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 2 files)... 

Downloading: 0.000%|          | 0/113796 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 3 files)... DONE

Downloading: 0.000%|          | 0/583212 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 1 files)... 

Downloading: 0.000%|          | 0/31925 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 2 files)... 

Downloading: 0.000%|          | 0/121687 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 3 files)... DONE

Downloading: 0.000%|          | 0/588455 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 1 files)... 

Downloading: 0.000%|          | 0/22099 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 2 files)... 

Downloading: 0.000%|          | 0/120107 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 3 files)... DONE

Downloading: 0.000%|          | 0/568234 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 1 files)... 

Downloading: 0.000%|          | 0/31111 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 2 files)... 

Downloading: 0.000%|          | 0/114076 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 3 files)... DONE

Downloading: 0.000%|          | 0/583709 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 1 files)... 

Downloading: 0.000%|          | 0/30789 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 2 files)... 

Downloading: 0.000%|          | 0/121219 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 3 files)... DONE

Downloading: 0.000%|          | 0/567283 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 1 files)... 

Downloading: 0.000%|          | 0/30830 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 2 files)... 

Downloading: 0.000%|          | 0/118847 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 3 files)... DONE

Downloading: 0.000%|          | 0/560503 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 1 files)... 

Downloading: 0.000%|          | 0/31976 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 2 files)... 

Downloading: 0.000%|          | 0/60369 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 3 files)... DONE

Downloading: 0.000%|          | 0/466146 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 1 files)... 

Downloading: 0.000%|          | 0/20922 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 2 files)... 

Downloading: 0.000%|          | 0/123091 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 3 files)... DONE

Downloading: 0.000%|          | 0/267692 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 1 files)... 

Downloading: 0.000%|          | 0/31549 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 2 files)... 

Downloading: 0.000%|          | 0/128281 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 3 files)... DONE

Downloading: 0.000%|          | 0/519235 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 1 files)... 

Downloading: 0.000%|          | 0/23889 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 2 files)... 

Downloading: 0.000%|          | 0/49599 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 3 files)... DONE

Downloading: 0.000%|          | 0/589068 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 1 files)... 

Downloading: 0.000%|          | 0/21937 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 2 files)... 

Downloading: 0.000%|          | 0/111872 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 3 files)... DONE

Downloading: 0.000%|          | 0/534513 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 1 files)... 

Downloading: 0.000%|          | 0/31151 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 2 files)... 

Downloading: 0.000%|          | 0/112842 elapsed<00:00 remaining<?

2025-11-17 11:44:06,996 INFO: There is no parent information


In [20]:
historical_start = today - datetime.timedelta(days=4)

try:
    historical_pm25 = air_quality_fg.read()
    historical_pm25["date"] = pd.to_datetime(historical_pm25["date"]).dt.tz_localize(None)
    historical_pm25 = historical_pm25[(historical_pm25["date"] >= historical_start) & (historical_pm25["date"] < today)][["date", "sensor_id", "pm25"]]
except:
    historical_pm25 = pd.DataFrame()

all_predictions = []
pred_file_paths = []

for sensor_id, location in locations.items():
    saved_model_dir = saved_model_dirs.get(sensor_id)
    model_path = f"{saved_model_dir}/model.json"

    sensor_weather = batch_data[batch_data["sensor_id"] == sensor_id].copy()
    sensor_historical = historical_pm25[historical_pm25["sensor_id"] == sensor_id] if not historical_pm25.empty else pd.DataFrame()

    retrieved_xgboost_model = XGBRegressor()
    retrieved_xgboost_model.load_model(model_path)
    booster = retrieved_xgboost_model.get_booster()
    model_feature_names = booster.feature_names if booster is not None else sensor_weather.columns.tolist()

    sensor_predictions = predict_for_sensor(
        sensor_weather,
        sensor_historical,
        location,
        retrieved_xgboost_model,
        model_feature_names,
    )
    sensor_predictions["model_source"] = saved_model_dir
    all_predictions.append(sensor_predictions)

    pred_file_path = f"{saved_model_dir}/forecast.png"
    plt = airquality.plot_air_quality_forecast(location['city'], location['street'], sensor_predictions.copy(), pred_file_path)
    plt.close()
    pred_file_paths.append(pred_file_path)

batch_data = pd.concat(all_predictions, ignore_index=True).sort_values(['sensor_id', 'date']) if all_predictions else pd.DataFrame()
if not batch_data.empty:
    batch_data['days_before_forecast_day'] = batch_data.groupby('sensor_id').cumcount() + 1
else:
    batch_data['days_before_forecast_day'] = []        

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (2.91s) 


In [21]:
monitor_fg = fs.get_or_create_feature_group(
    name='aq_predictions',
    description='Air Quality prediction monitoring',
    version=1,
    primary_key=['sensor_id','date','days_before_forecast_day'],
    event_time="date"
)

In [22]:
monitor_fg.insert(batch_data, wait=True)

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1279179/fs/1265797/fg/1730901


Uploading Dataframe: 100.00% |██████████| Rows 75/75 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: aq_predictions_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1279179/jobs/named/aq_predictions_1_offline_fg_materialization/executions
2025-11-17 11:46:40,575 INFO: Waiting for execution to finish. Current state: SUBMITTED. Final status: UNDEFINED
2025-11-17 11:46:43,768 INFO: Waiting for execution to finish. Current state: RUNNING. Final status: UNDEFINED
2025-11-17 11:48:13,358 INFO: Waiting for execution to finish. Current state: AGGREGATING_LOGS. Final status: SUCCEEDED
2025-11-17 11:48:13,534 INFO: Waiting for log aggregation to finish.
2025-11-17 11:48:22,177 INFO: Execution finished successfully.


(Job('aq_predictions_1_offline_fg_materialization', 'SPARK'), None)

In [23]:
monitoring_df = monitor_fg.filter(monitor_fg.days_before_forecast_day == 1).read()
monitoring_df

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.11s) 


Unnamed: 0,date,temperature_2m_mean,precipitation_sum,wind_speed_10m_max,wind_direction_10m_dominant,city,sensor_id,latitude,longitude,pm25_rolling_3d,pm25_lag_1d,pm25_lag_2d,pm25_lag_3d,pm25_nearby_avg,predicted_pm25,street,country,model_source,days_before_forecast_day
0,2025-11-18 00:00:00+00:00,2.55,1.2,5.692099,145.304779,Majorna-Linné,60541,57.696,11.95,17.0,17.0,,,,21.518194,Prinsgatan,Sweden,/var/folders/23/k2s63wtx6qjbl44f5xk8h4h80000gn...,1
1,2025-11-18 00:00:00+00:00,2.15,1.2,5.692099,145.304779,Centrum,69628,57.681718,11.970109,4.0,4.0,,,,5.876044,yster Estrids Gata,Sweden,/var/folders/23/k2s63wtx6qjbl44f5xk8h4h80000gn...,1
2,2025-11-18 00:00:00+00:00,4.45,0.0,4.072935,315.000092,Lindome,404209,57.601655,12.100873,6.0,6.0,,,,3.217386,Högkullevägen,Sweden,/var/folders/23/k2s63wtx6qjbl44f5xk8h4h80000gn...,1
3,2025-11-18 00:00:00+00:00,2.6,1.2,5.692099,145.304779,Gothenburg,88372,57.648728,12.008352,14.0,14.0,,,,13.551297,Ridlärargatan,Sweden,/var/folders/23/k2s63wtx6qjbl44f5xk8h4h80000gn...,1
4,2025-11-18 00:00:00+00:00,2.35,1.2,5.692099,145.304779,Norra Hisingen,61714,57.75,11.97,5.0,5.0,,,,8.531895,Nyhemsgatan,Sweden,/var/folders/23/k2s63wtx6qjbl44f5xk8h4h80000gn...,1
5,2025-11-18 00:00:00+00:00,2.5,1.2,5.692099,145.304779,Majorna-Linné,60535,57.692,11.958,8.0,8.0,,,,2.576761,Annedal,Sweden,/var/folders/23/k2s63wtx6qjbl44f5xk8h4h80000gn...,1
6,2025-11-18 00:00:00+00:00,2.55,1.2,5.692099,145.304779,Örgryte-Härlanda,65146,57.722,12.012,6.0,6.0,,,,8.077211,Landerigatan,Sweden,/var/folders/23/k2s63wtx6qjbl44f5xk8h4h80000gn...,1
7,2025-11-18 00:00:00+00:00,2.55,1.2,5.692099,145.304779,Mölndal,79750,57.67,12.006,2.0,2.0,,,,3.396012,Berghemsgatan,Sweden,/var/folders/23/k2s63wtx6qjbl44f5xk8h4h80000gn...,1
8,2025-11-18 00:00:00+00:00,2.2,1.2,5.692099,145.304779,Västra Göteborg,154549,57.678,11.91,8.0,8.0,,,,12.57253,Järnbrottsgatan,Sweden,/var/folders/23/k2s63wtx6qjbl44f5xk8h4h80000gn...,1
9,2025-11-18 00:00:00+00:00,2.55,1.2,5.692099,145.304779,Gothenburg,112672,57.66,12.0,51.0,51.0,,,,20.58746,Bågskyttegatan,Sweden,/var/folders/23/k2s63wtx6qjbl44f5xk8h4h80000gn...,1


In [24]:
air_quality_df = air_quality_fg.read()
air_quality_df

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.99s) 


Unnamed: 0,date,pm25,sensor_id,street,city,country,feed_url,pm25_rolling_3d,pm25_lag_1d,pm25_lag_2d,pm25_lag_3d,pm25_nearby_avg
0,2022-03-05 00:00:00+00:00,2.38,194215,Norra Sävviksvägen,Torslanda,Sweden,https://api.waqi.info/feed/A194215/,1.613333,0.90,1.40,2.54,1.126667
1,2024-04-28 00:00:00+00:00,1.83,59095,Eklanda Slätt,Mölndal,Sweden,https://api.waqi.info/feed/A59095/,2.286667,2.13,1.83,2.90,1.696667
2,2022-10-12 00:00:00+00:00,0.10,59893,Londongatan,Lundby,Sweden,https://api.waqi.info/feed/A59893/,0.280000,0.30,0.30,0.24,1.440000
3,2025-09-03 00:00:00+00:00,1.30,88372,Ridlärargatan,Gothenburg,Sweden,https://api.waqi.info/feed/A88372/,2.533333,2.70,2.20,2.70,2.953333
4,2020-12-07 00:00:00+00:00,13.75,65146,Landerigatan,Örgryte-Härlanda,Sweden,https://api.waqi.info/feed/A65146/,22.680000,24.07,24.97,19.00,21.713333
...,...,...,...,...,...,...,...,...,...,...,...,...
28210,2025-11-16 00:00:00+00:00,5.00,61714,Nyhemsgatan,Norra Hisingen,Sweden,https://api.waqi.info/feed/A61714/,,,,,
28211,2025-11-16 00:00:00+00:00,6.00,404209,Högkullevägen,Lindome,Sweden,https://api.waqi.info/feed/A404209/,,,,,
28212,2025-11-16 00:00:00+00:00,3.00,194215,Norra Sävviksvägen,Torslanda,Sweden,https://api.waqi.info/feed/A194215/,,,,,
28213,2025-11-16 00:00:00+00:00,4.00,69628,yster Estrids Gata,Centrum,Sweden,https://api.waqi.info/feed/A69628/,,,,,


In [27]:
outcome_df = air_quality_df[['date', 'sensor_id', 'pm25']].copy()
preds_df = monitoring_df[['date', 'sensor_id', 'predicted_pm25']].copy()

hindcast_df = pd.merge(preds_df, outcome_df, on=["date", "sensor_id"]).sort_values(['sensor_id', 'date'])

if len(hindcast_df) == 0:
    air_quality_df_copy = air_quality_df.copy()
    air_quality_df_copy['date'] = pd.to_datetime(air_quality_df_copy['date']).dt.tz_localize(None)
    dates_with_pm25 = air_quality_df_copy[air_quality_df_copy['pm25'].notna()].sort_values('date').tail(10)
    
    date_start = dates_with_pm25['date'].min() - datetime.timedelta(days=4)
    weather_df = weather_fg.filter((weather_fg.date >= date_start) & (weather_fg.date <= dates_with_pm25['date'].max())).read()
    weather_df['date'] = pd.to_datetime(weather_df['date']).dt.tz_localize(None)
    historical_pm25 = air_quality_df_copy[(air_quality_df_copy['date'] >= date_start) & 
                                          (air_quality_df_copy['date'] <= dates_with_pm25['date'].max())][['date', 'sensor_id', 'pm25']]
    
    all_features = []
    for sensor_id, location in locations.items():
        sensor_weather = weather_df[(weather_df['sensor_id'] == sensor_id) & 
                                    (weather_df['date'].isin(dates_with_pm25[dates_with_pm25['sensor_id'] == sensor_id]['date']))].copy()
        sensor_pm25 = historical_pm25[historical_pm25['sensor_id'] == sensor_id] if not historical_pm25.empty else pd.DataFrame()
        if not sensor_weather.empty:
            model_feature_names = sensor_weather.columns.tolist()
            sensor_weather = predict_for_sensor(sensor_weather, sensor_pm25, location, retrieved_xgboost_model, model_feature_names)
            sensor_weather['days_before_forecast_day'] = 1
            sensor_weather = sensor_weather.merge(
                dates_with_pm25[dates_with_pm25['sensor_id'] == sensor_id][['date', 'pm25']],
                on='date', how='left'
            )
            all_features.append(sensor_weather)
    
    features_df = pd.concat(all_features, ignore_index=True) if all_features else pd.DataFrame()
    hindcast_df = features_df[features_df['pm25'].notna()][['date', 'sensor_id', 'predicted_pm25', 'pm25', 'street', 'country', 'days_before_forecast_day']].copy()
    if not features_df.empty:
        monitor_fg.insert(features_df.drop('pm25', axis=1, errors='ignore'), write_options={"wait_for_job": True})
hindcast_df

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.18s) 


ValueError: DataFrame.dtypes for data must be int, float, bool or category. When categorical type is supplied, The experimental DMatrix parameter`enable_categorical` must be set to `True`.  Invalid columns:date: datetime64[us], city: object, sensor_id: object

### Plot the Hindcast comparing predicted with forecasted values (1-day prior forecast)

In [None]:
# Generate hindcast plots for each sensor
hindcast_file_paths = []
for sensor_id, location in locations.items():
    sensor_hindcast = hindcast_df[hindcast_df["sensor_id"] == sensor_id].copy()
    if sensor_hindcast.empty:
        continue
    
    city = location['city']
    street = location['street']
    hindcast_file_path = saved_model_dir / "hindcast.png"
    plt = airquality.plot_air_quality_forecast(city, street, sensor_hindcast, hindcast_file_path, hindcast=True)
    plt.close()
    hindcast_file_paths.append(hindcast_file_path)

### Upload the prediction and hindcast plots to Hopsworks


In [None]:
dataset_api = project.get_dataset_api()
str_today = today.strftime("%Y-%m-%d")
if dataset_api.exists("Resources/airquality") == False:
    dataset_api.mkdir("Resources/airquality")

# Upload all prediction and hindcast images
for sensor_id, location in locations.items():
    city = location['city']
    street = location['street']
    
    pred_path = saved_model_dir / "forecast.png"
    hindcast_path = saved_model_dir / "hindcast.png"
    
    if os.path.exists(pred_path):
        dataset_api.upload(pred_path, f"Resources/airquality/{sensor_id}_{str_today}_forecast", overwrite=True)
    if os.path.exists(hindcast_path):
        dataset_api.upload(hindcast_path, f"Resources/airquality/{sensor_id}_{str_today}_hindcast", overwrite=True)

proj_url = project.get_url()
print(f"See images in Hopsworks here: {proj_url}/settings/fb/path/Resources/airquality")