# 1. Backfill Pipeline

## 1.1. Setup

In [1]:
# Standard imports
import os
import sys
import json
import time
from datetime import date, timedelta
import warnings
from pathlib import Path
warnings.filterwarnings("ignore", module="IPython")

#  Establish project root directory
def find_project_root(start: Path):
    for parent in [start] + list(start.parents):
        if (parent / "pyproject.toml").exists():
            return parent
    return start

root_dir = find_project_root(Path().absolute())
print("Project root dir:", root_dir)

if str(root_dir) not in sys.path:
    sys.path.append(str(root_dir))

# Third-party imports
import requests
import pandas as pd
import great_expectations as gx
import hopsworks
from urllib3.exceptions import ProtocolError
from requests.exceptions import ConnectionError, Timeout, RequestException

#  Project imports
from utils import cleaning, config, feature_engineering, fetchers, hopsworks_admin, incremental, metadata

#  Load settings 
settings = config.HopsworksSettings(_env_file=f"{root_dir}/.env")
HOPSWORKS_API_KEY = settings.HOPSWORKS_API_KEY.get_secret_value()
GITHUB_USERNAME = settings.GH_USERNAME.get_secret_value()

# Login to Hopsworks
project = hopsworks.login(api_key_value=HOPSWORKS_API_KEY)
fs = project.get_feature_store()

Project root dir: c:\Users\krist\Documents\GitHub\pm25
HopsworksSettings initialized!
2026-01-15 07:48:35,636 INFO: Initializing external client
2026-01-15 07:48:35,638 INFO: Base URL: https://c.app.hopsworks.ai:443
2026-01-15 07:48:45,717 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1279184


Repository management

In [2]:
repo_dir = hopsworks_admin.clone_or_update_repo(GITHUB_USERNAME)
os.chdir(repo_dir)

Repository exists at c:\Users\krist\Documents\GitHub\pm25\notebooks\pm25-forecast-openmeteo-aqicn


In [3]:
today = date.today()

if settings.AQICN_API_KEY is None:
    print("AQICN_API_KEY missing.")
    sys.exit(1)

AQICN_API_KEY = settings.AQICN_API_KEY.get_secret_value()

secrets = hopsworks.get_secrets_api()
try:
    secret = secrets.get_secret("AQICN_API_KEY")
    if secret is not None:
        secret.delete()
except Exception:
    pass

secrets.create_secret("AQICN_API_KEY", AQICN_API_KEY)

Secret created successfully, explore it at https://c.app.hopsworks.ai:443/account/secrets


Secret('AQICN_API_KEY', 'PRIVATE')

## 1.2. Create Feature Groups

In [4]:
air_quality_fg, weather_fg = hopsworks_admin.create_feature_groups(fs)

## 1.3. Check and Backfill
When performed for the first time, might take a long time if many added sensors.

In [5]:
data_dir = os.path.join(root_dir, "data")
dir_list = os.listdir(data_dir)

# Get already processed sensors from feature group
existing_sensors = set()
metadata_df = pd.DataFrame()

# Only try to read if feature group has commits (not freshly created)
try:
    # Check commit metadata to see if any data was ever written
    commits = air_quality_fg.commit_details()
    
    if commits is not None and len(commits) > 0:
        # Feature group has data, safe to read
        existing_aq_data = air_quality_fg.read()
        existing_sensors = set(existing_aq_data["sensor_id"].unique())
        print(f"üìã Found {len(existing_sensors)} sensors already in feature store")
        
        # Get unique sensor metadata (take first occurrence of each sensor)
        # Keep sensor_id as a column, not as index
        metadata_df = existing_aq_data[["sensor_id", "latitude", "longitude", "city", "street", "country"]].drop_duplicates(subset=["sensor_id"])
        print(f"üìç Loaded metadata for {len(metadata_df)} existing sensors")
    else:
        # No commits yet, feature group is empty
        print("üìã No existing sensors found, starting fresh")
        print("üìç No existing sensors found")
        
except Exception as e:
    # Feature group is brand new or error checking commits
    print("üìã No existing sensors found, starting fresh")
    print("üìç No existing sensors found")

# Count total sensors to process
total_sensors = len([f for f in dir_list if f.endswith(".csv")])
remaining = total_sensors - len(existing_sensors)
print(f"üìä Total sensors: {total_sensors}, Already processed: {len(existing_sensors)}, Remaining: {remaining}")

if total_sensors != len(existing_sensors):
    print("\nüöÄ Starting backfill process...\n")
    # Track processing stats
    successful = 0
    failed = 0
    skipped = 0
    failed_sensors = []  # Track which sensors failed and why

    for file in dir_list:
        if not file.endswith(".csv"):
            continue

        file_path = os.path.join(data_dir, file)
        
        try:
            aq_df_raw, street, city, country, feed_url, sensor_id = metadata.read_sensor_data(
                file_path, AQICN_API_KEY
            )

            sensor_id = int(sensor_id)

            # Skip if already processed
            if sensor_id in existing_sensors:
                skipped += 1
                continue

            # Get working feed URL using sensor ID and API token
            try:
                working_feed_url = fetchers.get_working_feed_url(sensor_id, AQICN_API_KEY)
            except Exception as url_err:
                print(f"‚ö†Ô∏è Sensor {sensor_id}: Could not resolve feed URL - {url_err}")
                working_feed_url = feed_url  # Fallback to CSV feed_url if resolution fails

            # Get coordinates for this sensor
            lat, lon = metadata.get_coordinates(city, street, country)
            
            if lat is None or lon is None:
                print(f"‚ö†Ô∏è Sensor {sensor_id}: cannot geocode location")
                failed += 1
                failed_sensors.append((sensor_id, "Geocoding failed"))
                continue

            # Clean and prepare air quality data 
            aq_df = cleaning.clean_and_append_data(
                aq_df_raw, sensor_id, 
                city=city, street=street, country=country,
                latitude=lat, longitude=lon, aqicn_url=working_feed_url
            )
            aq_df = aq_df.sort_values("date").drop_duplicates(subset=["date"], keep="first")
            
            # Add features
            aq_df = feature_engineering.add_lagged_features(aq_df, "pm25", lags=[1,2,3])
            aq_df = feature_engineering.add_rolling_window_feature(aq_df, window_days=3, column="pm25", new_column="pm25_rolling_3d")
            
            if len(metadata_df) > 0:
                aq_df = feature_engineering.add_nearby_sensor_feature(aq_df, metadata_df, n_closest=3)
            else:
                aq_df["pm25_nearby_avg"] = 0.0
            
            # Date range for weather
            end_date = aq_df["date"].max().date()
            start_date = end_date - timedelta(days=365 * 3)

            # Fetch weather
            weather_df = fetchers.get_historical_weather(
                sensor_id, start_date, end_date, lat, lon
            )
            
            if weather_df is None or len(weather_df) == 0:
                print(f"‚ö†Ô∏è No weather data for sensor {sensor_id}")
                failed += 1
                failed_sensors.append((sensor_id, "No weather data"))
                continue

            # Prepare weather data
            weather_df["date"] = weather_df["date"].dt.tz_localize(None)
            weather_df["sensor_id"] = int(sensor_id)
            weather_df = weather_df.astype({
                "sensor_id": "int32",
                "temperature_2m_mean": "float64",
                "precipitation_sum": "float64",
                "wind_speed_10m_max": "float64",
                "wind_direction_10m_dominant": "float64",
            })
            # Insert without triggering materialization
            weather_fg.insert(weather_df, write_options={"start_offline_materialization": False})

            # Prepare air quality data
            aq_df["sensor_id"] = aq_df["sensor_id"].astype("int32")
            aq_columns = [f.name for f in air_quality_fg.features]
            aq_df = aq_df[aq_columns].astype({
                "sensor_id": "int32",
                "pm25": "float64",
                "pm25_lag_1d": "float64",
                "pm25_lag_2d": "float64",
                "pm25_lag_3d": "float64",
                "pm25_rolling_3d": "float64",
                "pm25_nearby_avg": "float64",
                "city": "string",
                "street": "string",
                "country": "string",
                "aqicn_url": "string",
                "latitude": "float64",
                "longitude": "float64",
            })
            # Insert without triggering materialization
            air_quality_fg.insert(aq_df, write_options={"start_offline_materialization": False})

            existing_sensors.add(sensor_id)
            
            # Add this sensor to metadata_df for subsequent nearby calculations
            # Keep as DataFrame without setting index
            new_meta = pd.DataFrame({
                "sensor_id": [sensor_id],
                "latitude": [lat],
                "longitude": [lon],
                "city": [city],
                "street": [street],
                "country": [country]
            })
            metadata_df = pd.concat([metadata_df, new_meta], ignore_index=True)
            
            successful += 1
            print(f"‚úÖ Sensor {sensor_id} ({successful}/{remaining} complete)")

        except Exception as e:
            failed += 1
            failed_sensors.append((sensor_id, f"{type(e).__name__}: {str(e)[:100]}"))
            print(f"‚ùå Sensor {sensor_id}: {type(e).__name__}: {str(e)}")
            continue
    
    print(f"\nüéâ Backfill complete!")
    print(f"üìä Final Summary:")
    print(f"   ‚úÖ Successfully processed: {successful}")
    print(f"   ‚ùå Failed: {failed}")
    print(f"   ‚è© Skipped (already processed): {skipped}")
    print(f"   üìà Total in feature store: {len(existing_sensors)}/{total_sensors}")

    if len(failed_sensors) > 0:
        print(f"\n‚ö†Ô∏è  Failed Sensors Detail:")
        for sid, reason in failed_sensors:
            print(f"   ‚Ä¢ Sensor {sid}: {reason}")

else:
    print("\n‚úÖ All sensors already processed. No backfill needed.")

üìã No existing sensors found, starting fresh
üìç No existing sensors found
üìä Total sensors: 103, Already processed: 0, Remaining: 103

üöÄ Starting backfill process...

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1279184/fs/1265800/fg/1945998


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1119/1119 | Elapsed Time: 00:01 | Remaining Time: 00:00


Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1279184/fs/1265800/fg/1952082


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1686/1686 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 105325 (1/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1510/1510 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 107110 (2/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2004/2004 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 112672 (3/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2006/2006 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 112993 (4/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1391/1391 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 113539 (5/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 410/410 | Elapsed Time: 00:00 | Remaining Time: 00:00


‚úÖ Sensor 113542 (6/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1872/1872 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 121810 (7/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1980/1980 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 122302 (8/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:02 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1804/1804 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 128095 (9/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1127/1127 | Elapsed Time: 00:00 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1961/1961 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 129124 (10/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1703/1703 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 149242 (11/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1118/1118 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1872/1872 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 154549 (12/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1872/1872 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 163156 (13/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1127/1127 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1464/1464 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 180187 (14/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1657/1657 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 191047 (15/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1717/1717 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 192520 (16/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1118/1118 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1639/1639 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 194215 (17/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1680/1680 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 196735 (18/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:00 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1446/1446 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 198559 (19/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1454/1454 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 208483 (20/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1103/1103 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1366/1366 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 249862 (21/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1123/1123 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 838/838 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 250030 (22/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1127/1127 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1309/1309 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 252352 (23/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1069/1069 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 345007 (24/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1131/1131 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 555/555 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 351115 (25/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 930/930 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 376954 (26/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 851/851 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 401314 (27/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1118/1118 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 861/861 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 404209 (28/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 843/843 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 407335 (29/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 756/756 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 409513 (30/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 787/787 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 415030 (31/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 629/629 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 417595 (32/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 754/754 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 420664 (33/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:02 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 703/703 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 462457 (34/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1126/1126 | Elapsed Time: 00:02 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 586/586 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 474841 (35/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 596/596 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 476353 (36/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:02 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 497/497 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 494275 (37/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1103/1103 | Elapsed Time: 00:02 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 411/411 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 497266 (38/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 246/246 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 533086 (39/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:00 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 111/111 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 556792 (40/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 86/86 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 562600 (41/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2058/2058 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 57421 (42/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1267/1267 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 58666 (43/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:02 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2067/2067 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 58909 (44/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1107/1107 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1275/1275 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 58912 (45/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2074/2074 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 58921 (46/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2164/2164 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 59095 (47/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 698/698 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 59356 (48/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2182/2182 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 59410 (49/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1598/1598 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 59497 (50/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 921/921 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 59593 (51/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2177/2177 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 59650 (52/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2171/2171 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 59656 (53/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2164/2164 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 59887 (54/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1118/1118 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2084/2084 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 59893 (55/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2052/2052 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 59899 (56/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1119/1119 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1013/1013 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 60073 (57/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2174/2174 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 60076 (58/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1118/1118 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1972/1972 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 60535 (59/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1118/1118 | Elapsed Time: 00:02 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1982/1982 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 60541 (60/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1696/1696 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 60838 (61/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1118/1118 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2131/2131 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 60853 (62/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2058/2058 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 60859 (63/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1959/1959 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 60886 (64/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1124/1124 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 801/801 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 60889 (65/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2174/2174 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 61045 (66/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2169/2169 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 61420 (67/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1118/1118 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2155/2155 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 61714 (68/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:00 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2006/2006 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 61861 (69/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2172/2172 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 61867 (70/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:02 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2022/2022 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 62566 (71/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2059/2059 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 62848 (72/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1111/1111 | Elapsed Time: 00:02 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 915/915 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 62968 (73/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1126/1126 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2089/2089 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 63637 (74/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2166/2166 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 63646 (75/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2153/2153 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 65104 (76/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1118/1118 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2146/2146 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 65146 (77/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1127/1127 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2129/2129 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 65272 (78/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2182/2182 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 65284 (79/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2178/2178 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 65290 (80/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2160/2160 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 65707 (81/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:00 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1781/1781 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 68167 (82/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1128/1128 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1910/1910 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 69628 (83/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:00 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1780/1780 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 69724 (84/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1126/1126 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1941/1941 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 70564 (85/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1835/1835 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 76915 (86/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1970/1970 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 77446 (87/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:02 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2178/2178 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 77488 (88/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1126/1126 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2173/2173 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 78529 (89/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1009/1009 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 78532 (90/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1672/1672 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 79750 (91/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2071/2071 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 79999 (92/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:02 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2184/2184 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 80773 (93/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1118/1118 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1810/1810 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 81505 (94/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2181/2181 | Elapsed Time: 00:02 | Remaining Time: 00:00


‚úÖ Sensor 82384 (95/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1506/1506 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 82942 (96/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1799/1799 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 84085 (97/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1103/1103 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2146/2146 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 87319 (98/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1992/1992 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 88372 (99/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1129/1129 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2159/2159 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 88876 (100/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 2131/2131 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 89584 (101/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1104/1104 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1941/1941 | Elapsed Time: 00:01 | Remaining Time: 00:00


‚úÖ Sensor 90676 (102/103 complete)


Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1103/1103 | Elapsed Time: 00:01 | Remaining Time: 00:00
Uploading Dataframe: 100.00% |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| Rows 1970/1970 | Elapsed Time: 00:01 | Remaining Time: 00:00

‚úÖ Sensor 92683 (103/103 complete)

üéâ Backfill complete!
üìä Final Summary:
   ‚úÖ Successfully processed: 103
   ‚ùå Failed: 0
   ‚è© Skipped (already processed): 0
   üìà Total in feature store: 103/103





## 1.4. Update Descriptions

In [6]:
hopsworks_admin.update_air_quality_description(air_quality_fg)
hopsworks_admin.update_weather_description(weather_fg)

## 1.5. Validation Setup
Creates Great Expectations validation suites for air quality and weather data with column value constraints.

In [7]:
aq_expectation_suite = gx.core.ExpectationSuite(
    expectation_suite_name="aq_expectation_suite"
)

# pm25 should be >= 0
aq_expectation_suite.add_expectation(
    gx.core.ExpectationConfiguration(
        expectation_type="expect_column_min_to_be_between",
        kwargs={
            "column": "pm25",
            "min_value": 0.0,
            "max_value": None,
            "strict_min": False,
        },
    )
)

aq_expectation_suite.add_expectation(
    gx.core.ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_in_type_list",
        kwargs={
            "column": "date",
            "type_list": ["datetime64", "Datetime", "Null"],
        },
    )
)


# sensor_id + date should be unique (PK)
aq_expectation_suite.add_expectation(
    gx.core.ExpectationConfiguration(
        expectation_type="expect_compound_columns_to_be_unique",
        kwargs={"column_list": ["sensor_id", "date"]},
    )
)

# rolling + lag features should be numeric (float or int)
for col in ["pm25_rolling_3d", "pm25_lag_1d", "pm25_lag_2d", "pm25_lag_3d"]:
    aq_expectation_suite.add_expectation(
        gx.core.ExpectationConfiguration(
            expectation_type="expect_column_values_to_be_in_type_list",
            kwargs={
                "column": col,
                "type_list": ["float64", "Float64", "Int64", "Null"],
            },
        )
    )

aq_expectation_suite.add_expectation(
    gx.core.ExpectationConfiguration(
        expectation_type="expect_table_row_count_to_be_between",
        kwargs={"min_value": 1, "max_value": None}
    )
)

hopsworks_admin.save_or_replace_expectation_suite(air_quality_fg, aq_expectation_suite)


weather_expectation_suite = gx.core.ExpectationSuite(
    expectation_suite_name="weather_expectation_suite"
)

weather_expectation_suite.add_expectation(
    gx.core.ExpectationConfiguration(   
        expectation_type="expect_column_values_to_be_in_type_list",
        kwargs={
            "column": "date",
            "type_list": ["datetime64", "Datetime", "Null"],
        },
    )
)

# Temperature column - allow nulls, should be within physical range
weather_expectation_suite.add_expectation(
    gx.core.ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_between",
        kwargs={
            "column": "temperature_2m_mean",
            "min_value": -80,
            "max_value": 60,
            "mostly": 1.0,
        },
    )
)
weather_expectation_suite.add_expectation(
    gx.core.ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_in_type_list",
        kwargs={
            "column": "temperature_2m_mean",
            "type_list": ["float64", "Float64", "Int64", "Null"],
        },
    )
)

# Precipitation column - should be >= 0, allow nulls
weather_expectation_suite.add_expectation(
    gx.core.ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_between",
        kwargs={
            "column": "precipitation_sum",
            "min_value": -0.1,
            "max_value": None,
            "mostly": 1.0,          # allow nulls
        },
    )
)
weather_expectation_suite.add_expectation(
    gx.core.ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_in_type_list",
        kwargs={
            "column": "precipitation_sum",
            "type_list": ["float64", "Float64", "Int64", "Null"],
        },
    )
)

# Wind column - should be >= 0, allow nulls
weather_expectation_suite.add_expectation(
    gx.core.ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_between",
        kwargs={
            "column": "wind_speed_10m_max",
            "min_value": 0,
            "max_value": None,
            "mostly": 1.0,          # allow nulls
        },
    )
)
weather_expectation_suite.add_expectation(
    gx.core.ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_in_type_list",
        kwargs={
            "column": "wind_speed_10m_max",
            "type_list": ["float64", "Float64", "Int64", "Null"],
        },
    )
)

gx.core.ExpectationConfiguration(
    expectation_type="expect_table_row_count_to_be_between",
    kwargs={"min_value": 1, "max_value": None}
)

hopsworks_admin.save_or_replace_expectation_suite(weather_fg, weather_expectation_suite)

Deleted existing expectation suite for FG 'air_quality'.
Attached expectation suite to Feature Group, edit it at https://c.app.hopsworks.ai:443/p/1279184/fs/1265800/fg/1952082
Saved expectation suite for FG 'air_quality'.
Deleted existing expectation suite for FG 'weather'.
Attached expectation suite to Feature Group, edit it at https://c.app.hopsworks.ai:443/p/1279184/fs/1265800/fg/1945998
Saved expectation suite for FG 'weather'.


## 1.6. Create Feature View

In [8]:
def create_feature_view(fs, air_quality_fg, weather_fg):
    query = (
        air_quality_fg.select_all()
        .join(weather_fg.select_all(), on=["sensor_id", "date"])
    )

    fv = fs.get_or_create_feature_view(
        name="air_quality_complete_fv",
        version=1,
        query=query,
        labels=["pm25"]
    )

    return fv


air_quality_fv = create_feature_view(fs, air_quality_fg, weather_fg)

Feature view created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1279184/fs/1265800/fv/air_quality_complete_fv/version/1


In [9]:
# df = air_quality_fv.get_batch_data(read_options={"use_spark": True})

In [10]:
# air_quality_fg.materialization()
# weather_fg.materialization()


How to perform materialization??? Started manually for now...

In [12]:
# td = air_quality_fv.create_training_data(
#     description="Initial materialization",
#     data_format="parquet",
#     write_options={"use_spark": True}
# )

In [13]:
# # Trigger materialization job to populate offline feature store
# try:
#     materialization_job = air_quality_fv.create_training_data(
#         description="Initial materialization after backfill",
#         data_format="parquet"
#     )
#     print("‚úÖ Materialization job started")
# except Exception as e:
#     print(f"‚ÑπÔ∏è Materialization will occur automatically when feature view is used: {e}")

## 1.7. Load Historical Data

In [14]:
fg = fs.get_feature_group("air_quality", version=1)
print(fg.time_travel_format)

HUDI


In [15]:
for f in air_quality_fv.features:
    print(f"{f.name}  |  {f.feature_group.name}  v{f.feature_group.version}")

sensor_id  |  air_quality  v1
date  |  air_quality  v1
pm25  |  air_quality  v1
pm25_lag_1d  |  air_quality  v1
pm25_lag_2d  |  air_quality  v1
pm25_lag_3d  |  air_quality  v1
pm25_rolling_3d  |  air_quality  v1
pm25_nearby_avg  |  air_quality  v1
city  |  air_quality  v1
street  |  air_quality  v1
country  |  air_quality  v1
aqicn_url  |  air_quality  v1
latitude  |  air_quality  v1
longitude  |  air_quality  v1
weather_temperature_2m_mean  |  weather  v1
weather_precipitation_sum  |  weather  v1
weather_wind_speed_10m_max  |  weather  v1
weather_wind_direction_10m_dominant  |  weather  v1


In [16]:
# Load historical data from feature view
air_quality_df = air_quality_fv.get_batch_data()
print(f"üìä Loaded {len(air_quality_df)} records from feature view")

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (12.50s) 
üìä Loaded 165664 records from feature view


## 1.8. Data Exploration

In [17]:
air_quality_df = air_quality_fg.read()
weather_df = weather_fg.read()

# Extract unique sensor metadata from air quality feature group
metadata_df = air_quality_df[["sensor_id", "city", "street", "country", "latitude", "longitude"]].drop_duplicates(subset=["sensor_id"])
print(f"üìç Extracted metadata for {len(metadata_df)} unique sensors")

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (9.40s) 
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (2.68s) 
üìç Extracted metadata for 103 unique sensors


In [18]:
print("üîç AIR QUALITY DATA EXPLORATION")
print("="*40)

print(f"Shape: {air_quality_df.shape}")
print(f"Date range: {air_quality_df['date'].min().date()} to {air_quality_df['date'].max().date()}")
print(f"Number of unique sensors: {air_quality_df['sensor_id'].nunique()}")
print(f"Countries: {metadata_df['country'].unique()}")
print(f"Cities: {metadata_df['city'].nunique()} unique cities")

print("\nüìä PM2.5 Statistics:")
print(air_quality_df['pm25'].describe())
print(f"Missing values: {air_quality_df['pm25'].isna().sum()}")

print("\nüìà Engineered Features Statistics:")
for col in ['pm25_rolling_3d', 'pm25_lag_1d', 'pm25_lag_2d', 'pm25_lag_3d', 'pm25_nearby_avg']:
    if col in air_quality_df.columns:
        missing = air_quality_df[col].isna().sum()
        print(f"{col}: {missing} missing values ({missing/len(air_quality_df)*100:.1f}%)")


üîç AIR QUALITY DATA EXPLORATION
Shape: (165664, 14)
Date range: 2019-12-09 to 2025-12-19
Number of unique sensors: 103
Countries: ['Sweden']
Cities: 85 unique cities

üìä PM2.5 Statistics:
count    165664.000000
mean          3.202695
std          11.877057
min           0.000000
25%           0.900000
50%           1.800000
75%           3.500000
max         999.900000
Name: pm25, dtype: float64
Missing values: 0

üìà Engineered Features Statistics:
pm25_rolling_3d: 103 missing values (0.1%)
pm25_lag_1d: 103 missing values (0.1%)
pm25_lag_2d: 206 missing values (0.1%)
pm25_lag_3d: 309 missing values (0.2%)
pm25_nearby_avg: 163978 missing values (99.0%)


In [19]:
print("üå§Ô∏è WEATHER DATA EXPLORATION") 
print("="*40)

print(f"Shape: {weather_df.shape}")
print(f"Date range: {weather_df['date'].min().date()} to {weather_df['date'].max().date()}")
print(f"Number of unique sensors: {metadata_df['sensor_id'].nunique()}")

print("\nüå°Ô∏è Weather Statistics:")
for col in ['temperature_2m_mean', 'precipitation_sum', 'wind_speed_10m_max', 'wind_direction_10m_dominant']:
    if col in weather_df.columns:
        print(f"{col}:")
        print(f"  Range: {weather_df[col].min():.2f} to {weather_df[col].max():.2f}, Mean: {weather_df[col].mean():.2f}, Missing: {weather_df[col].isna().sum()}")

print("\nüìç Geographic Coverage:")
print(f"Latitude range: {metadata_df['latitude'].min():.3f} to {metadata_df['latitude'].max():.3f}, Longitude range: {metadata_df['longitude'].min():.3f} to {metadata_df['longitude'].max():.3f}")

üå§Ô∏è WEATHER DATA EXPLORATION
Shape: (111503, 6)
Date range: 2018-06-01 to 2025-12-19
Number of unique sensors: 103

üå°Ô∏è Weather Statistics:
temperature_2m_mean:
  Range: -26.83 to 26.34, Mean: 6.47, Missing: 0
precipitation_sum:
  Range: 0.00 to 105.10, Mean: 2.26, Missing: 0
wind_speed_10m_max:
  Range: 3.05 to 63.46, Mean: 17.75, Missing: 0
wind_direction_10m_dominant:
  Range: 0.00 to 360.00, Mean: 203.84, Missing: 0

üìç Geographic Coverage:
Latitude range: 55.474 to 64.751, Longitude range: 11.171 to 20.953


In [20]:
print("üîó DATA QUALITY & RELATIONSHIPS")
print("="*40)

# Overall data completeness
sensor_day_counts = air_quality_df.groupby('sensor_id')['date'].count()
total_records = len(air_quality_df)
data_completeness = (1 - air_quality_df['pm25'].isna().sum() / total_records) * 100

print(f"üìä Overall Data Quality:")
print(f"Total records: {total_records:,}")
print(f"Data completeness: {data_completeness:.1f}%")
print(f"Days per sensor - Min: {sensor_day_counts.min()}, Median: {sensor_day_counts.median():.0f}, Max: {sensor_day_counts.max()}")
print(f"Sensors with <30 days: {(sensor_day_counts < 30).sum()}, >365 days: {(sensor_day_counts > 365).sum()}")

# Extreme values summary
extreme_count = (air_quality_df['pm25'] > 100).sum()
very_high_count = (air_quality_df['pm25'] > 50).sum()
print(f"\n‚ö†Ô∏è Air Quality Levels:")
print(f"Extreme readings (>100 Œºg/m¬≥): {extreme_count} ({extreme_count/total_records*100:.1f}%)")
print(f"Very high readings (>50 Œºg/m¬≥): {very_high_count} ({very_high_count/total_records*100:.1f}%)")

# Seasonal patterns
if len(air_quality_df) > 0:
    # Create temporary month column without modifying original DataFrame
    temp_months = pd.to_datetime(air_quality_df['date']).dt.month
    monthly_pm25 = air_quality_df.groupby(temp_months)['pm25'].mean()
    print(f"\nüóìÔ∏è Seasonal Patterns (PM2.5 Œºg/m¬≥):")
    seasons = {(12,1,2): "Winter", (3,4,5): "Spring", (6,7,8): "Summer", (9,10,11): "Autumn"}
    for months, season in seasons.items():
        season_avg = monthly_pm25[monthly_pm25.index.isin(months)].mean()
        print(f"  {season}: {season_avg:.1f}")

üîó DATA QUALITY & RELATIONSHIPS
üìä Overall Data Quality:
Total records: 165,664
Data completeness: 100.0%
Days per sensor - Min: 86, Median: 1872, Max: 2184
Sensors with <30 days: 0, >365 days: 100

‚ö†Ô∏è Air Quality Levels:
Extreme readings (>100 Œºg/m¬≥): 38 (0.0%)
Very high readings (>50 Œºg/m¬≥): 142 (0.1%)

üóìÔ∏è Seasonal Patterns (PM2.5 Œºg/m¬≥):
  Winter: 3.8
  Spring: 2.7
  Summer: 2.9
  Autumn: 3.5
