In [1]:
# IMPORT LIBRARIES
import requests
import pandas as pd
from datetime import datetime, timedelta
import time
import hopsworks


In [2]:
# LATITUDE & LONGITUDE OF ISLAMABAD 
LAT = 33.6844
LON = 73.0479
API_KEY = "2d50baa095da8ae975a0d6b839e794ca"


In [3]:
# FETCHING HISTORICAL AIR POLLUTION DATA 
def fetch_air_pollution_history(lat, lon, api_key, start_date, end_date):
    rows = []
    current = start_date

    while current < end_date:
        chunk_end = min(current + timedelta(days=5), end_date)

        url = (
            f"http://api.openweathermap.org/data/2.5/air_pollution/history"
            f"?lat={lat}&lon={lon}"
            f"&start={int(current.timestamp())}"
            f"&end={int(chunk_end.timestamp())}"
            f"&appid={api_key}"
        )

        res = requests.get(url, timeout=10)
        res.raise_for_status()
        data = res.json()

        for item in data.get("list", []):
            rows.append({
                "timestamp": pd.to_datetime(item["dt"], unit="s"),
                "pm25": item["components"]["pm2_5"],
                "pm10": item["components"]["pm10"],
                "co": item["components"]["co"],
                "no2": item["components"]["no2"],
                "o3": item["components"]["o3"],
                "so2": item["components"]["so2"],
                "nh3": item["components"]["nh3"],
                "aqi": item["main"]["aqi"]
            })

        current = chunk_end
        time.sleep(1)

    return pd.DataFrame(rows)


In [4]:
# FEATURE ENGINEERING
def compute_features(df):
    df = df.sort_values("timestamp").reset_index(drop=True)

    # ---- Time-based features ----
    df["hour"] = df["timestamp"].dt.hour
    df["day"] = df["timestamp"].dt.day
    df["month"] = df["timestamp"].dt.month
    df["day_of_week"] = df["timestamp"].dt.dayofweek
    df["is_weekend"] = df["day_of_week"].isin([5, 6]).astype(int)

    # ---- Lag & trend features ----
    df["aqi_lag_1"] = df["aqi"].shift(1)
    df["aqi_change_rate"] = df["aqi"] - df["aqi_lag_1"]

    # ---- Rolling averages ----
    df["pm25_roll_3h"] = df["pm25"].rolling(window=3).mean()
    df["pm10_roll_6h"] = df["pm10"].rolling(window=6).mean()

    # ---- Clean ----
    df = df.dropna()

    return df
    

In [9]:
# FEATURE STORE AND GET LAST HOUR POLLUTANT DATA
import hopsworks
from datetime import datetime, timedelta

# Login
project = hopsworks.login()
fs = project.get_feature_store()

# Get feature group
aqi_fg = fs.get_feature_group(
    name="aqi_features",
    version=1
)

# FETCH LAST 1 HOUR DATA
end_date = datetime.utcnow()
start_date = end_date - timedelta(hours=1)

df_raw = fetch_air_pollution_history(
    LAT,
    LON,
    API_KEY,
    start_date,
    end_date
)

# Feature engineering
df_features = compute_features(df_raw)

# Safety check
if df_features.empty:
    print("No new data to insert.")
else:
    aqi_fg.insert(df_features)
    print(f"Inserted {len(df_features)} new rows into Feature Group")

2026-02-05 17:21:38,992 INFO: Closing external client and cleaning up certificates.
Connection closed.
2026-02-05 17:21:38,997 INFO: Initializing external client
2026-02-05 17:21:38,998 INFO: Base URL: https://c.app.hopsworks.ai:443
2026-02-05 17:21:41,903 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1338518
No new data to insert.


In [7]:
BACKFILL = False 

if BACKFILL:
    end_date = datetime.utcnow()
    start_date = end_date - timedelta(days=120)

    df_raw = fetch_air_pollution_history(
        LAT, LON, API_KEY, start_date, end_date
    )

    df_features = compute_features(df_raw)

    aqi_fg.insert(df_features)


In [8]:
# Get existing feature group
#aqi_fg = fs.get_feature_group(
 #   name="aqi_features",
  #  version=1
#)

# Create feature view (ONCE)
#aqi_fv = fs.get_or_create_feature_view(
 #   name="aqi_feature_view",
  #  version=1,
   # description="Feature view for AQI prediction",
    #labels=["aqi"],
    #query=aqi_fg.select_all()
#)

#print("Feature View created:", aqi_fv.name, aqi_fv.version)

Feature view created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1338518/fs/1327173/fv/aqi_feature_view/version/1
Feature View created: aqi_feature_view 1
