In [1]:
import pandas as pd

In [2]:
# Load Data
daily = pd.read_csv('../data/mergeddata/merged_daily.csv', parse_dates=["date"])
hourly = pd.read_csv('../data/mergeddata/merged_hourly.csv', parse_dates=["time"])

In [3]:
# Extracting Time Features (without seasons)
def add_temporal_features(df):
    df["year"] = df["time"].dt.year if "time" in df.columns else df["date"].dt.year
    df["month"] = df["time"].dt.month if "time" in df.columns else df["date"].dt.month
    df["day"] = df["time"].dt.day if "time" in df.columns else df["date"].dt.day
    df["weekday"] = df["time"].dt.weekday if "time" in df.columns else df["date"].dt.weekday
    
    if "hour" in df.columns:  # Only for hourly data
        df["hour"] = df["time"].dt.hour
    return df

daily = add_temporal_features(daily)
hourly = add_temporal_features(hourly)

In [4]:
# Save processed files
daily.to_csv("../data/featureddata/featured_daily.csv", index=False)
hourly.to_csv("../data/featureddata/featured_hourly.csv", index=False)
print("✅ Temporal Features Added!")

✅ Temporal Features Added!


In [5]:
print("✅ Spatial Features already Added! - location and rural/urban classification")

✅ Spatial Features already Added! - location and rural/urban classification


In [6]:
# Adding Lag Features
def add_lag_features(df, lags=[1, 7, 14, 30]):
    for lag in lags:
        df[f"AQI_lag_{lag}"] = df["AQI"].shift(lag)
    return df

daily = add_lag_features(daily)

# Save processed file
daily.to_csv("../data/featureddata/featured_daily.csv", index=False)
print("✅ Lag Features Added!")


✅ Lag Features Added!


In [7]:
# Add rolling averages for AQI and pollutants
def add_rolling_features(df):
    df["AQI_7day_avg"] = df["AQI"].rolling(7, min_periods=1).mean()
    df["AQI_30day_avg"] = df["AQI"].rolling(30, min_periods=1).mean()
    return df

daily = add_rolling_features(daily)

# Save processed file
daily.to_csv("../data/featureddata/featured_daily.csv", index=False)
print("✅ Rolling Averages Added!")


✅ Rolling Averages Added!
