In [None]:
import pandas as pd
import numpy as np
from pathlib import Path

TRAIN_IN = "airq_train.csv"
VAL_IN   = "airq_val.csv"
TEST_IN  = "airq_test.csv"

TRAIN_OUT = "airq_train_fe.csv"
VAL_OUT   = "airq_val_fe.csv"
TEST_OUT  = "airq_test_fe.csv"

BASE_COLS = [
    "CO(GT)", "NMHC(GT)", "C6H6(GT)", "NOx(GT)", "NO2(GT)",
    "PT08.S1(CO)", "PT08.S2(NMHC)", "PT08.S3(NOx)",
    "PT08.S4(NO2)", "PT08.S5(O3)",
    "T", "RH", "AH"
]

def add_time_features(df: pd.DataFrame,
                      lags=(1, 3, 6, 12, 24),
                      mas=(3, 6, 12, 24)) -> pd.DataFrame:
    
    df = df.copy()

    # make sure timestamp is in time order
    df["timestamp"] = pd.to_datetime(df["timestamp"])
    df = df.sort_values("timestamp").reset_index(drop=True)

    # basic time features
    df["hour"] = df["timestamp"].dt.hour
    df["dayofweek"] = df["timestamp"].dt.dayofweek  # 0=Monday

    # lag features
    for col in BASE_COLS:
        if col not in df.columns:
            continue
        for L in lags:
            df[f"{col}_lag{L}"] = df[col].shift(L)

    # moving average features
    for col in BASE_COLS:
        if col not in df.columns:
            continue
        for W in mas:
            df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()

    return df

def main():
    for in_path, out_path in [
        (TRAIN_IN, TRAIN_OUT),
        (VAL_IN,   VAL_OUT),
        (TEST_IN,  TEST_OUT),
    ]:
        print(f"Processing {in_path} → {out_path}")
        df = pd.read_csv(in_path)

        # consider -200 as nan
        df.replace(-200, np.nan, inplace=True)

        df_fe = add_time_features(df)

        Path(out_path).parent.mkdir(parents=True, exist_ok=True)
        df_fe.to_csv(out_path, index=False)
        print("  Done. Shape:", df_fe.shape)

if __name__ == "__main__":
    main()


Processing airq_train.csv → airq_train_fe.csv


  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W

  Done. Shape: (6550, 133)
Processing airq_val.csv → airq_val_fe.csv
  Done. Shape: (1404, 133)
Processing airq_test.csv → airq_test_fe.csv


  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W, min_periods=1).mean()
  df[f"{col}_MA{W}"] = df[col].rolling(W

  Done. Shape: (1403, 133)
