# Feature Engineering for Economic Indicators

This notebook creates lagged features, rolling statistics, and other relevant features from the cleaned economic indicator data to enhance predictive power.

In [None]:
# 1. Import Libraries and Load Cleaned Data
import pandas as pd
import numpy as np

# Load cleaned data (update path as needed)
df = pd.read_csv('../data/processed/cleaned_economic_indicators.csv', index_col=0, parse_dates=True)
print(f"Loaded data shape: {df.shape}")
display(df.head())

# 2. Create Lag Features
def create_lag_features(df, columns, lags=[1,3,6,12]):
    for col in columns:
        for lag in lags:
            df[f'{col}_lag{lag}'] = df[col].shift(lag)
    return df

# Only use numeric columns for lag/rolling features, exclude all target columns
target_cols = ['1_month_recession_probability', '3_month_recession_probability', '6_month_recession_probability']
indicator_cols = [col for col in df.select_dtypes(include=[np.number]).columns if col not in target_cols]
df = create_lag_features(df, indicator_cols)
print("Lag features created.")

# 3. Create Rolling Statistics (mean, std)
def create_rolling_features(df, columns, windows=[3,6,12]):
    for col in columns:
        for win in windows:
            df[f'{col}_rollmean{win}'] = df[col].rolling(window=win).mean()
            df[f'{col}_rollstd{win}'] = df[col].rolling(window=win).std()
    return df

df = create_rolling_features(df, indicator_cols)
print("Rolling statistics created.")

# 4. Drop rows with NA from lag/rolling features
df_fe = df.dropna().copy()
print(f"Feature engineered data shape: {df_fe.shape}")

# 5. Save Enhanced Dataset
df_fe.to_csv('../data/processed/feature_engineered_economic_indicators.csv')
print("Feature engineered dataset saved to ../data/processed/feature_engineered_economic_indicators.csv")

Loaded data shape: (670, 3)


Unnamed: 0_level_0,value,realtime_end,date
realtime_start,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2025-07-29,-1.204969,2025-07-29,1967-02-01
2025-07-29,-1.19158,2025-07-29,1967-03-01
2025-07-29,-1.075902,2025-07-29,1967-04-01
2025-07-29,-0.869049,2025-07-29,1967-05-01
2025-07-29,-0.610453,2025-07-29,1967-06-01


Lag features created.
Rolling statistics created.
Feature engineered data shape: (658, 13)
Feature engineered dataset saved to ../data/processed/feature_engineered_economic_indicators.csv


In [4]:
df_fe.describe()

Unnamed: 0,value,value_lag1,value_lag3,value_lag6,value_lag12,value_rollmean3,value_rollstd3,value_rollmean6,value_rollstd6,value_rollmean12,value_rollstd12
count,658.0,658.0,658.0,658.0,658.0,658.0,658.0,658.0,658.0,658.0,658.0
mean,0.004871,0.005275,0.006149,0.005667,-0.002029,0.005298,0.130668,0.005756,0.238441,0.004278,0.424502
std,1.005061,1.005244,1.005569,1.00554,1.009384,0.995031,0.116404,0.965703,0.192358,0.876764,0.29137
min,-2.831585,-2.831585,-2.831585,-2.831585,-2.831585,-2.705803,0.002218,-2.499055,0.005633,-2.048974,0.024081
25%,-0.652454,-0.652454,-0.652454,-0.652454,-0.678018,-0.645155,0.043993,-0.649531,0.089969,-0.637354,0.205386
50%,0.058895,0.058895,0.058895,0.058564,0.047172,0.054001,0.100512,0.036311,0.181009,0.031218,0.336499
75%,0.698949,0.698949,0.698949,0.698949,0.698949,0.701837,0.18441,0.666676,0.346265,0.584347,0.613012
max,2.759544,2.759544,2.759544,2.759544,2.759544,2.624998,0.738213,2.462322,1.109365,2.157861,1.589229
