In [2]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score

# ------------------------------------------------------------
# LOAD & CLEAN DATA
# ------------------------------------------------------------

df = pd.read_csv("FRED-QD_2025m01.csv")

# Drop the metadata rows
df = df.drop(index=[0, 1]).reset_index(drop=True)

# Convert sasdate to datetime
df["sasdate"] = pd.to_datetime(df["sasdate"])

# Convert numeric columns
for col in df.columns[1:]:
    df[col] = pd.to_numeric(df[col], errors="coerce")

# Remove rows with missing values
df = df.dropna()

# ------------------------------------------------------------
# SELECT VARIABLES
# ------------------------------------------------------------

features = [
    "PCECC96",     # Consumption
    "GPDIC1",      # Investment
    "FPIx",        # Price Index
    "S&P 500",     # Stock Index
    "S&P div yield",
    "S&P PE ratio"
]

target = "GDPC1"   # GDP

X = df[features]
y = df[target]

# ------------------------------------------------------------
# TRAIN/TEST SPLIT
# ------------------------------------------------------------

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=False
)

# ------------------------------------------------------------
# RANDOM FOREST MODEL
# ------------------------------------------------------------

rf = RandomForestRegressor(
    n_estimators=500,
    max_depth=10,
    random_state=42
)

rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

# ------------------------------------------------------------
# METRICS
# ------------------------------------------------------------

print("=== RANDOM FOREST RESULTS ===")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.2f}")
print(f"R² Score: {r2_score(y_test, y_pred):.3f}")

print("\nFeature Importances:")
for feat, imp in sorted(zip(features, rf.feature_importances_), key=lambda x: -x[1]):
    print(f"{feat}: {imp:.3f}")


=== RANDOM FOREST RESULTS ===
MAE: 1440.18
R² Score: -4.615

Feature Importances:
PCECC96: 0.333
FPIx: 0.252
GPDIC1: 0.224
S&P 500: 0.183
S&P PE ratio: 0.005
S&P div yield: 0.003
