In [None]:
import pandas as pd

# Load both datasets
df1 = pd.read_csv("FRED-QD_2025m01.csv")
df2 = pd.read_csv("FRED-QD_2025m02.csv")

# OPTIONAL — ensure they align by date if a date column exists
# Example:
# df1 = df1.sort_values("DATE")
# df2 = df2.sort_values("DATE")

# Combine the datasets side-by-side (axis=1)
combined_df = pd.concat([df1, df2], axis=1)

# Drop non-numeric columns (important for ML)
combined_df = combined_df.select_dtypes(include=["float64", "int64"])

# Drop missing values
combined_df = combined_df.dropna()

print("Combined dataset shape:", combined_df.shape)

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# --------- SELECT TARGET ----------
target = combined_df.columns[0]
X = combined_df.drop(columns=[target])
y = combined_df[target]

# 80% training, 20% testing
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Random Forest model
rf = RandomForestRegressor(n_estimators=200, random_state=42)
rf.fit(X_train, y_train)

# Predictions
y_pred = rf.predict(X_test)

# Model accuracy
mse = mean_squared_error(y_test, y_pred)
print("Random Forest MSE:", mse)


=== RANDOM FOREST RESULTS ===
MAE: 1440.18
R² Score: -4.615

Feature Importances:
PCECC96: 0.333
FPIx: 0.252
GPDIC1: 0.224
S&P 500: 0.183
S&P PE ratio: 0.005
S&P div yield: 0.003
