## Step 1: Data Collection & Preparation

In [2]:
import pandas as pd

# Load datasets
lags_df = pd.read_csv("lags.csv")
pred_df = pd.read_csv("Prediction_Indicators.csv")
labels_df = pd.read_csv("Regime_Lables.csv")

# Format datetime
for df in [lags_df, pred_df, labels_df]:
    df['observation_date'] = pd.to_datetime(df['observation_date'])

# Merge datasets
merged_df = lags_df.merge(labels_df, on='observation_date', how='inner')

# Drop fully empty columns and fill NaNs with mean
merged_df = merged_df.dropna(axis=1, how='all')
merged_df = merged_df.fillna(merged_df.mean(numeric_only=True))

merged_df

Unnamed: 0,observation_date,Interest_Rates_lag1,Interest_Rates_lag2,Interest_Rates_lag3,Interest_Rates_lag4,Interest_Rates_roll_mean,Interest_Rates_roll_std,Interest_Rates_pct_change,10Y_minus_2Y_Treasury_Constant_Maturity_lag1,10Y_minus_2Y_Treasury_Constant_Maturity_lag2,...,Macro_Stress_Flag_Index_lag3,Macro_Stress_Flag_Index_lag4,Macro_Stress_Flag_Index_roll_mean,Macro_Stress_Flag_Index_roll_std,Macro_Stress_Flag_Index_pct_change,GDP_Growth_Rate,Unemployment_Rate_Change,Jobs_Added,Industrial_Production_Growth,Regime_Label
0,1977-01-01,4.87,4.659162,4.655632,4.652063,4.658355,0.558058,0.747798,1.43,0.887225,...,1.189474,1.185185,1.194737,0.641907,inf,1.186233,-0.3,242.0,-0.689685,mild
1,1977-04-01,4.66,4.870000,4.655632,4.652063,4.658355,0.558058,0.747798,1.31,1.430000,...,1.189474,1.185185,1.194737,0.641907,inf,1.941924,-0.2,337.0,0.976329,Boom
2,1977-07-01,5.16,4.660000,4.870000,4.652063,5.127500,0.505132,0.747798,1.41,1.310000,...,2.000000,1.185185,0.750000,0.957427,inf,1.803165,-0.3,346.0,0.135890,Boom
3,1977-10-01,5.82,5.160000,4.660000,4.870000,5.537500,0.803757,0.336756,1.06,1.410000,...,1.000000,2.000000,0.500000,0.577350,-0.5,0.001993,0.0,268.0,0.153911,Stable
4,1978-01-01,6.51,5.820000,5.160000,4.660000,6.062500,0.721128,0.450644,0.42,1.060000,...,0.000000,1.000000,0.750000,0.957427,1.0,0.319842,0.0,184.0,-1.457159,Recession
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187,2023-10-01,5.26,4.990000,4.510000,3.650000,5.022500,0.371786,0.460274,-0.93,-0.560000,...,1.000000,2.000000,1.000000,0.816497,0.0,0.788673,0.1,186.0,-0.706624,mild
188,2024-01-01,5.33,5.260000,4.990000,4.510000,5.227500,0.161735,0.181818,-0.27,-0.930000,...,1.000000,1.000000,1.250000,0.957427,1.0,0.404802,-0.1,119.0,-1.118474,Recession
189,2024-04-01,5.33,5.330000,5.260000,4.990000,5.312500,0.035000,0.068136,-0.26,-0.270000,...,0.000000,1.000000,1.500000,1.000000,1.0,0.738980,0.0,118.0,-0.157825,Stable
190,2024-07-01,5.33,5.330000,5.330000,5.260000,5.312500,0.035000,0.000000,-0.33,-0.260000,...,2.000000,0.000000,2.000000,0.000000,inf,0.759510,0.1,88.0,-0.711066,mild


## Step 4: Forecasting Indicators (Step 8.1)

In [8]:
import pandas as pd
import numpy as np
from statsmodels.tsa.api import VAR
from prophet import Prophet
from xgboost import XGBRegressor
import matplotlib.pyplot as plt
import warnings

# Suppress warnings for cleaner output
warnings.filterwarnings("ignore")

# Merge GDP and Unemployment into lags_df
from sklearn.preprocessing import StandardScaler

indicator_cols = ['observation_date', 'GDP_Growth_Rate', 'Unemployment_Rate']
gdp_unemp_df = prediction_df[indicator_cols].dropna()

# Merge into lags_df
lags_df = pd.merge(lags_df, gdp_unemp_df, on='observation_date', how='left')


# Load data
lags_df = pd.read_csv("lags.csv")
lags_df['observation_date'] = pd.to_datetime(lags_df['observation_date'])
lags_df = lags_df.set_index('observation_date')

# Fill missing values
lags_df = lags_df.fillna(lags_df.mean(numeric_only=True))

# Split indicators
var_cols = ['GDP_Growth_Rate', 'Unemployment_Rate']
prophet_cols = ['Jobs_Added', 'Retail_Sales']
xgb_cols = [col for col in lags_df.columns if col not in var_cols + prophet_cols]

# Forecast output container
future_forecasts = pd.DataFrame()

# Step 1: VAR for GDP + Unemployment
var_df = lags_df[var_cols].dropna()
var_model = VAR(var_df)
var_results = var_model.fit(maxlags=4, ic='aic')
var_forecast = var_results.forecast(var_df.values[-4:], steps=4)
var_index = pd.date_range(start=lags_df.index[-1] + pd.DateOffset(months=3), periods=4, freq='Q')
future_forecasts = pd.DataFrame(var_forecast, columns=var_cols, index=var_index)

# Step 2: Prophet for Jobs and Retail Sales
for col in prophet_cols:
    prophet_df = lags_df[[col]].reset_index()
    prophet_df.columns = ['ds', 'y']
    prophet_model = Prophet()
    prophet_model.fit(prophet_df)
    future = prophet_model.make_future_dataframe(periods=4, freq='Q')
    forecast = prophet_model.predict(future)
    future_values = forecast[['ds', 'yhat']].set_index('ds').tail(4)
    future_forecasts[col] = future_values['yhat']

# Step 3: XGBoostRegressor for all other indicators
for col in xgb_cols:
    series = lags_df[col].dropna()
    if len(series) < 20:
        continue  # skip short series
    X = np.arange(len(series)).reshape(-1, 1)
    y = series.values
    model = XGBRegressor(n_estimators=100)
    model.fit(X, y)
    X_future = np.arange(len(series), len(series) + 4).reshape(-1, 1)
    y_future = model.predict(X_future)
    future_forecasts[col] = y_future

# Display the resulting forecasted indicators
future_forecasts



NameError: name 'prediction_df' is not defined

### Step 5: Regime Classification on Forecasted Indicators (Step 8.2)