In [2]:
import pickle

# ARIMA models
with open("../../fix/arima_models/1_year_rate_arima_model.pkl", "rb") as f:
    one_year_rate_arima_model = pickle.load(f)

with open("../../fix/arima_models/3_months_rate_arima_model.pkl", "rb") as f:
    three_months_rate_arima_model = pickle.load(f)

with open("../../fix/arima_models/6_months_rate_arima_model.pkl", "rb") as f:
    six_months_rate_arima_model = pickle.load(f)

with open("../../fix/arima_models/10_year_rate_arima_model.pkl", "rb") as f:
    ten_year_rate_arima_model = pickle.load(f)

with open("../../fix/arima_models/CPI_arima_model.pkl", "rb") as f:
    cpi_arima_model = pickle.load(f)

with open("../../fix/arima_models/CSI_index_arima_model.pkl", "rb") as f:
    csi_index_arima_model = pickle.load(f)

with open("../../fix/arima_models/gdp_per_capita_arima_model.pkl", "rb") as f:
    gdp_per_capita_arima_model = pickle.load(f)

with open("../../fix/arima_models/INDPRO_arima_model.pkl", "rb") as f:
    indpro_arima_model = pickle.load(f)

with open("../../fix/arima_models/OECD_CLI_index_arima_model.pkl", "rb") as f:
    oecd_cli_index_arima_model = pickle.load(f)

with open("../../fix/arima_models/PPI_arima_model.pkl", "rb") as f:
    ppi_arima_model = pickle.load(f)

with open("../../fix/arima_models/share_price_arima_model.pkl", "rb") as f:
    share_price_arima_model = pickle.load(f)

with open("../../fix/arima_models/unemployment_rate_arima_model.pkl", "rb") as f:
    unemployment_rate_arima_model = pickle.load(f)


In [3]:
import pickle

# Prophet models
with open("../../fix/prophet_models/1_year_rate_prophet_model.pkl", "rb") as f:
    one_year_rate_prophet_model = pickle.load(f)

with open("../../fix/prophet_models/3_months_rate_prophet_model.pkl", "rb") as f:
    three_months_rate_prophet_model = pickle.load(f)

with open("../../fix/prophet_models/6_months_rate_prophet_model.pkl", "rb") as f:
    six_months_rate_prophet_model = pickle.load(f)

with open("../../fix/prophet_models/10_year_rate_prophet_model.pkl", "rb") as f:
    ten_year_rate_prophet_model = pickle.load(f)

with open("../../fix/prophet_models/CPI_prophet_model.pkl", "rb") as f:
    cpi_prophet_model = pickle.load(f)

with open("../../fix/prophet_models/CSI_index_prophet_model.pkl", "rb") as f:
    csi_index_prophet_model = pickle.load(f)

with open("../../fix/prophet_models/gdp_per_capita_prophet_model.pkl", "rb") as f:
    gdp_per_capita_prophet_model = pickle.load(f)

with open("../../fix/prophet_models/INDPRO_prophet_model.pkl", "rb") as f:
    indpro_prophet_model = pickle.load(f)

with open("../../fix/prophet_models/OECD_CLI_index_prophet_model.pkl", "rb") as f:
    oecd_cli_index_prophet_model = pickle.load(f)

with open("../../fix/prophet_models/PPI_prophet_model.pkl", "rb") as f:
    ppi_prophet_model = pickle.load(f)

with open("../../fix/prophet_models/share_price_prophet_model.pkl", "rb") as f:
    share_price_prophet_model = pickle.load(f)

with open("../../fix/prophet_models/unemployment_rate_prophet_model.pkl", "rb") as f:
    unemployment_rate_prophet_model = pickle.load(f)


In [5]:
models_dict = {
    'CSI_index': csi_index_prophet_model, 
    '10_year_rate': ten_year_rate_prophet_model, 
    '3_months_rate': three_months_rate_arima_model, 
    '1_year_rate': one_year_rate_prophet_model,
    'unemployment_rate': unemployment_rate_arima_model, 
    '6_months_rate': six_months_rate_arima_model, 
    'PPI': ppi_prophet_model, 
    'CPI':cpi_prophet_model, 
    'gdp_per_capita':gdp_per_capita_arima_model, 
    'OECD_CLI_index': oecd_cli_index_prophet_model, 
    'INDPRO': indpro_prophet_model, 
    'share_price':share_price_prophet_model
}

In [22]:
import pandas as pd
import numpy as np
import pickle
from statsmodels.tsa.seasonal import STL
import warnings
warnings.filterwarnings('ignore')

def production_forecasting_pipeline(input_data, models_dict, forecast_steps, date_col='date', freq='M'):
    """
    Production forecasting pipeline that matches your training process
    
    Key insight: Your models were trained with specific feature sets:
    - ARIMA models: Use ALL features except recession targets as exogenous variables
    - Prophet models: Use ALL features except recession targets as regressors
    
    Strategy: Use iterative forecasting approach similar to your training
    """
    
    print(f"Production Forecasting Pipeline - {forecast_steps} steps ahead")
    print("="*60)
    
    # Recession targets to exclude (from your training code)
    recession_targets = [
        'recession_probability', '1_month_recession_probability',
        '3_month_recession_probability', '6_month_recession_probability'
    ]
    
    # Financial indicators (your target variables)
    financial_indicators = [
        '1_year_rate', '3_months_rate', '6_months_rate', 'CPI', 'INDPRO',
        '10_year_rate', 'share_price', 'unemployment_rate', 'PPI',
        'OECD_CLI_index', 'CSI_index', 'gdp_per_capita'
    ]
    
    # 1. Create future date range
    if date_col in input_data.columns:
        last_date = pd.to_datetime(input_data[date_col].max())
        if freq == 'M':
            future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), 
                                       periods=forecast_steps, freq='M')
        elif freq == 'Q':
            future_dates = pd.date_range(start=last_date + pd.DateOffset(months=3), 
                                       periods=forecast_steps, freq='Q')
        else:
            future_dates = pd.date_range(start=last_date + pd.Timedelta(days=30), 
                                       periods=forecast_steps, freq='M')
        
        result_data = pd.DataFrame({date_col: future_dates})
    else:
        result_data = pd.DataFrame(index=range(forecast_steps))
    
    # 2. Separate models by type
    arima_models = {}
    prophet_models = {}
    
    for indicator, model in models_dict.items():
        model_type = str(type(model)).lower()
        if 'arima' in model_type:
            arima_models[indicator] = model
        elif 'prophet' in model_type:
            prophet_models[indicator] = model
        else:
            arima_models[indicator] = model  # Default to ARIMA treatment
    
    print(f"ARIMA models: {list(arima_models.keys())}")
    print(f"Prophet models: {list(prophet_models.keys())}")
    
    # 3. Forecast ARIMA models first (they need exogenous variables)
    print(f"\nStep 1: Forecasting ARIMA models...")
    
    for indicator, model in arima_models.items():
        try:
            print(f"  Processing ARIMA: {indicator}...")
            
            # Prepare exogenous variables for ARIMA (same as training)
            features_to_exclude = [date_col] + recession_targets + [indicator]
            available_features = [c for c in input_data.columns if c not in features_to_exclude]
            
            print(f"    Available exog features: {len(available_features)}")
            
            if len(available_features) == 0:
                # No exogenous variables - simple ARIMA
                predictions = model.forecast(steps=forecast_steps)
                if hasattr(predictions, 'values'):
                    predictions = predictions.values
            else:
                # ARIMA with exogenous variables
                exog_data = input_data[available_features].copy()
                
                # Clean exogenous data (same as your training)
                exog_data = exog_data.fillna(method='ffill').fillna(method='bfill')
                exog_data = exog_data.replace([np.inf, -np.inf], np.nan).fillna(method='ffill').fillna(method='bfill')
                
                # Remove non-varying columns
                varying_cols = [c for c in exog_data.columns if exog_data[c].nunique() > 1]
                exog_data = exog_data[varying_cols]
                
                print(f"    Using {len(varying_cols)} exog variables")
                
                if len(varying_cols) == 0:
                    # No varying exogenous variables
                    predictions = model.forecast(steps=forecast_steps)
                else:
                    # Create future exogenous variables (forward fill last values)
                    last_exog_values = exog_data.iloc[-1:].copy()
                    future_exog = pd.concat([last_exog_values] * forecast_steps, ignore_index=True)
                    
                    # Check if model expects more exogenous variables than we have
                    expected_exog_count = getattr(model.model, 'k_exog', 0)
                    
                    if expected_exog_count > 0 and len(varying_cols) != expected_exog_count:
                        print(f"    Warning: Model expects {expected_exog_count} exog vars, but have {len(varying_cols)}")
                        
                        if len(varying_cols) < expected_exog_count:
                            # Pad with zeros to match expected shape
                            missing_cols = expected_exog_count - len(varying_cols)
                            for i in range(missing_cols):
                                future_exog[f'missing_exog_{i}'] = 0.0
                            print(f"    Padded with {missing_cols} zero columns")
                        else:
                            # Take only the first expected_exog_count columns
                            future_exog = future_exog.iloc[:, :expected_exog_count]
                            print(f"    Truncated to {expected_exog_count} columns")
                    
                    # Make forecast with exogenous variables
                    predictions = model.forecast(steps=forecast_steps, exog=future_exog)
                
                if hasattr(predictions, 'values'):
                    predictions = predictions.values
            
            # Ensure correct length
            if len(predictions) != forecast_steps:
                if len(predictions) > forecast_steps:
                    predictions = predictions[:forecast_steps]
                else:
                    last_val = predictions[-1] if len(predictions) > 0 else 0
                    predictions = list(predictions) + [last_val] * (forecast_steps - len(predictions))
            
            result_data[indicator] = predictions
            print(f"    ✓ ARIMA forecast: {indicator} ({len(predictions)} values)")
            
        except Exception as e:
            print(f"    ✗ ARIMA failed for {indicator}: {str(e)}")
            # Use trend-based fallback
            predictions = trend_based_forecast(input_data, indicator, forecast_steps)
            result_data[indicator] = predictions
            print(f"    ✓ Fallback forecast: {indicator}")
    
    # 4. Now forecast Prophet models (they need ALL features as regressors)
    print(f"\nStep 2: Forecasting Prophet models...")
    
    for indicator, model in prophet_models.items():
        try:
            print(f"  Processing Prophet: {indicator}...")
            
            # Create future dataframe for Prophet
            future_df = model.make_future_dataframe(periods=forecast_steps, freq=freq)
            
            # Get regressor names from the model
            regressors = []
            if hasattr(model, 'extra_regressors'):
                regressors = list(model.extra_regressors.keys())
            
            print(f"    Model needs {len(regressors)} regressors")
            print(f"    Sample regressors: {regressors[:5]}..." if len(regressors) > 5 else f"    Regressors: {regressors}")
            
            # Prepare regressor values for the entire future_df
            historical_length = len(future_df) - forecast_steps
            
            for regressor in regressors:
                try:
                    if regressor in result_data.columns:
                        # Use ARIMA predictions for this regressor
                        if regressor in input_data.columns:
                            hist_values = input_data[regressor].fillna(method='ffill').fillna(method='bfill').tolist()
                        else:
                            hist_values = [0.0] * historical_length
                        
                        future_values = result_data[regressor].tolist()
                        all_values = hist_values + future_values
                        
                        # Adjust length to match future_df
                        if len(all_values) > len(future_df):
                            all_values = all_values[:len(future_df)]
                        elif len(all_values) < len(future_df):
                            last_val = all_values[-1] if all_values else 0
                            all_values.extend([last_val] * (len(future_df) - len(all_values)))
                        
                        future_df[regressor] = all_values
                        
                    elif regressor in input_data.columns:
                        # Use historical data + forward fill
                        hist_values = input_data[regressor].fillna(method='ffill').fillna(method='bfill').tolist()
                        
                        # Forward fill for future
                        last_val = hist_values[-1] if hist_values else 0
                        future_values = [last_val] * forecast_steps
                        all_values = hist_values + future_values
                        
                        # Adjust length
                        if len(all_values) > len(future_df):
                            all_values = all_values[:len(future_df)]
                        elif len(all_values) < len(future_df):
                            last_val = all_values[-1] if all_values else 0
                            all_values.extend([last_val] * (len(future_df) - len(all_values)))
                        
                        future_df[regressor] = all_values
                        
                    else:
                        # Missing regressor - use zeros
                        future_df[regressor] = [0.0] * len(future_df)
                        
                except Exception as reg_error:
                    print(f"      Warning: Error with regressor {regressor}: {str(reg_error)}")
                    future_df[regressor] = [0.0] * len(future_df)
            
            # Make Prophet prediction
            forecast_result = model.predict(future_df)
            predictions = forecast_result['yhat'].tail(forecast_steps).values
            
            result_data[indicator] = predictions
            print(f"    ✓ Prophet forecast: {indicator} ({len(predictions)} values)")
            
        except Exception as e:
            print(f"    ✗ Prophet failed for {indicator}: {str(e)}")
            # Use trend-based fallback
            predictions = trend_based_forecast(input_data, indicator, forecast_steps)
            result_data[indicator] = predictions
            print(f"    ✓ Fallback forecast: {indicator}")
    
    # 5. Apply STL decomposition (same as your training)
    print(f"\nStep 3: Applying STL decomposition...")
    
    indicators = list(models_dict.keys())
    for indicator in indicators:
        if indicator in result_data.columns:
            try:
                series = pd.Series(result_data[indicator])
                
                # Combine with historical data for better STL
                if indicator in input_data.columns:
                    historical_series = input_data[indicator].fillna(method='ffill').fillna(method='bfill')
                    combined_series = pd.concat([historical_series, series], ignore_index=True)
                else:
                    combined_series = series
                
                if len(combined_series.dropna()) >= 24:
                    # Use STL with same parameters as your training
                    stl = STL(combined_series, seasonal=13, period=12)
                    decomposition = stl.fit()
                    
                    # Extract forecast portion
                    trend = decomposition.trend.iloc[-forecast_steps:].values
                    residual = decomposition.resid.iloc[-forecast_steps:].values
                    
                    result_data[f'{indicator}_trend'] = trend
                    result_data[f'{indicator}_residual'] = residual
                else:
                    # Short series fallback
                    mean_val = series.mean()
                    result_data[f'{indicator}_trend'] = [mean_val] * forecast_steps
                    result_data[f'{indicator}_residual'] = (series - mean_val).values
                
                print(f"  ✓ STL: {indicator}")
                
            except Exception as e:
                print(f"  ✗ STL failed for {indicator}: {str(e)}")
                # Simple fallback
                result_data[f'{indicator}_trend'] = result_data[indicator].values
                result_data[f'{indicator}_residual'] = [0.0] * forecast_steps
    
    # 6. Extract required features AND financial indicators
    required_features = [
        'CSI_index_trend', '10_year_rate_trend', '3_months_rate_trend', 
        '1_year_rate_trend', 'unemployment_rate_trend', '6_months_rate_trend',
        'PPI_trend', 'CPI_trend', 'gdp_per_capita_trend', 'gdp_per_capita_residual',
        'OECD_CLI_index_trend', 'OECD_CLI_index_residual', '3_months_rate_residual',
        'INDPRO_trend', 'share_price_trend', '6_months_rate_residual',
        '1_year_rate_residual', '10_year_rate_residual'
    ]
    
    # Financial indicators (raw predictions)
    financial_indicators = [
        '1_year_rate', '3_months_rate', '6_months_rate', 'CPI', 'INDPRO',
        '10_year_rate', 'share_price', 'unemployment_rate', 'PPI',
        'OECD_CLI_index', 'CSI_index', 'gdp_per_capita'
    ]
    
    # Combine date + financial indicators + required features
    all_output_columns = [date_col] + financial_indicators + required_features
    
    # Keep available columns from result_data
    final_columns = [col for col in all_output_columns if col in result_data.columns]
    final_data = result_data[final_columns].copy()
    
    # Fill missing financial indicators with 0
    for indicator in financial_indicators:
        if indicator not in final_data.columns:
            final_data[indicator] = [0.0] * forecast_steps
    
    # Fill missing features with 0
    for feature in required_features:
        if feature not in final_data.columns:
            final_data[feature] = [0.0] * forecast_steps
    
    print(f"\n" + "="*60)
    print(f"PRODUCTION FORECASTING COMPLETE")
    print(f"="*60)
    print(f"Final shape: {final_data.shape}")
    if date_col in final_data.columns:
        print(f"Forecast period: {final_data[date_col].min()} to {final_data[date_col].max()}")
    print(f"Generated {len(financial_indicators)} financial indicators + {len(required_features)} engineered features")
    print(f"Financial indicators: {financial_indicators}")
    print(f"Sample features: {required_features[:5]}...")
    
    return final_data

def trend_based_forecast(input_data, indicator, forecast_steps):
    """
    Trend-based forecasting fallback (matches your training approach)
    """
    if indicator not in input_data.columns or input_data[indicator].isna().all():
        return [0.0] * forecast_steps
    
    series = input_data[indicator].fillna(method='ffill').fillna(method='bfill').dropna()
    
    if len(series) < 3:
        return [series.iloc[-1] if len(series) > 0 else 0.0] * forecast_steps
    
    # Simple linear trend
    x = np.arange(len(series))
    y = series.values
    
    try:
        slope, intercept = np.polyfit(x, y, 1)
        last_index = len(series) - 1
        
        predictions = []
        for step in range(1, forecast_steps + 1):
            pred = slope * (last_index + step) + intercept
            predictions.append(pred)
        
        return predictions
        
    except:
        # Ultimate fallback - last value
        return [series.iloc[-1]] * forecast_steps

"""
USAGE:

This pipeline matches your exact training process:

1. Load your models:
models_dict = {
    'CSI_index': csi_index_prophet_model,
    '10_year_rate': ten_year_rate_prophet_model,
    '3_months_rate': three_months_rate_arima_model,
    # ... etc
}

2. Run production forecasting:
production_features = production_forecasting_pipeline(
    input_data=your_historical_data,  # Should have same features as training
    models_dict=models_dict,
    forecast_steps=4,
    date_col='date',
    freq='M'
)

3. The output will have exactly your 18 required features for recession prediction

KEY DIFFERENCES FROM PREVIOUS ATTEMPTS:
- Matches your training's exogenous variable preparation exactly
- Uses iterative forecasting (ARIMA first, then Prophet with ARIMA results)
- Handles regressor preparation the same way as your training code
- Applies STL decomposition with same parameters as training
"""

"\nUSAGE:\n\nThis pipeline matches your exact training process:\n\n1. Load your models:\nmodels_dict = {\n    'CSI_index': csi_index_prophet_model,\n    '10_year_rate': ten_year_rate_prophet_model,\n    '3_months_rate': three_months_rate_arima_model,\n    # ... etc\n}\n\n2. Run production forecasting:\nproduction_features = production_forecasting_pipeline(\n    input_data=your_historical_data,  # Should have same features as training\n    models_dict=models_dict,\n    forecast_steps=4,\n    date_col='date',\n    freq='M'\n)\n\n3. The output will have exactly your 18 required features for recession prediction\n\nKEY DIFFERENCES FROM PREVIOUS ATTEMPTS:\n- Matches your training's exogenous variable preparation exactly\n- Uses iterative forecasting (ARIMA first, then Prophet with ARIMA results)\n- Handles regressor preparation the same way as your training code\n- Applies STL decomposition with same parameters as training\n"

In [23]:
historical_data = pd.read_csv('../../data/fix/feature_selected_recession_test.csv')

In [24]:
# Use the fixed function
production_features = production_forecasting_pipeline(
    input_data=historical_data,
    models_dict=models_dict,
    forecast_steps=4,
    date_col='date',
    freq='M'
)

Production Forecasting Pipeline - 4 steps ahead
ARIMA models: ['3_months_rate', 'unemployment_rate', '6_months_rate', 'gdp_per_capita']
Prophet models: ['CSI_index', '10_year_rate', '1_year_rate', 'PPI', 'CPI', 'OECD_CLI_index', 'INDPRO', 'share_price']

Step 1: Forecasting ARIMA models...
  Processing ARIMA: 3_months_rate...
    Available exog features: 29
    Using 17 exog variables
    Padded with 12 zero columns
    ✓ ARIMA forecast: 3_months_rate (4 values)
  Processing ARIMA: unemployment_rate...
    Available exog features: 29
    Using 17 exog variables
    Padded with 12 zero columns
    ✓ ARIMA forecast: unemployment_rate (4 values)
  Processing ARIMA: 6_months_rate...
    Available exog features: 29
    Using 17 exog variables
    Padded with 12 zero columns
    ✓ ARIMA forecast: 6_months_rate (4 values)
  Processing ARIMA: gdp_per_capita...
    Available exog features: 29
    Using 17 exog variables
    Padded with 12 zero columns
    ✓ ARIMA forecast: gdp_per_capita (4 val

In [25]:
production_features.columns

Index(['date', '1_year_rate', '3_months_rate', '6_months_rate', 'CPI',
       'INDPRO', '10_year_rate', 'share_price', 'unemployment_rate', 'PPI',
       'OECD_CLI_index', 'CSI_index', 'gdp_per_capita', 'CSI_index_trend',
       '10_year_rate_trend', '3_months_rate_trend', '1_year_rate_trend',
       'unemployment_rate_trend', '6_months_rate_trend', 'PPI_trend',
       'CPI_trend', 'gdp_per_capita_trend', 'gdp_per_capita_residual',
       'OECD_CLI_index_trend', 'OECD_CLI_index_residual',
       '3_months_rate_residual', 'INDPRO_trend', 'share_price_trend',
       '6_months_rate_residual', '1_year_rate_residual',
       '10_year_rate_residual'],
      dtype='object')