In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.api import VAR
from sklearn.metrics import mean_squared_error

# 1. Load M2 Data
df_m2 = pd.read_csv('../data/raw/M2SL.csv', parse_dates=['observation_date'], index_col='observation_date')
df_m2.rename(columns={'M2SL': 'M2'}, inplace=True)

# 2. Load Interest Rate Data (FEDFUNDS)
df_rate = pd.read_csv('../data/raw/FEDFUNDS.csv', parse_dates=['observation_date'], index_col='observation_date')

# 3. Merge Datasets
# Inner join ensures we only keep dates present in both files
df_multi = pd.concat([df_m2, df_rate], axis=1).dropna()

print("Merged Dataset Head:")
print(df_multi.head())

FileNotFoundError: [Errno 2] No such file or directory: '::/data/raw/FEDFUNDS.csv'

In [None]:
# 4. Data Transformations for Stationarity
# M2 -> Monthly Growth Rate (Log Difference * 100)
df_multi['M2_Growth'] = np.log(df_multi['M2']).diff() * 100

# FEDFUNDS -> First Difference (Change in Rate)
# We assume interest rates are non-stationary in levels
df_multi['Rate_Diff'] = df_multi['FEDFUNDS'].diff()

# Remove NaN values created by differencing
df_final = df_multi[['M2_Growth', 'Rate_Diff']].dropna()

# 5. Train-Test Split (Chronological 80/20)
n_obs = len(df_final)
n_test = int(n_obs * 0.2)
n_train = n_obs - n_test

train_var = df_final.iloc[:n_train]
test_var = df_final.iloc[n_train:]

print(f"VAR Training samples: {len(train_var)}")
print(f"VAR Test samples:     {len(test_var)}")

# Plot Transformed Variables
fig, axes = plt.subplots(2, 1, figsize=(10, 8))
axes[0].plot(train_var['M2_Growth'], color='green')
axes[0].set_title('M2 Growth Rate (Transformed)')
axes[1].plot(train_var['Rate_Diff'], color='red')
axes[1].set_title('Interest Rate Change (Transformed)')
plt.tight_layout()
plt.show()

In [None]:
# 6. Select Optimal Lag Order (AIC)
model = VAR(train_var)

# Check lags up to 15 months
lag_order_results = model.select_order(maxlags=15)
print(lag_order_results.summary())

# Extract best lag based on AIC
optimal_lag = lag_order_results.aic
print(f"\nSelected Optimal Lag (p): {optimal_lag}")

In [None]:
print("Starting VAR Rolling Forecast (1-step ahead)...")

# Initialize history with training data
history = train_var.values.tolist()
predictions = []

# Loop through the test set
for i in range(len(test_var)):
    
    # Create a DataFrame for the current history
    curr_data = pd.DataFrame(history, columns=['M2_Growth', 'Rate_Diff'])
    
    # Fit VAR model on current history
    # Note: We re-estimate the model at each step to incorporate new information
    model_fit = VAR(curr_data).fit(optimal_lag)
    
    # Forecast the next step (1 step ahead)
    # We provide the last 'p' observations required for prediction
    last_obs = np.array(history)[-optimal_lag:]
    fc = model_fit.forecast(y=last_obs, steps=1)
    
    # fc returns predictions for both variables [M2_pred, Rate_pred]
    # We extract M2 prediction (index 0)
    pred_m2 = fc[0][0]
    predictions.append(pred_m2)
    
    # Add actual observation (M2 and Rate) to history for the next iteration
    history.append(test_var.iloc[i].values.tolist())

# Create DataFrame for comparison
var_forecast_df = pd.DataFrame({
    'Actual': test_var['M2_Growth'],
    'VAR_Forecast': predictions
}, index=test_var.index)

# Calculate Evaluation Metrics
mse = mean_squared_error(test_var['M2_Growth'], predictions)
rmsfe = np.sqrt(mse)
mae = np.mean(np.abs(test_var['M2_Growth'] - predictions))

print(f"\nVAR Model Results:")
print(f"RMSFE: {rmsfe:.4f}")
print(f"MAE:   {mae:.4f}")

# Plotting Forecast vs Actuals
plt.figure(figsize=(12, 6))
plt.plot(train_var.index[-50:], train_var['M2_Growth'][-50:], label='Training Data (Last 50)', color='green', alpha=0.5)
plt.plot(test_var.index, test_var['M2_Growth'], label='Actual Data', color='blue')
plt.plot(var_forecast_df.index, var_forecast_df['VAR_Forecast'], label='VAR Forecast', color='brown', linestyle='--')

plt.title('Multivariate VAR Model: Forecast vs Actuals')
plt.xlabel('Date')
plt.ylabel('M2 Growth Rate (%)')
plt.legend()
plt.grid(True)
plt.show()