# Mixed Data Sampling (MIDAS) Tutorial

+ We work with two macroeconomic variables: `GDP growth` (quarterly) and a `leading economic indicator` (LEI) (monthly).
+ The goal is to train a model that can use higher-frequency monthly data to update lower-frequency quarterly forecasts.
+ We’ll use the **MIDAS Python package**, available [here](https://github.com/RenatoVassallo/MIDAS.git)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load the dataset
df_macro = pd.read_csv('macro_dataset.csv', parse_dates=['DATE'])
df_macro.set_index('DATE', inplace=True)
df_macro

In [None]:
# Plot
plt.figure(figsize=(10, 4))
plt.plot(df_macro.index, df_macro['gdp_yoy'], label='GDP YoY', marker='o', linestyle='-', color='navy')
plt.plot(df_macro.index, df_macro['lei_yoy'], label='LEI YoY', linestyle='-.', color='darkred')

# Formatting
plt.title('Macroeconomic indicators in different frequencies', fontsize=14)
plt.xlabel('Date')
plt.ylabel('Year-over-Year Change (%)')
plt.legend()
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()

## 🧠 MIDAS Model Specification

$$ y_t = c + \beta_1 \cdot \sum_{k=1}^{3} w_k(\theta_{11}, \theta_{12}) \cdot x_{t - \frac{k}{3}} + \lambda \cdot y_{t-1} + \varepsilon_t $$

Where:
+ 	$y_t$: low-frequency target (quarterly GDP YoY)
+ 	$x_{t - \frac{k}{3}}$: high-frequency regressor (LEI_YoY) at monthly lag k
+ 	$w_k(\theta_{11}, \theta_{12})$: exponential Almon weights over 3 monthly lags
+	$c$: intercept
+	$\beta_1$: coefficient on the weighted sum of high-frequency lags
+ 	$\lambda$: AR(1) coefficient
+	$\varepsilon_t$: error term

In [None]:
from MIDAS import MIDAS

midas_model = MIDAS(low_freq_series       = df_macro["gdp_yoy"],
                    high_freq_series_list = [df_macro["lei_yoy"]],
                    hf_lags_list          = ["3m"],
                    lf_lags               = 1)

+ If `GDP` is quarterly (e.g., March 2019), and `LEI` is monthly:
+ Then `"3m"` means you’ll include the last 3 monthly values of `LEI` available prior to the current quarter:

+ Example (forecasting 2019Q1 = March 2019):
    + You would use: $ \; \text{\texttt{LEI[Feb 2019]}, \; \texttt{LEI[Jan 2019]}, \; \texttt{LEI[Dec 2018]} }$.
    + These are stacked and passed through a polynomial weighting function (like Beta or ExpAlmon), to produce a single regressor per high-frequency series.

In [None]:
from datetime import datetime

# Prepare the data (optional, just to inspect the structure)
result = midas_model.prepare_data(forecast_horizon = 1,
                                  start_date       = datetime(2011, 3, 1),
                                  end_date         = datetime(2014, 12, 1))

# Display results
print("Y Data (Training):\n",    result["y_train"].head())
print("\nY Lags (Training):\n",  result["y_train_lags"].head())
print("\nHF Data (Training):\n", result["x_train_list"][0].head())
print("\nY Data (Test):\n",      result["y_forecast_target"].head())
print("\nY Lags (Test):\n",      result["y_forecast_lags"].head())
print("\nHF Data (Test):\n",     result["x_forecast_list"][0].head())

### 📐 Exponential Almon Weights:

$$ w_k(\theta_{11}, \theta_{12}) =
\frac{
\exp(\theta_{11} \cdot k + \theta_{12} \cdot k^2)
}{
\sum_{j=1}^{3} \exp(\theta_{11} \cdot j + \theta_{12} \cdot j^2)
}
\quad \text{for } k = 1, 2, 3 $$

### 🔧 Parameter Vector:

$$ \Phi = \begin{bmatrix}
c & \beta_1 & \theta_{11} & \theta_{12} & \lambda
\end{bmatrix}$$

In [None]:
# Fit the model
opt_res = midas_model.fit(data_dict = result, poly_list=["expalmon"])

# Extract fitted parameters
print(opt_res.x)

In [None]:
# Predict the low-frequency series
pred = midas_model.predict(data_dict = result)

# Join predictions
y_target_df = result["y_forecast_target"].to_frame(name="y_target")
forecast_df = y_target_df.join(pred)
forecast_df.columns = ['y_target', 'y_forecast']
forecast_df

## Rolling forecast

In [None]:
df_rolling, rmse = midas_model.rolling_forecast(start_date = datetime(2011, 3, 1),
                                                end_date   = datetime(2014, 12, 1),
                                                forecast_horizon = 1,
                                                poly_list = ["expalmon"],
                                                verbose = True)
df_rolling

In [None]:
# Plot the predictions
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 4))
plt.plot(df_rolling.index, df_rolling["targets"], label="Target", color='navy', marker='o')
plt.plot(df_rolling.index, df_rolling["preds"], label="Forecast", color='darkred', marker='x', linestyle='-.')
plt.title("MIDAS rolling forecast")
plt.xlabel("Date")
plt.ylabel("GDP YoY")
plt.legend()
plt.grid()
plt.show()

## Model comparison

In [None]:
from MIDAS import MIDAS

mod1 = MIDAS(low_freq_series       = df_macro["gdp_yoy"],
             high_freq_series_list = [df_macro["ip_yoy"], df_macro["lei_yoy"]],
             hf_lags_list          = ["3m", "3m"], 
             lf_lags               = 1)

df1, rmse1 = mod1.rolling_forecast(start_date = datetime(2011, 3, 1),
                                   end_date   = datetime(2014, 12, 1),
                                   forecast_horizon = 1,
                                   poly_list = ["expalmon", "expalmon"],
                                   verbose = False)

In [None]:
mod2 = MIDAS(low_freq_series       = df_macro["gdp_yoy"],
             high_freq_series_list = [df_macro["lei_yoy"]],
             hf_lags_list          = ["3m"], 
             lf_lags               = 1)

df2, rmse2 = mod2.rolling_forecast(start_date = datetime(2011, 3, 1),
                                   end_date   = datetime(2014, 12, 1),
                                   forecast_horizon = 1,
                                   poly_list = ["expalmon"],
                                   verbose = False)

In [None]:
# Compare RMSE values
rmse_df = pd.DataFrame({
    'Model': ['IP + LEI', 'LEI'],
    'RMSE': [rmse1, rmse2]
})
rmse_df.set_index('Model', inplace=True)
rmse_df.plot(kind='bar', figsize=(6, 4), legend=False)
plt.title('RMSE Comparison of MIDAS Models')
plt.ylabel('RMSE')
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()

## The `midas_compare` function

In [None]:
from MIDAS import midas_compare

models = [
    {"name": "IP", "high_freq_series": [df_macro["ip_yoy"]], "polys": ["expalmon"]},
    {"name": "LEI", "high_freq_series": [df_macro["lei_yoy"]], "polys": ["expalmon"]},
    {"name": "IP + LEI", "high_freq_series": [df_macro["ip_yoy"], df_macro["lei_yoy"]], "polys": ["expalmon", "expalmon"]}
]

results = midas_compare(
    low_freq_series=df_macro["gdp_yoy"],
    model_specs=models,
    hf_lags=["3m", "3m"],  
    lf_lags=1,
    forecast_horizon=1,
    start_date=datetime(2011, 3, 1),
    end_date=datetime(2014, 12, 1),
    plot_forecasts=True
)