# Align Macroeconomic Indicators to daily frequency

## Import Packages

In [150]:
import os
from pathlib import Path
import logging
import warnings

import pandas as pd
pd.set_option('future.no_silent_downcasting', True)

import matplotlib.pyplot as plt
import numpy as np
import ruptures as rpt

from datetime import datetime
import exchange_calendars as xc

# for Parquet I/O
import pyarrow as pa
import pyarrow.parquet as pq

## Read in Files

In [151]:
ffill_files = Path("../../../02_Data_Processed/02_Macroeconomic_Indicators/01_Forward_Filled/_fileNames.txt").read_text(encoding="utf-8").splitlines()
interp_files = Path("../../../02_Data_Processed/02_Macroeconomic_Indicators/02_Interpolated/_fileNames.txt").read_text(encoding="utf-8").splitlines()

## Align to daily frequency with forward fill

In [152]:

src_dir = "../../../02_Data_Processed/02_Macroeconomic_Indicators/01_Forward_Filled"
des_dir = "../../../02_Data_Processed/02_Macroeconomic_Indicators/03_Forward_Filled_Daily"
Path(des_dir).mkdir(parents=True, exist_ok=True)

def forward_fill_align_daily(relative_path, filename):
    file_path = Path(relative_path) / filename
    df = pd.read_parquet(file_path)
    df = df.set_index('date')
    df.index = pd.DatetimeIndex(df.index)
    start, end = df.index.min(), df.index.max()
    all_days = pd.date_range(start, end, freq="D")

    original_dates = set(df.index)
    df_daily = df.reindex(all_days).ffill()
    df_daily = df_daily.infer_objects(copy=False)
    
    df_daily['is_filled_daily'] = ~df_daily.index.isin(original_dates)
    df_daily = df_daily.reset_index().rename(columns={'index': 'date'})
    return df_daily

In [153]:
for f in ffill_files:
    df_daily = forward_fill_align_daily(src_dir, f)
    outname = f.replace('.parquet', '_daily.parquet')
    df_daily.to_parquet(Path(des_dir) / outname, index=False)


## Align to daily frequency with interpolation

In [154]:
src_dir = "../../../02_Data_Processed/02_Macroeconomic_Indicators/02_Interpolated"
des_dir = "../../../02_Data_Processed/02_Macroeconomic_Indicators/04_Interpolated_Daily"
Path(des_dir).mkdir(parents=True, exist_ok=True)


def interpolate_align_daily(relative_path, filename):
    file_path = Path(relative_path) / filename
    df = pd.read_parquet(file_path)
    df = df.set_index('date')
    df.index = pd.DatetimeIndex(df.index)
    start, end = df.index.min(), df.index.max()
    all_days = pd.date_range(start, end, freq="D")

    original_dates = set(df.index)

    df_daily = df.reindex(all_days)
    df_daily = df_daily.infer_objects(copy=False)
    # Interpolate only the value column(s)
    df_daily['value'] = df_daily['value'].interpolate(method='linear', limit_direction='both').round(1)
    df_daily['value_filled'] = df_daily['value_filled'].ffill()
    # Add indicator for interpolated rows
    df_daily['is_interpolated_daily'] = ~df_daily.index.isin(original_dates)
    df_daily = df_daily.reset_index().rename(columns={'index': 'date'})
    return df_daily

In [155]:
for f in interp_files:
    df_daily = interpolate_align_daily(src_dir, f)
    print(df_daily.head())
    outname = f.replace('.parquet', '_daily.parquet')
    df_daily.to_parquet(Path(des_dir) / outname, index=False)

        date  value value_filled  is_interpolated_daily
0 2012-01-31   21.3        False                  False
1 2012-02-01   21.3        False                   True
2 2012-02-02   21.3        False                   True
3 2012-02-03   21.3        False                   True
4 2012-02-04   21.3        False                   True
        date  value value_filled  is_interpolated_daily
0 2012-01-31   27.0        False                  False
1 2012-02-01   27.0        False                   True
2 2012-02-02   27.0        False                   True
3 2012-02-03   27.0        False                   True
4 2012-02-04   27.0        False                   True
        date         value value_filled  is_interpolated_daily
0 2012-01-31  3.970000e+07        False                  False
1 2012-02-01  3.960345e+07        False                   True
2 2012-02-02  3.950690e+07        False                   True
3 2012-02-03  3.941034e+07        False                   True
4 2012-02-04 