In [7]:
import pandas as pd
import numpy as np

# --- 1. Load Data ---
data = pd.read_excel(
    "pcpiMvMd.xlsx",
    index_col = 0,
    na_values = ['#N/A'],
    keep_default_na = True
    )

lag = 12

# --- 2. Index & Column Cleanup ---
data.index.name = 'Date'
# Cleans the index format from 'YYYY:Q' to 'YYYYQ' and converts to PeriodIndex with quarterly frequency
data.index = data.index.astype(str).str.replace(':', '').map(lambda x: pd.Period(x, freq = 'M'))

# --- 3. Real-Time Calculation Function ---
## Calculates the real-time YoY growth rate by pulling both the first reported RGDP value
## and the revised RGDP value from the same vintage column.
def calculate_real_time_CPI_yoy(df_levels, lag):
    real_time_vintage_cols = df_levels.apply(pd.Series.first_valid_index, axis = 1)
    real_time_yoy = pd.Series(index = df_levels.index, dtype = float)

    for obs_date in df_levels.index[lag:]:

        vintage_col_name = real_time_vintage_cols.loc[obs_date]

        if pd.isna(vintage_col_name):
            continue

        ## Get the full time time series of RGDP levels available on that trading date.
        vintage_series = df_levels[vintage_col_name]

        ## Exract numerator (t) and denomicator (t - 4) from the SAME vintage column.
        numerator_level = vintage_series.loc[obs_date]
        denominatorlevel = vintage_series.loc[obs_date - lag]
        if not pd.isna(numerator_level) and not pd.isna(denominatorlevel):
            yoy = ((numerator_level / denominatorlevel) -1) * 100
            real_time_yoy.loc[obs_date] = yoy

    return real_time_yoy

# --- 4. Execute All Necessary Calculations ---
final_revised_CPI = data.iloc[:, -1]
final_revised_CPI_yoy = ((final_revised_CPI / final_revised_CPI.shift(lag)) - 1) * 100
real_time_CPI_yoy = calculate_real_time_CPI_yoy(data, lag)

# --- 5. Create Final Output DataFrame ---
final_output_df = pd.DataFrame({
    'Final_Revised_CPI': final_revised_CPI.values,
    'Final_Revised_CPI_YoY_Growth': final_revised_CPI_yoy.values,
    'Real_Time_CPI_YoY_Growth': real_time_CPI_yoy,
})

final_output_df.index.name = 'Date'
final_output_df = final_output_df.dropna().reset_index(drop = False).set_index('Date', drop = True)

display(final_output_df)
final_output_df.to_excel("Final_CPI_YoY_Output(M).xlsx")

Unnamed: 0_level_0,Final_Revised_CPI,Final_Revised_CPI_YoY_Growth,Real_Time_CPI_YoY_Growth
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1949-01,24.010,1.393581,1.393581
1949-02,23.910,1.013942,1.013942
1949-03,23.910,1.744681,1.744681
1949-04,23.920,0.419815,0.419815
1949-05,23.910,-0.416493,-0.416493
...,...,...,...
2025-05,320.580,2.375934,2.375934
2025-06,321.500,2.672683,2.672683
2025-07,322.132,2.731801,2.731801
2025-08,323.364,2.939220,2.939220
