In [2]:
import pandas as pd
import numpy as np

# --- 1. Load Data ---
data = pd.read_excel(
    "aulrealGDP.xlsx",
    index_col = 0,
    na_values = ['#N/A', '.'],
    keep_default_na = True,
    header = 3
    )

lag = 4

# --- 2. Index & Column Cleanup ---

data = data.rename(columns={
    data.columns[1]: 'Quarter', 
    data.columns[2]: 'Year'
}, inplace=False)

# Combine 'Year' and 'Quarter' columns (e.g., '1960' + 'Q' + '1' -> '1960Q1')
date_strings = data['Year'].astype(str) + 'Q' + data['Quarter'].astype(str)

# Set the new Period Index
data = data.set_index(pd.PeriodIndex(date_strings, freq='Q'))
data.index.name = 'Date'
data.index.name = 'Date'

# Drop the original date/quarter columns
data = data.drop(columns=[data.columns[0], 'Quarter', 'Year'], errors='ignore')

# Cleans the index format f
# rom 'YYYY:Q' to 'YYYYQ' and converts to PeriodIndex with quarterly frequency
data.index = pd.to_datetime(data.index, format='%b-%y').to_period('Q')
try:
    data.columns = pd.to_datetime(data.columns)
except:
    data.columns = pd.to_datetime(data.columns.astype(str).str.split().str[0], errors='coerce')
display(data)

# --- 3. Real-Time Calculation Function ---
## Calculates the real-time YoY growth rate by pulling both the first reported RGDP value
## and the revised RGDP value from the same vintage column.
def calculate_real_time_RGDP_yoy(df_levels, lag):
    real_time_vintage_cols = df_levels.apply(pd.Series.first_valid_index, axis = 1)
    real_time_yoy = pd.Series(index = df_levels.index, dtype = float)

    for obs_date in df_levels.index[lag:]:

        vintage_col_name = real_time_vintage_cols.loc[obs_date]

        if pd.isna(vintage_col_name):
            continue

        ## Get the full time time series of RGDP levels available on that trading date.
        vintage_series = df_levels[vintage_col_name]

        ## Exract numerator (t) and denomicator (t - 4) from the SAME vintage column.
        numerator_level = vintage_series.loc[obs_date]
        denominatorlevel = vintage_series.loc[obs_date - lag]
        if not pd.isna(numerator_level) and not pd.isna(denominatorlevel):
            yoy = ((numerator_level / denominatorlevel) -1) * 100
            real_time_yoy.loc[obs_date] = yoy

    return real_time_yoy

# --- 4. Execute All Necessary Calculations ---
final_revised_RGDP = data.iloc[:, -1]
final_revised_RGDP_yoy = ((final_revised_RGDP / final_revised_RGDP.shift(lag)) - 1) * 100
real_time_RGDP_yoy = calculate_real_time_RGDP_yoy(data, lag)
acceleration_signal = real_time_RGDP_yoy -  real_time_RGDP_yoy.shift(4)

# --- 5. Create Final Output DataFrame ---
final_output_df = pd.DataFrame({
    'Final_Revised_RGDP': final_revised_RGDP.values,
    'Final_Revised_RGDP_YoY_Growth': final_revised_RGDP_yoy.values,
    'Real_Time_YoY_Growth': real_time_RGDP_yoy,
    'Acceleration Signal': acceleration_signal
})

final_output_df.index.name = 'Date'
final_output_df = final_output_df.dropna().reset_index(drop = False).set_index('Date', drop = True)

display(final_output_df)

FileNotFoundError: [Errno 2] No such file or directory: 'aulrealGDP.xlsx'