In [1]:
import pandas as pd
from fredapi import Fred
import os
from openpyxl import load_workbook
from typing import List, Tuple, Union
from pathlib import Path
import fredpy as fp

In [2]:
api="958ccd9c67808caf9f941367daf6e812"
fred = Fred(api_key=api)
fp.api_key = api

In [3]:
monthly_tickers = [
    "RECPROUSM156N",
    "PCEDGC96",
    'PCENDC96',
    "PCESC96",
    "RSAFS",
    "DSPIC96",
    "PSAVERT",
    "TOTALSA",
    "REVOLSL",
    "NONREVSL",
    "DGORDER",
    'ADXDNO',
    "INDPRO",
    "TCU",
    "HOUST",
    "PERMIT",
    "HSN1F",
    "EXHOSLUSM495S",
    "PAYEMS",
    "ADPMNUSNERSA",
    "UNRATE",
    "U6RATE",
    "CIVPART",
    "EMRATIO",
    "JTSJOR",
    "JTSHIR",
    "JTSTSR",
    "AWHAETP",
    "CPIAUCSL",
    "CPILFESL",
    "PPIFIS",
    "PCEPI",
    "PCEPILFE",
    "MICH",
    'CES0500000003',
    "AHETPI",
    "CSUSHPINSA",
    "IQ",
    "IR",
    'BOPTEXP',
    'BOPTIMP',
    'BOPSTB',
]

quarterly_tickers = [
    "GDPC1",
    "NGDPSAXDCUSQ",
    "GDPNOW",
    "PCECC96",
    "PNFIC1",
    "PRFIC1",
    "GCE",
    "ECIWAG",
    'PRS85006092'
]

weekly_tickers = [
    "ICSA",
    "CCSA",
    "MORTGAGE30US"
]

daily_tickers = [
    "T5YIFR",
    "T10YIE",
    "DTWEXBGS",
    "DFF",
    "DGS2",
    "DGS5",
    "DGS10",
    "DBAA"
]

In [4]:
a=fp.series('EXHOSLUSM495S')
a.last_updated

'2025-12-19 09:17:48-06'

In [5]:
# warnings and daily series do not have
# Fetch Data and Align Index
def Fetch(ticker_list, fred, freq, warning_list=None):
    if warning_list is None:
        warning_list = []

    values_dfs = []
    dates_dfs = []
    raw_dfs=[]
    # Set Frequency
    f = str(freq).strip().upper()
    if f in ("MONTHLY", "M"):
        period_code = "M"
    elif f in ("QUARTERLY", "Q"):
        period_code = "Q"
    elif f in ("WEEKLY", "W"):
        period_code = "W"
    elif f in ("ANNUAL", "A", "Y"):
        period_code = "A"
    elif f in ("DAILY", "D"):
        period_code = "D"
    else:
        raise ValueError(f"Unsupported frequency: {freq}")

    # Get Data through Fred. throw an error if unable to fetch
    if period_code != "D":
        for ticker in [t for t in ticker_list if t not in warning_list]:
            try:
                s = fred.get_series_all_releases(ticker)
            except Exception as e:
                print(f"Warning: couldn't fetch {ticker}: {e}")
                continue
            s["date"] = pd.to_datetime(s["date"])
            s["realtime_start"] = pd.to_datetime(s["realtime_start"])

            # Normalize timestamps by converting each release_date to the start of its time period
            # (e.g., start of day, month, quarter, etc.) for consistent grouping and comparison
            if period_code == "D":
                s["Time"] = s["date"].dt.floor("D")
            else:
                s["Time"] = s["date"].dt.to_period(period_code).dt.to_timestamp()

            # Group by the period and take the last (i.e., the latest release for that period)
            # ticker is the value column and real_time_start is the release date column
            grouped = s.groupby("Time").agg({"value": "last", "realtime_start": "last"})
            values_dfs.append(grouped[["value"]].rename(columns={"value": ticker}))
            dates_dfs.append(grouped[["realtime_start"]].rename(columns={"realtime_start": ticker}))
            s=s[['realtime_start','value','Time']]
            s=s.rename(columns={"realtime_start": f"realtime_start_{ticker}", "Time":f"Time_{ticker}",'value':f"value_{ticker}"})
            raw_dfs.append(s)

        for ticker in warning_list:
            try:
                s = fred.get_series(ticker)
            except Exception as e:
                print(f"Warning: couldn't fetch {ticker}: {e}")
                continue

            # Change to datetime
            s.index = pd.to_datetime(s.index)
            df = s.reset_index()
            df.columns = ["release_date", ticker]

            # Normalize timestamps by converting each release_date to the start of its time period
            # (e.g., start of day, month, quarter, etc.) for consistent grouping and comparison
            # here because release data is temporarily not available because the series is not available in fred. I just keep the release date blank
            if period_code == "D":
                df["Time"] = df["release_date"].dt.floor("D")
            else:
                df["Time"] = df["release_date"].dt.to_period(period_code).dt.to_timestamp()

            grouped = df.groupby("Time").agg({ticker: "last", "release_date": "last"})
            values_dfs.append(grouped[[ticker]])
            dates_dfs.append(grouped[["release_date"]].rename(columns={"release_date": ticker}))

    else:
        for ticker in ticker_list:
            try:
                s = fred.get_series(ticker)
            except Exception as e:
                print(f"Warning: couldn't fetch {ticker}: {e}")
                continue

            # Change to datetime
            s.index = pd.to_datetime(s.index)
            df = s.reset_index()
            df.columns = ["release_date", ticker]

            # Normalize timestamps by converting each release_date to the start of its time period
            # (e.g., start of day, month, quarter, etc.) for consistent grouping and comparison
            # here because release data is temporarily not available because the series is not available in fred. I just keep the release date blank
            if period_code == "D":
                df["Time"] = df["release_date"].dt.floor("D")
            else:
                df["Time"] = df["release_date"].dt.to_period(period_code).dt.to_timestamp()

            grouped = df.groupby("Time").agg({ticker: "last", "release_date": "last"})
            values_dfs.append(grouped[[ticker]])
            dates_dfs.append(grouped[["release_date"]].rename(columns={"release_date": ticker}))

    if not values_dfs:
        return pd.DataFrame(), pd.DataFrame(), pd.DataFrame()

    # concat the series of each economic data into a table
    values = pd.concat(values_dfs, axis=1, sort=True)
    dates = pd.concat(dates_dfs, axis=1, sort=True)
    values.index.name = "Time"
    dates.index.name = "Time"
    if period_code == "D":
            return values,dates, None
    raws= pd.concat(raw_dfs, axis=1, sort=True)
    raws.index.name = "Time"
    return values, dates, raws


In [6]:
# to csv
# find project root relative to this script
# Save the data table to the folder "Raw Data"
base_dir = Path.cwd().parent.parent
print(base_dir)
raw_location = base_dir / "Raw Data"
raw_location.mkdir(parents=True, exist_ok=True)

C:\Users\hp\Desktop\Duke Research Opportunity\Prof. Aguilar Macro Dashboard\MacroDashBoard


In [48]:
test_values, test_dates, test_raws = Fetch(quarterly_tickers, fred, "Q")

In [55]:
test_raws

Unnamed: 0_level_0,realtime_start_GDPC1,value_GDPC1,Time_GDPC1,realtime_start_NGDPSAXDCUSQ,value_NGDPSAXDCUSQ,Time_NGDPSAXDCUSQ,realtime_start_GDPNOW,value_GDPNOW,Time_GDPNOW,realtime_start_PCECC96,...,Time_PRFIC1,realtime_start_GCE,value_GCE,Time_GCE,realtime_start_ECIWAG,value_ECIWAG,Time_ECIWAG,realtime_start_PRS85006092,value_PRS85006092,Time_PRS85006092
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1992-12-22,1239.5,1947-01-01,2021-06-14,275200.0,1950-01-01,2016-05-17,3.2445,2011-07-01,1959-02-19,...,1947-01-01,1947-07-20,13.2,1939-01-01,1996-10-29,63.1,1980-01-01,2016-06-07,9.6,1947-04-01
1,1996-01-19,NaT,1947-01-01,2022-02-07,70207.0,1950-01-01,2016-05-17,5.1684,2011-10-01,1965-08-19,...,1947-01-01,1954-07-13,13.5,1939-01-01,2006-04-26,NaT,1980-01-01,2018-08-15,9.5,1947-04-01
2,1997-05-07,1402.5,1947-01-01,2021-06-14,284500.0,1950-04-01,2016-05-17,3.0151,2012-01-01,1976-01-16,...,1947-01-01,1958-07-16,NaT,1939-01-01,1996-10-29,64.5,1980-04-01,2019-03-07,9.4,1947-04-01
3,1999-10-28,NaT,1947-01-01,2022-02-07,72595.8,1950-04-01,2016-05-17,0.2243,2012-04-01,1980-12-23,...,1947-01-01,1947-07-20,13.2,1939-04-01,2006-04-26,NaT,1980-04-01,2021-08-10,9.5,1947-04-01
4,2000-04-27,1481.7,1947-01-01,2024-07-29,72595.796875,1950-04-01,2016-05-17,1.849,2012-07-01,1985-12-20,...,1947-01-01,1954-07-13,13.4,1939-04-01,1996-10-29,65.9,1980-07-01,2022-11-03,9.4,1947-04-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5333,NaT,,NaT,NaT,,NaT,NaT,,NaT,2025-06-26,...,NaT,NaT,,NaT,NaT,,NaT,NaT,,NaT
5334,NaT,,NaT,NaT,,NaT,NaT,,NaT,2025-09-25,...,NaT,NaT,,NaT,NaT,,NaT,NaT,,NaT
5335,NaT,,NaT,NaT,,NaT,NaT,,NaT,2025-07-30,...,NaT,NaT,,NaT,NaT,,NaT,NaT,,NaT
5336,NaT,,NaT,NaT,,NaT,NaT,,NaT,2025-08-28,...,NaT,NaT,,NaT,NaT,,NaT,NaT,,NaT


In [59]:
test_raws["value_GDPC1"].apply(type).value_counts()
test_raws["value_GDPC1"] = pd.to_numeric(test_raws["value_GDPC1"], errors="coerce")

In [39]:
raw_table=pd.read_csv(raw_location.joinpath(f'M_Raws.csv'))
raw_table

  raw_table=pd.read_csv(raw_location.joinpath(f'M_Raws.csv'))


Unnamed: 0,Time,realtime_start_RECPROUSM156N,value_RECPROUSM156N,Time_RECPROUSM156N,realtime_start_PCEDGC96,value_PCEDGC96,Time_PCEDGC96,realtime_start_PCENDC96,value_PCENDC96,Time_PCENDC96,...,Time_BOPTEXP,realtime_start_BOPTIMP,value_BOPTIMP,Time_BOPTIMP,realtime_start_BOPSTB,value_BOPSTB,Time_BOPSTB,realtime_start_TWEXBGSMTH,value_TWEXBGSMTH,Time_TWEXBGSMTH
0,0,2014-03-03,0.00,1967-02-01,1979-11-19,49.2,1959-01-01,1979-11-19,201.3,1959-01-01,...,1992-01-01,2010-04-13,52277.0,1992-01-01,1997-01-17,4845.0,1992-01-01,2019-04-01,28.3166,1973-03-01
1,1,2014-04-01,,1967-02-01,1980-12-22,48.0,1959-01-01,1985-12-23,450.0,1959-01-01,...,1992-02-01,2010-04-13,52513.0,1992-02-01,1997-06-19,4761.0,1992-01-01,2019-04-08,,1973-03-01
2,2,2014-03-03,0.00,1967-03-01,1985-12-23,91.5,1959-01-01,1991-12-23,510.6,1959-01-01,...,1992-03-01,2010-04-13,52935.0,1992-03-01,1998-06-18,4789.0,1992-01-01,2019-04-01,28.5161,1973-04-01
3,3,2014-04-01,,1967-03-01,1991-12-23,108.5,1959-01-01,1996-01-23,595.6,1959-01-01,...,1992-04-01,2010-04-13,53411.0,1992-04-01,1999-06-17,4940.0,1992-01-01,2019-04-08,,1973-04-01
4,4,2014-03-03,0.00,1967-04-01,1996-01-23,97.5,1959-01-01,1999-11-02,,1959-01-01,...,1992-05-01,2010-04-13,53963.0,1992-05-01,2000-06-20,5052.0,1992-01-01,2019-04-01,28.3601,1973-05-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84466,84466,2025-09-02,0.38,2025-06-01,,,,,,,...,,,,,,,,,,
84467,84467,2025-10-01,0.34,2025-06-01,,,,,,,...,,,,,,,,,,
84468,84468,2025-09-02,0.80,2025-07-01,,,,,,,...,,,,,,,,,,
84469,84469,2025-10-01,0.64,2025-07-01,,,,,,,...,,,,,,,,,,


In [57]:
mask = ~test_raws["value_GDPC1"].str.match(r"^-?\d+(\.\d+)?$", na=False)
bad_values = test_raws.loc[mask, "value_GDPC1"]
bad_values

Time
0       1239.5
1          NaT
2       1402.5
3          NaT
4       1481.7
         ...  
5333       NaN
5334       NaN
5335       NaN
5336       NaN
5337       NaN
Name: value_GDPC1, Length: 5338, dtype: object

In [38]:
raw_table

Unnamed: 0,Time,realtime_start_RECPROUSM156N,value_RECPROUSM156N,Time_RECPROUSM156N,realtime_start_PCEDGC96,value_PCEDGC96,Time_PCEDGC96,realtime_start_PCENDC96,value_PCENDC96,Time_PCENDC96,...,Time_BOPTEXP,realtime_start_BOPTIMP,value_BOPTIMP,Time_BOPTIMP,realtime_start_BOPSTB,value_BOPSTB,Time_BOPSTB,realtime_start_TWEXBGSMTH,value_TWEXBGSMTH,Time_TWEXBGSMTH
0,0,2014-03-03,0.00,1967-02-01,1979-11-19,49.2,1959-01-01,1979-11-19,201.3,1959-01-01,...,1992-01-01,2010-04-13,52277.0,1992-01-01,1997-01-17,4845.0,1992-01-01,2019-04-01,28.3166,1973-03-01
1,1,2014-04-01,,1967-02-01,1980-12-22,48.0,1959-01-01,1985-12-23,450.0,1959-01-01,...,1992-02-01,2010-04-13,52513.0,1992-02-01,1997-06-19,4761.0,1992-01-01,2019-04-08,,1973-03-01
2,2,2014-03-03,0.00,1967-03-01,1985-12-23,91.5,1959-01-01,1991-12-23,510.6,1959-01-01,...,1992-03-01,2010-04-13,52935.0,1992-03-01,1998-06-18,4789.0,1992-01-01,2019-04-01,28.5161,1973-04-01
3,3,2014-04-01,,1967-03-01,1991-12-23,108.5,1959-01-01,1996-01-23,595.6,1959-01-01,...,1992-04-01,2010-04-13,53411.0,1992-04-01,1999-06-17,4940.0,1992-01-01,2019-04-08,,1973-04-01
4,4,2014-03-03,0.00,1967-04-01,1996-01-23,97.5,1959-01-01,1999-11-02,,1959-01-01,...,1992-05-01,2010-04-13,53963.0,1992-05-01,2000-06-20,5052.0,1992-01-01,2019-04-01,28.3601,1973-05-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84466,84466,2025-09-02,0.38,2025-06-01,,,,,,,...,,,,,,,,,,
84467,84467,2025-10-01,0.34,2025-06-01,,,,,,,...,,,,,,,,,,
84468,84468,2025-09-02,0.80,2025-07-01,,,,,,,...,,,,,,,,,,
84469,84469,2025-10-01,0.64,2025-07-01,,,,,,,...,,,,,,,,,,


In [44]:
def isNewRelease(ticker: str,freq: str,raw: pd.DataFrame) -> bool:
    latest_date, value, period=GetLevel(ticker,n_lags=1,freq=freq)
    period=pd.to_datetime(period)
    raw[f"Time_{ticker}"]=pd.to_datetime(raw[f"Time_{ticker}"])
    release_time=raw[raw[f"Time_{ticker}"]==period].shape[0]
    print(release_time)
    if release_time>1 :
        return False
    else:
        return True

a=isNewRelease('ADPMNUSNERSA','M',raw_table)
print(a)

2025-12-03
2025-11-01 00:00:00
1
True


In [21]:
def isNewReleaseOnline(ticker: str,freq: str,fred) -> bool:
    latest_date, value, period=GetLevel(ticker,n_lags=1,freq=freq)
    raw=fred.get_series_all_releases(ticker)
    print(latest_date)
    print(period)
    raw['date']=pd.to_datetime(raw['date'])
    time_release=raw[raw['date']==period].shape[0]
    print(time_release)
    if time_release>1 :
        return False
    else:
        return True

a=isNewReleaseOnline('ADPMNUSNERSA','M',fred)
print(a)

2025-12-03
2025-11-01 00:00:00
1
True


In [46]:
raw=fred.get_series_all_releases("CPIAUCSL")
raw['date']=pd.to_datetime(raw['date'])
raw= raw.sort_values(
    by=["date"]
)
raw

Unnamed: 0,realtime_start,date,value
0,1994-02-17 00:00:00,1947-01-01,21.48
1,1994-02-17 00:00:00,1947-02-01,21.62
2,1994-02-17 00:00:00,1947-03-01,22.0
3,1994-02-17 00:00:00,1947-04-01,22.0
4,1994-02-17 00:00:00,1947-05-01,21.95
...,...,...,...
3289,2025-08-12 00:00:00,2025-07-01,322.132
3290,2025-09-11 00:00:00,2025-08-01,323.364
3291,2025-10-24 00:00:00,2025-09-01,324.368
3292,2025-12-18 00:00:00,2025-10-01,NaT


In [16]:
raw_table['Time_ADPMNUSNERSA']

0        2010-01-01
1        2010-01-01
2        2010-01-01
3        2010-01-01
4        2010-01-01
            ...    
84466           NaN
84467           NaN
84468           NaN
84469           NaN
84470           NaN
Name: Time_ADPMNUSNERSA, Length: 84471, dtype: object

In [25]:
monthly_raws

Unnamed: 0_level_0,realtime_start_RECPROUSM156N,value_RECPROUSM156N,Time_RECPROUSM156N,realtime_start_PCEDGC96,value_PCEDGC96,Time_PCEDGC96,realtime_start_PCENDC96,value_PCENDC96,Time_PCENDC96,realtime_start_PCESC96,...,Time_IR,realtime_start_BOPTEXP,value_BOPTEXP,Time_BOPTEXP,realtime_start_BOPTIMP,value_BOPTIMP,Time_BOPTIMP,realtime_start_BOPSTB,value_BOPSTB,Time_BOPSTB
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,2014-03-03,0.0,1967-02-01,1979-11-19,49.2,1959-01-01,1979-11-19,201.3,1959-01-01,1979-11-19,...,1982-09-01,2010-04-13,50251.0,1992-01-01,2010-04-13,52277.0,1992-01-01,1997-01-17,4845.0,1992-01-01
1,2014-04-01,NaT,1967-02-01,1980-12-22,48.0,1959-01-01,1985-12-23,450.0,1959-01-01,1980-12-22,...,1982-10-01,2010-04-13,51682.0,1992-02-01,2010-04-13,52513.0,1992-02-01,1997-06-19,4761.0,1992-01-01
2,2014-03-03,0.0,1967-03-01,1985-12-23,91.5,1959-01-01,1991-12-23,510.6,1959-01-01,1985-12-23,...,1982-11-01,2010-04-13,50294.0,1992-03-01,2010-04-13,52935.0,1992-03-01,1998-06-18,4789.0,1992-01-01
3,2014-04-01,NaT,1967-03-01,1991-12-23,108.5,1959-01-01,1996-01-23,595.6,1959-01-01,1991-12-23,...,1982-12-01,2010-04-13,50302.0,1992-04-01,2010-04-13,53411.0,1992-04-01,1999-06-17,4940.0,1992-01-01
4,2014-03-03,0.0,1967-04-01,1996-01-23,97.5,1959-01-01,1999-11-02,NaT,1959-01-01,1996-01-23,...,1983-01-01,2010-04-13,50044.0,1992-05-01,2010-04-13,53963.0,1992-05-01,2000-06-20,5052.0,1992-01-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84466,2025-09-02,0.38,2025-06-01,NaT,,NaT,NaT,,NaT,NaT,...,NaT,NaT,,NaT,NaT,,NaT,NaT,,NaT
84467,2025-10-01,0.34,2025-06-01,NaT,,NaT,NaT,,NaT,NaT,...,NaT,NaT,,NaT,NaT,,NaT,NaT,,NaT
84468,2025-09-02,0.8,2025-07-01,NaT,,NaT,NaT,,NaT,NaT,...,NaT,NaT,,NaT,NaT,,NaT,NaT,,NaT
84469,2025-10-01,0.64,2025-07-01,NaT,,NaT,NaT,,NaT,NaT,...,NaT,NaT,,NaT,NaT,,NaT,NaT,,NaT


In [13]:
monthly_dates

Unnamed: 0,realtime_start,date,value
0,1992-12-22 00:00:00,1946-01-01 00:00:00,199.7
1,1996-01-19 00:00:00,1946-01-01 00:00:00,NaT
2,1997-05-07 00:00:00,1946-01-01 00:00:00,210.4
3,1999-10-28 00:00:00,1946-01-01 00:00:00,NaT
4,1992-12-22 00:00:00,1946-04-01 00:00:00,207.7
...,...,...,...
3226,2025-06-26 00:00:00,2025-01-01 00:00:00,29962.047
3227,2025-09-25 00:00:00,2025-01-01 00:00:00,30042.113
3228,2025-07-30 00:00:00,2025-04-01 00:00:00,30331.117
3229,2025-08-28 00:00:00,2025-04-01 00:00:00,30353.902


In [None]:
def aggregate_by_representation_period(df, ticker, period_code="M"):

    # Ensure datetimes
    df = df.copy()
    df["date"] = pd.to_datetime(df["date"])
    df["real_time_start"] = pd.to_datetime(df["real_time_start"])

    # Derive grouping index "Time" from the representation-period column.
    # If period_code == "D" use the day; otherwise convert date->period->timestamp start
    if period_code == "D":
        df["Time"] = df["date"].dt.floor("D")
    else:
        df["Time"] = df["date"].dt.to_period(period_code).dt.to_timestamp()

    # Sort by actual release time so the last entry per Time is the latest release
    df = df.sort_values("real_time_start")

    # Group by the period and take the last (i.e., the latest release for that period)
    grouped = df.groupby("Time").agg({ticker: "last", "real_time_start": "last"})

    # values: latest value per represented period
    values = grouped[[ticker]].copy()
    # dates: latest release time per represented period (column name aligned to ticker)
    dates = grouped[["real_time_start"]].rename(columns={"real_time_start": ticker}).copy()

    values.index.name = "Time"
    dates.index.name = "Time"

    return values, dates

In [12]:
# GetData from the csv files
def NormalizeFreq(code: str) -> str:
    """Return canonical single-letter code: 'A','Q','M','W','D' or raise."""
    fm = {
        ("MONTHLY", "M"): "M",
        ("QUARTERLY", "Q"): "Q",
        ("WEEKLY", "W"): "W",
        ("ANNUAL", "A", "Y"): "A",
        ("DAILY", "D"): "D",
    }
    code_up = code.upper()
    for keys, val in fm.items():
        if code_up in keys:
            return val
    raise ValueError(f"Unsupported frequency: {code}")

def GetData(ticker: str, freq: str):
    period_code=NormalizeFreq(freq)
    base = base_dir / "Raw Data"
    dates_path = os.path.join(base, f"{period_code}_Dates.csv")
    values_path = os.path.join(base, f"{period_code}_Values.csv")

    dates = pd.read_csv(dates_path,index_col=0, parse_dates=True)
    values = pd.read_csv(values_path,index_col=0, parse_dates=True)

    if ticker not in values.columns:
        raise KeyError(f"Ticker '{ticker}' not found in values file.")

    last_idx = values[ticker].last_valid_index()
    last_pos = values.index.get_loc(last_idx)
    current_period = last_idx
    latest_update = dates[ticker].iloc[last_pos]
    value_col = values[ticker]
    return latest_update, value_col, current_period

In [52]:
current_period,latext_update,value_col=GetData("", freq="M")

In [40]:
raw_table=pd.read_csv(raw_location.joinpath(f'M_Raws.csv'))
raw_table

  raw_table=pd.read_csv(raw_location.joinpath(f'M_Raws.csv'))


Unnamed: 0,Time,realtime_start_RECPROUSM156N,value_RECPROUSM156N,Time_RECPROUSM156N,realtime_start_PCEDGC96,value_PCEDGC96,Time_PCEDGC96,realtime_start_PCENDC96,value_PCENDC96,Time_PCENDC96,...,Time_IR,realtime_start_BOPTEXP,value_BOPTEXP,Time_BOPTEXP,realtime_start_BOPTIMP,value_BOPTIMP,Time_BOPTIMP,realtime_start_BOPSTB,value_BOPSTB,Time_BOPSTB
0,0,2014-03-03,0.00,1967-02-01,1979-11-19,49.2,1959-01-01,1979-11-19,201.3,1959-01-01,...,1982-09-01,2010-04-13,50251.0,1992-01-01,2010-04-13,52277.0,1992-01-01,1997-01-17,4845.0,1992-01-01
1,1,2014-04-01,,1967-02-01,1980-12-22,48.0,1959-01-01,1985-12-23,450.0,1959-01-01,...,1982-10-01,2010-04-13,51682.0,1992-02-01,2010-04-13,52513.0,1992-02-01,1997-06-19,4761.0,1992-01-01
2,2,2014-03-03,0.00,1967-03-01,1985-12-23,91.5,1959-01-01,1991-12-23,510.6,1959-01-01,...,1982-11-01,2010-04-13,50294.0,1992-03-01,2010-04-13,52935.0,1992-03-01,1998-06-18,4789.0,1992-01-01
3,3,2014-04-01,,1967-03-01,1991-12-23,108.5,1959-01-01,1996-01-23,595.6,1959-01-01,...,1982-12-01,2010-04-13,50302.0,1992-04-01,2010-04-13,53411.0,1992-04-01,1999-06-17,4940.0,1992-01-01
4,4,2014-03-03,0.00,1967-04-01,1996-01-23,97.5,1959-01-01,1999-11-02,,1959-01-01,...,1983-01-01,2010-04-13,50044.0,1992-05-01,2010-04-13,53963.0,1992-05-01,2000-06-20,5052.0,1992-01-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84466,84466,2025-09-02,0.38,2025-06-01,,,,,,,...,,,,,,,,,,
84467,84467,2025-10-01,0.34,2025-06-01,,,,,,,...,,,,,,,,,,
84468,84468,2025-09-02,0.80,2025-07-01,,,,,,,...,,,,,,,,,,
84469,84469,2025-10-01,0.64,2025-07-01,,,,,,,...,,,,,,,,,,


In [13]:
def GetLevel(ticker: str, freq: str, n_lags: int = 4):
    latest_date, value_col, current_period = GetData(ticker, freq)
    last_idx = value_col.last_valid_index()
    last_pos = value_col.index.get_loc(last_idx)
    period_code=NormalizeFreq(freq)
    if period_code == "D" :
        non_null_up_to_last = value_col.iloc[: last_pos + 1].dropna()
        latest_values = non_null_up_to_last.tail(n_lags).tolist()
        return latest_date, latest_values, current_period
    latest_values = value_col.tail(n_lags).tolist()
    return latest_date, latest_values, current_period

def GetDelta(ticker: str, freq: str, agg_freq: str, n_lags: int = 4, pct: bool = True) -> Tuple[str, pd.Series, pd.Timestamp]:
    """
    Return (latest_date, delta_series, current_period) where delta_series is the same index as value_col
    containing differences (level diffs) or percent changes depending on `pct`.
    - `freq` is the native frequency of the data stored (e.g., "M" or "MONTHLY").
    - `agg_freq` is the aggregation period for the delta (e.g., "A" for year-over-year).
    - If value_col has a DatetimeIndex, the function prefers date-aware shifts (DateOffset).
    - If value_col has a plain integer index, the function uses integer shifts where we can derive them.
    """
    _INT_SHIFT_TABLE = {
        ("M", "A"): 12,
        ("M", "Q"): 3,
        ("Q", "A"): 4,
        ("Q", "M"): None,
        ("W", "M"): None,
        ("D", "M"): None,
        ("M", "M"): 1,
        ("Q", "Q"): 1,
        ("A", "A"): 1,
        ("W", "W"): 1,
        ("D", "D"): 1,
    }

    # normalize freq inputs
    freq_code = NormalizeFreq(freq)
    agg_code = NormalizeFreq(agg_freq)

    # Get Data (now returns latest_date, value_col, current_period)
    latest_date, value_col, current_period = GetData(ticker, freq_code, n_lags)
    last_idx = value_col.last_valid_index()
    last_pos = value_col.index.get_loc(last_idx)
    start_pos = max(0, last_pos - n_lags)

    # Same-frequency change (e.g., MoM when agg == freq)
    if agg_code == freq_code:
        shift_arg = 1
        if pct:
            result = value_col.pct_change(shift_arg, fill_method=None)
        else:
            result = value_col.diff(shift_arg)
        result = result.iloc[start_pos : last_pos + 1].tolist()
        return latest_date, result, current_period

    # Default: try datetime-aware shifts first
    idx = value_col.index
    if isinstance(idx, pd.DatetimeIndex) or pd.api.types.is_datetime64_any_dtype(idx):

        if freq_code == "M" and agg_code == "A":
            offset = pd.DateOffset(months=12)
        elif freq_code == "M" and agg_code == "Q":
            offset = pd.DateOffset(months=3)
        elif freq_code == "Q" and agg_code == "A":
            offset = pd.DateOffset(months=12)
        elif freq_code == "Q" and agg_code == "M":
            raise ValueError("Unsupported conversion: quarter-index to month-based aggregation when using DatetimeIndex.")
        elif freq_code == "W" and agg_code == "A":
            offset = pd.DateOffset(weeks=52)
        elif freq_code == "D" and agg_code == "A":
            offset = pd.DateOffset(years=1)
        else:
            raise ValueError(f"Unsupported date-based conversion: {freq_code} -> {agg_code}")

        # Move historical values so prev[t] == value_col[t - offset]
        prev = value_col.shift(freq=offset)
        prev = prev.reindex(value_col.index)

        if pct:
            result = (value_col - prev) / prev
        else:
            result = value_col - prev

        result = result.iloc[start_pos : last_pos + 1].tolist()
        return latest_date, result, current_period

    # If not a datetime index, fall back to integer shifts
    key = (freq_code, agg_code)
    shift = _INT_SHIFT_TABLE.get(key, None)
    if shift is None:
        if freq_code == "Q" and agg_code == "A":
            shift = 4
        elif freq_code == "M" and agg_code == "A":
            shift = 12
        elif freq_code == "M" and agg_code == "Q":
            shift = 3
        else:
            raise ValueError(f"Unsupported conversion for integer-indexed series: {freq_code} -> {agg_code}")

    shift = int(shift)
    if pct:
        result = value_col.pct_change(shift, fill_method=None)
    else:
        result = value_col.diff(shift)

    result = result.iloc[start_pos : last_pos + 1].tolist()
    return latest_date, result, current_period


In [6]:
base_dir = Path.cwd().parent.parent

In [None]:
GetDelta('DTWEXBGS','D','A')

In [58]:
raw_location = base_dir / "Raw Data"
raw_location.mkdir(parents=True, exist_ok=True)
daily_values=pd.read_csv(raw_location/"D_Values.csv")
monthly_values=pd.read_csv(raw_location/"M_Values.csv")
monthly_dates=pd.read_csv(raw_location/"M_Dates.csv")

In [59]:
def AddRealAvgEarning():
    monthly_values['RCES0500000003']=monthly_values['CES0500000003']/(monthly_values['PCEPI']/100)
    monthly_dates['RCES0500000003']=monthly_dates['CES0500000003']

AddRealAvgEarning()
monthly_values

Unnamed: 0,Time,RECPROUSM156N,PCEDGC96,PCENDC96,PCESC96,RSAFS,DSPIC96,PSAVERT,TOTALSA,REVOLSL,...,AHETPI,CSUSHPINSA,IQ,IR,BOPTEXP,BOPTIMP,BOPSTB,EXHOSLUSM495S,RCES0500000003*,RCES0500000003
0,1919-01-01,,,,,,,,,,...,,,,,,,,,,
1,1919-02-01,,,,,,,,,,...,,,,,,,,,,
2,1919-03-01,,,,,,,,,,...,,,,,,,,,,
3,1919-04-01,,,,,,,,,,...,,,,,,,,,,
4,1919-05-01,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1277,2025-06-01,0.34,2113.9,3522.8,10863.2,723033.0,18036.2,5.0,16.042,1300321.06,...,31.26,331.685,152.4,140.7,279650.0,338736.0,26631.0,3930000.0,28.648525,28.648525
1278,2025-07-01,0.64,2152.2,3528.1,10888.2,727414.0,18077.3,4.8,16.896,1311491.90,...,31.34,331.003,152.8,141.0,280464.0,358775.0,25565.0,4010000.0,28.696563,28.696563
1279,2025-08-01,0.96,2171.4,3547.1,10910.9,732010.0,18097.2,4.6,16.802,1305533.90,...,31.46,330.022,153.2,141.4,,,,4000000.0,28.699375,28.699375
1280,2025-09-01,,,,,,,,,,...,,,,,,,,4060000.0,,


In [54]:
monthly_values.shift(freq=pd.DateOffset(months=12))

Unnamed: 0_level_0,SAHMREALTIME,RECPROUSM156N,PCEDGC96,PCENDC96,PCESC96,RSAFS,DSPIC96,PSAVERT,TOTALSA,REVOLSL,...,T5YIFR,CES0500000003,AHETPI,CSUSHPINSA,DTWEXBGS,IQ,IR,BOPTEXP,BOPTIMP,BOPSTB
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1920-01-01,,,,,,,,,,,...,,,,,,,,,,
1920-02-01,,,,,,,,,,,...,,,,,,,,,,
1920-03-01,,,,,,,,,,,...,,,,,,,,,,
1920-04-01,,,,,,,,,,,...,,,,,,,,,,
1920-05-01,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2026-06-01,0.17,0.34,2113.9,3522.8,10863.2,723033.0,18036.2,5.0,16.042,1300321.06,...,2.27,36.31,31.26,331.671,119.8269,152.4,140.7,279650.0,338736.0,26631.0
2026-07-01,0.10,0.64,2152.2,3528.1,10888.2,727414.0,18077.3,4.8,16.896,1311491.90,...,2.32,36.43,31.34,331.127,122.1088,152.8,141.0,280464.0,358775.0,25565.0
2026-08-01,0.13,0.96,2171.4,3547.1,10910.9,732010.0,18097.2,4.6,16.802,1305533.90,...,2.35,36.53,31.46,,120.6028,153.2,141.4,,,
2026-09-01,,,,,,,,,,,...,2.30,,,,120.5624,,,,,


In [32]:
# --- Constants ---

from openpyxl.styles import PatternFill

def isNewRelease(ticker: str,freq: str,raw: pd.DataFrame) -> bool:
    latest_date, value, period=GetLevel(ticker,n_lags=1,freq=freq)
    print(latest_date)
    print(period)
    print(raw[raw[f"Time_{ticker}"]==period].shape[0])
    if raw[raw[f"Time_{ticker}"]==period].shape[0]>1 :
        return False
    else:
        return True

folder = base_dir / "MacroDashboard Versions"
input_xlsx = folder / "Economic Dashboard V1-Template.xlsx"
output_xlsx = folder / "Economic Dashboard V1-Template-UPDATE.xlsx"

wb = load_workbook(input_xlsx)
ws = wb.active
start_row = 3
max_row = ws.max_row
def _to_python_datetime(dt: Union[pd.Timestamp, str, None]):
    """
    Convert pandas.Timestamp -> python datetime for openpyxl.
    If dt is None or unparseable, return it unchanged.
    """
    if isinstance(dt, pd.Timestamp):
        return dt.to_pydatetime()
    return dt

def isNewRelease(ticker: str,freq: str,raw: pd.DataFrame) -> bool:
    latest_date, value, period=GetLevel(ticker,n_lags=1,freq=freq)
    print(latest_date)
    print(period)
    print(raw[raw[f"Time_{ticker}"]==period].shape[0])
    if raw[raw[raw[f"Time_{ticker}"]==period].shape[0]].shape[0]>1 :
        return False
    else:
        return True




# yellow fill for "fresh" release dates
_New_FILL = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")
# green fill for revision
_Revision_FILL = PatternFill(start_color="CCFFCC", end_color="CCFFCC", fill_type="solid")
# "no fill" sentinel to clear existing fills
_NO_FILL = PatternFill(fill_type=None)

def WritePanel(
    row: int,
    ticker_col: str,
    freq_col: str,
    date_col: str,
    units_col: str,
    present_col: str,
    lag_cols: List[str],
    n_lags: int = 4,
) -> None:
    """
    Write one row for a panel (left or right).
    - ticker_col, freq_col, date_col, present_col: column letters (e.g. "B", "E", "C", "F")
    - lag_cols: list of lag column letters in order [Lag1_col, Lag2_col, ...]
    - n_lags: number of lags expected (defaults to 4)
    """
    # read ticker and freq values from the worksheet
    raw_ticker = ws[f"{ticker_col}{row}"].value
    raw_freq = ws[f"{freq_col}{row}"].value
    raw_units = ws[f"{units_col}{row}"].value

    # If ticker cell is empty -> skip row
    if raw_ticker is None or str(raw_ticker).strip() == "" \
            or str(raw_freq).strip() == "Freq" \
            or len(str(raw_ticker).strip()) > 20:
        return

    # normalize strings safely
    ticker = str(raw_ticker).strip()
    freq = str(raw_freq).strip() if raw_freq is not None else "M"
    units = str(raw_units).strip().lower()

    # get data using your GetLevel/GetDelta functions (they now return 3 values)
    if 'delta' not in units:
        try:
            latest_date, recent_values, current_period = GetLevel(ticker, freq, n_lags=n_lags)
        except Exception as exc:
            # write error to date cell and skip writing values for this row
            ws[f"{date_col}{row}"].value = f"ERR: {exc}"
            # clear any fill to avoid stale highlight
            ws[f"{date_col}{row}"].fill = _NO_FILL
            return
    else:
        pct = "%" in units
        agg = units[units.index("/") + 1:units.index("/") + 2]
        try:
            latest_date, recent_values, current_period = GetDelta(ticker, freq, agg, n_lags=n_lags, pct=pct)
        except Exception as exc:
            # write error to date cell and skip writing values for this row
            ws[f"{date_col}{row}"].value = f"ERR: {exc}"
            ws[f"{date_col}{row}"].fill = _NO_FILL
            return

    # recent_values expected chronological oldest ... most recent
    values = list(recent_values)

    # Ensure fixed length = n_lags + 1 by padding at front (older side) with None
    expected_len = n_lags + 1
    if len(values) < expected_len:
        pad_len = expected_len - len(values)
        values = [None] * pad_len + values

    # Present is last element, lag1 is second-last, etc.
    present_value = values[-1]
    lag_values = []
    for i in range(1, len(lag_cols) + 1):
        # i=1 -> lag1 => values[-2], general: values[-1 - i]
        idx = -1 - i
        try:
            lag_values.append(values[idx])
        except IndexError:
            lag_values.append(None)

    # write the representation-period (current_period) as a date (no time)
    try:
        # if current_period is a pandas Timestamp or datetime
        if isinstance(current_period, pd.Timestamp):
            ws[f"{date_col}{row}"].value = current_period.date()
        else:
            ws[f"{date_col}{row}"].value = _to_python_datetime(current_period).date()
    except Exception:
        ws[f"{date_col}{row}"].value = current_period

    # Highlight date cell if latest_date is within 7 days of now; else clear fill
    try:
        if pd.notnull(latest_date):
            # use pandas Timedelta to handle timezone-aware timestamps gracefully
            latest_ts = pd.to_datetime(latest_date)
            if pd.Timestamp.now() - latest_ts <= pd.Timedelta(days=7):
                ws[f"{date_col}{row}"].fill = _New_FILL
            else:
                ws[f"{date_col}{row}"].fill = _NO_FILL
        else:
            ws[f"{date_col}{row}"].fill = _NO_FILL
    except Exception:
        # In case of any comparison errors, ensure no fill is applied
        ws[f"{date_col}{row}"].fill = _NO_FILL

    # write present and lag values
    ws[f"{present_col}{row}"].value = present_value
    for col_letter, lag_value in zip(lag_cols, lag_values):
        ws[f"{col_letter}{row}"].value = lag_value


# Output section is the left panel, and other three sections are in the right panel.
# Can automate the identification of columns for each panel
# Left panel: B = ticker, E = freq, C = Latest Date, F = Present, G,H,I,J = Lag1..Lag4
left_panel = {
    "ticker_col": "B",
    "freq_col": "E",
    "date_col": "C",
    "units_col": "D",
    "present_col": "F",
    "lag_cols": ["G", "H", "I", "J"],
}

# Right panel: M = ticker, P = freq, N = Latest Date, Q = Present, R,S,T,U = Lag1..Lag4
right_panel = {
    "ticker_col": "M",
    "freq_col": "P",
    "date_col": "N",
    "units_col": "O",
    "present_col": "Q",
    "lag_cols": ["R", "S", "T", "U"],
}

# iterate and write
for r in range(start_row, max_row + 1):
    WritePanel(r, **left_panel)
    WritePanel(r, **right_panel)

wb.save(output_xlsx)
