In [1]:
import requests
import pandas as pd
from io import BytesIO,StringIO
import zipfile
import warnings
from datetime import datetime
import plotly.graph_objects as go
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
import plotly.express as px
from fredapi import Fred

#### Define webscrape functions and desired series_id

In [2]:
def get_bls_txt(url):
    response = requests.get(url, headers = {"user-agent":"xinxianwang21@gmail.com"})
    data = StringIO(response.text)
    df = pd.read_csv(data, sep='\t')
    df.columns = df.columns.str.strip()
    return df

def get_bls_xls(url, sheet = 'Table 1', header = 2):
    response = requests.get(url, headers = {"user-agent":"xinxianwang21@gmail.com"})
    data = BytesIO(response.content)
    df = pd.read_excel(data, sheet_name=sheet, header = header)
    df.columns = df.columns.str.strip()
    return df

def get_bls_zip(zip_url, target_file_name, start_row):
    response = requests.get(zip_url, headers = {"user-agent":"xinxianwang21@gmail.com"})
    response.raise_for_status()  
    zip_file = zipfile.ZipFile(BytesIO(response.content))
    for file_name in zip_file.namelist():
        if file_name.endswith(target_file_name):
            with zip_file.open(file_name) as file:
               return pd.read_fwf(file, skiprows=start_row, header=None)
            
def order_df(df, list, mapping_df):
    '''Orders a pivot table based on series id list'''
    df = df[df['series_id'].isin(list)]
    df = df.pivot_table(values='value', columns = 'date', index='series_id').reindex(list)
    df = df.merge(mapping_df, on='series_id', how='left')
    df = df.set_index('series_title')
    df = df.drop('series_id',axis=1)
    return df

def expand_yearly_dec(df):
    '''Expands dec df to monthly df for RI calculation, need to convert index to year'''
    monthly_df = []
    for index, row in df.iterrows():
        monthly_index = pd.date_range(start=f'{index}-12-01', end=f'{index+1}-11-01', freq='MS')
        for month in monthly_index:
            new_row = {col: row[col] for col in df.columns}
            new_row['Date'] = month
            monthly_df.append(new_row)
    monthly_df = pd.DataFrame(monthly_df)
    monthly_df.set_index('Date', inplace=True)
    return monthly_df

In [3]:
sa_ids = [
    'CUSR0000SA0',  # All items
    'CUSR0000SAF1',  # Food
    'CUSR0000SAF11',  # Food at home
    'CUSR0000SEFV',  # Food away from home
    'CUSR0000SA0E',  # Energy
    'CUSR0000SA0L1E',  # All items less food and energy
    'CUSR0000SACL1E',  # Commodities less food and energy commodities
    'CUSR0000SAA',  # Apparel
    'CUSR0000SETA01',  # New vehicles
    'CUSR0000SETA02',  # Used cars and trucks
    'CUSR0000SAM1',  # Medical care commodities
    'CUSR0000SAF116', # Alcoholic Beverages
    'CUSR0000SEGA',  # Tobacco and smoking products
    'CUSR0000SASLE', # Services less Energy Services
    'CUSR0000SAH1',  # Shelter
    'CUSR0000SEHA',  # Rent of primary residence
    'CUSR0000SEHC',  # Owners' equivalent rent of residences
    'CUSR0000SAM2',  # Medical care services
    'CUSR0000SEMC01',  # Physicians' services
    'CUSR0000SEMD01',  # Hospital services
    'CUSR0000SAS4',  # Transportation services
    'CUSR0000SETD',  # Motor vehicle maintenance and repair
    'CUSR0000SETE',  # Motor vehicle insurance
    'CUSR0000SETG01'  # Airline fares
]

#### Get SA MoM df

In [4]:
## get series id and value df
index_url = 'https://download.bls.gov/pub/time.series/cu/cu.data.0.Current'
df = get_bls_txt(index_url)
df['month'] = df['period'].str.extract(r'(\d{2})')[0]
df = df[(df['month'] != '13') & (df['year'] > 2012)].reset_index()
df['date'] = pd.to_datetime(df['year'].astype(str) + '-' + df['month'], format='%Y-%m')
df['series_id'] = df['series_id'].str.strip()
df = df[['date', 'series_id', 'value']]

## get series id and names mapping
name_url = 'https://download.bls.gov/pub/time.series/cu/cu.series'
name_df = get_bls_txt(name_url)
name_df = name_df[['series_id','series_title']]
name_df['series_id'] = name_df['series_id'].str.strip()

sa_df = order_df(df, sa_ids, name_df)
sa_df.index = sa_df.index.str.replace(" in U.S. city average, all urban consumers, seasonally adjusted", "", regex=False)
sa_df = sa_df.T
sa_df.index = pd.to_datetime(sa_df.index)
sa_df_mom = (sa_df.pct_change()*100).dropna()
sa_df_yoy = (sa_df.pct_change(12)*100).dropna()
sa_df = sa_df[sa_df.index>'2013-11']
sa_df_mom = sa_df_mom[pd.to_datetime(sa_df_mom.index)>'2013-11']
sa_df_yoy = sa_df_yoy[pd.to_datetime(sa_df_yoy.index)>'2013-12']

  df = pd.read_csv(data, sep='\t')


#### Get updated RI and backout Core Services Ex Rent

In [6]:
# 2020 onwards RI are available here https://www.bls.gov/cpi/tables/relative-importance/home.htm#Weights
current_year = datetime.now().year - 1
ri_2020onward = None

for year in range(2020, current_year+1):
    url = f"https://www.bls.gov/cpi/tables/relative-importance/{year}.xlsx"
    df = get_bls_xls(url, header=4).dropna().drop(['Unnamed: 0', 'CPI-W'], axis=1)
    df = df.rename({'Unnamed: 1': 'series_title', 'CPI-U': f"{year}"}, axis=1)
    df = df.set_index('series_title').T
    df = df[sa_df_mom.columns]
    ri_df = df.loc[:,~df.columns.duplicated()].copy()
    ri_2020onward = pd.concat([ri_2020onward, ri_df], axis=0)

zip_url = "https://www.bls.gov/cpi/tables/relative-importance/ri-archive-2010-2019.zip"
ri_before2020 = None
for year in range(2013, 2020):
    # print(year)
    target_file_name = f"{year}.txt"
    df = get_bls_zip(zip_url, target_file_name, start_row=13)
    df.columns = ['series_title', str(year), 'CPI-W']
    df = df[['series_title', str(year)]]
    df['series_title'] = df['series_title'].str.replace('.', '', regex=False)
    if 'Airline fare' in df['series_title'].values:
        df['series_title'] = df['series_title'].str.replace('Airline fare', 'Airline fares', regex=False)
    df = df.set_index('series_title').T
    df = df[sa_df_mom.columns]
    df = df.loc[:,~df.columns.duplicated()].copy()
    ri_before2020 = pd.concat([ri_before2020, df], axis=0)
    
ri_all = pd.concat([ri_before2020, ri_2020onward], axis=0)

In [7]:
ri_all.index = ri_all.index.astype(int)
monthly_ri = expand_yearly_dec(ri_all)
sa_dec = sa_df[sa_df.index.month==12]
sa_dec.index = sa_dec.index.year.astype(int)
monthly_sa = expand_yearly_dec(sa_dec)

In [8]:
chg_since_dec = monthly_ri*(sa_df/monthly_sa)
updated_ri = chg_since_dec.div(chg_since_dec['All items'], axis=0)*100
updated_ri = updated_ri.dropna(how='all')
updated_ri['Core Services Ex Rent'] = (updated_ri['Services less energy services']-updated_ri['Rent of primary residence']-updated_ri['Owners\' equivalent rent of residences'])

In [9]:
updated_ri_mom = updated_ri.copy()
updated_ri_mom.index = updated_ri.index + pd.DateOffset(months=1)
sa_df_mom['Core Services Ex Rent'] = (sa_df_mom['Services less energy services']*updated_ri_mom['Services less energy services']
 -sa_df_mom['Rent of primary residence']*updated_ri_mom['Rent of primary residence']
 -sa_df_mom['Owners\' equivalent rent of residences']*updated_ri_mom['Owners\' equivalent rent of residences'])/updated_ri_mom['Core Services Ex Rent']

### Generate Display Table, Charts

In [10]:
sa_df_clean = sa_df_mom.iloc[-13:].sort_index(ascending=False)
sa_df_clean.index = pd.to_datetime(sa_df_clean.index).to_period('M')
sa_df_clean.loc['3m-MA'] = sa_df_clean.iloc[0:3].mean()
sa_df_clean.loc['6m-MA'] = sa_df_clean.iloc[0:6].mean()
new_order = ['3m-MA', '6m-MA'] + [row for row in sa_df_clean.index if row not in ['3m-MA', '6m-MA']]
sa_df_clean = sa_df_clean.loc[new_order]
columns_not_to_round = ['All items less food and energy','Core Services Ex Rent']

rounding_dict = {col: 1 for col in sa_df_clean.columns if col not in columns_not_to_round}
for col in columns_not_to_round:
    rounding_dict[col] = 2

sa_df_clean = sa_df_clean.round(rounding_dict)
sa_df_clean = sa_df_clean.astype(object).T ## need astype object to not mess up rounding
sa_df_clean

Unnamed: 0_level_0,3m-MA,6m-MA,2024-03,2024-02,2024-01,2023-12,2023-11,2023-10,2023-09,2023-08,2023-07,2023-06,2023-05,2023-04,2023-03
series_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
All items,0.4,0.3,0.4,0.4,0.3,0.2,0.2,0.1,0.4,0.5,0.2,0.2,0.1,0.4,0.1
Food,0.2,0.2,0.1,0.0,0.4,0.2,0.2,0.3,0.2,0.2,0.2,0.2,0.2,0.0,0.1
Food at home,0.1,0.1,-0.0,-0.0,0.4,0.1,0.0,0.3,0.1,0.2,0.2,0.0,0.1,-0.2,-0.2
Food away from home,0.3,0.3,0.3,0.1,0.5,0.3,0.4,0.4,0.4,0.3,0.2,0.4,0.5,0.4,0.6
Energy,0.8,-0.2,1.1,2.3,-0.9,-0.2,-1.6,-2.1,1.2,4.4,-0.0,0.5,-3.0,0.7,-2.7
All items less food and energy,0.37,0.32,0.36,0.36,0.39,0.28,0.31,0.24,0.32,0.23,0.23,0.19,0.36,0.47,0.32
Commodities less food and energy commodities,-0.1,-0.1,-0.2,0.1,-0.3,-0.1,-0.2,-0.0,-0.2,-0.2,-0.3,-0.1,0.4,0.4,0.2
Apparel,0.2,-0.0,0.7,0.6,-0.7,-0.0,-0.6,0.0,-0.3,0.2,0.1,0.1,0.2,0.3,0.2
New vehicles,-0.1,-0.0,-0.2,-0.1,-0.0,0.2,-0.0,-0.1,0.2,0.2,-0.0,-0.0,-0.1,-0.1,0.3
Used cars and trucks,-1.3,-0.4,-1.1,0.5,-3.4,0.6,1.4,-0.4,-1.8,-1.9,-1.5,-0.6,3.2,3.2,-0.6


In [11]:
contri_to_core = sa_df_mom*updated_ri_mom.div(updated_ri_mom['All items less food and energy'],axis=0)
contri_to_core.drop(['All items','Food','Food at home','Food away from home','Energy'],axis=1, inplace=True)
contri_to_core.round(2).dropna()

series_title,All items less food and energy,Commodities less food and energy commodities,Apparel,New vehicles,Used cars and trucks,Medical care commodities,Alcoholic beverages,Tobacco and smoking products,Services less energy services,Shelter,Rent of primary residence,Owners' equivalent rent of residences,Medical care services,Physicians' services,Hospital services,Transportation services,Motor vehicle maintenance and repair,Motor vehicle insurance,Airline fares,Core Services Ex Rent
2014-01-01,0.09,-0.06,-0.02,-0.01,-0.01,0.01,-0.0,0.00,0.15,0.10,0.02,0.07,0.02,-0.00,0.02,-0.01,0.00,0.01,-0.03,0.06
2014-02-01,0.09,-0.04,-0.02,0.00,-0.01,0.01,-0.0,0.00,0.14,0.09,0.02,0.07,0.02,-0.00,0.02,0.01,0.00,0.01,-0.01,0.05
2014-03-01,0.19,-0.02,0.01,0.00,-0.02,-0.00,0.0,0.00,0.20,0.13,0.03,0.08,0.03,0.00,0.02,0.02,-0.00,0.01,0.01,0.09
2014-04-01,0.19,0.01,0.00,0.01,-0.01,0.01,0.0,0.00,0.18,0.10,0.03,0.07,0.01,0.00,0.01,0.02,0.00,0.02,0.01,0.08
2014-05-01,0.19,0.00,0.01,0.00,-0.00,0.01,0.0,0.00,0.19,0.12,0.03,0.06,0.01,0.01,0.01,0.03,0.00,0.01,0.02,0.10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-01,0.31,-0.06,-0.02,-0.00,0.04,0.01,-0.0,0.01,0.34,0.20,0.04,0.16,0.04,0.01,0.00,0.08,0.00,0.04,-0.00,0.14
2023-12-01,0.28,-0.02,-0.00,0.01,0.02,-0.00,0.0,-0.00,0.28,0.18,0.04,0.14,0.04,0.00,0.01,0.01,-0.00,0.06,0.01,0.11
2024-01-01,0.39,-0.08,-0.02,-0.00,-0.08,-0.01,0.0,0.00,0.50,0.29,0.03,0.19,0.06,0.01,0.04,0.08,0.01,0.05,0.01,0.28
2024-02-01,0.36,0.03,0.02,-0.00,0.01,0.00,-0.0,0.01,0.35,0.19,0.04,0.15,-0.00,-0.00,-0.01,0.11,0.01,0.03,0.03,0.16


In [12]:
fig = go.Figure()
for col in ['Commodities less food and energy commodities', 'Rent of primary residence',
       'Owners\' equivalent rent of residences', 'Medical care services', 'Transportation services']:
    fig.add_trace(go.Bar(
        x=contri_to_core.index,
        y=contri_to_core[col],
        name=col
    ))

fig.add_trace(go.Scatter(        
    x=contri_to_core.index,
    y=contri_to_core['All items less food and energy'],
    name='All items less food and energy',
    mode='lines',
    line=dict(color='rgb(0, 70, 140)')
))


# Set the barmode to 'stack' for a stacked bar chart
fig.update_layout(
    barmode='relative',
    title='MoM Contribution to Core',
    xaxis_title='Date',
    yaxis_title='Contribution Value',
    font=dict(
    family="Times New Roman",  # Set the font for all text in the figure
    size=14,                   # You can adjust the size as needed
    color="black"              # Set the text color if needed
)
)

fig.show()

In [30]:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
fred = Fred(api_key="d0b19bd01f39fdd7318477768791c1a9")
core_pce = pd.DataFrame({series: fred.get_series(series) for series in ['PCEPILFE']})
pce_cpi_yoy = core_pce.pct_change(12,fill_method=None) * 100 
pce_cpi_yoy['Core CPI YoY'] = sa_df_yoy['All items less food and energy']
pce_cpi_yoy = round(pce_cpi_yoy.dropna(),2)
pce_cpi_yoy.columns = ['Core_PCE_yoy', 'Core_CPI_yoy']
time_series_fig = go.Figure()
time_series_fig.add_trace(go.Scatter(
    x=pce_cpi_yoy.index,
    y=pce_cpi_yoy['Core_PCE_yoy'],
    name='Core_PCE_yoy',
    mode='lines',
    line=dict(color='red')
))
time_series_fig.add_trace(go.Scatter(
    x=pce_cpi_yoy.index,
    y=pce_cpi_yoy['Core_CPI_yoy'],
    name='Core_CPI_yoy',
    mode='lines',
    line=dict(color='blue')
))

start_date = pce_cpi_yoy.index.min()
end_date = pce_cpi_yoy.index.max() 

tickvals = pd.date_range(start=start_date, end=end_date)

time_series_fig.update_layout(
    title='Core PCE vs CPI',
    xaxis_title='Date',
    yaxis_title='Percent Change, year ago',
    xaxis=dict(
        tickmode='array',
        tickvals=tickvals,
        tickformat="%Y-%m"
    )
)
time_series_fig.show()