In [2]:
import requests
import pandas as pd
from io import BytesIO,StringIO
import warnings

with warnings.catch_warnings():
    warnings.simplefilter("ignore")


In [2]:
def get_bls_txt(url):
    response = requests.get(url, headers = {"user-agent":"xinxianwang21@gmail.com"})
    data = StringIO(response.text)
    df = pd.read_csv(data, sep='\t')
    df.columns = df.columns.str.strip()
    return df

def get_bls_xls(url):
    response = requests.get(url, headers = {"user-agent":"xinxianwang21@gmail.com"})
    data = BytesIO(response.content)
    df = pd.read_excel(data)
    df.columns = df.columns.str.strip()
    return df

def order_df(df, list, mapping_df):
    '''Orders a pivot table based on series id list'''
    df = df[df['series_id'].isin(list)]
    df = df.pivot_table(values='value', columns = 'date', index='series_id').reindex(list)
    df = df.merge(mapping_df, on='series_id', how='left')
    df = df.set_index('series_title')
    df = df.drop('series_id',axis=1)
    return df

In [3]:
## get series id and value df
index_url = 'https://download.bls.gov/pub/time.series/cu/cu.data.0.Current'
df = get_bls_txt(index_url)
df['month'] = df['period'].str.extract(r'(\d{2})')[0]
df = df[(df['month'] != '13') & (df['year'] > 2012)].reset_index()
df['date'] = pd.to_datetime(df['year'].astype(str) + '-' + df['month'], format='%Y-%m')
df['series_id'] = df['series_id'].str.strip()
df = df[['date', 'series_id', 'value']]

## get series id and names mapping
name_url = 'https://download.bls.gov/pub/time.series/cu/cu.series'
name_df = get_bls_txt(name_url)
name_df = name_df[['series_id','series_title']]
name_df['series_id'] = name_df['series_id'].str.strip()

  df = pd.read_csv(data, sep='\t')


In [4]:
sa_ids = [
    'CUSR0000SA0',  # All items
    'CUSR0000SAF1',  # Food
    'CUSR0000SAF11',  # Food at home
    'CUSR0000SEFV',  # Food away from home
    'CUSR0000SA0E',  # Energy
    'CUSR0000SA0L1E',  # All items less food and energy
    'CUSR0000SACL1E',  # Commodities less food and energy commodities
    'CUSR0000SAA',  # Apparel
    'CUSR0000SETA01',  # New vehicles
    'CUSR0000SETA02',  # Used cars and trucks
    'CUSR0000SAM1',  # Medical care commodities
    'CUSR0000SAF116', # Alcoholic Beverages
    'CUSR0000SEGA',  # Tobacco and smoking products
    'CUSR0000SASLE', # Services less Energy Services
    'CUSR0000SAH1',  # Shelter
    'CUSR0000SEHA',  # Rent of primary residence
    'CUSR0000SEHC',  # Owners' equivalent rent of residences
    'CUSR0000SAM2',  # Medical care services
    'CUSR0000SEMC01',  # Physicians' services
    'CUSR0000SEMD01',  # Hospital services
    'CUSR0000SAS4',  # Transportation services
    'CUSR0000SETD',  # Motor vehicle maintenance and repair
    'CUSR0000SETE',  # Motor vehicle insurance
    'CUSR0000SETG01'  # Airline fares
]

In [5]:
sa_df = order_df(df, sa_ids, name_df)
sa_df.index = sa_df.index.str.replace(" in U.S. city average, all urban consumers, seasonally adjusted", "", regex=False)

In [15]:
sa_df_clean = sa_df.T
sa_df_clean = sa_df_clean.pct_change()*100
sa_df_clean = sa_df_clean.iloc[-13:].sort_index(ascending=False)
sa_df_clean.index = pd.to_datetime(sa_df_clean.index).to_period('M')
sa_df_clean.loc['3m-MA'] = sa_df_clean.iloc[0:3].mean()
sa_df_clean.loc['6m-MA'] = sa_df_clean.iloc[0:6].mean()
new_order = ['3m-MA', '6m-MA'] + [row for row in sa_df_clean.index if row not in ['3m-MA', '6m-MA']]
sa_df_clean = sa_df_clean.loc[new_order]

columns_not_to_round = ['All items less food and energy']

rounding_dict = {col: 1 for col in sa_df_clean.columns if col not in columns_not_to_round}
for col in columns_not_to_round:
    rounding_dict[col] = 2

sa_df_clean = sa_df_clean.round(rounding_dict)

sa_df_clean = sa_df_clean.astype(object).T ## need astype object to not mess up rounding

In [16]:
sa_df_clean

Unnamed: 0_level_0,3m-MA,6m-MA,2024-03,2024-02,2024-01,2023-12,2023-11,2023-10,2023-09,2023-08,2023-07,2023-06,2023-05,2023-04,2023-03
series_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
All items,0.4,0.3,0.4,0.4,0.3,0.2,0.2,0.1,0.4,0.5,0.2,0.2,0.1,0.4,0.1
Food,0.2,0.2,0.1,0.0,0.4,0.2,0.2,0.3,0.2,0.2,0.2,0.2,0.2,0.0,0.1
Food at home,0.1,0.1,-0.0,-0.0,0.4,0.1,0.0,0.3,0.1,0.2,0.2,0.0,0.1,-0.2,-0.2
Food away from home,0.3,0.3,0.3,0.1,0.5,0.3,0.4,0.4,0.4,0.3,0.2,0.4,0.5,0.4,0.6
Energy,0.8,-0.2,1.1,2.3,-0.9,-0.2,-1.6,-2.1,1.2,4.4,-0.0,0.5,-3.0,0.7,-2.7
All items less food and energy,0.37,0.32,0.36,0.36,0.39,0.28,0.31,0.24,0.32,0.23,0.23,0.19,0.36,0.47,0.32
Commodities less food and energy commodities,-0.1,-0.1,-0.2,0.1,-0.3,-0.1,-0.2,-0.0,-0.2,-0.2,-0.3,-0.1,0.4,0.4,0.2
Apparel,0.2,-0.0,0.7,0.6,-0.7,-0.0,-0.6,0.0,-0.3,0.2,0.1,0.1,0.2,0.3,0.2
New vehicles,-0.1,-0.0,-0.2,-0.1,-0.0,0.2,-0.0,-0.1,0.2,0.2,-0.0,-0.0,-0.1,-0.1,0.3
Used cars and trucks,-1.3,-0.4,-1.1,0.5,-3.4,0.6,1.4,-0.4,-1.8,-1.9,-1.5,-0.6,3.2,3.2,-0.6


In [5]:
    def get_bls_txt(url):
        response = requests.get(url, headers = {"user-agent":"xinxianwang21@gmail.com"})
        data = StringIO(response.text)
        df = pd.read_csv(data, sep='\t')
        df.columns = df.columns.str.strip()
        return df

    def order_df(df, list, mapping_df):
        '''Orders a pivot table based on series id list'''
        df = df[df['series_id'].isin(list)]
        df = df.pivot_table(values='value', columns = 'date', index='series_id').reindex(list)
        df = df.merge(mapping_df, on='series_id', how='left')
        df = df.set_index('series_title')
        df = df.drop('series_id',axis=1)
        return df

    ## get series id and value df
    index_url = 'https://download.bls.gov/pub/time.series/cu/cu.data.0.Current'
    df = get_bls_txt(index_url)
    df['month'] = df['period'].str.extract(r'(\d{2})')[0]
    df = df[(df['month'] != '13') & (df['year'] > 2012)].reset_index()
    df['date'] = pd.to_datetime(df['year'].astype(str) + '-' + df['month'], format='%Y-%m')
    df['series_id'] = df['series_id'].str.strip()
    df = df[['date', 'series_id', 'value']]

    ## get series id and names mapping
    name_url = 'https://download.bls.gov/pub/time.series/cu/cu.series'
    name_df = get_bls_txt(name_url)
    name_df = name_df[['series_id','series_title']]
    name_df['series_id'] = name_df['series_id'].str.strip()


    sa_ids = [
        'CUSR0000SA0',  # All items
        'CUSR0000SAF1',  # Food
        'CUSR0000SAF11',  # Food at home
        'CUSR0000SEFV',  # Food away from home
        'CUSR0000SA0E',  # Energy
        'CUSR0000SA0L1E',  # All items less food and energy
        'CUSR0000SACL1E',  # Commodities less food and energy commodities
        'CUSR0000SAA',  # Apparel
        'CUSR0000SETA01',  # New vehicles
        'CUSR0000SETA02',  # Used cars and trucks
        'CUSR0000SAM1',  # Medical care commodities
        'CUSR0000SAF116', # Alcoholic Beverages
        'CUSR0000SEGA',  # Tobacco and smoking products
        'CUSR0000SASLE', # Services less Energy Services
        'CUSR0000SAH1',  # Shelter
        'CUSR0000SEHA',  # Rent of primary residence
        'CUSR0000SEHC',  # Owners' equivalent rent of residences
        'CUSR0000SAM2',  # Medical care services
        'CUSR0000SEMC01',  # Physicians' services
        'CUSR0000SEMD01',  # Hospital services
        'CUSR0000SAS4',  # Transportation services
        'CUSR0000SETD',  # Motor vehicle maintenance and repair
        'CUSR0000SETE',  # Motor vehicle insurance
        'CUSR0000SETG01'  # Airline fares
    ]

    sa_df = order_df(df, sa_ids, name_df)
    sa_df.index = sa_df.index.str.replace(" in U.S. city average, all urban consumers, seasonally adjusted", "", regex=False)
    sa_df_clean = sa_df.T
    sa_df_clean = sa_df_clean.pct_change(fill_method=None)*100
    sa_df_clean = sa_df_clean.iloc[-13:].sort_index(ascending=False)
    sa_df_clean.index = pd.to_datetime(sa_df_clean.index).to_period('M')
    sa_df_clean.loc['3m-MA'] = sa_df_clean.iloc[0:3].mean()
    sa_df_clean.loc['6m-MA'] = sa_df_clean.iloc[0:6].mean()
    new_order = ['3m-MA', '6m-MA'] + [row for row in sa_df_clean.index if row not in ['3m-MA', '6m-MA']]
    sa_df_clean = sa_df_clean.loc[new_order]

    columns_not_to_round = ['All items less food and energy']

    rounding_dict = {col: 1 for col in sa_df_clean.columns if col not in columns_not_to_round}
    for col in columns_not_to_round:
        rounding_dict[col] = 2

    sa_df_clean = sa_df_clean.round(rounding_dict)
    sa_df_clean = sa_df_clean.astype(object).T
    sa_df_clean = sa_df_clean.reset_index()
    

  df = pd.read_csv(data, sep='\t')


In [7]:
sa_df_clean.columns

Index(['series_title',        '3m-MA',        '6m-MA',        2024-03,
              2024-02,        2024-01,        2023-12,        2023-11,
              2023-10,        2023-09,        2023-08,        2023-07,
              2023-06,        2023-05,        2023-04,        2023-03],
      dtype='object')