### bd econ CPS price level retrieval

bd_CPS_cpi.ipynb

January 30, 2019

Brian Dew, @bd_econ

-----
This file is used to download the 1989-present consumer price indices for all urban consumers, as well as for urban consumers in the four census regions: Northeast, Midwest, South, and West. These data are used to adjust the wage and overtime values reported in the current population survey for changes to the price level. The 1989 to 1999 consumer price data does tend to overstate the inflation rate, thus making wage growth seem lower than it actually was over the period. Therefore, I may opt, at a later date, to replace the regional CPI approach with the BLS research series, referred to as the CPI-U-RS.

In [1]:
# Import packages and bls api key
import pandas as pd
print('pandas:', pd.__version__)
import os
import requests
print('requests:', requests.__version__)
import json
import config
import time

os.chdir('/home/brian/Documents/CPS/data/')

def fred_df(series, start='1988'):
    '''Return df of fred series'''
    url = f'http://research.stlouisfed.org/fred2/series/{series}/downloaddata/{series}.csv'
    df = pd.read_csv(url, index_col='DATE', parse_dates=True, na_values=['.'])
    return df.loc[start:]  

pandas: 1.5.1
requests: 2.28.1


In [2]:
# Date of latest CPS file
# List of monthly raw CPS data files to process
files = [file for file in os.listdir() 
         if file.endswith('pub.dat')]

cps_mo = pd.Series([pd.to_datetime(f[:5], format='%b%y') 
                    for f in files]).sort_values().iloc[-1]

# Retrieve CPI
srs = {'CPIAUCNS': 'ALL',
       'CUUR0100SA0': 'Northeast',
       'CUUR0200SA0': 'Midwest',
       'CUUR0300SA0': 'South',
       'CUUR0400SA0': 'West'}

df = pd.DataFrame()
for i, n in srs.items():
    df[n] = fred_df(i)['VALUE']
    
# Handle cases with CPS before CPI
if df.index[-1] < cps_mo:
    # Retrieve latest nowcast
    cpsdt = cps_mo.strftime('%B %Y')
    print(f'CPI not yet available for {cpsdt}, retrieving nowcast')
    url = 'https://www.clevelandfed.org/indicators-and-data/inflation-nowcasting'
    r = pd.read_html(url)[0].set_index('Month')
    nowcast = 1 + (float(r.loc[cpsdt, 'CPI']) / 100)
    df.loc[cps_mo] = df.iloc[-1] * nowcast
    
cpi = (df.iloc[-1] / df)
cpi.to_csv('clean/cpi.csv')

BLS version stopped working

In [3]:
# # Code to update CPI as needed
# api_url = 'https://api.bls.gov/publicAPI/v2/timeseries/data/'

# # API key in config.py which contains: bls_key = 'key'
# key = f'?registrationkey={config.bls_key}'

# # Series stored as a dictionary
# series_dict = {'CUUR0000SA0': 'ALL',
#                'CUUR0100SA0': 'Northeast',
#                'CUUR0200SA0': 'Midwest',
#                'CUUR0300SA0': 'South',
#                'CUUR0400SA0': 'West'}

# # Start year and end year
# date_r = (1989, 2023)

# # Because API requests are limited to 10 years at a time,
# # this code splits the dates above in 10-year chunks
# dates = [(str(date_r[0]), str(date_r[1]))]
# while int(dates[-1][1]) - int(dates[-1][0]) > 10:
#     dates = [(str(date_r[0]), str(date_r[0]+9))]
#     d1 = int(dates[-1][0])
#     while int(dates[-1][1]) < date_r[1]:
#         d1 = d1 + 10
#         d2 = min([date_r[1], d1+9])
#         dates.append((str(d1),(d2)))
        
# df = pd.DataFrame()

# for start, end in dates:
#     # Submit the list of series as data
#     data = json.dumps({
#         "seriesid": list(series_dict.keys()),
#         "startyear": start, "endyear": end})

#     # Post request for the data
#     p = requests.post(f'{api_url}{key}', 
#         headers={'Content-type': 'application/json',
#                  'User-Agent': 
#                  'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/110.0'}, 
#         data=data).json()
#     time.sleep(0.1)
#     for s in p['Results']['series']:
#         col = series_dict[s['seriesID']]
#         for r in s['data']:
#             date = pd.to_datetime(f'{r["periodName"]} {r["year"]}')
#             df.at[date, col] = float(r['value'])
# df = df.sort_index()
# # Output results
# print(f'Post Request Status: {p["status"]}')
# print('Latest month: ', df.index[-1])
      
# cpi = (df.iloc[-1] / df)
# cpi.to_csv('cpi.csv')