# CPI Special Calculations over time with Python

Brian Dew, @bd_econ, Center for Economic and Policy Research

March 22, 2018

----- 
Every two years the relative importance of CPI items is updated. I'd like to know how a few special aggregates contribute to inflation (unfortunately not seasonally adjusted), and to do this I need to compensate for the updates. BLS explains how to do this here: https://www.bls.gov/cpi/tables/relative-importance/home.htm



In [1]:
# Import preliminaries
import pandas as pd
import numpy as np
import requests
import json
import config

#### Find series ids

In [2]:
# Use BLS Flat Files to find series IDs and series info
s_url = 'https://download.bls.gov/pub/time.series/cu/cu.series'
s = pd.read_table(s_url, sep='\t')
s2 = s[(s['seasonal'] == 'U') & 
       (s['base_code'] == 'S') &
       (s['area_code'] == '0000') &
       (s['periodicity_code'] == 'R')]

#### Manually look up the relative importance

In [3]:
# Relative weights for series of interest, from here: 
# https://www.bls.gov/cpi/tables/relative-importance/home.htm
rel_wgt = {'CUUR0000SAF1': [(('2013-12-01', '2015-12-01'), 13.891), 
                           (('2015-12-01', '2017-12-01'), 14.015), 
                           (('2017-12-01', '2019-12-01'), 13.384)],
           'CUUR0000SA0': [(('2013-12-01', '2015-12-01'), 100.0), 
                           (('2015-12-01', '2017-12-01'), 100.0), 
                           (('2017-12-01', '2019-12-01'), 100.0)],
           'CUUR0000SA0E': [(('2013-12-01', '2015-12-01'), 9.046), 
                            (('2015-12-01', '2017-12-01'), 6.816), 
                            (('2017-12-01', '2019-12-01'), 7.513)],
           'CUUR0000SAH1': [(('2013-12-01', '2015-12-01'), 32.029), 
                            (('2015-12-01', '2017-12-01'), 33.15), 
                            (('2017-12-01', '2019-12-01'), 32.843)],
           'CUUR0000SACL1E': [(('2013-12-01', '2015-12-01'), 19.71), 
                              (('2015-12-01', '2017-12-01'), 19.613), 
                              (('2017-12-01', '2019-12-01'), 19.849)],
           'CUUR0000SASLE': [(('2013-12-01', '2015-12-01'), 57.353), 
                             (('2015-12-01', '2017-12-01'), 59.556), 
                             (('2017-12-01', '2019-12-01'), 59.254)]}

#### Location of BLS API and parameters

In [13]:
# API access to Bureau of Labor Statistics data
url = 'https://api.bls.gov/publicAPI/v2/timeseries/data/'
# API key in config.py which contains: bls_key = 'key'
key = '?registrationkey={}'.format(config.bls_key)

headers = {'Content-type': 'application/json'}    # Request json fmt
param = {"seriesid": list(rel_wgt.keys()),           
         "startyear": "2013", "endyear": "2018"}

#### Request data from BLS API 

In [4]:
# Request all of the series in s
p = requests.post(f'{url}{key}', data=json.dumps(param), headers=headers).json()
print(f"Status: {p['status']}") # Print request status
p = p['Results']['series']  # Keep data portion of post results

# Use first series to get the datetime values
idx = pd.to_datetime([f"{i['period']} {i['year']}" for i in p[0]['data']])
print(f'Latest: {idx[0]:%B %Y}') # Print latest value

Status: REQUEST_SUCCEEDED
Latest: February 2018


#### Store date and value for each series

In [5]:
# Convert API data to datetime date and float value
data = {i['seriesID']: 
        [(pd.to_datetime(f'{r["year"]}-{r["period"][1:]}-01'), float(r['value'])) 
         for r in i['data']] for i in p}

#### Store all the above in dictionary

In [6]:
# Dictionary combining all the info for each series
d = {i: {'name': s2[s2['series_id'].str.strip() == i]['series_title'].values[0],
         'values': data[i],
         'rel_wgt': rel_wgt[i]} for i in list(rel_wgt.keys())}

#### Adjust for changes to relative importance

In [7]:
# Adjust for changes to relative importance
df1, df2, df3 = pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
for i, v in d.items():
    start, end = v['rel_wgt'][0][0][0], v['rel_wgt'][0][0][1]
    rwc, rwn = v['rel_wgt'][0][1], v['rel_wgt'][1][1]
    df1.at[start: end, i] = (pd.DataFrame(v['values'])
        .iloc[::-1].set_index(0).loc[start: end][1])
    df1[i] = (df1[i].diff().cumsum() / df1.loc[start, i] + 1)
    df1.at[start, i] = 1.0
    df1[i] = (df1[i] * rwc)
    link = (df1.loc[end, i] / rwn)
    # Next set of dates
    start, end = v['rel_wgt'][1][0][0], v['rel_wgt'][1][0][1]
    rwc, rwn = v['rel_wgt'][1][1], v['rel_wgt'][2][1]
    df2[i] = (pd.DataFrame(v['values'])
        .iloc[::-1].set_index(0).loc[start: end][1])
    df2[i] = df2[i].diff().cumsum() / df2.loc[start, i] + 1
    df2.at[start, i] = 1.0
    #df1.at[start: end, i] = (df1.loc[start: end, i] * rwc)
    df2[i] = (df2[i] * rwc) * link
    link = (df2.loc[end, i] / rwn)
    # Next set of dates
    start, end = v['rel_wgt'][2][0][0], v['rel_wgt'][2][0][1]
    rwc = v['rel_wgt'][2][1]
    df3[i] = (pd.DataFrame(v['values'])
        .iloc[::-1].set_index(0).loc[start: end][1])
    df3[i] = df3[i].diff().cumsum() / df3.loc[start, i] + 1
    df3.at[start, i] = 1.0
    df3[i] = (df3[i] * rwc) * link

#### Calculate contribution to total

In [53]:
# Calculate exactly what I want to show
final = pd.concat([df1, df2, df3])
final = final[~final.index.duplicated(keep='first')].diff(12)
final['core_services'] = final['CUUR0000SASLE'] - final['CUUR0000SAH1']
final['food_energy'] = final['CUUR0000SAF1'] + final['CUUR0000SA0E']
final = final.dropna().round(2)
d2 = final[['core_services', 'CUUR0000SAH1', 'CUUR0000SACL1E', 'food_energy']].loc['2015-01-01':]
col_names = ['core services', 'shelter', 'core goods', 'food & energy']
d2.columns = col_names
d2['total'] = final['CUUR0000SA0'].loc['2015-01-01':]
d2['strdate'] = d2.index.strftime('%B %Y')
d2['date'] = d2.index

#### Bokeh requires separate positive and negative values

In [9]:
# Split positive and negative values
infl = {i :[n if n > -0.0001 else 0 for n in list(d2[i])] for i in col_names}
infl['dates'] = list(d2.index)
infl['strdate'] = d2.index.strftime('%b %Y')
defl = {i :[n  if n < -0.0001 else 0 for n in list(d2[i])] for i in col_names}
defl['dates'] = list(d2.index)
defl['strdate'] = d2.index.strftime('%b %Y')

#### Bokeh settings

In [10]:
# Bokeh settings
from bokeh.plotting import figure, show
import bokeh.models
import bokeh.plotting
from bokeh.models import ColumnDataSource, Span, Label, HoverTool, Range1d, CDSView
from bokeh.embed import components
from bokeh.io import output_notebook
output_notebook()

#### Plot in bokeh

In [62]:
# HTML text that shows when hovering
tooltips = """
    <div style="line-height: 110%;">
        <span style="font-size: 11px; font-style: italic;">@strdate:</span><br>
        <span style="font-size: 10px; padding: 0 5px;">CPI Inflation: </span>
        <span style="font-size: 11px; font-weight: bold;">@total{1.1}%</span><br>
        <span style="font-size: 9px; font-style: italic;">Contribution from:</span><br>        
        <span style="font-size: 10px; padding: 0 5px;">Core goods: </span>
        <span>@{core goods}{1.1}</span><br> 
        <span style="font-size: 10px; padding: 0 5px;">Core services: </span>
        <span>@{core services}{1.1}</span><br> 
        <span style="font-size: 10px; padding: 0 5px;">Shelter: </span>
        <span>@shelter{1.1}</span><br> 
        <span style="font-size: 10px; padding: 0 5px;">Food & Shelter: </span>
        <span>@{food & energy}{1.1}</span>
    </div>
"""
# Bar width determined by how many bars
width = ((d2.index[-1] - d2.index[0]).total_seconds() 
         * 750.0 / len(d2.index))

# horizontal line at zero
zero_line = Span(location=0, dimension='width', 
                 line_color='gray', line_width=1)

p = figure(plot_width=700, plot_height=300, 
           x_axis_type='datetime', 
           tools=['pan, wheel_zoom, box_zoom, reset'], 
           logo=None,
           toolbar_location=None,  
           y_range=Range1d(-2.4, 3.8))
p.vbar_stack(col_names, x='dates', width=width, 
             color=['#FFEA00', '#80DEEA', '#00c853', '#304FFE'], 
             legend=['Core services', 'Shelter', 'Core goods', 'Food & energy'], 
             line_alpha=0, source=ColumnDataSource(infl))
p.vbar_stack(col_names, x='dates', width=width, 
             color=['#FFEA00', '#80DEEA', '#00c853', '#304FFE'], 
             line_alpha=0, source=ColumnDataSource(defl))

p.add_layout(zero_line)

r1 = p.line('date', 'total', color='white', alpha=0.0, source=ColumnDataSource(d2))

p.sizing_mode = 'scale_width'
p.xgrid.grid_line_color = None
p.outline_line_color = 'white'
p.axis.axis_line_color = 'white'
p.toolbar.active_drag = None
p.legend.orientation = "horizontal"
p.legend.location = "top_left"
p.legend.border_line_alpha = 0
p.legend.background_fill_alpha = 0
p.legend.margin = 5
p.legend.padding = 0
p.legend.spacing = 10
p.legend.glyph_height = 14
p.legend.glyph_width = 14
p.legend.label_text_font_size = '9pt'

p.add_tools(HoverTool(tooltips=tooltips, show_arrow=False, mode='vline',  renderers=[r1]))

script, div = components(p)
with open('C:/Working/bdecon.github.io/plots/cpi_comp.html', 'w') as text_file:
    text_file.write(f'{script} {div}')

show(p)