# Data_Collector: Jobs Report
------

Updated: January 23, 2018

Status: Collects series, need to do calculations and export

Descr: Python code for reading the BLS jobs report on the first friday of the month, at 8:30am. [PDF release](https://www.bls.gov/news.release/pdf/empsit.pdf)

To Do: Text calculations. Make charts. combine charts and save as picture.

------




#### Packages - Run early

In [1]:
import sys # Check which version of python is being used
print(f'python {sys.version_info[0]}.{sys.version_info[1]}')
import pandas as pd    # Pandas to organize and make calcs
print(f'pandas {pd.__version__}')
from pandas_datareader.data import DataReader as pdr 
import requests        # Make requests to econ data APIs
import json            # Work with JSON data
import config          # Local file config.py with BLS API key

python 3.6
pandas 0.22.0


#### Define custom functions - run early

In [2]:
# Define some tools to use later in calculations
def pr(series):
    '''Multiply by 100 and round'''
    return (series * 100).round(1)

def agr(series):
    '''Calculate percent change from year ago'''
    return pr(series.pct_change(12))

def gr3(series):
    '''Calculate 3 month moving average and annualize'''
    return pr(((series.rolling(3).sum().pct_change(3) + 1) ** 4) - 1)

# Write text for chart annotations
def wt(filename, filetext):
    '''Write string to txt file'''
    with open(filename, 'w') as text_file:
        text_file.write(filetext)

#### Parameters - Inspect and run early

In [3]:
# API access to Bureau of Labor Statistics data
url = 'https://api.bls.gov/publicAPI/v2/timeseries/data/'
# API key in config.py which contains: bls_key = 'key'
key = '?registrationkey={}'.format(config.bls_key)
# Series of interest, format is: ('Fred ID', 'BLS ID')
#s = [('OPHNFB', 'PRS85006092')]
s = [('PAYEMS', 'CES0000000001'), ('UNRATE', 'LNS14000000'), 
     ('U6RATE', 'LNS13327709'), ('LNS12300060', 'LNS12300060'), 
     ('AHETPI', 'CES0500000008'), ('LNS12032197', 'LNS12032197'), 
     ('LNS12032200', 'LNS12032200'), ('LNS12035019', 'LNS12035019'),
     ('LNS13025703', 'LNS13025703')]

headers = {'Content-type': 'application/json'}    # Request json fmt
param = {"seriesid": [i[1] for i in s],           
         "startyear": "2016", "endyear": "2018"}  # Update for 2018

#### Use BLS text table with series info to collect the name of each series in s - run early

In [4]:
d = {}    # Dictionary to fill with series names
for i in ['ln', 'ce']:
    url2 = f'https://download.bls.gov/pub/time.series/{i}/{i}.series'
    t = pd.read_table(url2, sep=r'\s*\t', engine='python')
    t = t[t['series_id'].isin([i[1] for i in s])].set_index('series_id')['series_title']
    for k, v in t.items(): d[k] = v

#### Get long-term values from FRED - Inspect and run early

In [5]:
# Start and end date in datetime format
rng = pd.to_datetime(['1973-01-01', '2015-12-31', '2017-12-31'])
dft = pdr([i[0] for i in s], 'fred', rng[0], rng[1])
dft.columns = [i[1] for i in s]  # Rename to match with BLS

#### Request Jobs Report data from BLS API - run at 8:31

In [20]:
# Request all of the series in s
p = requests.post(f'{url}{key}', data=json.dumps(param), headers=headers).json()
print(f"Status: {p['status']}") # Print request status
p = p['Results']['series']  # Keep data portion of post results

# Use first series to get the datetime values
idx = pd.to_datetime([f"{i['period'][1:]} {i['year']}" for i in p[0]['data']])
print(f'Latest: {idx[0]:%B %Y}') # Print latest value

Status: REQUEST_SUCCEEDED
Latest: March 2018


In [21]:
[len(i['data']) for i in p]

[27, 27, 27, 27, 27, 27, 27, 27, 27]

#### Clean up and make calculations for series of interest

In [22]:
# Build empty dataframe df from the API results, p
df = pd.DataFrame()
for s in p: df[s['seriesID']] = pd.Series(index = idx, 
    data = [i['value'] for i in s['data']], dtype='float').iloc[::-1]
df = dft.append(df) # Merge previous data with new BLS data

# Calculate indicators of interest (pr, gr3, agr defined above)
df['ptec'] = pr(df['LNS12032197'] / df['LNS12035019']) #PT economic
df['ptnec'] = pr(df['LNS12032200'] / df['LNS12035019']) #PT non-econ
df['payroll'] = df['CES0000000001'].diff() # Payroll growth
df['ahe'] = gr3(df['CES0500000008'])     # AHE 3m/3m annualized
df['ahe2'] = agr(df['CES0500000008'])    # AHE 12m pct change
df['cpi'] = agr(pdr('CPIAUCSL', 'fred', rng[0], rng[2]))  # CPI

In [23]:
df.tail()

Unnamed: 0,CES0000000001,CES0500000008,LNS12032197,LNS12032200,LNS12035019,LNS12300060,LNS13025703,LNS13327709,LNS14000000,ptec,ptnec,payroll,ahe,ahe2,cpi
2017-11-01,147450.0,22.23,4759.0,20645.0,151453.0,79.0,23.9,8.0,4.1,3.1,13.6,216.0,2.6,2.3,2.2
2017-12-01,147625.0,22.31,4856.0,20703.0,151478.0,79.1,22.9,8.1,4.1,3.2,13.7,175.0,2.1,2.4,2.1
2018-01-01,147801.0,22.34,4926.0,20434.0,151894.0,79.0,21.5,8.2,4.1,3.2,13.5,176.0,2.4,2.4,
2018-02-01,148127.0,22.38,5091.0,20641.0,152713.0,79.3,20.7,8.2,4.1,3.3,13.5,326.0,2.5,2.4,
2018-03-01,148230.0,22.42,4944.0,20999.0,152747.0,79.2,20.3,8.0,4.1,3.2,13.7,103.0,2.5,2.4,


#### Chart data and annotation text

In [24]:
# Payrolls
df['payroll'].loc['2000-01-01':].to_csv('payroll.csv', header=True, index_label='DATE')
payrolls1 = '\\scriptsize{2018}\\\\\scriptsize{Mar}\\\ \\textbf{+'
payrolls2 = '{0:g}}}'.format(df['payroll'][-1])
payroll = '{}{}'.format(payrolls1, payrolls2)
wt('payroll.txt', payroll)

In [25]:
# Epop
df['LNS12300060'].loc['2000-01-01':].to_csv('LNS12300060.csv', header=True, index_label='DATE')
epop1 = '\\scriptsize{2018}\\\\\scriptsize{Mar}\\\ \\textbf{'
epop2 = '{}\%}}'.format(df['LNS12300060'][-1])
epop = '{}{}'.format(epop1, epop2)
wt('LNS12300060.txt', epop)

In [26]:
# AHE
df['ahe'].loc['2000-01-01':].to_csv('ahe.csv', header=True, index_label='DATE')
ahe1 = '\\scriptsize{2018}\\\\\scriptsize{Mar}\\\ \\textbf{'
ahe2 = '{}\%}}'.format(df['ahe'][-1])
ahe = '{}{}'.format(ahe1, ahe2)
wt('ahe.txt', ahe)

In [27]:
# AHE 12m
df['ahe2'].loc['2000-01-01':].to_csv('ahe2.csv', header=True, index_label='DATE')
ahe1 = '\\scriptsize{2018}\\\\\scriptsize{Mar}\\\ \\textbf{'
ahe2 = '{}\%}}'.format(df['ahe2'][-1])
ahe = '{}{}'.format(ahe1, ahe2)
wt('ahe2.txt', ahe)

In [35]:
with open("payroll.txt", "w") as text_file:
    print('{}{}'.format(payrolls1, payrolls2), file=text_file)

In [33]:
payroll

'\\scriptsize{2018}\\\\scriptsize{Jan}\\\textbf{+148}'

In [34]:
wt('payroll2.txt', payroll)

In [10]:
df.tail()

Unnamed: 0,CES0000000001,CES0500000008,LNS12032197,LNS12032200,LNS12035019,LNS12300060,LNS13025703,LNS13327709,LNS14000000,ptec,ptnec,payroll,ahe,ahe2,cpi
2017-09-01,146963.0,22.2,5048.0,20619.0,151942.0,78.9,25.5,8.3,4.2,3.3,13.6,14.0,2.6,2.6,2.2
2017-10-01,147234.0,22.18,4799.0,20552.0,151334.0,78.9,25.0,8.0,4.1,3.2,13.6,271.0,2.6,2.2,2.0
2017-11-01,147450.0,22.23,4759.0,20645.0,151453.0,79.0,23.9,8.0,4.1,3.1,13.6,216.0,2.6,2.3,2.2
2017-12-01,147610.0,22.31,4856.0,20703.0,151478.0,79.1,22.9,8.1,4.1,3.2,13.7,160.0,2.1,2.4,2.1
2018-01-01,147810.0,22.34,4926.0,20434.0,151894.0,79.0,21.5,8.2,4.1,3.2,13.5,200.0,2.4,2.4,


In [18]:
payrolls2 = '{0:g}}}'.format(df['payroll'][-1])

In [20]:
wt('payroll.txt', payroll)

'\\scriptsize{2018}\\\\scriptsize{Jan}\\\textbf{+148}'

In [21]:
df['payroll'].loc['2000-01-01':].to_csv('payroll.csv', header=True, index_label='DATE')

In [8]:
df.tail(1)

Unnamed: 0,CES0000000001,CES0500000008,LNS12032197,LNS12032200,LNS12035019,LNS12300060,LNS13025703,LNS13327709,LNS14000000,ptec,ptnec,payroll,ahe,ahe2,cpi
2017-12-01,147380.0,22.3,4856.0,20703.0,151478.0,79.1,22.9,8.1,4.1,3.2,13.7,148.0,1.7,2.3,2.1


#### Use BLS text files to look up series IDs

In [None]:
ln_series = 'https://download.bls.gov/pub/time.series/ln/ln.series'
ln = pd.read_table(ln_series, sep=r'\s*\t', engine='python')

In [21]:
ln[(ln['series_title'].str.contains('Unemployment Rate')) & 
   (ln['series_title'].str.contains('Black')) & 
   (ln['periodicity_code'] == 'M') & 
   (ln['seasonal'] == 'S')][['series_id', 'series_title']]

Unnamed: 0,series_id,series_title
915,LNS14000006,(Seas) Unemployment Rate - Black or African Am...
917,LNS14000007,(Seas) Unemployment Rate - Black or African Am...
919,LNS14000008,(Seas) Unemployment Rate - Black or African Am...
935,LNS14000018,"(Seas) Unemployment Rate - 16-19 yrs., Black o..."
937,LNS14000019,"(Seas) Unemployment Rate - 16-19 yrs., Black o..."
939,LNS14000020,"(Seas) Unemployment Rate - 16-19 yrs., Black o..."
959,LNS14000030,"(Seas) Unemployment Rate - 20 yrs. & over, Bla..."
961,LNS14000031,"(Seas) Unemployment Rate - 20 yrs. & over, Bla..."
963,LNS14000032,"(Seas) Unemployment Rate - 20 yrs. & over, Bla..."


#### Labor Productivity and Cost
PRS85006092

In [34]:
dft

Unnamed: 0_level_0,PRS85006092
DATE,Unnamed: 1_level_1
1973-01-01,51.435
1973-04-01,51.483
1973-07-01,50.984
1973-10-01,50.654
1974-01-01,50.551
1974-04-01,50.418
1974-07-01,49.883
1974-10-01,50.325
1975-01-01,50.724
1975-04-01,51.570


In [15]:
ln_series = 'https://download.bls.gov/pub/time.series/ln/ln.series'
ln = pd.read_table(ln_series, sep=r'\s*\t', engine='python')

In [None]:
ce['series_id'].str

In [9]:
ce_series = 'https://download.bls.gov/pub/time.series/ce/ce.series'
ln_series = 'https://download.bls.gov/pub/time.series/ln/ln.series'

In [None]:
s[0][1]

In [None]:
d = {}
for i in ['ln', 'ce']:
    url = f'https://download.bls.gov/pub/time.series/{i}/{i}.series'
    t = pd.read_table(url, sep=r'\s*\t', engine='python')
    t = t[t['series_id'].isin([i[1] for i in s])].set_index('series_id')['series_title']
    for k, v in t.items(): d[k] = v

In [None]:
t[t['series_id'].isin([i[1] for i in s])].set_index('series_id')['series_title']

In [None]:
    
ce_series = 'https://download.bls.gov/pub/time.series/ce/ce.series'
ln_series = 'https://download.bls.gov/pub/time.series/ln/ln.series'
ce = pd.read_table(ce_series, sep=r'\s*\t', engine='python')[['series_id', 'series_title']]
ln = pd.read_table(ln_series, sep=r'\s*\t', engine='python')[['series_id', 'series_title']]

ce[ce['series_id'].isin([i[1] for i in s])].append(ln[ln['series_id'].isin([i[1] for i in s])])

In [None]:
ce['series_id'][0]

In [None]:
ce[ce['series_id'].isin([i[1] for i in s])].append(ln[ln['series_id'].isin([i[1] for i in s])])

In [None]:
ce['series_id'][0]

In [None]:
ce.iloc[0]['series_title']

In [None]:
ce

In [None]:
df['ahe'] = df['CES0500000008'].rolling(3).sum().pct_change(3)
df['ahe'] = ((((df['ahe']+1)**4)-1)*100).round(1) # annualize

In [None]:
(((df['CES0500000008'].rolling(3).sum().pct_change(3)+1)**4)-1)*100 

In [None]:
df['ahe3'] = agr(df['CES0500000008'])
df['ahe2'] = (df['CES0500000008'].pct_change(12) * 100).round(1)

df[['ahe2', 'ahe3']].tail()

In [None]:
d

In [None]:
cpi.columns = ['cpi']

In [None]:
cpi

In [None]:
cpi = pdr(['CPIAUCSL'], 'fred', rng[0], pd.to_datetime('2017-12-31'))

In [None]:
cpi

In [None]:
df = df.join(cpi.pct_change(12) * 100)

In [None]:
df['ahe2'] = df['CES0500000008'].pct_change(12) * 100

In [None]:
%matplotlib inline
df.loc['1995-01-01':,['LNS14000000', 'LNS13327709']].plot()

In [None]:
df.loc['1995-01-01':,'LNS12300060'].plot()

In [None]:
df.loc['1995-01-01':,['ahe2', 'ahe', 'CWSR0000SA0']].plot()

In [None]:
df.loc['1994-01-01':,['ptnec', 'ptec']].rolling(12).mean().plot()

In [None]:
df.loc['1995-01-01':,'payroll'].plot()

In [None]:
df = df_prev.append(df)

In [None]:
(df['CES0500000003'].rolling(3).mean().pct_change(3)
             .add(1).pow(4).subtract(1).multiply(100).round(1))

In [None]:
df_prev.columns = [s_id for name, s_id in s]

In [None]:
df_prev

In [None]:
df

In [None]:
pd.to_datetime(['1973-01-01', '2015-12-31'])[0]

### Turn json data into a pandas dataframe

In [None]:
# Build dataframe df from the API results, p
df = pd.DataFrame() # Empty dataframe to fill with values
for s in p: df[s['seriesID']] = pd.Series(index = idx, 
        data = [i['value'] for i in s['data']]
        ).astype(float).iloc[::-1]

### Calculate series for graphs

In [None]:
print(f"Latest: {idx[0]:%B %Y}") 

In [None]:
today = datetime.datetime.now()
print(f'Cell run: {today:%b %d, %Y}')

In [None]:
['{} {}'.format(i['period'], i['year']) for i in r]

In [None]:
idx = pd.to_datetime([f"{i['period']} {i['year']}" for i in p[0]['data']])

In [None]:
dd = {}
for x, y in s:
    if 
    dd[x]
    data = {param['seriesid'][0]: [float(i['value']) for i in r]}

In [None]:
# Empty dictionary to fill with values
dd = {}

# Build a pandas series from the API results, p
for s in p:
    dd[s['seriesID']] = pd.Series(index = idx, 
        data = [i['value'] for i in s['data']]
        ).astype(float).iloc[::-1]

# Combine series into one pandas dataframe
df = pd.DataFrame(dd)

In [None]:
p

In [None]:
# List comprehensions - clean json data and covert the 
# date, value, and footnote into pandas dataframe columns
dates = ['{} {}'.format(i['period'], i['year']) for i in r]
index = pd.to_datetime(dates)
data = {param['seriesid'][0]: [float(i['value']) for i in r]}
# .iloc[::-1] reverses the sorting/order of the dataframe
df = pd.DataFrame(index=index, data=data).iloc[::-1]