## Gather Employment to Population Ratio Data for Several Demographic Groups

Use the BLS API to first identify the series code for several demographic groups then to retrieve data on each group's employment-to-population ratio. 

In [14]:
import pandas as pd
import math
import requests
import json
import config

In [15]:
series_url = 'https://download.bls.gov/pub/time.series/ln/ln.series'

In [16]:
df = pd.read_table(series_url)

In [33]:
df1 = df[(df['lfst_code'] == 23) 
   & (df['seasonal'] == 'U') 
   & (df['periodicity_code'] == 'M')
   & (df['ages_code'].isin([8,20,31,33,38,42,49,65]))
   & (df['sexs_code'].isin([1,2]))
   & (df['orig_code'].isin([0]))
   & (df['vets_code'].isin([0]))
   & (df['race_code'].isin([0]))
        ][['series_id', 'series_title', 'ages_code', 'sexs_code', 'orig_code', 'race_code']]

#df1 = df1[~(df1['race_code'] == 0) | ~(df1['orig_code'] == 0)]
df1['series'] = [x.strip(' ') for x in df1['series_id']]
series_dict1 = pd.Series(df1['series_title'].values,index=df1['series'])[:25].to_dict()
series_dict2 = pd.Series(df1['series_title'].values,index=df1['series'])[25:].to_dict()

In [35]:
# Include the start and end year here
date_range = (1993, 2012)

# Divide the date range into BLS-API-friendly length requests
req_no = int(math.ceil((date_range[1] - date_range[0]) / 10.0))
dates = []
for i in range(0,req_no):
    d1 = str(date_range[0]+i*10)
    d2 = str(date_range[0]+i*10+9)
    dates.append((d1,d2))
dates[-1] = (dates[-1][0], str(date_range[1]))

dates

[('1993', '2002'), ('2003', '2012')]

In [36]:
# URL, key, and headers same as above
url = 'https://api.bls.gov/publicAPI/v2/timeseries/data/'
key = '?registrationkey={}'.format(config.bls_key)
headers = {'Content-type': 'application/json'}

df3 = pd.DataFrame()  # blank pandas dataframe to be filled later
df4 = pd.DataFrame()  # blank pandas dataframe to be filled later

In [37]:
for sd in [series_dict1]:
    for start, end in dates: 

        # The data sent in the post request now includes a start and end year
        data = json.dumps({"seriesid":sd.keys(), "startyear":start, "endyear":end})
        p = requests.post('{}{}'.format(url, key), headers=headers, data=data).json()
        d = {} # New dictionary to be filled with data
        for series in p['Results']['series']:
            s = series['seriesID']  # Shorten name to 's'

            # Add dictionary entry with series and reverse series order
            d[s] = pd.DataFrame(series['data']).iloc[::-1]
            if len(series['data']) > 0:  # This if/else is to allow for series of different lengths

                # Convert BLS API dates to readable format (YYYY-MM-DD)
                d[s]['date'] = pd.to_datetime(d[s]['period'] + ' ' + d[s]['year'])

                # Keep only date and series values
                d[s] = d[s].set_index('date')['value'].astype(float)

                # Rename and identify values as floating point numbers
                d[s] = d[s].rename(sd[s])
            else:  # If blank, leave as a blank pandas series
                d[s]['date'] = d[s]['value'] = pd.Series()
                d[s] = d[s]['value'].rename(sd[s])

        # Combine the dataframes for each range of years into one by appending        
        if sd == series_dict1:
            df3 = df3.append(pd.concat([d[k] for k in sd.keys()], axis=1))
        else: 
            df4 = df4.append(pd.concat([d[k] for k in sd.keys()], axis=1))

In [38]:
full_data = df3.join(df4)

In [40]:
high_tax = pd.Series(full_data.ix['1993-01-01':'2000-12-01'].mean())
low_tax = pd.Series(full_data.ix['2001-01-01':'2012-12-01'].mean())

In [41]:
pd.concat([high_tax, low_tax], axis=1)

Unnamed: 0,0,1
"(Unadj) Employment-Population Ratio - 25-34 yrs., Men",88.705208,85.05625
"(Unadj) Employment-Population Ratio - 55-64 yrs., Women",48.485417,55.320139
"(Unadj) Employment-Population Ratio - 35-44 yrs., Men",89.127083,87.013889
"(Unadj) Employment-Population Ratio - 20-24 yrs., Men",75.183333,68.889583
"(Unadj) Employment-Population Ratio - 45-54 yrs., Men",86.13125,83.183333
"(Unadj) Employment-Population Ratio - 20-24 yrs., Women",65.91875,63.097222
"(Unadj) Employment-Population Ratio - 16-19 yrs., Men",44.1125,32.564583
"(Unadj) Employment-Population Ratio - 16-19 yrs., Women",43.658333,34.478472
"(Unadj) Employment-Population Ratio - 25-34 yrs., Women",71.292708,69.634028
"(Unadj) Employment-Population Ratio - 45-54 yrs., Women",73.035417,72.5625


In [61]:
df = pd.read_csv('full_data.csv')
df = df.set_index(pd.to_datetime(df['date']))

In [62]:
annual = df.groupby(df.index.year).transform('mean').drop_duplicates()

In [63]:
annual

Unnamed: 0_level_0,"(Unadj) Employment-Population Ratio - 16-19 yrs., Black or African American Men","(Unadj) Employment-Population Ratio - 35-44 yrs., White Men","(Unadj) Employment-Population Ratio - 25-34 yrs., White Men","(Unadj) Employment-Population Ratio - 16-19 yrs., White Men","(Unadj) Employment-Population Ratio - 16-19 yrs., White Women","(Unadj) Employment-Population Ratio - 25-54 yrs., Black or African American Women",(Unadj) Employment-Population Ratio - 25-34 yrs. Hispanic or Latino Men,"(Unadj) Employment-Population Ratio - 25-54 yrs., White Women","(Unadj) Employment-Population Ratio - 25-54 yrs., White Men","(Unadj) Employment-Population Ratio - 25-54 yrs., Black or African American Men",...,"(Unadj) Employment-Population Ratio - 55-64 yrs., Black or African American Women","(Unadj) Employment-Population Ratio - 35-44 yrs., Black or African American Men","(Unadj) Employment-Population Ratio - 45-54 yrs., White Men","(Unadj) Employment-Population Ratio - 25-34 yrs., White Women","(Unadj) Employment-Population Ratio - 25-34 yrs., Black or African American Men","(Unadj) Employment-Population Ratio - 55-64 yrs., Black or African American Men","(Unadj) Employment-Population Ratio - 65 yrs. & over, Hispanic or Latino Women","(Unadj) Employment-Population Ratio - 65 yrs. & over, White Women","(Unadj) Employment-Population Ratio - 25-34 yrs., Black or African American Women","(Unadj) Employment-Population Ratio - 35-44 yrs., Black or African American Women"
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1993-01-01,23.633333,89.808333,88.775,46.6,45.658333,,,,,,...,42.083333,77.091667,87.05,69.991667,76.608333,52.658333,,7.891667,61.75,70.233333
1994-01-01,25.416667,90.233333,89.0,48.291667,47.541667,67.05,85.75,72.65,88.908333,76.75,...,43.008333,78.05,86.941667,71.091667,77.125,51.191667,7.0,8.866667,63.516667,70.366667
1995-01-01,25.158333,89.9,89.816667,49.408333,48.108333,68.583333,85.983333,73.25,89.133333,77.533333,...,45.458333,77.75,87.183333,72.183333,79.341667,51.983333,6.1,8.658333,66.1,71.866667
1996-01-01,24.883333,90.308333,90.4,48.233333,47.616667,69.925,87.1,73.575,89.558333,77.125,...,45.408333,77.841667,87.533333,72.058333,78.708333,52.725,6.3,8.375,67.525,72.833333
1997-01-01,23.758333,90.666667,90.791667,48.125,47.191667,70.875,88.691667,74.333333,89.95,78.325,...,45.658333,79.133333,88.041667,73.083333,79.283333,52.025,7.566667,8.341667,69.6,72.775
1998-01-01,28.4,91.291667,91.15,48.583333,49.308333,72.708333,89.125,74.116667,90.233333,79.216667,...,46.916667,79.708333,87.916667,73.341667,80.775,54.808333,5.975,8.45,72.091667,74.55
1999-01-01,26.675,91.5,91.425,49.308333,48.341667,74.591667,90.008333,74.466667,90.383333,80.341667,...,46.466667,81.516667,87.95,73.3,83.008333,53.3,6.2,8.683333,75.25,75.975
2000-01-01,28.958333,91.508333,91.691667,49.475,48.783333,74.633333,90.6,74.566667,90.383333,79.458333,...,47.041667,80.266667,87.775,73.041667,81.933333,55.85,7.316667,9.141667,75.666667,75.458333
2001-01-01,26.366667,90.641667,90.341667,46.2,46.458333,73.65,89.041667,73.775,89.4,77.725,...,48.375,79.125,87.158333,71.7,79.4,56.091667,6.408333,9.3,74.691667,74.966667
2002-01-01,25.633333,89.433333,88.733333,42.341667,44.1,71.483333,87.758333,72.841667,88.141667,76.558333,...,49.358333,77.925,86.175,71.291667,77.808333,54.841667,7.825,9.575,71.516667,73.116667


In [64]:
annual.to_csv('full_data_ann.csv')