# Current Population Survey Dashboard

### Data generation from bd CPS

-----

January 17, 2019

Brian W. Dew (brian.w.dew@gmail.com; @bd_econ)

-----

To Do:

- Next chart - prime age epop by state (change?)

In [1]:
import pandas as pd
import numpy as np
import os

os.chdir('/home/brian/Documents/CPS/data/clean/')

path = '/home/brian/Documents/CPS-dashboard/'

def write_txt(filename, filetext):
    # Write label to txt file
    with open(filename, 'w') as text_file:
        text_file.write(filetext)
        
def epop(group, variable, weight):
    # Calculate epop for group
    d = group[variable]
    w = group[weight]
    return (d * w).sum() / w.sum() * 100.0

#### EPOP line chart

In [2]:
data = pd.Series()
for year in range(1994, 2019):
    df = (pd.read_feather(f'cps{year}.ft')
            .filter(['HRMONTH', 'AGE', 'BASICWGT', 'EMP'])
            .query('25 <= AGE <= 54')
            .groupby('HRMONTH')
            .apply(epop, 'EMP', 'BASICWGT'))
    df.index = pd.to_datetime([f'{year}-{i}-01' for i in df.index])
    data = data.append(df)

In [3]:
final = data.rolling(12).mean().dropna()

final.to_csv(f'{path}epop.csv', index_label='DATE', header=['epop'])

In [15]:
pop = pd.read_feather('cps2018.ft').query('HRMONTH == 11 and 25 <= AGE <= 54')['PWORWGT'].sum()

In [21]:
(pop * 0.746) - (pop * 0.793)

-5959145.416000009

In [20]:
data.loc['2010']

2010-01-01    74.585414
2010-02-01    74.571988
2010-03-01    74.781929
2010-04-01    75.518383
2010-05-01    75.332567
2010-06-01    75.148762
2010-07-01    74.829120
2010-08-01    75.018997
2010-09-01    75.414555
2010-10-01    75.586410
2010-11-01    75.227490
2010-12-01    75.078694
dtype: float64

In [19]:
date = final.index[-1].strftime('%Y-%m')
val = round(final[-1], 1)
text = f'\scriptsize{{{date}}}\\\ \\footnotesize{{\\textbf{{{val}\%}}}}'
write_txt(f'{path}epop.txt', text)

#### EPOP bar range chart

save three csv files, one per educational group

In [62]:
HS_LTHS = ['LTHS', 'HS']
SC = ['SC']
COLL_ADV = ['COLL', 'ADV']

groups = [('Men 25-39', '25 <= AGE <=39 and FEMALE == 0 and EDUC in @HS_LTHS', 'HS_LTHS'),
          ('Men 40-54', '40 <= AGE <=54 and FEMALE == 0 and EDUC in @HS_LTHS', 'HS_LTHS'),
          ('Women 25-39', '25 <= AGE <=39 and FEMALE == 1 and EDUC in @HS_LTHS', 'HS_LTHS'),
          ('Women 40-54', '40 <= AGE <=54 and FEMALE == 1 and EDUC in @HS_LTHS', 'HS_LTHS'),
          ('Men 25-39', '25 <= AGE <=39 and FEMALE == 0 and EDUC in @SC', 'SC'),
          ('Men 40-54', '40 <= AGE <=54 and FEMALE == 0 and EDUC in @SC', 'SC'),
          ('Women 25-39', '25 <= AGE <=39 and FEMALE == 1 and EDUC in @SC', 'SC'),
          ('Women 40-54', '40 <= AGE <=54 and FEMALE == 1 and EDUC in @SC', 'SC'),
          ('Men 25-39', '25 <= AGE <=39 and FEMALE == 0 and EDUC in @COLL_ADV', 'COLL_ADV'),
          ('Men 40-54', '40 <= AGE <=54 and FEMALE == 0 and EDUC in @COLL_ADV', 'COLL_ADV'),
          ('Women 25-39', '25 <= AGE <=39 and FEMALE == 1 and EDUC in @COLL_ADV', 'COLL_ADV'),
          ('Women 40-54', '40 <= AGE <=54 and FEMALE == 1 and EDUC in @COLL_ADV', 'COLL_ADV')]

In [63]:
data = pd.DataFrame()
for year in range(1994, 2019):
    year_data = pd.DataFrame()
    df = (pd.read_feather(f'cps{year}.ft')
            .filter(['HRMONTH', 'AGE', 'BASICWGT', 'EMP', 'FEMALE', 'EDUC'])
            .query('25 <= AGE <= 54'))
    for grp in groups:
        g = (df.query(grp[1])
               .groupby('HRMONTH')
               .apply(epop, 'EMP', 'BASICWGT'))
        g.index = pd.to_datetime([f'{year}-{i}-01' for i in g.index])
        grp_name = grp[0] + grp[2]
        year_data[grp_name] = g
    data = data.append(year_data)

In [64]:
combined_data = pd.DataFrame()
for grp in groups:
    grp_name = grp[0] + grp[2]
    d = data[grp_name]
    categories = [('max', d.max()-d.min()), ('min', d.min()),
                  ('12m', d.iloc[-13]), ('last', d.iloc[-1])]
    for cat in categories:
        combined_data.at[grp_name, cat[0]] = cat[1]

In [68]:
label = list(round(combined_data['last'] - combined_data['12m'], 1))
plus_minus = ['+' if val >= 0 else '-' for val in label]
last_val = list(combined_data['last'].round(1))
final = [f'{a}\% ({b}{c})' for a, b, c in zip(last_val, plus_minus, label)]

combined_data['Label'] = final

# Legend text
rng = '\scriptsize 1994-present range'
ltdt = data.index[-1].strftime('%b %Y')
prdt = data.index[-13].strftime('%b %Y')

legend = f'\legend{{{rng}, \scriptsize {prdt}, \scriptsize {ltdt}}}'

In [69]:
write_txt(f'{path}epop_grp_legend.txt', legend)

In [70]:
combined_data.to_csv(f'{path}epop_group.csv', index_label='group')