# What's driving prime-age non-participation since March 2001?

In [1]:
# Import preliminaries
import pandas as pd
import numpy as np

import os

os.chdir('/home/brian/Documents/CPS/data/clean/')

In [2]:
# Age and sex demographics in March 2001
filter_cols = ['AGE', 'FEMALE', 'NILFREASON', 'BASICWGT', 'MONTH']
df = (pd.read_feather('cps2001.ft', columns=filter_cols)
        .query('MONTH == 3 and 25 <= AGE <= 54'))

# Create dictionary with sex and age distribution in March 2001
dem_2001 = {}
for sex in [0, 1]:
    dfs = df[df['FEMALE'] == sex]
    age_dict = (dfs.groupby('AGE').BASICWGT.sum()
                / dfs.BASICWGT.sum()).to_dict()
    for age in age_dict.keys():
        dem_2001[(sex, age)] = age_dict[age]

In [3]:
df = (pd.read_feather('cps2019.ft', columns=filter_cols)
        .query('MONTH == 2 and 25 <= AGE <= 54'))

dem_adj = {}
for sex in [0, 1]:
    dfs = df[df['FEMALE'] == sex]
    age_dict = (dfs.groupby('AGE').BASICWGT.sum()
                / dfs.BASICWGT.sum()).to_dict()
    for age in age_dict.keys():
        dem_adj[(sex, age)] = dem_2001[(sex, age)] / age_dict[age]

In [6]:
df['COMBO'] = list(zip(df['FEMALE'], df['AGE']))
df['ADJWGT'] = df['COMBO'].map(dem_adj) * df['BASICWGT']

In [7]:
df

Unnamed: 0,MONTH,AGE,BASICWGT,FEMALE,NILFREASON,COMBO,ADJWGT
5,2,31,2719.619873,0,,"(0, 31)",2682.596074
8,2,29,2759.212646,1,,"(1, 29)",2276.132577
12,2,33,1813.669556,0,Disabled/Ill,"(0, 33)",1547.781231
15,2,52,2095.983643,1,,"(1, 52)",2095.924769
19,2,45,2676.436035,0,,"(0, 45)",3468.070368
20,2,28,2652.018311,1,,"(1, 28)",2216.506332
21,2,28,3119.887695,0,,"(0, 28)",2489.851822
22,2,31,1689.695557,1,,"(1, 31)",1530.422333
23,2,42,2751.528320,0,,"(0, 42)",3029.011909
27,2,33,2745.097168,0,,"(0, 33)",2342.659313


In [None]:
dem_adj

In [None]:
df = (pd.read_feather('cps2019.ft', columns=filter_cols)
        .query('MONTH == 2 and 25 <= AGE <= 54'))

# Create dictionary with sex and age distribution in current month
dem_adj = {}
for sex in [0, 1]:
    dfs = df[df['FEMALE'] == sex]
    age_dict = (dfs.groupby('AGE').BASICWGT.sum()
                / dfs.BASICWGT.sum()).to_dict()
    for age in age_dict.keys():
        
        dem_adj[(sex, age)] = #age_dict[age]

df['COMBO'] = list(zip(df['FEMALE'], df['AGE']))

In [None]:
df['COMBO'] = list(zip(df['FEMALE'], df['AGE']))
df['COMBO'].map(dem_2001)

In [None]:
df['COMBO'].map(dem_2001)

In [None]:
df = (pd.read_feather('cps2019.ft', columns=filter_cols)
        .query('MONTH == 2 and 25 <= AGE <= 54'))

dem_curr = {}
for sex in [0, 1]:
    dfs = df[df['FEMALE'] == sex]
    dem_curr[sex] = (dfs.groupby('AGE').BASICWGT.sum()
                     / dfs.BASICWGT.sum()).to_dict()

In [None]:
dem_2001[0][29] / dem_curr[0][29]

In [None]:
df = (pd.read_feather('cps2019.ft', columns=filter_cols)
        .query('MONTH == 2 and 25 <= AGE <= 54'))

# What share of total is each age in the current month?
age_curr = (df.groupby('AGE').BASICWGT.sum() / df.BASICWGT.sum()).to_dict()

# Ratio of March 2001 age distribution to current age distribution
age_adj = {age: dem_01['age'][age] / age_curr[age] for age in age_curr.keys()}

# Ratio then used to create adjusted weight
wgt_adj = df['AGE'].map(age_adj) * df['BASICWGT']

# Same for sex
sex_curr = (df.groupby('FEMALE').BASICWGT.sum() / df.BASICWGT.sum()).to_dict()
sex_adj = {sex: dem_01['sex'][sex] / sex_curr[sex] for sex in sex_curr.keys()}
wgt_adj = df['FEMALE'].map(sex_adj) * wgt_adj


df['ADJWGT'] = wgt_adj

In [None]:
df

In [None]:

    
    
    
#    d_age = (df.groupby('AGE').BASICWGT.sum() / df.BASICWGT.sum()).to_dict()
#d_sex = (df.groupby('FEMALE').BASICWGT.sum() / df.BASICWGT.sum()).to_dict()
#dem_01 = {'age': d_age, 'sex': d_sex}

In [None]:
dem_2001

In [None]:
# Calculations for each month since March 2001
results = pd.DataFrame()
filter_cols = ['AGE', 'FEMALE', 'NILFREASON', 'EDUC', 'BASICWGT', 'MONTH']

for year in range(2001, 2020):
    data = (pd.read_feather(f'cps{year}.ft', columns=filter_cols)
              .query('25 <= AGE <= 54'))
    for i, df in data.groupby('MONTH'):
        date = pd.to_datetime(f'{year}-{i}-01')

In [None]:
# Base period demographics
def base_period_demographics(base_period):
    """Function calculates the demographic share each category has in a specific 
       Month and Year that is input as a tuple. It returns age, sex, and educ as
       as share of the total population in a dictionary."""
    df = pd.read_feather(f'cps{base_period[0]}.ft')
    df = df[(df['AGE'].between(16, 80)) & (df['MONTH'] == base_period[1])]
    #Calculate each category of age, sex, educ as share of total and store as dict
    aged = (df.groupby('AGE')['BASICWGT'].sum() / df['BASICWGT'].sum()).to_dict()
    sexd = (df.groupby('FEMALE')['BASICWGT'].sum() / df['BASICWGT'].sum()).to_dict()
    educd = (df.groupby('EDUC')['BASICWGT'].sum() / df['BASICWGT'].sum()).to_dict()
    return {'da_age': aged, 'da_sex': sexd, 'da_educ': educd}