In [177]:
from pathlib import Path
import pandas as pd
import json
from itertools import islice
import re

In [178]:
with open('../code_output/oews_feather_path_dict.json') as infile:
    oews_path_dict = json.load(infile)

In [179]:
with open('../code_output/opm_collapsed_pre2014_feather_path_dict.json') as infile:
    opm_path_dict = json.load(infile)

In [180]:
census_occ_1018_xwalk_df = pd.read_feather('../code_output/census_occ_1018_xwalk.feather')
census_occ_0010_xwalk_df = pd.read_feather('../code_output/census_occ_0010_xwalk.feather')
census_occ_1018_xwalk_df = census_occ_1018_xwalk_df[['2018_census_occ_code', '2010_census_occ_code']]
occ_soc_18_xwalk_df = pd.read_feather('../code_output/occ_soc_18_xwalk.feather')
occ_soc_10_xwalk_df = pd.read_feather('../code_output/occ_soc_10_xwalk.feather')
occ_soc_00_xwalk_df = pd.read_feather('../code_output/occ_soc_00_xwalk.feather')

In [181]:
oews_opm_merged_feather_path_dict = {}
binary_path = Path('../cleaned_binaries/')

In [182]:
for year, oews_path in oews_path_dict.items():

    if int(year) > 2014:
        continue

    print(f'Merging {year}')

    oews_df = pd.read_feather(Path(oews_path))

    if int(year) < 2010:
        oews_df = oews_df.merge(occ_soc_00_xwalk_df, how = 'inner', left_on = ['occ_code'], right_on = ['2000_soc_code'])
        oews_df['tot_inc'] = oews_df['tot_emp']*oews_df['a_mean']

        oews_collapsed_df = oews_df.groupby(by = ['area', '2000_census_occ_code']).agg({
            'area_name': 'first',
            'occ_title': 'first',
            'tot_emp': 'sum',
            '2000_soc_code': 'first',
            'tot_inc': 'sum'
        })
    elif 2009 < int(year) < 2015:
        oews_df = oews_df.merge(occ_soc_10_xwalk_df, how = 'inner', left_on = ['occ_code'], right_on = ['2010_soc_code'])
        oews_df['tot_inc'] = oews_df['tot_emp']*oews_df['a_mean']

        oews_collapsed_df = oews_df.groupby(by = ['area', '2010_census_occ_code']).agg({
            'area_name': 'first',
            'occ_title': 'first',
            'tot_emp': 'sum',
            '2010_soc_code': 'first',
            'tot_inc': 'sum'
        })



    oews_collapsed_df['a_mean'] = oews_collapsed_df['tot_inc']/oews_collapsed_df['tot_emp']
    oews_collapsed_df.reset_index(inplace = True)

    if int(year) > 2004:
        oews_collapsed_df['area'] = oews_collapsed_df['area'].str.slice(stop = 4) # MSA code has 1 trailing 0 after 2004

    opm_path = opm_path_dict[year]
    opm_df = pd.read_feather(Path(opm_path))
    opm_df['msa_code'] = opm_df['msa_code'].str.slice(start = 1)

    if int(year) < 2010:
        oews_opm_merged_df = oews_collapsed_df.merge(opm_df, how = 'inner', left_on = ['area', '2000_census_occ_code'], right_on = ['msa_code', '2000_census_occ_code'])
    elif 2009 < int(year) < 2015:
        oews_opm_merged_df = oews_collapsed_df.merge(opm_df, how = 'inner', left_on = ['area', '2010_census_occ_code'], right_on = ['msa_code', '2010_census_occ_code'])

    oews_opm_merged_df.reset_index(inplace = True, drop = True)

    target_path = Path(binary_path).joinpath(f'oews_opm_merged_{year}.feather')
    oews_opm_merged_df.to_feather(target_path)

    oews_opm_merged_feather_path_dict[year] = str(target_path)

Merging 2000
Merging 2001
Merging 2002
Merging 1997
Merging 1998
Merging 1999
Merging 2003
Merging 2004
Merging 2005
Merging 2006
Merging 2007
Merging 2008
Merging 2009
Merging 2010
Merging 2011
Merging 2012
Merging 2013
Merging 2014


In [183]:
# Save dictionaries for paths of binaries
with open('../code_output/oews_opm_merged_feather_path_dict.json', 'w') as outfile:
    json.dump(oews_opm_merged_feather_path_dict, outfile, indent = 4)