In [1]:
# This script uses the seed persons (PUMS) data to create a cross-tabulation of INDUSTRY and OCCUPATION SECTORS.
# Row percentages the cross-tabulation are used as size term coefficients for work location choice model.

#date: 07/28/2023

import pandas as pd, numpy as np, os

In [2]:
# Define data paths
### Synthetic population file location
input_path=r'\\sandag.org\transdata\ABM\RSG_Space\ABM3\2022 data\resident_input\persons.csv'
### Location to save size terms
output_path=r'C:\abm_runs\rohans\calibration\workplace_location\output'

### Read synthetic persons data
seed_persons = pd.read_csv(input_path)

### Crosswalks for NAICS and SOC codes
naics_xwalk = {
    '11': 'emp_ag_min',
    '21': 'emp_ag_min',
    '22': 'emp_utl',
    '23': 'emp_con',
    '31': 'emp_mnf',
    '32': 'emp_mnf',
    '33': 'emp_mnf',
    '3M': '',
    '42': 'emp_whl',
    '44': 'emp_ret',
    '45': 'emp_ret',
    '48': 'emp_trn_wrh',
    '49': 'emp_trn_wrh',
    '4M': '',
    '51': 'emp_bus_svcs',
    '52': 'emp_fin_res_mgm',
    '53': 'emp_fin_res_mgm',
    '54': 'emp_bus_svcs',
    '55': 'emp_fin_res_mgm',
    '56': 'emp_bus_svcs',
    '61': 'emp_educ',
    '62': 'emp_hlth',
    '71': 'emp_ent',
    '721': 'emp_accm',
    '722': 'emp_food',
    '81': 'emp_oth',
    '92': 'emp_gov',
    '99': '',
    'MIL': 'emp_mil'
}

soc_xwalk = {
    '11': 'mngt_busi_scic_arts', #Management, Business, Science, and Arts Labor
    '13': 'mngt_busi_scic_arts',
    '15': 'mngt_busi_scic_arts',
    '17': 'mngt_busi_scic_arts',
    '19': 'mngt_busi_scic_arts',
    '21': 'mngt_busi_scic_arts',
    '23': 'mngt_busi_scic_arts',
    '25': 'services', #Services Labor
    '27': 'mngt_busi_scic_arts',
    '29': 'health', #Healthcare
    '31': 'health',
    '33': 'services', 
    '35': 'services',
    '37': 'services',
    '39': 'services',
    '41': 'sales_office', #Sales and Office Labor
    '43': 'sales_office',
    '45': 'constr_maint', #Natural Resources, Construction, and Maintenance Labor
    '47': 'constr_maint',
    '49': 'constr_maint',
    '51': 'prod_trans_move', #Production Transportation and Material Moving Labor
    '53': 'prod_trans_move',
    '55': 'military', #Military Labor
    '99': ''
}

In [3]:
### Rename the NAICS and SOC codes
seed_persons[['naics2_original_code', 'soc2']] = seed_persons[['naics2_original_code', 'soc2']].astype(str)
seed_persons['INDUSTRY'] = seed_persons['naics2_original_code'].map(naics_xwalk)
seed_persons['OCCUPATION'] = seed_persons['soc2'].map(soc_xwalk)

In [4]:
seed_persons

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,hhid,perid,household_serial_no,pnum,age,sex,miltary,...,indcen,weeks,hours,rac1p,hisp,version,naics2_original_code,soc2,INDUSTRY,OCCUPATION
0,0,0,0,1,1,0,1,34,2,0,...,0,1,40,1,2,0,33,41,emp_mnf,sales_office
1,1,1,1,1,2,0,2,16,2,0,...,0,0,0,1,2,0,0,0,,
2,2,2,2,1,3,0,3,15,2,0,...,0,0,0,1,2,0,0,0,,
3,3,3,3,1,4,0,4,14,2,0,...,0,0,0,1,2,0,0,0,,
4,4,4,4,1,5,0,5,12,1,0,...,0,0,0,1,2,0,0,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3283875,3283875,3283875,3283875,1276879,3283876,0,1,41,1,0,...,0,0,0,1,1,0,0,0,,
3283876,3283876,3283876,3283876,1276880,3283877,0,1,28,1,0,...,0,5,40,1,2,0,23,47,emp_con,constr_maint
3283877,3283877,3283877,3283877,1276881,3283878,0,1,76,1,0,...,0,0,0,1,1,0,0,0,,
3283878,3283878,3283878,3283878,1276882,3283879,0,1,46,1,0,...,0,0,0,9,2,0,0,0,,


In [5]:
### Create a pivot table and calculate size terms
size_work = seed_persons.pivot_table(index = ['OCCUPATION'], columns = ['INDUSTRY'], values = 'hhid', aggfunc = 'count', fill_value=0)
size_work.drop('', axis=0, inplace=True)
size_work.to_csv(os.path.join(output_path, 'work_size_terms_counts.csv'), index=True)
size_work

INDUSTRY,Unnamed: 1_level_0,emp_accm,emp_ag_min,emp_bus_svcs,emp_con,emp_educ,emp_ent,emp_fin_res_mgm,emp_food,emp_gov,emp_hlth,emp_mil,emp_mnf,emp_oth,emp_ret,emp_trn_wrh,emp_utl,emp_whl
OCCUPATION,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
constr_maint,282,429,6358,8315,55544,815,1189,2995,102,7649,436,18539,4344,8939,3206,2335,1201,1080
health,21,81,0,4503,16,1376,430,1971,6,12659,69721,4170,210,5482,3965,0,87,19
military,0,0,0,0,0,0,0,0,0,5,0,46083,0,0,0,0,0,0
mngt_busi_scic_arts,944,2701,4288,185223,19537,8524,8221,96425,13276,66135,24074,16912,64025,18873,11705,3548,5560,6283
prod_trans_move,2002,1260,1433,9238,1908,346,1452,1778,3723,5392,1630,8056,31886,9569,18430,22068,1837,5111
sales_office,2475,2167,818,34384,3564,3349,3627,69505,15055,23472,11853,4272,14040,9936,63537,14361,758,12097
services,17,6810,2738,40665,1174,22564,13181,5475,85178,48132,12359,6185,2402,34375,3977,655,105,173


In [6]:
### Convert size terms to ratio
size_work = size_work.div(size_work.max(axis=1), axis=0)
size_work.reset_index(inplace=True)
size_work.to_csv(os.path.join(output_path, 'work_size_terms.csv'), index=False)
size_work

INDUSTRY,OCCUPATION,Unnamed: 2,emp_accm,emp_ag_min,emp_bus_svcs,emp_con,emp_educ,emp_ent,emp_fin_res_mgm,emp_food,emp_gov,emp_hlth,emp_mil,emp_mnf,emp_oth,emp_ret,emp_trn_wrh,emp_utl,emp_whl
0,constr_maint,0.005077,0.007724,0.114468,0.149701,1.0,0.014673,0.021406,0.053921,0.001836,0.137711,0.00785,0.333771,0.078208,0.160935,0.05772,0.042039,0.021622,0.019444
1,health,0.000301,0.001162,0.0,0.064586,0.000229,0.019736,0.006167,0.02827,8.6e-05,0.181567,1.0,0.05981,0.003012,0.078628,0.05687,0.0,0.001248,0.000273
2,military,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000108,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,mngt_busi_scic_arts,0.005097,0.014582,0.02315,1.0,0.105478,0.04602,0.044384,0.520589,0.071676,0.357056,0.129973,0.091306,0.345664,0.101893,0.063194,0.019155,0.030018,0.033921
4,prod_trans_move,0.062786,0.039516,0.044941,0.28972,0.059838,0.010851,0.045537,0.055761,0.11676,0.169102,0.05112,0.25265,1.0,0.3001,0.577997,0.692091,0.057611,0.16029
5,sales_office,0.035609,0.031178,0.011769,0.494698,0.051277,0.048184,0.052183,1.0,0.216603,0.337702,0.170534,0.061463,0.202,0.142954,0.914136,0.206618,0.010906,0.174045
6,services,0.0002,0.07995,0.032144,0.477412,0.013783,0.264904,0.154747,0.064277,1.0,0.565075,0.145096,0.072613,0.0282,0.403567,0.04669,0.00769,0.001233,0.002031
