In [1]:
import numpy as  np
import pandas as pd
import os

In [2]:
# This should point to base dir of regional_forecast git repo
# https://github.com/BayAreaMetro/regional_forecast
REGIONAL_FORECAST_GIT_DIR = r"\\mainmodel\MainModelShare\regional_forecast"

# The location of REMI outputs
REMI_OUTPUT_BOX_DIR       = os.path.join(os.environ["USERPROFILE"],
                            "Box\Modeling and Surveys\Regional Modeling\REMI\REMI files\PBA50 Blueprint\REMI output")

# e.g. Draft Forecast Base Case, Draft Forecast Upper Case, Forecast 03122020
FORECAST_DIR              = "Forecast 04132020"

# e.g. Blueprint Baseline, Blueprint Basic, Blueprint Crossing, Blueprint Plus Fix it First
FORECAST_VARIANT          = "Blueprint Plus Fix it First"

# Figure out corresponding UrbanSim Scenario number
# see https://github.com/BayAreaMetro/bayarea_urbansim/blob/zoning_mods_update/configs/scenarios.md
URBANSIM_VARIANT_MAPPING  = {"Blueprint Baseline"         :20,
                             "Blueprint Basic"            :21,
                             "Blueprint Plus Fix it First":22,
                             "Blueprint Crossing"         :23}
URBANSIM_VARIANT          = URBANSIM_VARIANT_MAPPING[FORECAST_VARIANT]

# read employment data
employment_input_file     = os.path.join(REMI_OUTPUT_BOX_DIR, FORECAST_DIR, FORECAST_VARIANT,
                                         "employment_projections_s{}.csv".format(URBANSIM_VARIANT))
employment_output_file    = os.path.join(REMI_OUTPUT_BOX_DIR, FORECAST_DIR, FORECAST_VARIANT, 
                                         "emp by ind11_s{}.csv".format(URBANSIM_VARIANT))

In [3]:
#Map the relationship between ACS sectors and REMI sectors.
#We use ACS sector aggregration because self employment data comes from ACS Table C24070, which uses this aggregration
sectormap = pd.read_csv(os.path.join(REGIONAL_FORECAST_GIT_DIR, "regionals post process", "sectormap.csv"))

#remi BEA concept employment translates to EDD+ACS concept with a adjustment factor calculated using 2015 data
emp_translate = pd.read_csv(os.path.join(REGIONAL_FORECAST_GIT_DIR, "regionals post process", "emp_translate.csv"))

In [4]:
sectormap.head()

Unnamed: 0,ind,Industries
0,Agriculture & Natural Resources,"Forestry, fishing, and hunting"
1,Agriculture & Natural Resources,Mining
2,Agriculture & Natural Resources,Farm
3,Construction,Construction
4,Manufacturing & Wholesale,Manufacturing


In [5]:
emp_translate.head()

Unnamed: 0,ind,adjustment
0,Agriculture & Natural Resources,0.642857
1,Construction,0.850598
2,Manufacturing & Wholesale,0.905314
3,Retail,0.854841
4,Transportation & Utilities,0.594682


In [6]:
emp = pd.read_csv(employment_input_file)

# merge with sectormap
emp_merge = pd.merge(sectormap,emp, how='inner', on=['Industries'])

In [7]:
emp_merge

Unnamed: 0,ind,Industries,Units,2015,2016,2017,2018,2019,2020,2021,...,2041,2042,2043,2044,2045,2046,2047,2048,2049,2050
0,Agriculture & Natural Resources,"Forestry, fishing, and hunting",Thousands (Jobs),10205,10954,10840,11178,11410,10627,10649,...,13890,14017,14145,14270,14397,14519,14641,14765,14889,14997
1,Agriculture & Natural Resources,Mining,Thousands (Jobs),10199,9945,8739,8927,8998,8062,8070,...,10951,11118,11291,11467,11638,11819,12000,12182,12370,12554
2,Agriculture & Natural Resources,Farm,Thousands (Jobs),18321,18108,17853,18265,18503,17252,17111,...,19066,19080,19090,19102,19109,19114,19115,19114,19113,19096
3,Construction,Construction,Thousands (Jobs),244277,255041,263329,269013,270213,249803,260240,...,342775,344186,345731,347380,349088,350856,352672,354568,356530,358350
4,Manufacturing & Wholesale,Manufacturing,Thousands (Jobs),356984,364871,378070,382830,386756,361849,349789,...,311242,311095,310962,310858,310790,310737,310690,310631,310578,310329
5,Manufacturing & Wholesale,Wholesale trade,Thousands (Jobs),165195,157972,155785,160451,163576,152644,150448,...,165828,166527,167219,167914,168607,169282,169949,170608,171259,171800
6,Retail,Retail trade,Thousands (Jobs),418874,417551,418616,432873,444082,421677,414488,...,493176,496161,499164,502190,505221,508220,511220,514211,517198,519890
7,Transportation & Utilities,Transportation and warehousing,Thousands (Jobs),171172,196279,204922,211449,215097,178869,191728,...,255708,258047,260415,262836,265315,267833,270409,273012,275648,278167
8,Transportation & Utilities,Utilities,Thousands (Jobs),15237,16344,17706,18363,18591,17356,17365,...,19586,19699,19812,19936,20064,20200,20341,20489,20639,20784
9,Information,Information,Thousands (Jobs),187626,200837,218673,225915,233552,224481,223367,...,327642,334254,341037,348001,355138,362465,369975,377671,385561,393586


In [8]:
emp_sum = emp_merge.groupby(['ind']).sum().reset_index()

In [9]:
emp_sum

Unnamed: 0,ind,2015,2016,2017,2018,2019,2020,2021,2022,2023,...,2041,2042,2043,2044,2045,2046,2047,2048,2049,2050
0,Agriculture & Natural Resources,38725,39007,37432,38370,38911,35941,35830,35866,35865,...,43907,44215,44526,44839,45144,45452,45756,46061,46372,46647
1,"Arts, Recreation & Other Services",784241,797899,810911,845278,857191,664864,687155,716419,741773,...,1007025,1013322,1019648,1026118,1032708,1039347,1046071,1052844,1059683,1065973
2,Construction,244277,255041,263329,269013,270213,249803,260240,303662,299549,...,342775,344186,345731,347380,349088,350856,352672,354568,356530,358350
3,Financial & Leasing,489377,509424,523346,527025,532871,495537,494240,496543,494739,...,602076,604863,607671,610492,613328,616133,618936,621730,624526,626947
4,Government,486379,495282,501000,507430,515247,542401,538427,531335,523044,...,530232,528978,527726,526492,525262,523993,522723,521446,520152,518394
5,Health & Educational Services,697502,714984,730117,745437,760424,729199,732110,738974,740292,...,978883,986308,993760,1001404,1009205,1017075,1025091,1033218,1041444,1049169
6,Information,187626,200837,218673,225915,233552,224481,223367,222504,221515,...,327642,334254,341037,348001,355138,362465,369975,377671,385561,393586
7,Manufacturing & Wholesale,522179,522843,533855,543281,550332,514493,500237,489913,476778,...,477070,477622,478181,478772,479397,480019,480639,481239,481837,482129
8,Professional & Managerial Services,1051828,1080501,1096488,1132434,1167509,1110132,1110583,1117713,1117415,...,1463802,1479826,1495953,1512185,1528489,1544803,1561164,1577587,1594073,1609634
9,Retail,418874,417551,418616,432873,444082,421677,414488,411386,402588,...,493176,496161,499164,502190,505221,508220,511220,514211,517198,519890


In [10]:
emp_merge2 = pd.merge(emp_sum,emp_translate, how='inner', on=['ind']).set_index('ind')

In [11]:
emp_merge2

Unnamed: 0_level_0,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,...,2042,2043,2044,2045,2046,2047,2048,2049,2050,adjustment
ind,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Agriculture & Natural Resources,38725,39007,37432,38370,38911,35941,35830,35866,35865,35985,...,44215,44526,44839,45144,45452,45756,46061,46372,46647,0.642857
"Arts, Recreation & Other Services",784241,797899,810911,845278,857191,664864,687155,716419,741773,771905,...,1013322,1019648,1026118,1032708,1039347,1046071,1052844,1059683,1065973,0.762703
Construction,244277,255041,263329,269013,270213,249803,260240,303662,299549,295340,...,344186,345731,347380,349088,350856,352672,354568,356530,358350,0.850598
Financial & Leasing,489377,509424,523346,527025,532871,495537,494240,496543,494739,494580,...,604863,607671,610492,613328,616133,618936,621730,624526,626947,0.428895
Government,486379,495282,501000,507430,515247,542401,538427,531335,523044,512965,...,528978,527726,526492,525262,523993,522723,521446,520152,518394,0.958208
Health & Educational Services,697502,714984,730117,745437,760424,729199,732110,738974,740292,742893,...,986308,993760,1001404,1009205,1017075,1025091,1033218,1041444,1049169,0.847728
Information,187626,200837,218673,225915,233552,224481,223367,222504,221515,220569,...,334254,341037,348001,355138,362465,369975,377671,385561,393586,0.916083
Manufacturing & Wholesale,522179,522843,533855,543281,550332,514493,500237,489913,476778,465912,...,477622,478181,478772,479397,480019,480639,481239,481837,482129,0.905314
Professional & Managerial Services,1051828,1080501,1096488,1132434,1167509,1110132,1110583,1117713,1117415,1119176,...,1479826,1495953,1512185,1528489,1544803,1561164,1577587,1594073,1609634,0.754617
Retail,418874,417551,418616,432873,444082,421677,414488,411386,402588,395043,...,496161,499164,502190,505221,508220,511220,514211,517198,519890,0.854841


In [12]:
#choose only column needed and multiply by adjustment
emp_merge2 = emp_merge2[['2015','2020','2025','2030','2035','2040','2045','2050']].mul(emp_merge2.adjustment, axis = 0)
#round to int and bring industry back to it's own column
emp_merge3 = emp_merge2.astype(int).reset_index()

In [13]:
emp_merge3

Unnamed: 0,ind,2015,2020,2025,2030,2035,2040,2045,2050
0,Agriculture & Natural Resources,24894,23104,23321,26106,27039,28027,29021,29987
1,"Arts, Recreation & Other Services",598143,507093,615796,696554,725950,763296,787649,813021
2,Construction,207781,212482,249657,261216,273236,290458,296933,304811
3,Financial & Leasing,209891,212533,212738,235757,244942,257022,263053,268894
4,Government,466052,519733,480841,498460,502939,509267,503310,496729
5,Health & Educational Services,591292,618162,633314,740983,780092,823641,855531,889410
6,Information,171880,205643,201243,245178,266979,294253,325335,360557
7,Manufacturing & Wholesale,472735,465777,414945,434519,428737,431378,434004,436478
8,Professional & Managerial Services,793726,837724,848437,977492,1033410,1092555,1153423,1214656
9,Retail,358070,360466,332291,379739,393971,419014,431883,444423


In [14]:
emp_merge3.to_csv(employment_output_file, header=True, index=False)