In [1]:
import pandas as pd
import numpy as np

# Importing EDD Data

In [2]:
import pyodbc

conn = pyodbc.connect('Driver={ODBC Driver 17 for SQL Server};'
                    'Server=sql2014b8.sandag.org;'
                    'Database=EMPCORE;'
                    'Trusted_Connection=yes;')

edd_emp =  pd.read_sql_query(
    """
    SELECT * 
    FROM [EMPCORE].[dbo].[CA_EDD_EMP2019_all_months]
    """, conn)

# Shuffling around EDD sectors in MGRA file

In [3]:
mgra_emp = pd.read_csv(r'C:\Users\jchu\OneDrive - San Diego Association of Governments\Projects\2023\2023-028 MGRA15 Input Table 2022\2023-028-02 [Employment]\Data\Version 5\mgra15_based_input_2022_02.csv')

In [4]:
# calculating government employment
own_code = {1:'Federal government',
2:'State government',
3:'Local government',
5:'Private sector'}
edd_emp['ownership'] = edd_emp['own'].replace(own_code)
edd_emp = pd.concat([edd_emp, pd.get_dummies(edd_emp['ownership'])], axis=1)
edd_emp['emp_gov'] = edd_emp['Local government'] + edd_emp['State government'] + edd_emp['Federal government']

# adding names to employment naics codes
edd_emp['naics_2_digit'] = edd_emp['NAICS(6)'].astype(str).apply(lambda x: x[:2])
emp_code = {'11':'emp_ag_min',
    '21':'emp_ag_min',
    '22':'emp_utl_mnf_whl',
    '23':'emp_trn_wrh_con',
    '31':'emp_utl_mnf_whl',
    '32':'emp_utl_mnf_whl',
    '33':'emp_utl_mnf_whl',
    '42':'emp_utl_mnf_whl',
    '44':'emp_ret',
    '45':'emp_ret',
    '48':'emp_trn_wrh_con',
    '49':'emp_trn_wrh_con',
    '51':'emp_bus_svcs',
    '52':'emp_fin_res_mgm',
    '53':'emp_fin_res_mgm',
    '54':'emp_bus_svcs',
    '55':'emp_fin_res_mgm',
    '56':'emp_bus_svcs',
    '61':'emp_educ',
    '62':'emp_hlth',
    '71':'emp_ent',
    '72':'emp_accm_food',
    '81':'emp_oth'
}
edd_emp['naics_names'] = edd_emp['naics_2_digit'].replace(emp_code)
edd_emp = pd.concat([edd_emp, pd.get_dummies(edd_emp['naics_names'])], axis=1)

# if government owned then not included in private jobs
edd_emp.loc[edd_emp['emp_gov'] == 1, [i for i in edd_emp['naics_names'].unique() if i not in ['emp_educ', 'emp_hlth', 'emp_ent']]] = 0

# if part of educ, health, and entertainment, then should not be part of emp_gov
edd_emp.loc[((edd_emp['emp_educ'] > 0) | 
            (edd_emp['emp_hlth'] > 0) | 
            (edd_emp['emp_ent'] > 0)), ['emp_gov']] = 0

# add all emp values using a dummy
edd_emp[['emp_gov',
        '92',
        '99',
        'emp_accm_food',
        'emp_ag_min',
        'emp_bus_svcs',
        'emp_educ',
        'emp_ent',
        'emp_fin_res_mgm',
        'emp_hlth',
        'emp_oth',
        'emp_ret',
        'emp_trn_wrh_con',
        'emp_utl_mnf_whl']] = edd_emp[['emp_gov',
                                     '92',
                                     '99',
                                     'emp_accm_food',
                                     'emp_ag_min',
                                     'emp_bus_svcs',
                                     'emp_educ',
                                     'emp_ent',
                                     'emp_fin_res_mgm',
                                     'emp_hlth',
                                     'emp_oth',
                                     'emp_ret',
                                     'emp_trn_wrh_con',
                                     'emp_utl_mnf_whl']].apply(lambda x: np.asarray(x)*np.asarray(edd_emp['avg_emp']))

In [5]:
# EDD does not have emp_mil in their data
military = mgra_emp['emp_mil']
mgra_emp['emp_accm_food'] = mgra_emp['emp_accm'] + mgra_emp['emp_food']
mgra_emp.drop(columns='emp_mil', inplace=True)

# Regional Sectors

In [6]:
# add all emp columns for regional totals
region_sector_emp = mgra_emp[[col for col in mgra_emp.columns if 'emp' in col and 'non_ws' not in col and 'emp_tot' not in col and 'emp_food' not in col and col!='emp_accm' and 'emp_mil' not in col]]
print(region_sector_emp.sum().sum())
region_sector_emp.sum()

1498589


emp_gov            121320
emp_ag_min           3305
emp_bus_svcs       267270
emp_fin_res_mgm    105702
emp_educ           132111
emp_hlth           198703
emp_ret            144185
emp_trn_wrh_con    109568
emp_utl_mnf_whl    175188
emp_ent             31417
emp_oth             50794
emp_accm_food      159026
dtype: int64

In [7]:
# EDD regional totals
edd_region_sector_emp = edd_emp[region_sector_emp.columns]
print(edd_region_sector_emp.sum().sum())
edd_region_sector_emp.sum()

1477741.0


emp_gov            101165.0
emp_ag_min           9800.0
emp_bus_svcs       250255.0
emp_fin_res_mgm     99632.0
emp_educ           130478.0
emp_hlth           206628.0
emp_ret            144671.0
emp_trn_wrh_con    113085.0
emp_utl_mnf_whl    161920.0
emp_ent             36674.0
emp_oth             52202.0
emp_accm_food      171231.0
dtype: float64

In [8]:
# total differences for each sector
print(region_sector_emp.sum().sum() - edd_region_sector_emp.sum().sum())
region_sector_emp.sum() - edd_region_sector_emp.sum()

20848.0


emp_gov            20155.0
emp_ag_min         -6495.0
emp_bus_svcs       17015.0
emp_fin_res_mgm     6070.0
emp_educ            1633.0
emp_hlth           -7925.0
emp_ret             -486.0
emp_trn_wrh_con    -3517.0
emp_utl_mnf_whl    13268.0
emp_ent            -5257.0
emp_oth            -1408.0
emp_accm_food     -12205.0
dtype: float64

In [9]:
# distribution differences for each sector
((region_sector_emp.sum()/region_sector_emp.sum().sum())*100) - ((edd_region_sector_emp.sum()/edd_region_sector_emp.sum().sum())*100)

emp_gov            1.249693
emp_ag_min        -0.442634
emp_bus_svcs       0.899806
emp_fin_res_mgm    0.311252
emp_educ          -0.013865
emp_hlth          -0.723355
emp_ret           -0.168627
emp_trn_wrh_con   -0.341148
emp_utl_mnf_whl    0.732931
emp_ent           -0.385322
emp_oth           -0.143099
emp_accm_food     -0.975633
dtype: float64

In [10]:
edd_emp

Unnamed: 0,emp_id,dba,mgra,address,city,zip,own,NAICS(6),NAICS(4),NAICS(4)_sector_name,...,emp_bus_svcs,emp_educ,emp_ent,emp_fin_res_mgm,emp_hlth,emp_oth,emp_ret,emp_trn_wrh_con,emp_utl_mnf_whl,na
0,7,CHARLES E WARNER,3390,4133 KANSAS ST,SAN DIEGO,92104.0,5,814110.0,8141.0,Private Households,...,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0
1,8,"T I T SERVICE, INC.",22609,440 INDUSTRIAL ST,TECATE,91980.0,5,484110.0,4841.0,General Freight Trucking,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
2,12,JAMES E JOHNSON,3433,904 57TH STREET,SAN DIEGO,92114.0,5,814110.0,8141.0,Private Households,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0
3,13,"CC EQUIPMENT RENTAL, INC.",20276,2744 SURREY LN,ESCONDIDO,92029.0,5,238911.0,2389.0,Other Specialty Trade Contractors,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0
4,14,"TELLES GLOBAL CONSULTING, INC.",2646,12577 CAMINITO ROSITA,SAN DIEGO,92128.0,5,561611.0,5616.0,Investigation and Security Services,...,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113592,500003,Ambleside Academy,19180,936 Miramar Drive,VISTA,92081.0,5,611110.0,6111.0,Elementary and Secondary Schools,...,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
113593,500004,Valley Christian School,19743,1350 Discovery Street,San Marcos,92078.0,5,611110.0,6111.0,Elementary and Secondary Schools,...,0.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
113594,500005,Classical Academy,20924,2950 South Bear Valley Parkway,Escondido,92025.0,3,611110.0,6111.0,Elementary and Secondary Schools,...,0.0,80.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
113595,500006,Maranatha Christian Schools,23705,9050 Maranatha Drive,San Diego,92127.0,5,611110.0,6111.0,Elementary and Secondary Schools,...,0.0,160.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0


# MGRA Employment total differences

In [11]:
# look at MGRA total employment (as EDD defines it: W&S non military)
est_mgra_emp = mgra_emp[['mgra']]
est_mgra_emp['est_emp'] = mgra_emp['emp_tot']-mgra_emp['emp_non_ws_wfh']-mgra_emp['emp_non_ws_oth']-military

# look at EDD total employment
edd_mgra_emp = edd_emp[['mgra', 'avg_emp']].rename(columns={'mgra':'mgra', 'avg_emp':'edd_emp'})
edd_mgra_emp = edd_mgra_emp.groupby('mgra').sum().reset_index()

# difference of MGRA - EDD
mgra_diffs = est_mgra_emp.merge(edd_mgra_emp, how='outer', on='mgra').fillna(0)
mgra_diffs['diff'] = mgra_diffs['est_emp']-mgra_diffs['edd_emp']
mgra_diffs['pct_diff'] = ((mgra_diffs['est_emp']-mgra_diffs['edd_emp'])/mgra_diffs['edd_emp'])*100
mgra_diffs

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  est_mgra_emp['est_emp'] = mgra_emp['emp_tot']-mgra_emp['emp_non_ws_wfh']-mgra_emp['emp_non_ws_oth']-military


Unnamed: 0,mgra,est_emp,edd_emp,diff,pct_diff
0,1,22,22.0,0.0,0.000000
1,2,162,161.0,1.0,0.621118
2,3,278,276.0,2.0,0.724638
3,4,281,278.0,3.0,1.079137
4,5,12,11.0,1.0,9.090909
...,...,...,...,...,...
24316,24317,0,0.0,0.0,
24317,24318,3,3.0,0.0,0.000000
24318,24319,0,0.0,0.0,
24319,24320,0,0.0,0.0,


# MGRA Employment total differences by sector and sector %'s within MGRA

### dist = sector represents X% of employment within MGRA

In [12]:
# merge EDD sector names with diff df
edd_mgra_sector_emp = edd_emp[['mgra', 'avg_emp']+[i for i in edd_emp['naics_names'].unique() if 'emp' in i]+['emp_gov']]
edd_mgra_sector_emp = edd_mgra_sector_emp.melt(id_vars = 'mgra', var_name='sector', value_name='edd_emp')
edd_mgra_sector_emp = edd_mgra_sector_emp[edd_mgra_sector_emp['sector']!='avg_emp'].rename(columns={'mgra':'mgra'}).reset_index(drop=True)
edd_mgra_sector_emp = edd_mgra_sector_emp.sort_values('mgra').groupby(['mgra', 'sector']).sum().reset_index()
edd_mgra_sector_emp = edd_mgra_sector_emp.merge(mgra_diffs[['mgra', 'edd_emp']], how='left', on='mgra', suffixes=('','_total'))

# merge MGRA sector names with diff df
est_mgra_sector_emp = mgra_emp[['mgra']+list(edd_mgra_sector_emp['sector'].unique())]
est_mgra_sector_emp = est_mgra_sector_emp.melt(id_vars = 'mgra', var_name='sector', value_name='est_emp')
est_mgra_sector_emp = est_mgra_sector_emp.merge(mgra_diffs[['mgra', 'est_emp']], how='left', on='mgra', suffixes=('','_total'))

# Calculations on sectors and MGRA totals
mgra_sector_diffs = edd_mgra_sector_emp.merge(est_mgra_sector_emp, how='outer', on=['mgra', 'sector']).fillna(0)
mgra_sector_diffs['diff'] = mgra_sector_diffs['est_emp'] - mgra_sector_diffs['edd_emp']
mgra_sector_diffs['edd_dist'] = (mgra_sector_diffs['edd_emp']/mgra_sector_diffs['edd_emp_total'])*100
mgra_sector_diffs['est_dist'] = (mgra_sector_diffs['est_emp']/mgra_sector_diffs['est_emp_total'])*100
mgra_sector_diffs['dist_diff'] = mgra_sector_diffs['est_dist'] - mgra_sector_diffs['edd_dist']
mgra_sector_diffs.sort_values('diff')

Unnamed: 0,mgra,sector,edd_emp,edd_emp_total,est_emp,est_emp_total,diff,edd_dist,est_dist,dist_diff
57111,5682,emp_educ,15425.0,15740.0,1193,1519,-14232.0,97.998729,78.538512,-19.460217
86790,8527,emp_gov,11193.0,11388.0,4113,4256,-7080.0,98.287671,96.640038,-1.647634
139590,14148,emp_gov,6694.0,6694.0,62,62,-6632.0,100.000000,100.000000,0.000000
74286,7318,emp_gov,8409.0,8683.0,3348,3635,-5061.0,96.844409,92.104539,-4.739869
87319,8578,emp_hlth,8623.0,8626.0,4512,4515,-4111.0,99.965221,99.933555,-0.031667
...,...,...,...,...,...,...,...,...,...,...
286407,6834,emp_utl_mnf_whl,0.0,0.0,1677,1677,1677.0,,100.000000,
287093,10032,emp_utl_mnf_whl,0.0,0.0,2029,2029,2029.0,,100.000000,
145672,14817,emp_ent,0.0,3161.0,2290,2337,2290.0,0.000000,97.988875,97.988875
138199,14008,emp_hlth,187.0,825.0,3152,3828,2965.0,22.666667,82.340648,59.673981


# Crosswalk

In [13]:
import pyodbc

conn = pyodbc.connect('Driver={ODBC Driver 17 for SQL Server};'
                    'Server=DDAMWSQL16.sandag.org;'
                    'Database=estimates;'
                    'Trusted_Connection=yes;')

with open(r'LUZ_MGRA_cross.sql', 'r') as sql_file:
    sql_query = sql_file.read()

crosswalk =  pd.read_sql_query(sql_query, conn)

crosswalk

Unnamed: 0,mgra_id,mgra,census_tract,cpa,jurisdiction,sra,luz,region
0,150000100,1,2705,Mid-City:Eastern Area,San Diego,MID-CITY,10,San Diego
1,150000200,2,5601,Downtown,San Diego,CENTRAL SAN DIEGO,28,San Diego
2,150000300,3,15407,*Not in a CPA*,El Cajon,EL CAJON,239,San Diego
3,150000400,4,17407,*Not in a CPA*,Encinitas,SAN DIEGUITO,151,San Diego
4,150000500,5,17407,*Not in a CPA*,Encinitas,SAN DIEGUITO,151,San Diego
...,...,...,...,...,...,...,...,...
24316,152431700,24317,14806,County Islands,Unincorporated,LA MESA,11,San Diego
24317,152431800,24318,20706,North County Metro,Unincorporated,ESCONDIDO,212,San Diego
24318,152431900,24319,21001,Mountain Empire,Unincorporated,ANZA-BORREGO SPRINGS,225,San Diego
24319,152432000,24320,21002,Desert,Unincorporated,ANZA-BORREGO SPRINGS,227,San Diego


In [14]:
mgra_emp_crosswalk = mgra_emp.merge(crosswalk, how='left', on='mgra')
mgra_emp_crosswalk

Unnamed: 0,mgra,taz,LUZ,pop,hhp,hs,hs_sf,hs_mf,hs_mh,hh,...,effective_acres,truckregiontype,emp_accm_food,mgra_id,census_tract,cpa,jurisdiction,sra,luz,region
0,1,3010,10,440,440,176,84,92,0,174,...,18.837621,1,0,150000100,2705,Mid-City:Eastern Area,San Diego,MID-CITY,10,San Diego
1,2,1797,28,130,68,56,0,56,0,48,...,2.872330,1,26,150000200,5601,Downtown,San Diego,CENTRAL SAN DIEGO,28,San Diego
2,3,4361,239,549,549,200,23,177,0,192,...,25.713898,1,0,150000300,15407,*Not in a CPA*,El Cajon,EL CAJON,239,San Diego
3,4,340,151,5,5,3,3,0,0,2,...,2.678374,1,192,150000400,17407,*Not in a CPA*,Encinitas,SAN DIEGUITO,151,San Diego
4,5,388,151,90,90,43,43,0,0,36,...,4.057765,1,0,150000500,17407,*Not in a CPA*,Encinitas,SAN DIEGUITO,151,San Diego
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24316,24317,3691,11,5,5,2,2,0,0,2,...,0.648684,1,0,152431700,14806,County Islands,Unincorporated,LA MESA,11,San Diego
24317,24318,3683,212,136,136,46,46,0,0,46,...,50.514707,1,0,152431800,20706,North County Metro,Unincorporated,ESCONDIDO,212,San Diego
24318,24319,4943,225,0,0,0,0,0,0,0,...,1892.855556,1,0,152431900,21001,Mountain Empire,Unincorporated,ANZA-BORREGO SPRINGS,225,San Diego
24319,24320,4940,227,0,0,0,0,0,0,0,...,-1.847529,1,0,152432000,21002,Desert,Unincorporated,ANZA-BORREGO SPRINGS,227,San Diego


In [15]:
edd_emp_crosswalk = edd_emp.merge(crosswalk, how='left', left_on='mgra', right_on='mgra')

# MANUAL OVERRIDE OF MGRA FOR POWAY SHERIFFS
edd_emp_crosswalk.loc[(edd_emp_crosswalk['dba'] == 'COUNTY OF SAN DIEGO') & 
                 (edd_emp_crosswalk['mgra']==14148), ['jurisdiction']] = 'San Diego'

edd_emp_crosswalk.loc[(edd_emp_crosswalk['dba'] == 'COUNTY OF SAN DIEGO') & 
                 (edd_emp_crosswalk['mgra']==14148), ['sra']] = 'CENTRAL SAN DIEGO'

edd_emp_crosswalk

Unnamed: 0,emp_id,dba,mgra,address,city,zip,own,NAICS(6),NAICS(4),NAICS(4)_sector_name,...,emp_trn_wrh_con,emp_utl_mnf_whl,na,mgra_id,census_tract,cpa,jurisdiction,sra,luz,region
0,7,CHARLES E WARNER,3390,4133 KANSAS ST,SAN DIEGO,92104.0,5,814110.0,8141.0,Private Households,...,0.0,0.0,0,150339000,1301,Greater North Park,San Diego,CENTRAL SAN DIEGO,4,San Diego
1,8,"T I T SERVICE, INC.",22609,440 INDUSTRIAL ST,TECATE,91980.0,5,484110.0,4841.0,General Freight Trucking,...,0.0,0.0,0,152260900,21101,Mountain Empire,Unincorporated,MOUNTAIN EMPIRE,230,San Diego
2,12,JAMES E JOHNSON,3433,904 57TH STREET,SAN DIEGO,92114.0,5,814110.0,8141.0,Private Households,...,0.0,0.0,0,150343300,3001,Southeastern:Encanto Neighborhoods,San Diego,SOUTHEASTERN SAN DIEGO,12,San Diego
3,13,"CC EQUIPMENT RENTAL, INC.",20276,2744 SURREY LN,ESCONDIDO,92029.0,5,238911.0,2389.0,Other Specialty Trade Contractors,...,1.0,0.0,0,152027600,20311,San Dieguito,Unincorporated,ESCONDIDO,199,San Diego
4,14,"TELLES GLOBAL CONSULTING, INC.",2646,12577 CAMINITO ROSITA,SAN DIEGO,92128.0,5,561611.0,5616.0,Investigation and Security Services,...,0.0,0.0,0,150264600,17051,Rancho Bernardo,San Diego,NORTH SAN DIEGO,130,San Diego
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113592,500003,Ambleside Academy,19180,936 Miramar Drive,VISTA,92081.0,5,611110.0,6111.0,Elementary and Secondary Schools,...,0.0,0.0,0,151918000,19904,North County Metro,Unincorporated,VISTA,165,San Diego
113593,500004,Valley Christian School,19743,1350 Discovery Street,San Marcos,92078.0,5,611110.0,6111.0,Elementary and Secondary Schools,...,0.0,0.0,0,151974300,20029,*Not in a CPA*,San Marcos,SAN MARCOS,195,San Diego
113594,500005,Classical Academy,20924,2950 South Bear Valley Parkway,Escondido,92025.0,3,611110.0,6111.0,Elementary and Secondary Schools,...,0.0,0.0,0,152092400,20706,*Not in a CPA*,Escondido,ESCONDIDO,211,San Diego
113595,500006,Maranatha Christian Schools,23705,9050 Maranatha Drive,San Diego,92127.0,5,611110.0,6111.0,Elementary and Secondary Schools,...,0.0,0.0,0,152370500,17066,San Dieguito,Unincorporated,NORTH SAN DIEGO,144,San Diego


# All geographies

In [16]:
def rollup_diff(geography):
    
    # geography level mgra based totals
    est_emp_rollup = mgra_emp_crosswalk[[geography]]
    est_emp_rollup['est_emp'] = mgra_emp_crosswalk['emp_tot']-mgra_emp_crosswalk['emp_non_ws_wfh']-mgra_emp_crosswalk['emp_non_ws_oth']-military
    est_emp_rollup = est_emp_rollup.groupby(geography).sum().reset_index()

    # geography level based edd totals
    edd_emp_rollup = edd_emp_crosswalk[[geography, 'avg_emp']]
    edd_emp_rollup = edd_emp_rollup.rename(columns={'avg_emp':'edd_emp'}).groupby(geography).sum().reset_index()

    # merge edd and mgra based totals
    geography_diff = est_emp_rollup.merge(edd_emp_rollup, how='outer', on=geography)
    geography_diff['diff'] = geography_diff['est_emp'] - geography_diff['edd_emp']
    geography_diff['pct_diff'] = ((geography_diff['est_emp'] - geography_diff['edd_emp'])/geography_diff['edd_emp'])*100
    
    # edd geography and sector level
    edd_geo_sector_emp = edd_emp_crosswalk[[geography]+[i for i in edd_emp_crosswalk['naics_names'].unique() if 'emp' in i]+['emp_gov']]
    edd_geo_sector_emp = edd_geo_sector_emp.melt(id_vars=geography, var_name='sector', value_name='edd_emp')
    edd_geo_sector_emp = edd_geo_sector_emp.sort_values(geography).groupby([geography, 'sector']).sum().reset_index()
    edd_geo_sector_emp = edd_geo_sector_emp.merge(geography_diff[[geography, 'edd_emp']], how='left', on=geography, suffixes=('', '_total'))

    # est geography and sector level
    est_geo_sector_emp = mgra_emp_crosswalk[[geography]+list(edd_geo_sector_emp['sector'].unique())]
    est_geo_sector_emp = est_geo_sector_emp.groupby(geography).sum().reset_index()
    est_geo_sector_emp = est_geo_sector_emp.melt(id_vars=geography, var_name='sector', value_name='est_emp')
    est_geo_sector_emp = est_geo_sector_emp.merge(geography_diff[[geography, 'est_emp']], how='left', on=geography, suffixes=('', '_total'))

    # calculate sector level diffs and distributions
    geo_sector_diffs = edd_geo_sector_emp.merge(est_geo_sector_emp, how='outer', on=[geography, 'sector']).fillna(0)
    geo_sector_diffs['diff'] = geo_sector_diffs['est_emp'] - geo_sector_diffs['edd_emp']
    geo_sector_diffs['edd_dist'] = (geo_sector_diffs['edd_emp']/geo_sector_diffs['edd_emp_total'])*100
    geo_sector_diffs['est_dist'] = (geo_sector_diffs['est_emp']/geo_sector_diffs['est_emp_total'])*100
    geo_sector_diffs['dist_diff'] = geo_sector_diffs['est_dist'] - geo_sector_diffs['edd_dist']
    
    return (geography_diff, geo_sector_diffs)

In [17]:
rollup_diff('luz')[0]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  est_emp_rollup['est_emp'] = mgra_emp['emp_tot']-mgra_emp['emp_non_ws_wfh']-mgra_emp['emp_non_ws_oth']-military


Unnamed: 0,luz,est_emp,edd_emp,diff,pct_diff
0,1,6978,6526.0,452.0,6.926142
1,2,8772,8558.0,214.0,2.500584
2,3,24282,25891.0,-1609.0,-6.214515
3,4,8001,8119.0,-118.0,-1.453381
4,5,54394,54013.0,381.0,0.705386
...,...,...,...,...,...
240,241,2456,2590.0,-134.0,-5.173745
241,242,12354,8486.0,3868.0,45.580957
242,243,14368,13573.0,795.0,5.857217
243,244,25127,22639.0,2488.0,10.989885


In [18]:
rollup_diff('luz')[1]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  est_emp_rollup['est_emp'] = mgra_emp['emp_tot']-mgra_emp['emp_non_ws_wfh']-mgra_emp['emp_non_ws_oth']-military


Unnamed: 0,luz,sector,edd_emp,edd_emp_total,est_emp,est_emp_total,diff,edd_dist,est_dist,dist_diff
0,1,emp_accm_food,1901.0,6526.0,1790,6978,-111.0,29.129635,25.652049,-3.477586
1,1,emp_ag_min,1.0,6526.0,0,6978,-1.0,0.015323,0.000000,-0.015323
2,1,emp_bus_svcs,1534.0,6526.0,1631,6978,97.0,23.505976,23.373459,-0.132517
3,1,emp_educ,613.0,6526.0,615,6978,2.0,9.393196,8.813414,-0.579783
4,1,emp_ent,100.0,6526.0,72,6978,-28.0,1.532332,1.031814,-0.500518
...,...,...,...,...,...,...,...,...,...,...
2935,214,emp_trn_wrh_con,0.0,0.0,0,0,0.0,,,
2936,217,emp_trn_wrh_con,0.0,0.0,0,0,0.0,,,
2937,72,emp_utl_mnf_whl,0.0,0.0,0,0,0.0,,,
2938,214,emp_utl_mnf_whl,0.0,0.0,0,0,0.0,,,


In [19]:
for geo in crosswalk.columns[1:]:
    writer = pd.ExcelWriter(r'v5\{}_emp_est_minus_EDD_QA_v5.xlsx'.format(geo), engine='xlsxwriter')
    
    rollup_output = rollup_diff(geo)
    rollup_output[0].to_excel(writer, sheet_name = '{}_diff'.format(geo), index=False)
    rollup_output[1].to_excel(writer, sheet_name = '{}_sector_diff'.format(geo), index=False)
    writer.save()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  est_emp_rollup['est_emp'] = mgra_emp['emp_tot']-mgra_emp['emp_non_ws_wfh']-mgra_emp['emp_non_ws_oth']-military
