# Employment Controls Analysis

In [126]:
# Library Download
import pandas as pd
import urllib.request  # For downloading the xlsx file
import pandas as pd
from sodapy import Socrata
import ssl
import sqlalchemy
import pyodbc

# Downloading Data From EDD

In [127]:
# Sector Level Data
client = Socrata("data.edd.ca.gov", None)
results = client.get_all("pwx8-ztk5", area_name='San Diego County', Year=2019)
results_df = pd.DataFrame.from_records(results)



In [128]:
results_df

Unnamed: 0,area_type,area_name,year,month,series_code,industry_title,seasonally_adjusted_y_n,current_employment
0,County,San Diego County,2019,Annual,1000000,Total Wage and Salary,N,1512800
1,County,San Diego County,2019,Annual,11000000,Total Farm,N,9700
2,County,San Diego County,2019,Annual,0,Total Nonfarm,N,1503100
3,County,San Diego County,2019,Annual,5000000,Total Private,N,1254500
4,County,San Diego County,2019,Annual,6000000,Goods Producing,N,200000
...,...,...,...,...,...,...,...,...
95,County,San Diego County,2019,Annual,90931611,Local Government Education,N,80400
96,County,San Diego County,2019,Annual,90932000,Local Government Excluding Education,N,70200
97,County,San Diego County,2019,Annual,90939012,County,N,20300
98,County,San Diego County,2019,Annual,90939022,City,N,19300


In [129]:
# Cleaning - this is taken from the QCEW Prep Code
results_df.rename(columns={'series_code': 'Series Code'}, inplace=True)
results_df['Series Code']= results_df['Series Code'].astype('int')
results_df['agg_code'] = results_df['Series Code'] // 10**6 # Floor division (how many times does 10^6 go into that series code)

results_df


Unnamed: 0,area_type,area_name,year,month,Series Code,industry_title,seasonally_adjusted_y_n,current_employment,agg_code
0,County,San Diego County,2019,Annual,1000000,Total Wage and Salary,N,1512800,1
1,County,San Diego County,2019,Annual,11000000,Total Farm,N,9700,11
2,County,San Diego County,2019,Annual,0,Total Nonfarm,N,1503100,0
3,County,San Diego County,2019,Annual,5000000,Total Private,N,1254500,5
4,County,San Diego County,2019,Annual,6000000,Goods Producing,N,200000,6
...,...,...,...,...,...,...,...,...,...
95,County,San Diego County,2019,Annual,90931611,Local Government Education,N,80400,90
96,County,San Diego County,2019,Annual,90932000,Local Government Excluding Education,N,70200,90
97,County,San Diego County,2019,Annual,90939012,County,N,20300,90
98,County,San Diego County,2019,Annual,90939022,City,N,19300,90


# Downloading Label Information

In [130]:
conn = pyodbc.connect('Driver={ODBC Driver 17 for SQL Server};'
                      'Server=DDAMWSQL16.sandag.org;'
                      'Database=demographic_warehouse;'
                      'Trusted_Connection=yes;')

qry = '''SELECT * FROM [socioec_data].ca_edd.[xref_sandag_industry_edd_sector]'''


sandag_sectors = pd.read_sql_query(qry, conn)
sandag_sectors['edd_sector_v2'] = sandag_sectors['edd_sector'].str.replace('-', '').astype('int')
sandag_sectors

Unnamed: 0,yr,sandag_industry_id,edd_sector,sandag_sector,edd_sector_v2
0,2013,1,11-000000,11,11000000
1,2013,2,10-000000,21,10000000
2,2013,3,43-220000,22,43220000
3,2013,4,20-000000,23,20000000
4,2013,5,30-000000,31-33,30000000
5,2013,6,41-000000,42,41000000
6,2013,7,42-000000,44-45,42000000
7,2013,8,43-400089,48-49,43400089
8,2013,9,50-000000,51,50000000
9,2013,10,55-520000,52,55520000


# Merging Data

In [131]:
temp = pd.merge(results_df, sandag_sectors[['sandag_industry_id', 'edd_sector_v2']],
               left_on = 'Series Code', right_on = 'edd_sector_v2', how = 'left')

In [135]:
temp[temp['industry_title'] == 'Federal Government excluding Department of Def...']

Unnamed: 0,area_type,area_name,year,month,Series Code,industry_title,seasonally_adjusted_y_n,current_employment,agg_code,sandag_industry_id,edd_sector_v2


In [133]:
final_df = temp[~temp['sandag_industry_id'].isna()]
final_df

Unnamed: 0,area_type,area_name,year,month,Series Code,industry_title,seasonally_adjusted_y_n,current_employment,agg_code,sandag_industry_id,edd_sector_v2
1,County,San Diego County,2019,Annual,11000000,Total Farm,N,9700,11,1.0,11000000.0
6,County,San Diego County,2019,Annual,10000000,Mining and Logging,N,400,10,2.0,10000000.0
7,County,San Diego County,2019,Annual,20000000,Construction,N,84000,20,4.0,20000000.0
14,County,San Diego County,2019,Annual,30000000,Manufacturing,N,115700,30,5.0,30000000.0
24,County,San Diego County,2019,Annual,41000000,Wholesale Trade,N,44000,41,6.0,41000000.0
27,County,San Diego County,2019,Annual,42000000,Retail Trade,N,145600,42,7.0,42000000.0
41,County,San Diego County,2019,Annual,43220000,Utilities,N,4600,43,3.0,43220000.0
42,County,San Diego County,2019,Annual,43400089,Transportation and Warehousing,N,29700,43,8.0,43400089.0
44,County,San Diego County,2019,Annual,50000000,Information,N,23500,50,9.0,50000000.0
50,County,San Diego County,2019,Annual,55520000,Finance and Insurance,N,46400,55,10.0,55520000.0


In [138]:
for i in final_df['industry_title']:
    print(i)

Total Farm
Mining and Logging
Construction
Manufacturing
Wholesale Trade
Retail Trade
Utilities
Transportation and Warehousing
Information
Finance and Insurance
Real Estate and Rental and Leasing
Professional, Scientific and Technical S
Management of Companies and Enterprises
Administrative and Support and Waste Ser
Educational Services
Health Care and Social Assistance
Arts, Entertainment, and Recreation
Accommodation
Food Services and Drinking Places
Other Services
Federal Government excluding Department of Defense
Department of Defense
State Government Education
State Government Excluding Education
Local Government Education
Local Government Excluding Education


# Checking Values

#### EDD Total Farm Value 
This is an aggregate of Total Farm and Total NonFarm

In [39]:
raw_csv = pd.read_csv('C:/Users/cra/San Diego Association of Governments/SANDAG QA QC - Documents/Projects/2022/2022-47 Base Year Forecast Output QC/Data/mgra13_based_input2019_01.csv')

In [57]:
raw_csv['emp_total'].sum()

1686273.0

In [None]:
# Purva: 1,513,000

In [27]:

mgra_ind = pd.read_csv('C:/Users/cra/San Diego Association of Governments/SANDAG QA QC - Documents/Projects/2022/2022-47 Base Year Forecast Output QC/Data/mgra_ind.csv')

In [30]:
mgra_ind['emp_total'].sum()

1686273.0

In [90]:
final_df['current_employment'] = final_df['current_employment'].astype('int')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_df['current_employment'] = final_df['current_employment'].astype('int')


In [178]:
final_df.to_excel('C:/Users/cra/San Diego Association of Governments/SANDAG QA QC - Documents/Projects/2022/2022-47 Base Year Forecast Output QC/Data/Employment Bridge/final_df.xlsx')

# Checking Employment Total Values 

In [63]:
emp_types = ['emp_ag',
'emp_health',
'emp_personal_svcs_office',
'emp_personal_svcs_retail',
'emp_amusement',
'emp_hotel',
'emp_restaurant_bar',
'emp_trans',
'emp_retail',
'emp_whsle_whs',
'emp_fed_non_mil',
'emp_const_non_bldg_prod',
'emp_const_non_bldg_office',
'emp_const_bldg_prod',
'emp_const_bldg_office',
'emp_utilities_prod',
'emp_utilities_office',
'emp_mfg_prod',
'emp_mfg_office',
'emp_pvt_ed_k12',
'emp_pvt_ed_post_k12_oth',
'emp_prof_bus_svcs',
'emp_prof_bus_svcs_bldg_maint',
'emp_state_local_gov_ent',
'emp_state_local_gov_blue',
'emp_state_local_gov_white',
'emp_public_ed',
'emp_fed_mil',
'emp_pvt_hh',
]

In [159]:
raw_data = pd.DataFrame()
for emp in emp_types:
    raw_data[emp] = [raw_csv[emp].sum()]

In [162]:
raw_data

Unnamed: 0,emp_ag,emp_health,emp_personal_svcs_office,emp_personal_svcs_retail,emp_amusement,emp_hotel,emp_restaurant_bar,emp_trans,emp_retail,emp_whsle_whs,...,emp_pvt_ed_k12,emp_pvt_ed_post_k12_oth,emp_prof_bus_svcs,emp_prof_bus_svcs_bldg_maint,emp_state_local_gov_ent,emp_state_local_gov_blue,emp_state_local_gov_white,emp_public_ed,emp_fed_mil,emp_pvt_hh
0,9500,213650.5,27137.0,41987.0,48192,33518,131594,41425.5,158239.5,46625,...,33087.5,112814.5,273570.5,116837.5,19079.5,9480.0,16608.5,0,129765.0,0


In [152]:
final_df['current_employment'] = final_df['current_employment'].astype('int')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_df['current_employment'] = final_df['current_employment'].astype('int')


In [158]:
sandag_emp = pd.DataFrame()

sandag_emp['emp_ag'] = [final_df[final_df['industry_title'] == 'Mining and Logging']['current_employment'].sum() + final_df[final_df['industry_title'] == 'Total Farm']['current_employment'].sum()]


sandag_emp['emp_health'] = [final_df[final_df['industry_title'] == 'Health Care and Social Assistance']['current_employment'].sum()]

sandag_emp['emp_personal_svcs_office'] = [final_df[final_df['industry_title'] == 'Other Services']['current_employment'].sum()]

sandag_emp['emp_personal_svcs_retail'] = [final_df[final_df['industry_title'] == 'Other Services']['current_employment'].sum()]

sandag_emp['emp_amusement'] = [final_df[final_df['industry_title'] == 'Arts, Entertainment, and Recreation']['current_employment'].sum()]

sandag_emp['emp_hotel'] = [final_df[final_df['industry_title'] == 'Accommodation']['current_employment'].sum()]

# This one is different
sandag_emp['emp_restaurant_bar'] = [final_df[final_df['industry_title'] == 'Food Services and Drinking Places']['current_employment'].sum()]

sandag_emp['emp_trans'] = [final_df[final_df['industry_title'] == 'Transportation and Warehousing']['current_employment'].sum()]

sandag_emp['emp_retail'] = [final_df[final_df['industry_title'] == 'Retail Trade']['current_employment'].sum()]

sandag_emp['emp_whsle_whs'] = [final_df[final_df['industry_title'] == 'Wholesale Trade']['current_employment'].sum()]

# This feels right unsure why in the excel it's labeled as "Defense"
sandag_emp['emp_fed_non_mil'] = [final_df[final_df['industry_title'] == 'Federal Government excluding Department of Defense']['current_employment'].sum()]

sandag_emp['emp_const_non_bldg_prod'] = [final_df[final_df['industry_title'] == 'Construction']['current_employment'].sum()]

sandag_emp['emp_const_non_bldg_office'] = [final_df[final_df['industry_title'] == 'Construction']['current_employment'].sum()]

sandag_emp['emp_const_bldg_prod'] = [final_df[final_df['industry_title'] == 'Construction']['current_employment'].sum()]

sandag_emp['emp_const_bldg_office'] = [final_df[final_df['industry_title'] == 'Construction']['current_employment'].sum()]

sandag_emp['emp_utilities_prod'] = [final_df[final_df['industry_title'] == 'Utilities']['current_employment'].sum()]

sandag_emp['emp_utilities_office'] = [final_df[final_df['industry_title'] == 'Utilities']['current_employment'].sum()]

sandag_emp['emp_mfg_prod'] = [final_df[final_df['industry_title'] == 'Manufacturing']['current_employment'].sum()]

sandag_emp['emp_mfg_office'] = [final_df[final_df['industry_title'] == 'Manufacturing']['current_employment'].sum()]

sandag_emp['emp_pvt_ed_k12'] = [final_df[final_df['industry_title'] == 'Educational Services']['current_employment'].sum()]

sandag_emp['emp_pvt_ed_post_k12_oth'] = [final_df[final_df['industry_title'] == 'Educational Services']['current_employment'].sum()]

sandag_emp['emp_prof_bus_svcs'] = [final_df[final_df['industry_title'] == 'Information']['current_employment'].sum()+ final_df[final_df['industry_title'] == 'Finance and Insurance']['current_employment'].sum()+ final_df[final_df['industry_title'] == 'Real Estate and Rental and Leasing']['current_employment'].sum()+ final_df[final_df['industry_title'] == 'Professional, Scientific and Technical S']['current_employment'].sum()+ final_df[final_df['industry_title'] == 'Management of Companies and Enterprises']['current_employment'].sum()+ final_df[final_df['industry_title'] == 'Administrative and Support and Waste Ser']['current_employment'].sum()]

sandag_emp['emp_prof_bus_svcs_bldg_maint'] = [final_df[final_df['industry_title'] == 'Information']['current_employment'].sum()+ final_df[final_df['industry_title'] == 'Finance and Insurance']['current_employment'].sum()+ final_df[final_df['industry_title'] == 'Real Estate and Rental and Leasing']['current_employment'].sum()+ final_df[final_df['industry_title'] == 'Professional, Scientific and Technical S']['current_employment'].sum()+ final_df[final_df['industry_title'] == 'Management of Companies and Enterprises']['current_employment'].sum()+ final_df[final_df['industry_title'] == 'Administrative and Support and Waste Ser']['current_employment'].sum()]


sandag_emp['emp_state_local_gov_ent'] = [final_df[final_df['industry_title'] == 'State Government Excluding Education']['current_employment'].sum() + final_df[final_df['industry_title'] == 'Local Government Excluding Education']['current_employment'].sum()]

sandag_emp['emp_state_local_gov_blue'] = [final_df[final_df['industry_title'] == 'State Government Excluding Education']['current_employment'].sum() + final_df[final_df['industry_title'] == 'Local Government Excluding Education']['current_employment'].sum()]

sandag_emp['emp_state_local_gov_white'] = [final_df[final_df['industry_title'] == 'State Government Excluding Education']['current_employment'].sum() + final_df[final_df['industry_title'] == 'Local Government Excluding Education']['current_employment'].sum()]


sandag_emp['emp_public_ed'] = [final_df[final_df['industry_title'] == 'State Government Education']['current_employment'].sum() + final_df[final_df['industry_title'] == 'Local Government Education']['current_employment'].sum()]

sandag_emp['emp_fed_mil'] = [final_df[final_df['industry_title'] == 'Department of Defense']['current_employment'].sum()] # This also says to add number 27 there isn't a 27

sandag_emp

Unnamed: 0,emp_ag,emp_health,emp_personal_svcs_office,emp_personal_svcs_retail,emp_amusement,emp_hotel,emp_restaurant_bar,emp_trans,emp_retail,emp_whsle_whs,...,emp_mfg_office,emp_pvt_ed_k12,emp_pvt_ed_post_k12_oth,emp_prof_bus_svcs,emp_prof_bus_svcs_bldg_maint,emp_state_local_gov_ent,emp_state_local_gov_blue,emp_state_local_gov_white,emp_public_ed,emp_fed_mil
0,10100,186400,56400,56400,30200,32100,139400,29700,145600,44000,...,115700,30200,30200,355900,355900,87900,87900,87900,113100,23700


In [99]:
final_df[final_df['industry_title'] == 'Mining and Logging']['current_employment'].sum()

400

In [170]:
comparison = raw_data.T.merge(sandag_emp.T, how='left', left_index=True, right_index=True)
comparison = comparison.rename(columns={'0_x': 'CSV', '0_y': 'SANDAG Sectors from EDD'})


comparison

Unnamed: 0,CSV,SANDAG Sectors from EDD
emp_ag,9500.0,10100.0
emp_health,213650.5,186400.0
emp_personal_svcs_office,27137.0,56400.0
emp_personal_svcs_retail,41987.0,56400.0
emp_amusement,48192.0,30200.0
emp_hotel,33518.0,32100.0
emp_restaurant_bar,131594.0,139400.0
emp_trans,41425.5,29700.0
emp_retail,158239.5,145600.0
emp_whsle_whs,46625.0,44000.0


In [177]:
# comparison.to_excel('C:/Users/cra/San Diego Association of Governments/SANDAG QA QC - Documents/Projects/2022/2022-47 Base Year Forecast Output QC/Data/emp_comparison_totals.xlsx')