In [1]:
# Libraries 
import numpy as np
import pandas as pd
import pyodbc
import copy

# EC2_Data_file_2_Build

## Downloading the SQL Data

In [2]:
# Staging Data (SQL)
conn = pyodbc.connect('Driver={ODBC Driver 17 for SQL Server};'
                    'Server=DDAMWSQL16.sandag.org;'
                    'Database=ws;'
                    'Trusted_Connection=yes;')

In [47]:
query = """
-- For JT00
DECLARE	@return_value int

EXEC	@return_value = [employment_centers].[sp_wac_characteristics_by_center_JT00]
		@release_id = 2
"""

sql_jt00 =  pd.read_sql_query(query, conn)

In [48]:
query = """
-- For JT02
DECLARE	@return_value int

EXEC	@return_value = [employment_centers].[sp_wac_characteristics_by_center_JT02]
		@release_id = 2
"""

sql_jt02 =  pd.read_sql_query(query, conn)

In [20]:
query = """
-- Jobs by Average Wage
DECLARE	@return_value int

EXEC	@return_value = [employment_centers].[sp_avg_wage_by_center]
		@release_id = 2
"""

sql_j_avg_wage =  pd.read_sql_query(query, conn)

In [22]:
sql_j_avg_wage[['employment_center', 'avg_wage']]

Unnamed: 0,employment_center,avg_wage
0,1,49360
1,2,50038
2,3,52719
3,4,86672
4,5,58862
...,...,...
142,1039,107258
143,1040,111149
144,1041,86679
145,1042,124000


In [26]:
def avg_wage_grab(conn):
    query = """
        DECLARE	@return_value int

        EXEC	@return_value = [employment_centers].[sp_avg_wage_by_center]
                @release_id = 2
        """

    return pd.read_sql_query(query, conn)[['employment_center', 'avg_wage']].set_index('employment_center')

In [27]:
avg_wage_grab()

Unnamed: 0_level_0,avg_wage
employment_center,Unnamed: 1_level_1
1,49360
2,50038
3,52719
4,86672
5,58862
...,...
1039,107258
1040,111149
1041,86679
1042,124000


In [5]:
query = """
-- EC List Info 
SELECT [employment_center_id],
	[employment_center_name],
	[tier],
	[parent]
  FROM [ws].[employment_centers].[dim_employment_center_2.0]
"""

ec_list =  pd.read_sql_query(query, conn)

In [51]:
ec_list['Type'] = np.where((ec_list['tier'] == 0) & (ec_list['parent'].isna()), 'Combined Center',
                            np.where((ec_list['tier'] == 0) & (~ec_list['parent'].isna()), 'Sub-Center',
                                     np.nan))

In [88]:
ec_list.set_index('employment_center_id')

Unnamed: 0_level_0,employment_center_name,tier,parent,Type
employment_center_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
55,Otay Mesa Brown Field,4,,
56,Otay Mesa East,4,,
57,Pacific Beach,3,,
58,Poway,4,,
59,Ramona,4,,
...,...,...,...,...
1004,Carslabd Palomar Airport Sub-Center: Melrose,0,4.0,Sub-Center
1005,Carslabd Palomar Airport Sub-Center: Palomar Oaks,0,4.0,Sub-Center
1006,Downtown Sub-Center: Bankers Hill,0,18.0,Sub-Center
1007,Downtown Sub-Center: Columbia,0,18.0,Sub-Center


# JT00 Work

In [2]:
# Staging Data (SQL)
conn = pyodbc.connect('Driver={ODBC Driver 17 for SQL Server};'
                    'Server=DDAMWSQL16.sandag.org;'
                    'Database=ws;'
                    'Trusted_Connection=yes;')

In [25]:
def download_and_clean_employment_center_data(conn):
    query = """
            -- EC List Info 
            SELECT [employment_center_id],
                [employment_center_name],
                [tier],
                [parent]
            FROM [ws].[employment_centers].[dim_employment_center_2.0]
            """
    ec_list =  pd.read_sql_query(query, conn)

    ec_list['Type'] = np.where((ec_list['tier'] == 0) & (ec_list['parent'].isna()), 'Combined Center',
                            np.where((ec_list['tier'] == 0) & (~ec_list['parent'].isna()), 'Sub-Center',
                                     np.nan))
    ec_list['Type'].replace('nan', np.nan, inplace=True)
    return ec_list.set_index('employment_center_id').sort_index(ascending=True)

In [8]:
def avg_wage_grab(conn):
    query = """
        DECLARE	@return_value int

        EXEC	@return_value = [employment_centers].[sp_avg_wage_by_center]
                @release_id = 2
        """

    return pd.read_sql_query(query, conn)[['employment_center', 'avg_wage']].set_index('employment_center')

In [4]:
def download_jt_data(JT_Val, conn):
    """
    Downloads the proper JTtable
    Input opitons are: 'JT00' and 'JT02' 
    """
    query = f"""
    DECLARE	@return_value int

    EXEC	@return_value = [employment_centers].[sp_wac_characteristics_by_center_{JT_Val}]
            @release_id = 2
    """

    return  pd.read_sql_query(query, conn)

In [5]:
def build_df_and_find_percentages(SQL_Data, is_all):
    # Cleaning 
    sql_prep = copy.deepcopy(SQL_Data)
    sql_prep = sql_prep.drop(['tier', 'employment_center_name'], axis = 1)
    sql_prep = sql_prep.set_index('employment_center_id')

    # Drop columns that sum to zero, should only effect JT00
    to_drop = sql_prep.columns[(sql_prep == 0).all()].tolist()
    sql_prep = sql_prep.drop(columns=to_drop)

    # Grab and delete total 
    total = sql_prep['jobs']
    sql_prep = sql_prep.drop('jobs', axis = 1)

    output_df = pd.DataFrame(index=sql_prep.index)

    # Check and set status of employment level that we are looking at 
    if is_all:
        employment_status = 'All_Jobs'
    else:
        employment_status = 'Priv_Jobs'

    # Set the core columns 
    for col in sql_prep.columns: 
        output_df[f"{employment_status}_{col}"] = sql_prep[col]

    # Calculate the remaining columns 
    for col in sql_prep.columns:
        if 'educ30' in col:
            output_df[f"{employment_status}_%_{col}"] = round(sql_prep[col] / (total-sql_prep['age_lt30']) , 4) * 100
        else: 
            output_df[f"{employment_status}_%_{col}"] = round(sql_prep[col] / total, 4) * 100

    output_df.insert(0, f'{employment_status}_Total', total)

    output_df = output_df.sort_index(ascending=True)

    return output_df

In [13]:
def main():
    ec_list = download_and_clean_employment_center_data(conn)
    JT00 = download_jt_data(JT_Val='JT00', conn=conn)
    print('JT00 is downloaded')
    JT02 = download_jt_data(JT_Val='JT02', conn=conn)
    print('JT02 is downloaded')
    averge_wage = avg_wage_grab(conn)
    print('Average wage downloaded')

    print('Now Processing JT00 and JT02 Data')
    processed_JT00 = build_df_and_find_percentages(JT00, is_all=True)
    processed_JT02 = build_df_and_find_percentages(JT02, is_all=False)

    output = ec_list.merge(processed_JT00, how='left', left_index=True, right_index=True).merge(processed_JT02, how='left', left_index=True, right_index=True).merge(averge_wage, how='left', left_index=True, right_index=True)

    output.fillna(0, inplace=True)

    return output
    

In [14]:
output = main()
output

JT00 is downloaded
JT02 is downloaded
Average wage downloaded
Now Processing JT00 and JT02 Data


Unnamed: 0_level_0,employment_center_name,tier,parent,Type,All_Jobs_Total,All_Jobs_male,All_Jobs_female,All_Jobs_age_lt30,All_Jobs_age_30to54,All_Jobs_age_55plus,...,Priv_Jobs_%_jobs_firms_age_2_to_3,Priv_Jobs_%_jobs_firms_age_4_to_5,Priv_Jobs_%_jobs_firms_age_6_to_10,Priv_Jobs_%_jobs_firms_age_11_plus,Priv_Jobs_%_jobs_firms_size_0_to_19,Priv_Jobs_%_jobs_firms_size_20_to_49,Priv_Jobs_%_jobs_firms_size_50_to_249,Priv_Jobs_%_jobs_firms_size_250_to_499,Priv_Jobs_%_jobs_firms_size_500_plus,avg_wage
employment_center_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,San Diego Airport,3,0.0,,18782.0,10570.0,8212.0,4508.0,10083.0,4191.0,...,5.69,3.59,9.18,79.01,15.87,12.97,21.67,3.89,45.60,49360
2,Alpine,4,0.0,,1230.0,541.0,689.0,324.0,630.0,276.0,...,8.01,5.40,18.03,66.81,34.06,24.48,29.70,0.26,11.50,50038
3,Barrio Logan,4,0.0,,4230.0,2555.0,1675.0,934.0,2348.0,948.0,...,5.59,2.79,5.36,82.96,20.41,17.21,11.02,2.94,48.41,52719
4,Carlsbad Palomar Airport,2,0.0,,31627.0,17763.0,13864.0,6369.0,18450.0,6808.0,...,4.87,2.63,9.41,80.40,15.28,11.92,20.31,7.74,44.76,86672
5,Carlsbad State Beach,3,0.0,,11510.0,6049.0,5461.0,3822.0,5458.0,2230.0,...,1.58,3.00,9.02,85.75,9.16,5.98,14.67,10.63,59.57,58862
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1039,Sorrento Valley West Sub-Center: Scripps_Research,0,71.0,Sub-Center,14210.0,7067.0,7143.0,1801.0,9168.0,3241.0,...,2.25,3.05,7.77,83.40,4.48,5.10,8.30,7.29,74.82,107258
1040,Sorrento Valley West Sub-Center: UCSD,0,71.0,Sub-Center,22865.0,9702.0,13163.0,2923.0,14823.0,5119.0,...,2.26,9.15,6.55,79.55,16.50,10.85,44.41,0.90,27.34,111149
1041,Sorrento Valley West Sub-Center: UCSD East Hea...,0,71.0,Sub-Center,18304.0,8340.0,9964.0,2738.0,11790.0,3776.0,...,0.39,1.24,9.74,88.03,4.32,1.33,9.83,7.62,76.90,86679
1042,Sorrento Valley West Sub-Center: UTC Office,0,71.0,Sub-Center,14171.0,7774.0,6397.0,3149.0,8578.0,2444.0,...,4.83,6.67,10.56,75.61,12.35,8.45,17.54,2.22,59.44,124000


In [21]:
output['Type'][2]

'nan'