# Demographic Data by Employment Centers in 2022
#### Purpose:
This document shows the process of building five tables containing demographic data by employment centers about the people who lived in each center in 2022.

#### Data Source:
 All datas come from the following sources:\
$\;\;\;\;\;\;$ 1. "J:\DataScience\DSEconProdDessem\EC2\Emp_Counts_2022\Inputs\EC2_to_MGRA15_XRef.csv" \
$\;\;\;\;\;\;$ 2. "J:\DataScience\DSEconProdDessem\EC2\Emp_Counts_2022\Inputs\sub_centers_mgra15.csv" \
$\;\;\;\;\;\;$ The following tables are in DDAM SQL Server: \
$\;\;\;\;\;\;$ 3. [demographic_warehouse].[fact].[age] \
$\;\;\;\;\;\;$ 4. [demographic_warehouse].[dim].[age_group] \
$\;\;\;\;\;\;$ 5. [demographic_warehouse].[dim].[mgra_denormalize] \
$\;\;\;\;\;\;$ 6. [demographic_warehouse].[fact].[ethnicity] \
$\;\;\;\;\;\;$ 7.  [demographic_warehouse].[dim].[ethnicity] \
$\;\;\;\;\;\;$ 8. [demographic_warehouse].[fact].[population] \
$\;\;\;\;\;\;$ 9. [demographic_warehouse].[dim].[housing_type] \
$\;\;\;\;\;\;$ 10. [demographic_warehouse].[fact].[housing] \
$\;\;\;\;\;\;$ 12. [demographic_warehouse].[dim].[structure_type] \
$\;\;\;\;\;\;$ 13. [ws].[mgra_base].[sr15_2022_01]


#### Transformations being preformed:
The transformation process at a high level for this project combining all the datasets together. Finally the data is broken down by the employment centers in San Diego county. 

#### Location of Outputs:
"J:\DataScience\DSEconProdDessem\EC2\Dem_Data by_EC yr_2022\Outputs"

#### Author: 
Navid Hedayati (navid.hedayati@sandag.org)

#### Data Created 
6/16/2023

In [3]:
# Needed libraries
import pandas as pd
import numpy as np
import pyodbc
import cursor
import geopandas as gpd
import sqlalchemy as sql
import json
import matplotlib.pyplot as plt
import seaborn as sns

# Donwload the data

In [4]:
# Connection to DDAM sql server
conn_DDAM = pyodbc.connect('Driver={SQL Server};'
                           'Server=DDAMWSQL16.sandag.org;'  
                           'Trusted_Connection=yes;')

cursor_DDAM = conn_DDAM.cursor()

In [5]:
# Query from DDAM sql server
age_mgra = pd.read_sql_query('''   
--AGE
  select mgra.mgra,age.age_group_id,age.population
  from  [demographic_warehouse].[fact].[age] as age
  join [demographic_warehouse].[dim].[mgra_denormalize] as mgra
  on age.mgra_id =mgra.mgra_id
  where age.datasource_id = 46 and age.yr_id = 2022; ''',conn_DDAM)

In [6]:
# Query from DDAM sql server
eth_mgra = pd.read_sql_query(''' 
--ETHNICITY
   select mgra.mgra,ethnicity.ethnicity_id,ethnicity.population
  from  [demographic_warehouse].[fact].[ethnicity] as ethnicity
  join [demographic_warehouse].[dim].[mgra_denormalize] as mgra
  on ethnicity.mgra_id = mgra.mgra_id
  where ethnicity.datasource_id = 46 and ethnicity.yr_id = 2022;  ''',conn_DDAM)

In [7]:
# Query from DDAM sql server
age_dim = pd.read_sql_query('''  SELECT *
  FROM [demographic_warehouse].[dim].[age_group] ''',conn_DDAM)

In [8]:
age_dim = age_dim[['age_group_id','name']]
age_dim = age_dim.sort_values(by = 'age_group_id')
age_dim.head()

Unnamed: 0,age_group_id,name
0,1,Under 5
1,2,5 to 9
2,3,10 to 14
3,4,15 to 17
4,5,18 and 19


In [9]:
# Query from DDAM sql server
eth_dim = pd.read_sql_query(''' SELECT *
  FROM [demographic_warehouse].[dim].[ethnicity] ''',conn_DDAM)

In [10]:
eth_dim = eth_dim[['ethnicity_id','code','long_name']]
eth_dim = eth_dim.sort_values(by = 'ethnicity_id')
eth_dim

Unnamed: 0,ethnicity_id,code,long_name
0,1,hisp,Hispanic
1,2,nhw,"Non-Hispanic, White"
2,3,nhb,"Non-Hispanic, Black"
3,4,nhai,"Non-Hispanic, American Indian or Alaska Native"
4,5,nha,"Non-Hispanic, Asian"
5,6,nhh,"Non-Hispanic, Hawaiian or Pacific Islander"
6,7,nho,"Non-Hispanic, Other"
7,8,nh2m,"Non-Hispanic, Two or More Races"


In [12]:
ec_mgra = pd.read_csv(r"J:\DataScience\DSEconProdDessem\EC2\EC2_&_MGRA15_XRef\ec2_mgra15.csv")

sub_ctrs_mgra = pd.read_csv(r"J:\DataScience\DSEconProdDessem\EC2\EC2_&_MGRA15_XRef\sub_centers_mgra15.csv")

ec_mgra['Parent'] = 0

sub_ctrs_mgra['Tier'] = 0

# Population by Age and by the Employment Centers in 2022


In [15]:
pop_age = pd.merge(age_mgra,age_dim,
                   how = "left",
                   left_on = "age_group_id",
                   right_on = "age_group_id")

In [16]:
pop_age = pop_age[['mgra','age_group_id','name','population']]
pop_age.head()

Unnamed: 0,mgra,age_group_id,name,population
0,40,1,Under 5,1
1,40,2,5 to 9,3
2,40,3,10 to 14,4
3,40,4,15 to 17,3
4,40,5,18 and 19,3


In [21]:
age_ec = pd.merge(pop_age,ec_mgra,
                  how = "left",
                  left_on = "mgra",
                  right_on = "mgra15")

In [22]:
age_sc = pd.merge(pop_age,sub_ctrs_mgra,
                  how = "left",
                  left_on = "mgra",
                  right_on = "MGRA15")

In [25]:
age_ec.columns

Index(['mgra', 'age_group_id', 'name', 'population', 'mgra15', 'ec_id',
       'ec_name', 'Parent'],
      dtype='object')

In [26]:
age_ec = age_ec[['ec_id','ec_name','Parent','age_group_id','name','population']]
age_ec = age_ec.sort_values(by = 'age_group_id')
age_ec.head(3)      

Unnamed: 0,ec_id,ec_name,Parent,age_group_id,name,population
0,,,0,1,Under 5,1
318954,,,0,1,Under 5,0
318934,,,0,1,Under 5,0


In [27]:
age_sc = age_sc[['EC_ID','EC_Name','Tier','Parent','age_group_id','name','population']]
age_sc = age_sc.sort_values(by = 'age_group_id')
age_sc.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,age_group_id,name,population
0,,,0,,1,Under 5,1
318954,,,0,,1,Under 5,0
318934,,,0,,1,Under 5,0


In [28]:
age = pd.concat([age_ec,age_sc])

In [30]:
age

Unnamed: 0,ec_id,ec_name,Parent,age_group_id,name,population,EC_ID,EC_Name,Tier
0,,,0.0,1,Under 5,1,,,
318954,,,0.0,1,Under 5,0,,,
318934,,,0.0,1,Under 5,0,,,
318914,,,0.0,1,Under 5,0,,,
51985,,,0.0,1,Under 5,0,,,
...,...,...,...,...,...,...,...,...,...
246959,,,,20,85 and Older,0,,,0.0
246939,,,,20,85 and Older,3,,,0.0
246919,,,,20,85 and Older,3,,,0.0
74579,,,,20,85 and Older,2,,,0.0


In [29]:
age.shape

(972840, 9)

In [909]:
age_1 = age[age['age_group_id'] == 1]

age_2 = age[age['age_group_id'] == 2]

age_3 = age[age['age_group_id'] == 3]

age_4 = age[age['age_group_id'] == 4]

age_5 = age[age['age_group_id'] == 5]

age_6 = age[age['age_group_id'] == 6]

age_7 = age[age['age_group_id'] == 7]

age_8 = age[age['age_group_id'] == 8]

age_9 = age[age['age_group_id'] == 9]

age_10 = age[age['age_group_id'] == 10]

age_11 = age[age['age_group_id'] == 11]

age_12 = age[age['age_group_id'] == 12]

age_13 = age[age['age_group_id'] == 13]

age_14 = age[age['age_group_id'] == 14]

age_15 = age[age['age_group_id'] == 15]

age_16 = age[age['age_group_id'] == 16]

age_17 = age[age['age_group_id'] == 17]

age_18 = age[age['age_group_id'] == 18]

age_19 = age[age['age_group_id'] == 19]

age_20 = age[age['age_group_id'] == 20]

In [910]:
age_1.head()

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,age_group_id,name,population
0,,,,,1,Under 5,1
452659,,,,,1,Under 5,0
145185,14.0,Chula Vista Southwest,3.0,0.0,1,Under 5,31
145205,,,,,1,Under 5,0
145225,71.0,Sorrento Valley West,1.0,0.0,1,Under 5,33


In [911]:
age_1 = age_1.groupby(['EC_ID','EC_Name','Tier','Parent','age_group_id','name']).agg({'sum'})

age_2 = age_2.groupby(['EC_ID','EC_Name','Tier','Parent','age_group_id','name']).agg({'sum'})

age_3 = age_3.groupby(['EC_ID','EC_Name','Tier','Parent','age_group_id','name']).agg({'sum'})

age_4 = age_4.groupby(['EC_ID','EC_Name','Tier','Parent','age_group_id','name']).agg({'sum'})

age_5 = age_5.groupby(['EC_ID','EC_Name','Tier','Parent','age_group_id','name']).agg({'sum'})

age_6 = age_6.groupby(['EC_ID','EC_Name','Tier','Parent','age_group_id','name']).agg({'sum'})

age_7 = age_7.groupby(['EC_ID','EC_Name','Tier','Parent','age_group_id','name']).agg({'sum'})

age_8 = age_8.groupby(['EC_ID','EC_Name','Tier','Parent','age_group_id','name']).agg({'sum'})

age_9 = age_9.groupby(['EC_ID','EC_Name','Tier','Parent','age_group_id','name']).agg({'sum'})

age_10 = age_10.groupby(['EC_ID','EC_Name','Tier','Parent','age_group_id','name']).agg({'sum'})

age_11 = age_11.groupby(['EC_ID','EC_Name','Tier','Parent','age_group_id','name']).agg({'sum'})

age_12 = age_12.groupby(['EC_ID','EC_Name','Tier','Parent','age_group_id','name']).agg({'sum'})

age_13 = age_13.groupby(['EC_ID','EC_Name','Tier','Parent','age_group_id','name']).agg({'sum'})

age_14 = age_14.groupby(['EC_ID','EC_Name','Tier','Parent','age_group_id','name']).agg({'sum'})

age_15 = age_15.groupby(['EC_ID','EC_Name','Tier','Parent','age_group_id','name']).agg({'sum'})

age_16 = age_16.groupby(['EC_ID','EC_Name','Tier','Parent','age_group_id','name']).agg({'sum'})

age_17 = age_17.groupby(['EC_ID','EC_Name','Tier','Parent','age_group_id','name']).agg({'sum'})

age_18 = age_18.groupby(['EC_ID','EC_Name','Tier','Parent','age_group_id','name']).agg({'sum'})

age_19 = age_19.groupby(['EC_ID','EC_Name','Tier','Parent','age_group_id','name']).agg({'sum'})

age_20 = age_20.groupby(['EC_ID','EC_Name','Tier','Parent','age_group_id','name']).agg({'sum'})

In [912]:
age_1.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,population
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,sum
EC_ID,EC_Name,Tier,Parent,age_group_id,name,Unnamed: 6_level_2
1.0,San Diego Airport,3.0,0.0,1,Under 5,318
2.0,Alpine,4.0,0.0,1,Under 5,247
3.0,Barrio Logan,4.0,0.0,1,Under 5,409
4.0,Carlsbad Palomar Airport,2.0,0.0,1,Under 5,27
5.0,Carlsbad State Beach,3.0,0.0,1,Under 5,46


In [913]:
age_20.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,population
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,sum
EC_ID,EC_Name,Tier,Parent,age_group_id,name,Unnamed: 6_level_2
1.0,San Diego Airport,3.0,0.0,20,85 and Older,121
2.0,Alpine,4.0,0.0,20,85 and Older,83
3.0,Barrio Logan,4.0,0.0,20,85 and Older,116
4.0,Carlsbad Palomar Airport,2.0,0.0,20,85 and Older,27
5.0,Carlsbad State Beach,3.0,0.0,20,85 and Older,15


In [914]:
age.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,age_group_id,name,population
0,,,,,1,Under 5,1
452659,,,,,1,Under 5,0
145185,14.0,Chula Vista Southwest,3.0,0.0,1,Under 5,31


In [915]:
age_test = age[['EC_ID','EC_Name','Tier','Parent','population']]
age_test.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,population
0,,,,,1
452659,,,,,0
145185,14.0,Chula Vista Southwest,3.0,0.0,31


In [916]:
age_test = age_test.groupby(['EC_ID','EC_Name','Tier','Parent',]).agg({'sum'})
age_test.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,population
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,sum
EC_ID,EC_Name,Tier,Parent,Unnamed: 4_level_2
1.0,San Diego Airport,3.0,0.0,5765
2.0,Alpine,4.0,0.0,4364
3.0,Barrio Logan,4.0,0.0,5066


In [917]:
age_1.columns = age_1.columns.levels[0]
age_2.columns = age_2.columns.levels[0]
age_3.columns = age_3.columns.levels[0]
age_4.columns = age_4.columns.levels[0]
age_5.columns = age_5.columns.levels[0]
age_6.columns = age_6.columns.levels[0]
age_7.columns = age_7.columns.levels[0]
age_8.columns = age_8.columns.levels[0]
age_9.columns = age_9.columns.levels[0]
age_10.columns = age_10.columns.levels[0]
age_11.columns = age_11.columns.levels[0]
age_12.columns = age_12.columns.levels[0]
age_13.columns = age_13.columns.levels[0]
age_14.columns = age_14.columns.levels[0]
age_15.columns = age_15.columns.levels[0]
age_16.columns = age_16.columns.levels[0]
age_17.columns = age_17.columns.levels[0]
age_18.columns = age_18.columns.levels[0]
age_19.columns = age_19.columns.levels[0]
age_20.columns = age_20.columns.levels[0]

In [918]:
age_test.columns = age_test.columns.levels[0]

In [919]:
age_10.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,population
EC_ID,EC_Name,Tier,Parent,age_group_id,name,Unnamed: 6_level_1
1.0,San Diego Airport,3.0,0.0,10,40 to 44,487
2.0,Alpine,4.0,0.0,10,40 to 44,243
3.0,Barrio Logan,4.0,0.0,10,40 to 44,413


In [920]:
age_1 = age_1.reset_index()
age_2 = age_2.reset_index()
age_3 = age_3.reset_index()
age_4 = age_4.reset_index()
age_5 = age_5.reset_index()
age_6 = age_6.reset_index()
age_7 = age_7.reset_index()
age_8 = age_8.reset_index()
age_9 = age_9.reset_index()
age_10 = age_10.reset_index()
age_11 = age_11.reset_index()
age_12 = age_12.reset_index()
age_13 = age_13.reset_index()
age_14 = age_14.reset_index()
age_15 = age_15.reset_index()
age_16 = age_16.reset_index()
age_17 = age_17.reset_index()
age_18 = age_18.reset_index()
age_19 = age_19.reset_index()
age_20 = age_20.reset_index()

In [921]:
age_test = age_test.reset_index()

In [922]:
age_20.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,age_group_id,name,population
0,1.0,San Diego Airport,3.0,0.0,20,85 and Older,121
1,2.0,Alpine,4.0,0.0,20,85 and Older,83
2,3.0,Barrio Logan,4.0,0.0,20,85 and Older,116


In [923]:
age_test.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,population
0,1.0,San Diego Airport,3.0,0.0,5765
1,2.0,Alpine,4.0,0.0,4364
2,3.0,Barrio Logan,4.0,0.0,5066


In [924]:
age_1 = age_1.rename(columns={'population':'Pop_0-4_Yrs'})
age_2 = age_2.rename(columns={'population':'Pop_5-9_Yrs'})
age_3 = age_3.rename(columns={'population':'Pop_10-14_Yrs'})
age_4 = age_4.rename(columns={'population':'Pop_15-17_Yrs'})
age_5 = age_5.rename(columns={'population':'Pop_18-19_Yrs'})
age_6 = age_6.rename(columns={'population':'Pop_20-24_Yrs'})
age_7 = age_7.rename(columns={'population':'Pop_25-29_Yrs'})
age_8 = age_8.rename(columns={'population':'Pop_30-34_Yrs'})
age_9 = age_9.rename(columns={'population':'Pop_35-39_Yrs'})
age_10 = age_10.rename(columns={'population':'Pop_40-44_Yrs'})
age_11 = age_11.rename(columns={'population':'Pop_45-49_Yrs'})
age_12 = age_12.rename(columns={'population':'Pop_50-54_Yrs'})
age_13 = age_13.rename(columns={'population':'Pop_55-59_Yrs'})
age_14 = age_14.rename(columns={'population':'Pop_60-61_Yrs'})
age_15 = age_15.rename(columns={'population':'Pop_62-64_Yrs'})
age_16 = age_16.rename(columns={'population':'Pop_65-69_Yrs'})
age_17 = age_17.rename(columns={'population':'Pop_70-74_Yrs'})
age_18 = age_18.rename(columns={'population':'Pop_75-79_Yrs'})
age_19 = age_19.rename(columns={'population':'Pop_80-84_Yrs'})
age_20 = age_20.rename(columns={'population':'Pop_85+_Yrs'})

In [925]:
age_2.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,age_group_id,name,Pop_5-9_Yrs
0,1.0,San Diego Airport,3.0,0.0,2,5 to 9,367
1,2.0,Alpine,4.0,0.0,2,5 to 9,251
2,3.0,Barrio Logan,4.0,0.0,2,5 to 9,428


In [926]:
age_1 = age_1.drop(columns = ['age_group_id','name'])
age_2 = age_2.drop(columns = ['age_group_id','name'])
age_3 = age_3.drop(columns = ['age_group_id','name'])
age_4 = age_4.drop(columns = ['age_group_id','name'])
age_5 = age_5.drop(columns = ['age_group_id','name'])
age_6 = age_6.drop(columns = ['age_group_id','name'])
age_7 = age_7.drop(columns = ['age_group_id','name'])
age_8 = age_8.drop(columns = ['age_group_id','name'])
age_9 = age_9.drop(columns = ['age_group_id','name'])
age_10 = age_10.drop(columns = ['age_group_id','name'])
age_11 = age_11.drop(columns = ['age_group_id','name'])
age_12 = age_12.drop(columns = ['age_group_id','name'])
age_13 = age_13.drop(columns = ['age_group_id','name'])
age_14 = age_14.drop(columns = ['age_group_id','name'])
age_15 = age_15.drop(columns = ['age_group_id','name'])
age_16 = age_16.drop(columns = ['age_group_id','name'])
age_17 = age_17.drop(columns = ['age_group_id','name'])
age_18 = age_18.drop(columns = ['age_group_id','name'])
age_19 = age_19.drop(columns = ['age_group_id','name'])
age_20 = age_20.drop(columns = ['age_group_id','name'])

In [927]:
age_19.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,Pop_80-84_Yrs
0,1.0,San Diego Airport,3.0,0.0,140
1,2.0,Alpine,4.0,0.0,122
2,3.0,Barrio Logan,4.0,0.0,62


In [928]:
#Join the dataframes
dfs =[age_1,age_2,age_3,age_4,age_5,age_6,age_7,age_8,age_9,age_10,age_11,age_12,age_13,age_14,age_15,age_16,age_17,age_18,age_19,age_20]

import functools as ft

age = ft.reduce(lambda left, right: pd.merge(left, right, on='EC_ID'), dfs)

  age = ft.reduce(lambda left, right: pd.merge(left, right, on='EC_ID'), dfs)
  age = ft.reduce(lambda left, right: pd.merge(left, right, on='EC_ID'), dfs)
  age = ft.reduce(lambda left, right: pd.merge(left, right, on='EC_ID'), dfs)
  age = ft.reduce(lambda left, right: pd.merge(left, right, on='EC_ID'), dfs)
  age = ft.reduce(lambda left, right: pd.merge(left, right, on='EC_ID'), dfs)
  age = ft.reduce(lambda left, right: pd.merge(left, right, on='EC_ID'), dfs)
  age = ft.reduce(lambda left, right: pd.merge(left, right, on='EC_ID'), dfs)
  age = ft.reduce(lambda left, right: pd.merge(left, right, on='EC_ID'), dfs)
  age = ft.reduce(lambda left, right: pd.merge(left, right, on='EC_ID'), dfs)


In [929]:
age = age[['EC_ID','EC_Name_x','Tier_x','Parent_x',
 'Pop_0-4_Yrs',
 'Pop_5-9_Yrs', 
 'Pop_10-14_Yrs',
 'Pop_15-17_Yrs',
 'Pop_18-19_Yrs',
 'Pop_20-24_Yrs',
 'Pop_25-29_Yrs',
 'Pop_30-34_Yrs',
 'Pop_35-39_Yrs',
 'Pop_40-44_Yrs',
 'Pop_45-49_Yrs',
 'Pop_50-54_Yrs',
 'Pop_55-59_Yrs',
 'Pop_60-61_Yrs',
 'Pop_62-64_Yrs',
 'Pop_65-69_Yrs',
 'Pop_70-74_Yrs',
 'Pop_75-79_Yrs',
 'Pop_80-84_Yrs',
 'Pop_85+_Yrs']]

age.head()

Unnamed: 0,EC_ID,EC_Name_x,EC_Name_x.1,EC_Name_x.2,EC_Name_x.3,EC_Name_x.4,EC_Name_x.5,EC_Name_x.6,EC_Name_x.7,EC_Name_x.8,...,Pop_45-49_Yrs,Pop_50-54_Yrs,Pop_55-59_Yrs,Pop_60-61_Yrs,Pop_62-64_Yrs,Pop_65-69_Yrs,Pop_70-74_Yrs,Pop_75-79_Yrs,Pop_80-84_Yrs,Pop_85+_Yrs
0,1.0,San Diego Airport,San Diego Airport,San Diego Airport,San Diego Airport,San Diego Airport,San Diego Airport,San Diego Airport,San Diego Airport,San Diego Airport,...,353,277,259,89,132,352,335,249,140,121
1,2.0,Alpine,Alpine,Alpine,Alpine,Alpine,Alpine,Alpine,Alpine,Alpine,...,323,298,224,80,147,317,243,242,122,83
2,3.0,Barrio Logan,Barrio Logan,Barrio Logan,Barrio Logan,Barrio Logan,Barrio Logan,Barrio Logan,Barrio Logan,Barrio Logan,...,378,303,211,75,135,197,155,77,62,116
3,4.0,Carlsbad Palomar Airport,Carlsbad Palomar Airport,Carlsbad Palomar Airport,Carlsbad Palomar Airport,Carlsbad Palomar Airport,Carlsbad Palomar Airport,Carlsbad Palomar Airport,Carlsbad Palomar Airport,Carlsbad Palomar Airport,...,132,77,62,32,21,39,23,31,23,27
4,5.0,Carlsbad State Beach,Carlsbad State Beach,Carlsbad State Beach,Carlsbad State Beach,Carlsbad State Beach,Carlsbad State Beach,Carlsbad State Beach,Carlsbad State Beach,Carlsbad State Beach,...,56,33,76,48,29,36,40,51,16,15


In [930]:
list(age)

['EC_ID',
 'EC_Name_x',
 'EC_Name_x',
 'EC_Name_x',
 'EC_Name_x',
 'EC_Name_x',
 'EC_Name_x',
 'EC_Name_x',
 'EC_Name_x',
 'EC_Name_x',
 'EC_Name_x',
 'Tier_x',
 'Tier_x',
 'Tier_x',
 'Tier_x',
 'Tier_x',
 'Tier_x',
 'Tier_x',
 'Tier_x',
 'Tier_x',
 'Tier_x',
 'Parent_x',
 'Parent_x',
 'Parent_x',
 'Parent_x',
 'Parent_x',
 'Parent_x',
 'Parent_x',
 'Parent_x',
 'Parent_x',
 'Parent_x',
 'Pop_0-4_Yrs',
 'Pop_5-9_Yrs',
 'Pop_10-14_Yrs',
 'Pop_15-17_Yrs',
 'Pop_18-19_Yrs',
 'Pop_20-24_Yrs',
 'Pop_25-29_Yrs',
 'Pop_30-34_Yrs',
 'Pop_35-39_Yrs',
 'Pop_40-44_Yrs',
 'Pop_45-49_Yrs',
 'Pop_50-54_Yrs',
 'Pop_55-59_Yrs',
 'Pop_60-61_Yrs',
 'Pop_62-64_Yrs',
 'Pop_65-69_Yrs',
 'Pop_70-74_Yrs',
 'Pop_75-79_Yrs',
 'Pop_80-84_Yrs',
 'Pop_85+_Yrs']

In [931]:
age = age.iloc[:,[0,1,11,21,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50]]
age.head()

Unnamed: 0,EC_ID,EC_Name_x,Tier_x,Parent_x,Pop_0-4_Yrs,Pop_5-9_Yrs,Pop_10-14_Yrs,Pop_15-17_Yrs,Pop_18-19_Yrs,Pop_20-24_Yrs,...,Pop_45-49_Yrs,Pop_50-54_Yrs,Pop_55-59_Yrs,Pop_60-61_Yrs,Pop_62-64_Yrs,Pop_65-69_Yrs,Pop_70-74_Yrs,Pop_75-79_Yrs,Pop_80-84_Yrs,Pop_85+_Yrs
0,1.0,San Diego Airport,3.0,0.0,318,367,263,156,94,374,...,353,277,259,89,132,352,335,249,140,121
1,2.0,Alpine,4.0,0.0,247,251,331,200,107,218,...,323,298,224,80,147,317,243,242,122,83
2,3.0,Barrio Logan,4.0,0.0,409,428,262,167,160,328,...,378,303,211,75,135,197,155,77,62,116
3,4.0,Carlsbad Palomar Airport,2.0,0.0,27,71,77,67,35,59,...,132,77,62,32,21,39,23,31,23,27
4,5.0,Carlsbad State Beach,3.0,0.0,46,65,51,25,14,26,...,56,33,76,48,29,36,40,51,16,15


In [932]:
age = age.rename(columns = {'EC_Name_x':'EC_Name','Tier_x':'Tier','Parent_x':'Parent'})
age.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,Pop_0-4_Yrs,Pop_5-9_Yrs,Pop_10-14_Yrs,Pop_15-17_Yrs,Pop_18-19_Yrs,Pop_20-24_Yrs,...,Pop_45-49_Yrs,Pop_50-54_Yrs,Pop_55-59_Yrs,Pop_60-61_Yrs,Pop_62-64_Yrs,Pop_65-69_Yrs,Pop_70-74_Yrs,Pop_75-79_Yrs,Pop_80-84_Yrs,Pop_85+_Yrs
0,1.0,San Diego Airport,3.0,0.0,318,367,263,156,94,374,...,353,277,259,89,132,352,335,249,140,121
1,2.0,Alpine,4.0,0.0,247,251,331,200,107,218,...,323,298,224,80,147,317,243,242,122,83
2,3.0,Barrio Logan,4.0,0.0,409,428,262,167,160,328,...,378,303,211,75,135,197,155,77,62,116


In [933]:
age['EC_ID'] = age['EC_ID'].astype('int')
age.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,Pop_0-4_Yrs,Pop_5-9_Yrs,Pop_10-14_Yrs,Pop_15-17_Yrs,Pop_18-19_Yrs,Pop_20-24_Yrs,...,Pop_45-49_Yrs,Pop_50-54_Yrs,Pop_55-59_Yrs,Pop_60-61_Yrs,Pop_62-64_Yrs,Pop_65-69_Yrs,Pop_70-74_Yrs,Pop_75-79_Yrs,Pop_80-84_Yrs,Pop_85+_Yrs
0,1,San Diego Airport,3.0,0.0,318,367,263,156,94,374,...,353,277,259,89,132,352,335,249,140,121
1,2,Alpine,4.0,0.0,247,251,331,200,107,218,...,323,298,224,80,147,317,243,242,122,83
2,3,Barrio Logan,4.0,0.0,409,428,262,167,160,328,...,378,303,211,75,135,197,155,77,62,116


In [934]:
age.shape

(145, 24)

In [935]:
age_test['EC_ID'] = age_test['EC_ID'].astype('int')

age_test = age_test.rename(columns = {'population':'Pop_tot'})

age_test.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,Pop_tot
0,1,San Diego Airport,3.0,0.0,5765
1,2,Alpine,4.0,0.0,4364
2,3,Barrio Logan,4.0,0.0,5066


In [936]:
age = pd.merge(age, age_test,
                how = "left",
                left_on = "EC_ID",
                right_on = "EC_ID")
age.head(3)

Unnamed: 0,EC_ID,EC_Name_x,Tier_x,Parent_x,Pop_0-4_Yrs,Pop_5-9_Yrs,Pop_10-14_Yrs,Pop_15-17_Yrs,Pop_18-19_Yrs,Pop_20-24_Yrs,...,Pop_62-64_Yrs,Pop_65-69_Yrs,Pop_70-74_Yrs,Pop_75-79_Yrs,Pop_80-84_Yrs,Pop_85+_Yrs,EC_Name_y,Tier_y,Parent_y,Pop_tot
0,1,San Diego Airport,3.0,0.0,318,367,263,156,94,374,...,132,352,335,249,140,121,San Diego Airport,3.0,0.0,5765
1,2,Alpine,4.0,0.0,247,251,331,200,107,218,...,147,317,243,242,122,83,Alpine,4.0,0.0,4364
2,3,Barrio Logan,4.0,0.0,409,428,262,167,160,328,...,135,197,155,77,62,116,Barrio Logan,4.0,0.0,5066


In [937]:
age = age.drop(columns = ['EC_Name_y','Tier_y','Parent_y'])
age = age.rename(columns = {'EC_Name_x':'EC_Name','Tier_x':'Tier','Parent_x':'Parent'})
age.head()

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,Pop_0-4_Yrs,Pop_5-9_Yrs,Pop_10-14_Yrs,Pop_15-17_Yrs,Pop_18-19_Yrs,Pop_20-24_Yrs,...,Pop_50-54_Yrs,Pop_55-59_Yrs,Pop_60-61_Yrs,Pop_62-64_Yrs,Pop_65-69_Yrs,Pop_70-74_Yrs,Pop_75-79_Yrs,Pop_80-84_Yrs,Pop_85+_Yrs,Pop_tot
0,1,San Diego Airport,3.0,0.0,318,367,263,156,94,374,...,277,259,89,132,352,335,249,140,121,5765
1,2,Alpine,4.0,0.0,247,251,331,200,107,218,...,298,224,80,147,317,243,242,122,83,4364
2,3,Barrio Logan,4.0,0.0,409,428,262,167,160,328,...,303,211,75,135,197,155,77,62,116,5066
3,4,Carlsbad Palomar Airport,2.0,0.0,27,71,77,67,35,59,...,77,62,32,21,39,23,31,23,27,1011
4,5,Carlsbad State Beach,3.0,0.0,46,65,51,25,14,26,...,33,76,48,29,36,40,51,16,15,831


In [938]:
def percent(p):
    value = ((p/age['Pop_tot'])*100).round(1)
    
    return value

In [939]:
age['%Pop_0-4_Yrs']   = percent(age['Pop_0-4_Yrs'])
age['%Pop_5-9_Yrs']   = percent(age['Pop_5-9_Yrs'])
age['%Pop_10-14_Yrs'] = percent(age['Pop_10-14_Yrs'])
age['%Pop_15-17_Yrs'] = percent(age['Pop_15-17_Yrs'])
age['%Pop_18-19_Yrs'] = percent(age['Pop_18-19_Yrs'])
age['%Pop_20-24_Yrs'] = percent(age['Pop_20-24_Yrs'])
age['%Pop_25-29_Yrs'] = percent(age['Pop_25-29_Yrs'])
age['%Pop_30-34_Yrs'] = percent(age['Pop_30-34_Yrs'])
age['%Pop_35-39_Yrs'] = percent(age['Pop_35-39_Yrs'])
age['%Pop_40-44_Yrs'] = percent(age['Pop_40-44_Yrs'])
age['%Pop_45-49_Yrs'] = percent(age['Pop_45-49_Yrs'])
age['%Pop_50-54_Yrs'] = percent(age['Pop_50-54_Yrs'])
age['%Pop_55-59_Yrs'] = percent(age['Pop_55-59_Yrs'])
age['%Pop_60-61_Yrs'] = percent(age['Pop_60-61_Yrs'])
age['%Pop_62-64_Yrs'] = percent(age['Pop_62-64_Yrs'])
age['%Pop_65-69_Yrs'] = percent(age['Pop_65-69_Yrs'])
age['%Pop_70-74_Yrs'] = percent(age['Pop_70-74_Yrs'])
age['%Pop_75-79_Yrs'] = percent(age['Pop_75-79_Yrs'])
age['%Pop_80-84_Yrs'] = percent(age['Pop_80-84_Yrs'])
age['%Pop_85+_Yrs']   = percent(age['Pop_85+_Yrs'])

In [940]:
age = age.drop(columns = 'Pop_tot')
age['Tier'] = age['Tier'].astype('int')
age['Parent'] = age['Parent'].astype('int')
age.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,Pop_0-4_Yrs,Pop_5-9_Yrs,Pop_10-14_Yrs,Pop_15-17_Yrs,Pop_18-19_Yrs,Pop_20-24_Yrs,...,%Pop_45-49_Yrs,%Pop_50-54_Yrs,%Pop_55-59_Yrs,%Pop_60-61_Yrs,%Pop_62-64_Yrs,%Pop_65-69_Yrs,%Pop_70-74_Yrs,%Pop_75-79_Yrs,%Pop_80-84_Yrs,%Pop_85+_Yrs
0,1,San Diego Airport,3,0,318,367,263,156,94,374,...,6.1,4.8,4.5,1.5,2.3,6.1,5.8,4.3,2.4,2.1
1,2,Alpine,4,0,247,251,331,200,107,218,...,7.4,6.8,5.1,1.8,3.4,7.3,5.6,5.5,2.8,1.9
2,3,Barrio Logan,4,0,409,428,262,167,160,328,...,7.5,6.0,4.2,1.5,2.7,3.9,3.1,1.5,1.2,2.3


In [941]:
age = age.set_index(['EC_ID','EC_Name','Tier','Parent'])
age.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Pop_0-4_Yrs,Pop_5-9_Yrs,Pop_10-14_Yrs,Pop_15-17_Yrs,Pop_18-19_Yrs,Pop_20-24_Yrs,Pop_25-29_Yrs,Pop_30-34_Yrs,Pop_35-39_Yrs,Pop_40-44_Yrs,...,%Pop_45-49_Yrs,%Pop_50-54_Yrs,%Pop_55-59_Yrs,%Pop_60-61_Yrs,%Pop_62-64_Yrs,%Pop_65-69_Yrs,%Pop_70-74_Yrs,%Pop_75-79_Yrs,%Pop_80-84_Yrs,%Pop_85+_Yrs
EC_ID,EC_Name,Tier,Parent,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
1,San Diego Airport,3,0,318,367,263,156,94,374,420,434,545,487,...,6.1,4.8,4.5,1.5,2.3,6.1,5.8,4.3,2.4,2.1
2,Alpine,4,0,247,251,331,200,107,218,184,229,275,243,...,7.4,6.8,5.1,1.8,3.4,7.3,5.6,5.5,2.8,1.9
3,Barrio Logan,4,0,409,428,262,167,160,328,469,391,330,413,...,7.5,6.0,4.2,1.5,2.7,3.9,3.1,1.5,1.2,2.3


In [942]:
age.tail(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Pop_0-4_Yrs,Pop_5-9_Yrs,Pop_10-14_Yrs,Pop_15-17_Yrs,Pop_18-19_Yrs,Pop_20-24_Yrs,Pop_25-29_Yrs,Pop_30-34_Yrs,Pop_35-39_Yrs,Pop_40-44_Yrs,...,%Pop_45-49_Yrs,%Pop_50-54_Yrs,%Pop_55-59_Yrs,%Pop_60-61_Yrs,%Pop_62-64_Yrs,%Pop_65-69_Yrs,%Pop_70-74_Yrs,%Pop_75-79_Yrs,%Pop_80-84_Yrs,%Pop_85+_Yrs
EC_ID,EC_Name,Tier,Parent,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
1041,Sorrento Valley West Sub-Center: UCSD East Health Care,0,71,122,50,32,30,492,1043,557,325,196,97,...,2.6,1.3,0.5,0.1,0.5,0.8,1.3,0.1,0.3,0.5
1042,Sorrento Valley West Sub-Center: UTC Office,0,71,48,30,23,5,3,220,247,118,39,50,...,7.1,4.5,1.8,0.9,2.2,4.1,3.7,3.6,1.2,1.6
1043,Sorrento Valley West Sub-Center: UTC Retail,0,71,0,0,0,0,0,0,0,0,0,0,...,,,,,,,,,,


In [422]:
#age.to_csv("pop_by_age.csv",sep = ",")

# Population by Race/Ethnicity and by the Employment Centers in 2022


In [943]:
eth_dim

Unnamed: 0,ethnicity_id,code,long_name
0,1,hisp,Hispanic
1,2,nhw,"Non-Hispanic, White"
2,3,nhb,"Non-Hispanic, Black"
3,4,nhai,"Non-Hispanic, American Indian or Alaska Native"
4,5,nha,"Non-Hispanic, Asian"
5,6,nhh,"Non-Hispanic, Hawaiian or Pacific Islander"
6,7,nho,"Non-Hispanic, Other"
7,8,nh2m,"Non-Hispanic, Two or More Races"


In [944]:
pop_eth = pd.merge(eth_mgra,eth_dim,
                   how = "left",
                   left_on = "ethnicity_id",
                   right_on = "ethnicity_id")

In [945]:
list(pop_eth)

['mgra', 'ethnicity_id', 'population', 'code', 'long_name']

In [946]:
pop_eth = pop_eth[['mgra', 'ethnicity_id','long_name','population']]
pop_eth.head()

Unnamed: 0,mgra,ethnicity_id,long_name,population
0,1,1,Hispanic,120
1,1,2,"Non-Hispanic, White",73
2,1,3,"Non-Hispanic, Black",43
3,1,4,"Non-Hispanic, American Indian or Alaska Native",1
4,1,5,"Non-Hispanic, Asian",173


In [947]:
list(ec_mgra)

['EC_ID', 'EC_Name', 'Tier', 'MGRA15', 'Parent']

In [948]:
list(sub_ctrs_mgra)

['MGRA15', 'EC_ID', 'EC_Name', 'Parent', 'Tier']

In [949]:
eth_ec = pd.merge(pop_eth,ec_mgra,
                  how = "left",
                  left_on = "mgra",
                  right_on = "MGRA15")

In [950]:
eth_sc = pd.merge(pop_eth,sub_ctrs_mgra,
                  how = "left",
                  left_on = "mgra",
                  right_on = "MGRA15")

In [951]:
list(eth_ec)

['mgra',
 'ethnicity_id',
 'long_name',
 'population',
 'EC_ID',
 'EC_Name',
 'Tier',
 'MGRA15',
 'Parent']

In [952]:
list(eth_sc)

['mgra',
 'ethnicity_id',
 'long_name',
 'population',
 'MGRA15',
 'EC_ID',
 'EC_Name',
 'Parent',
 'Tier']

In [953]:
eth_ec.head(3)

Unnamed: 0,mgra,ethnicity_id,long_name,population,EC_ID,EC_Name,Tier,MGRA15,Parent
0,1,1,Hispanic,120,,,,,
1,1,2,"Non-Hispanic, White",73,,,,,
2,1,3,"Non-Hispanic, Black",43,,,,,


In [954]:
eth_sc.head(3)

Unnamed: 0,mgra,ethnicity_id,long_name,population,MGRA15,EC_ID,EC_Name,Parent,Tier
0,1,1,Hispanic,120,1,,,,0
1,1,2,"Non-Hispanic, White",73,1,,,,0
2,1,3,"Non-Hispanic, Black",43,1,,,,0


In [955]:
eth_ec = eth_ec[['EC_ID','EC_Name','Tier','Parent','ethnicity_id','long_name','population']]
eth_ec = eth_ec.sort_values(by = 'ethnicity_id')
eth_ec.head(3)      

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,ethnicity_id,long_name,population
0,,,,,1,Hispanic,120
166384,,,,,1,Hispanic,7
122496,,,,,1,Hispanic,0


In [956]:
eth_sc = eth_sc[['EC_ID','EC_Name','Tier','Parent','ethnicity_id','long_name','population']]
eth_sc = eth_sc.sort_values(by = 'ethnicity_id')
eth_sc.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,ethnicity_id,long_name,population
0,,,0,,1,Hispanic,120
166384,,,0,,1,Hispanic,7
122496,,,0,,1,Hispanic,0


In [957]:
eth = pd.concat([eth_ec,eth_sc])

In [958]:
eth.shape

(389136, 7)

In [959]:
eth.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,ethnicity_id,long_name,population
0,,,,,1,Hispanic,120
166384,,,,,1,Hispanic,7
122496,,,,,1,Hispanic,0


In [960]:
eth_dim

Unnamed: 0,ethnicity_id,code,long_name
0,1,hisp,Hispanic
1,2,nhw,"Non-Hispanic, White"
2,3,nhb,"Non-Hispanic, Black"
3,4,nhai,"Non-Hispanic, American Indian or Alaska Native"
4,5,nha,"Non-Hispanic, Asian"
5,6,nhh,"Non-Hispanic, Hawaiian or Pacific Islander"
6,7,nho,"Non-Hispanic, Other"
7,8,nh2m,"Non-Hispanic, Two or More Races"


In [961]:
eth_1 = eth[eth['ethnicity_id'] == 1]

eth_2 = eth[eth['ethnicity_id'] == 2]

eth_3 = eth[eth['ethnicity_id'] == 3]

eth_4 = eth[eth['ethnicity_id'] == 4]

eth_5 = eth[eth['ethnicity_id'] == 5]

eth_6 = eth[eth['ethnicity_id'] == 6]

eth_7 = eth[eth['ethnicity_id'] == 7]

eth_8 = eth[eth['ethnicity_id'] == 8]

In [962]:
eth_1.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,ethnicity_id,long_name,population
0,,,,,1,Hispanic,120
166384,,,,,1,Hispanic,7
122496,,,,,1,Hispanic,0


In [963]:
eth_1 = eth_1.groupby(['EC_ID','EC_Name','Tier','Parent','ethnicity_id','long_name']).agg({'sum'})

eth_2 = eth_2.groupby(['EC_ID','EC_Name','Tier','Parent','ethnicity_id','long_name']).agg({'sum'})

eth_3 = eth_3.groupby(['EC_ID','EC_Name','Tier','Parent','ethnicity_id','long_name']).agg({'sum'})

eth_4 = eth_4.groupby(['EC_ID','EC_Name','Tier','Parent','ethnicity_id','long_name']).agg({'sum'})

eth_5 = eth_5.groupby(['EC_ID','EC_Name','Tier','Parent','ethnicity_id','long_name']).agg({'sum'})

eth_6 = eth_6.groupby(['EC_ID','EC_Name','Tier','Parent','ethnicity_id','long_name']).agg({'sum'})

eth_7 = eth_7.groupby(['EC_ID','EC_Name','Tier','Parent','ethnicity_id','long_name']).agg({'sum'})

eth_8 = eth_8.groupby(['EC_ID','EC_Name','Tier','Parent','ethnicity_id','long_name']).agg({'sum'})

In [964]:
eth_4.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,population
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,sum
EC_ID,EC_Name,Tier,Parent,ethnicity_id,long_name,Unnamed: 6_level_2
1.0,San Diego Airport,3.0,0.0,4,"Non-Hispanic, American Indian or Alaska Native",21
2.0,Alpine,4.0,0.0,4,"Non-Hispanic, American Indian or Alaska Native",2
3.0,Barrio Logan,4.0,0.0,4,"Non-Hispanic, American Indian or Alaska Native",11


In [965]:
eth_test = eth[['EC_ID','EC_Name','Tier','Parent','population']]
eth_test.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,population
0,,,,,120
166384,,,,,7
122496,,,,,0


In [966]:
eth_test = eth_test.groupby(['EC_ID','EC_Name','Tier','Parent',]).agg({'sum'})
eth_test.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,population
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,sum
EC_ID,EC_Name,Tier,Parent,Unnamed: 4_level_2
1.0,San Diego Airport,3.0,0.0,5765
2.0,Alpine,4.0,0.0,4364
3.0,Barrio Logan,4.0,0.0,5066


In [967]:
eth_1.columns = eth_1.columns.levels[0]
eth_2.columns = eth_2.columns.levels[0]
eth_3.columns = eth_3.columns.levels[0]
eth_4.columns = eth_4.columns.levels[0]
eth_5.columns = eth_5.columns.levels[0]
eth_6.columns = eth_6.columns.levels[0]
eth_7.columns = eth_7.columns.levels[0]
eth_8.columns = eth_8.columns.levels[0]

In [968]:
eth_test.columns = eth_test.columns.levels[0]

In [969]:
eth_4.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,population
EC_ID,EC_Name,Tier,Parent,ethnicity_id,long_name,Unnamed: 6_level_1
1.0,San Diego Airport,3.0,0.0,4,"Non-Hispanic, American Indian or Alaska Native",21
2.0,Alpine,4.0,0.0,4,"Non-Hispanic, American Indian or Alaska Native",2
3.0,Barrio Logan,4.0,0.0,4,"Non-Hispanic, American Indian or Alaska Native",11


In [970]:
eth_1 = eth_1.reset_index()
eth_2 = eth_2.reset_index()
eth_3 = eth_3.reset_index()
eth_4 = eth_4.reset_index()
eth_5 = eth_5.reset_index()
eth_6 = eth_6.reset_index()
eth_7 = eth_7.reset_index()
eth_8 = eth_8.reset_index()

In [971]:
eth_test = eth_test.reset_index()

In [972]:
eth_2.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,ethnicity_id,long_name,population
0,1.0,San Diego Airport,3.0,0.0,2,"Non-Hispanic, White",3362
1,2.0,Alpine,4.0,0.0,2,"Non-Hispanic, White",3087
2,3.0,Barrio Logan,4.0,0.0,2,"Non-Hispanic, White",860


In [973]:
eth_test.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,population
0,1.0,San Diego Airport,3.0,0.0,5765
1,2.0,Alpine,4.0,0.0,4364
2,3.0,Barrio Logan,4.0,0.0,5066


In [975]:
eth_1 = eth_1.rename(columns={'population':'Pop_Hisp'})
eth_2 = eth_2.rename(columns={'population':'Pop_Wht'})
eth_3 = eth_3.rename(columns={'population':'Pop_Blk'})
eth_4 = eth_4.rename(columns={'population':'Pop_Am-Ind'})
eth_5 = eth_5.rename(columns={'population':'Pop_Asn'})
eth_6 = eth_6.rename(columns={'population':'Pop_Pac-Isl'})
eth_7 = eth_7.rename(columns={'population':'Pop_Oth'})
eth_8 = eth_8.rename(columns={'population':'Pop_2+'})

In [976]:
eth_6.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,ethnicity_id,long_name,Pop_Pac-Isl
0,1.0,San Diego Airport,3.0,0.0,6,"Non-Hispanic, Hawaiian or Pacific Islander",60
1,2.0,Alpine,4.0,0.0,6,"Non-Hispanic, Hawaiian or Pacific Islander",4
2,3.0,Barrio Logan,4.0,0.0,6,"Non-Hispanic, Hawaiian or Pacific Islander",18


In [977]:
eth_1  = eth_1.drop(columns = ['ethnicity_id','long_name'])
eth_2  = eth_2.drop(columns = ['ethnicity_id','long_name'])
eth_3  = eth_3.drop(columns = ['ethnicity_id','long_name'])
eth_4  = eth_4.drop(columns = ['ethnicity_id','long_name'])
eth_5  = eth_5.drop(columns = ['ethnicity_id','long_name'])
eth_6  = eth_6.drop(columns = ['ethnicity_id','long_name'])
eth_7  = eth_7.drop(columns = ['ethnicity_id','long_name'])
eth_8  = eth_8.drop(columns = ['ethnicity_id','long_name'])

In [978]:
eth_5.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,Pop_Asn
0,1.0,San Diego Airport,3.0,0.0,344
1,2.0,Alpine,4.0,0.0,221
2,3.0,Barrio Logan,4.0,0.0,168


In [979]:
#Join the dataframes
dfs =[eth_1,eth_2,eth_3,eth_4,eth_5,eth_6,eth_7,eth_8]

import functools as ft

eth = ft.reduce(lambda left, right: pd.merge(left, right, on='EC_ID'), dfs)

  eth = ft.reduce(lambda left, right: pd.merge(left, right, on='EC_ID'), dfs)
  eth = ft.reduce(lambda left, right: pd.merge(left, right, on='EC_ID'), dfs)
  eth = ft.reduce(lambda left, right: pd.merge(left, right, on='EC_ID'), dfs)


In [980]:
list(eth)

['EC_ID',
 'EC_Name_x',
 'Tier_x',
 'Parent_x',
 'Pop_Hisp',
 'EC_Name_y',
 'Tier_y',
 'Parent_y',
 'Pop_Wht',
 'EC_Name_x',
 'Tier_x',
 'Parent_x',
 'Pop_Blk',
 'EC_Name_y',
 'Tier_y',
 'Parent_y',
 'Pop_Am-Ind',
 'EC_Name_x',
 'Tier_x',
 'Parent_x',
 'Pop_Asn',
 'EC_Name_y',
 'Tier_y',
 'Parent_y',
 'Pop_Pac-Isl',
 'EC_Name_x',
 'Tier_x',
 'Parent_x',
 'Pop_Oth',
 'EC_Name_y',
 'Tier_y',
 'Parent_y',
 'Pop_2+']

In [981]:
eth = eth[['EC_ID','EC_Name_x','Tier_x','Parent_x',
 'Pop_Hisp',
 'Pop_Wht',
 'Pop_Blk',
 'Pop_Am-Ind',
 'Pop_Asn',
 'Pop_Pac-Isl',
 'Pop_Oth',
 'Pop_2+']]

eth.head()

Unnamed: 0,EC_ID,EC_Name_x,EC_Name_x.1,EC_Name_x.2,EC_Name_x.3,Tier_x,Tier_x.1,Tier_x.2,Tier_x.3,Parent_x,...,Parent_x.1,Parent_x.2,Pop_Hisp,Pop_Wht,Pop_Blk,Pop_Am-Ind,Pop_Asn,Pop_Pac-Isl,Pop_Oth,Pop_2+
0,1.0,San Diego Airport,San Diego Airport,San Diego Airport,San Diego Airport,3.0,3.0,3.0,3.0,0.0,...,0.0,0.0,1325,3362,244,21,344,60,92,317
1,2.0,Alpine,Alpine,Alpine,Alpine,4.0,4.0,4.0,4.0,0.0,...,0.0,0.0,672,3087,108,2,221,4,0,270
2,3.0,Barrio Logan,Barrio Logan,Barrio Logan,Barrio Logan,4.0,4.0,4.0,4.0,0.0,...,0.0,0.0,3727,860,260,11,168,18,1,21
3,4.0,Carlsbad Palomar Airport,Carlsbad Palomar Airport,Carlsbad Palomar Airport,Carlsbad Palomar Airport,2.0,2.0,2.0,2.0,0.0,...,0.0,0.0,223,509,17,0,213,0,0,49
4,5.0,Carlsbad State Beach,Carlsbad State Beach,Carlsbad State Beach,Carlsbad State Beach,3.0,3.0,3.0,3.0,0.0,...,0.0,0.0,113,531,3,0,125,3,0,56


In [982]:
list(eth)

['EC_ID',
 'EC_Name_x',
 'EC_Name_x',
 'EC_Name_x',
 'EC_Name_x',
 'Tier_x',
 'Tier_x',
 'Tier_x',
 'Tier_x',
 'Parent_x',
 'Parent_x',
 'Parent_x',
 'Parent_x',
 'Pop_Hisp',
 'Pop_Wht',
 'Pop_Blk',
 'Pop_Am-Ind',
 'Pop_Asn',
 'Pop_Pac-Isl',
 'Pop_Oth',
 'Pop_2+']

In [983]:
eth = eth.iloc[:,[0,1,5,9,13,14,15,16,17,18,19,20]]
eth.head()

Unnamed: 0,EC_ID,EC_Name_x,Tier_x,Parent_x,Pop_Hisp,Pop_Wht,Pop_Blk,Pop_Am-Ind,Pop_Asn,Pop_Pac-Isl,Pop_Oth,Pop_2+
0,1.0,San Diego Airport,3.0,0.0,1325,3362,244,21,344,60,92,317
1,2.0,Alpine,4.0,0.0,672,3087,108,2,221,4,0,270
2,3.0,Barrio Logan,4.0,0.0,3727,860,260,11,168,18,1,21
3,4.0,Carlsbad Palomar Airport,2.0,0.0,223,509,17,0,213,0,0,49
4,5.0,Carlsbad State Beach,3.0,0.0,113,531,3,0,125,3,0,56


In [984]:
eth = eth.rename(columns = {'EC_Name_x':'EC_Name','Tier_x':'Tier','Parent_x':'Parent'})
eth.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,Pop_Hisp,Pop_Wht,Pop_Blk,Pop_Am-Ind,Pop_Asn,Pop_Pac-Isl,Pop_Oth,Pop_2+
0,1.0,San Diego Airport,3.0,0.0,1325,3362,244,21,344,60,92,317
1,2.0,Alpine,4.0,0.0,672,3087,108,2,221,4,0,270
2,3.0,Barrio Logan,4.0,0.0,3727,860,260,11,168,18,1,21


In [985]:
eth['EC_ID'] = eth['EC_ID'].astype('int')
eth.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,Pop_Hisp,Pop_Wht,Pop_Blk,Pop_Am-Ind,Pop_Asn,Pop_Pac-Isl,Pop_Oth,Pop_2+
0,1,San Diego Airport,3.0,0.0,1325,3362,244,21,344,60,92,317
1,2,Alpine,4.0,0.0,672,3087,108,2,221,4,0,270
2,3,Barrio Logan,4.0,0.0,3727,860,260,11,168,18,1,21


In [986]:
eth.shape

(145, 12)

In [987]:
eth_test['EC_ID'] = eth_test['EC_ID'].astype('int')

eth_test = eth_test.rename(columns = {'population':'Pop_tot'})

eth_test.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,Pop_tot
0,1,San Diego Airport,3.0,0.0,5765
1,2,Alpine,4.0,0.0,4364
2,3,Barrio Logan,4.0,0.0,5066


In [988]:
eth = pd.merge(eth, eth_test,
                how = "left",
                left_on = "EC_ID",
                right_on = "EC_ID")
eth.head(3)

Unnamed: 0,EC_ID,EC_Name_x,Tier_x,Parent_x,Pop_Hisp,Pop_Wht,Pop_Blk,Pop_Am-Ind,Pop_Asn,Pop_Pac-Isl,Pop_Oth,Pop_2+,EC_Name_y,Tier_y,Parent_y,Pop_tot
0,1,San Diego Airport,3.0,0.0,1325,3362,244,21,344,60,92,317,San Diego Airport,3.0,0.0,5765
1,2,Alpine,4.0,0.0,672,3087,108,2,221,4,0,270,Alpine,4.0,0.0,4364
2,3,Barrio Logan,4.0,0.0,3727,860,260,11,168,18,1,21,Barrio Logan,4.0,0.0,5066


In [989]:
eth = eth.drop(columns = ['EC_Name_y','Tier_y','Parent_y'])
eth = eth.rename(columns = {'EC_Name_x':'EC_Name','Tier_x':'Tier','Parent_x':'Parent'})
eth.head()

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,Pop_Hisp,Pop_Wht,Pop_Blk,Pop_Am-Ind,Pop_Asn,Pop_Pac-Isl,Pop_Oth,Pop_2+,Pop_tot
0,1,San Diego Airport,3.0,0.0,1325,3362,244,21,344,60,92,317,5765
1,2,Alpine,4.0,0.0,672,3087,108,2,221,4,0,270,4364
2,3,Barrio Logan,4.0,0.0,3727,860,260,11,168,18,1,21,5066
3,4,Carlsbad Palomar Airport,2.0,0.0,223,509,17,0,213,0,0,49,1011
4,5,Carlsbad State Beach,3.0,0.0,113,531,3,0,125,3,0,56,831


In [990]:
def perc(e):
    val = ((e/eth['Pop_tot'])*100).round(1)
    
    return val

In [991]:
eth['%Pop_Hisp']   = perc(eth['Pop_Hisp'])
eth['%Pop_Wht']   = perc(eth['Pop_Wht'])
eth['%Pop_Blk'] = perc(eth['Pop_Blk'])
eth['%Pop_Am-Ind'] = perc(eth['Pop_Am-Ind'])
eth['%Pop_Asn'] = perc(eth['Pop_Asn'])
eth['%Pop_Pac-Isl'] = perc(eth['Pop_Pac-Isl'])
eth['%Pop_Oth'] = perc(eth['Pop_Oth'])
eth['%Pop_2+'] = perc(eth['Pop_2+'])

In [992]:
eth = eth.drop(columns = 'Pop_tot')
eth['Tier'] = eth['Tier'].astype('int')
eth['Parent'] = eth['Parent'].astype('int')
eth.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,Pop_Hisp,Pop_Wht,Pop_Blk,Pop_Am-Ind,Pop_Asn,Pop_Pac-Isl,Pop_Oth,Pop_2+,%Pop_Hisp,%Pop_Wht,%Pop_Blk,%Pop_Am-Ind,%Pop_Asn,%Pop_Pac-Isl,%Pop_Oth,%Pop_2+
0,1,San Diego Airport,3,0,1325,3362,244,21,344,60,92,317,23.0,58.3,4.2,0.4,6.0,1.0,1.6,5.5
1,2,Alpine,4,0,672,3087,108,2,221,4,0,270,15.4,70.7,2.5,0.0,5.1,0.1,0.0,6.2
2,3,Barrio Logan,4,0,3727,860,260,11,168,18,1,21,73.6,17.0,5.1,0.2,3.3,0.4,0.0,0.4


In [993]:
eth = eth.set_index(['EC_ID','EC_Name','Tier','Parent'])
eth.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Pop_Hisp,Pop_Wht,Pop_Blk,Pop_Am-Ind,Pop_Asn,Pop_Pac-Isl,Pop_Oth,Pop_2+,%Pop_Hisp,%Pop_Wht,%Pop_Blk,%Pop_Am-Ind,%Pop_Asn,%Pop_Pac-Isl,%Pop_Oth,%Pop_2+
EC_ID,EC_Name,Tier,Parent,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1,San Diego Airport,3,0,1325,3362,244,21,344,60,92,317,23.0,58.3,4.2,0.4,6.0,1.0,1.6,5.5
2,Alpine,4,0,672,3087,108,2,221,4,0,270,15.4,70.7,2.5,0.0,5.1,0.1,0.0,6.2
3,Barrio Logan,4,0,3727,860,260,11,168,18,1,21,73.6,17.0,5.1,0.2,3.3,0.4,0.0,0.4


In [526]:
eth.to_csv("pop_by_ethnicity-race.csv",sep = ",")

# Household Population by the Employment Centers in 2022


In [994]:
# Query from DDAM sql server
pop_mgra = pd.read_sql_query('''   select mgra.mgra,pop.housing_type_id,pop.population
  from  [demographic_warehouse].[fact].[population] as pop
  join [demographic_warehouse].[dim].[mgra_denormalize] as mgra
  on pop.mgra_id = mgra.mgra_id
  where pop.datasource_id = 46 and pop.yr_id = 2022
  ''',conn_DDAM)

  pop_mgra = pd.read_sql_query('''   select mgra.mgra,pop.housing_type_id,pop.population


In [995]:
pop_mgra.shape

(97284, 3)

In [996]:
pop_mgra.head()

Unnamed: 0,mgra,housing_type_id,population
0,446,2,0
1,446,3,0
2,446,1,150
3,447,2,0
4,447,3,0


In [997]:
# Query from DDAM sql server
pop_dim = pd.read_sql_query(''' 
SELECT *
  FROM [demographic_warehouse].[dim].[housing_type]  ''',conn_DDAM)

  pop_dim = pd.read_sql_query('''


In [998]:
pop_dim

Unnamed: 0,housing_type_id,short_name,long_name
0,1,hh,Household Population
1,2,gq_mil,Group Quarters - Military
2,3,gq_college,Group Quarters - College
3,4,gq_other,Group Quarters - Other


In [999]:
pop_mgra = pd.merge(pop_mgra,pop_dim,
                   how = "left",
                   left_on = "housing_type_id",
                   right_on = "housing_type_id")

In [1000]:
pop_mgra.head()

Unnamed: 0,mgra,housing_type_id,population,short_name,long_name
0,446,2,0,gq_mil,Group Quarters - Military
1,446,3,0,gq_college,Group Quarters - College
2,446,1,150,hh,Household Population
3,447,2,0,gq_mil,Group Quarters - Military
4,447,3,0,gq_college,Group Quarters - College


In [1001]:
pop_ec = pd.merge(pop_mgra,ec_mgra,
                  how = "left",
                  left_on = "mgra",
                  right_on = "MGRA15")

In [1002]:
pop_sc = pd.merge(pop_mgra,sub_ctrs_mgra,
                  how = "left",
                  left_on = "mgra",
                  right_on = "MGRA15")

In [1003]:
list(pop_ec)

['mgra',
 'housing_type_id',
 'population',
 'short_name',
 'long_name',
 'EC_ID',
 'EC_Name',
 'Tier',
 'MGRA15',
 'Parent']

In [1004]:
list(pop_sc)

['mgra',
 'housing_type_id',
 'population',
 'short_name',
 'long_name',
 'MGRA15',
 'EC_ID',
 'EC_Name',
 'Parent',
 'Tier']

In [1005]:
pop_ec = pop_ec[['EC_ID','EC_Name','Tier','Parent','housing_type_id','short_name','long_name','population']]
pop_ec = pop_ec.sort_values(by = 'housing_type_id')
pop_ec.head(3)      

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,housing_type_id,short_name,long_name,population
48641,83.0,El Cajon - Gillespie Field,3.0,0.0,1,hh,Household Population,0
44552,55.0,Otay Mesa Brown Field,4.0,0.0,1,hh,Household Population,0
44555,55.0,Otay Mesa Brown Field,4.0,0.0,1,hh,Household Population,0


In [1006]:
pop_sc = pop_sc[['EC_ID','EC_Name','Tier','Parent','housing_type_id','short_name','long_name','population']]
pop_sc = pop_sc.sort_values(by = 'housing_type_id')
pop_sc.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,housing_type_id,short_name,long_name,population
48641,,,0,,1,hh,Household Population,0
44552,,,0,,1,hh,Household Population,0
44555,,,0,,1,hh,Household Population,0


In [1007]:
pop = pd.concat([pop_ec,pop_sc])

In [1008]:
pop.shape

(194568, 8)

In [1009]:
pop.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,housing_type_id,short_name,long_name,population
48641,83.0,El Cajon - Gillespie Field,3.0,0.0,1,hh,Household Population,0
44552,55.0,Otay Mesa Brown Field,4.0,0.0,1,hh,Household Population,0
44555,55.0,Otay Mesa Brown Field,4.0,0.0,1,hh,Household Population,0


In [1010]:
pop_dim

Unnamed: 0,housing_type_id,short_name,long_name
0,1,hh,Household Population
1,2,gq_mil,Group Quarters - Military
2,3,gq_college,Group Quarters - College
3,4,gq_other,Group Quarters - Other


In [1011]:
pop_1 = pop[pop['housing_type_id'] == 1]

pop_2 = pop[pop['housing_type_id'] == 2]

pop_3 = pop[pop['housing_type_id'] == 3]

pop_4 = pop[pop['housing_type_id'] == 4]

In [1012]:
pop_1.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,housing_type_id,short_name,long_name,population
48641,83.0,El Cajon - Gillespie Field,3.0,0.0,1,hh,Household Population,0
44552,55.0,Otay Mesa Brown Field,4.0,0.0,1,hh,Household Population,0
44555,55.0,Otay Mesa Brown Field,4.0,0.0,1,hh,Household Population,0


In [1013]:
pop_1 = pop_1.groupby(['EC_ID','EC_Name','Tier','Parent','housing_type_id','short_name','long_name']).agg({'sum'})

pop_2 = pop_2.groupby(['EC_ID','EC_Name','Tier','Parent','housing_type_id','short_name','long_name']).agg({'sum'})

pop_3 = pop_3.groupby(['EC_ID','EC_Name','Tier','Parent','housing_type_id','short_name','long_name']).agg({'sum'})

pop_4 = pop_4.groupby(['EC_ID','EC_Name','Tier','Parent','housing_type_id','short_name','long_name']).agg({'sum'})

In [1014]:
pop_3.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,population
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,sum
EC_ID,EC_Name,Tier,Parent,housing_type_id,short_name,long_name,Unnamed: 7_level_2
1.0,San Diego Airport,3.0,0.0,3,gq_college,Group Quarters - College,0
2.0,Alpine,4.0,0.0,3,gq_college,Group Quarters - College,0
3.0,Barrio Logan,4.0,0.0,3,gq_college,Group Quarters - College,0


In [1015]:
pop.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,housing_type_id,short_name,long_name,population
48641,83.0,El Cajon - Gillespie Field,3.0,0.0,1,hh,Household Population,0
44552,55.0,Otay Mesa Brown Field,4.0,0.0,1,hh,Household Population,0
44555,55.0,Otay Mesa Brown Field,4.0,0.0,1,hh,Household Population,0


In [1016]:
pop_1.columns = pop_1.columns.levels[0]
pop_2.columns = pop_2.columns.levels[0]
pop_3.columns = pop_3.columns.levels[0]
pop_4.columns = pop_4.columns.levels[0]

In [1017]:
pop_1.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,population
EC_ID,EC_Name,Tier,Parent,housing_type_id,short_name,long_name,Unnamed: 7_level_1
1.0,San Diego Airport,3.0,0.0,1,hh,Household Population,5524
2.0,Alpine,4.0,0.0,1,hh,Household Population,4255
3.0,Barrio Logan,4.0,0.0,1,hh,Household Population,4420


In [1018]:
pop_1 = pop_1.reset_index()
pop_2 = pop_2.reset_index()
pop_3 = pop_3.reset_index()
pop_4 = pop_4.reset_index()

In [1019]:
pop_dim

Unnamed: 0,housing_type_id,short_name,long_name
0,1,hh,Household Population
1,2,gq_mil,Group Quarters - Military
2,3,gq_college,Group Quarters - College
3,4,gq_other,Group Quarters - Other


In [1020]:
pop_1 = pop_1.rename(columns={'population':'Pop_HH'})
pop_2 = pop_2.rename(columns={'population':'GQ_Mil'})
pop_3 = pop_3.rename(columns={'population':'GQ_Col'})
pop_4 = pop_4.rename(columns={'population':'GQ_Oth'})

In [1021]:
pop_3.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,housing_type_id,short_name,long_name,GQ_Col
0,1.0,San Diego Airport,3.0,0.0,3,gq_college,Group Quarters - College,0
1,2.0,Alpine,4.0,0.0,3,gq_college,Group Quarters - College,0
2,3.0,Barrio Logan,4.0,0.0,3,gq_college,Group Quarters - College,0


In [1022]:
pop_1  = pop_1.drop(columns = ['housing_type_id','short_name','long_name'])
pop_2  = pop_2.drop(columns = ['housing_type_id','short_name','long_name'])
pop_3  = pop_3.drop(columns = ['housing_type_id','short_name','long_name'])
pop_4  = pop_4.drop(columns = ['housing_type_id','short_name','long_name'])

In [1023]:
pop_2.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,GQ_Mil
0,1.0,San Diego Airport,3.0,0.0,0
1,2.0,Alpine,4.0,0.0,0
2,3.0,Barrio Logan,4.0,0.0,0


In [1024]:
#Join the dataframes
dfs =[pop_1,pop_2,pop_3,pop_4]

import functools as ft

pop = ft.reduce(lambda left, right: pd.merge(left, right, on='EC_ID'), dfs)

  pop = ft.reduce(lambda left, right: pd.merge(left, right, on='EC_ID'), dfs)


In [1025]:
list(pop)

['EC_ID',
 'EC_Name_x',
 'Tier_x',
 'Parent_x',
 'Pop_HH',
 'EC_Name_y',
 'Tier_y',
 'Parent_y',
 'GQ_Mil',
 'EC_Name_x',
 'Tier_x',
 'Parent_x',
 'GQ_Col',
 'EC_Name_y',
 'Tier_y',
 'Parent_y',
 'GQ_Oth']

In [1026]:
pop = pop[['EC_ID','EC_Name_x','Tier_x','Parent_x',
 'Pop_HH',
 'GQ_Mil',
 'GQ_Col',
 'GQ_Oth']]

pop.head()

Unnamed: 0,EC_ID,EC_Name_x,EC_Name_x.1,Tier_x,Tier_x.1,Parent_x,Parent_x.1,Pop_HH,GQ_Mil,GQ_Col,GQ_Oth
0,1.0,San Diego Airport,San Diego Airport,3.0,3.0,0.0,0.0,5524,0,0,241
1,2.0,Alpine,Alpine,4.0,4.0,0.0,0.0,4255,0,0,109
2,3.0,Barrio Logan,Barrio Logan,4.0,4.0,0.0,0.0,4420,0,0,646
3,4.0,Carlsbad Palomar Airport,Carlsbad Palomar Airport,2.0,2.0,0.0,0.0,928,0,0,83
4,5.0,Carlsbad State Beach,Carlsbad State Beach,3.0,3.0,0.0,0.0,831,0,0,0


In [1027]:
list(pop)

['EC_ID',
 'EC_Name_x',
 'EC_Name_x',
 'Tier_x',
 'Tier_x',
 'Parent_x',
 'Parent_x',
 'Pop_HH',
 'GQ_Mil',
 'GQ_Col',
 'GQ_Oth']

In [1028]:
pop = pop.iloc[:,[0,1,3,5,7,8,9,10]]
pop.head()

Unnamed: 0,EC_ID,EC_Name_x,Tier_x,Parent_x,Pop_HH,GQ_Mil,GQ_Col,GQ_Oth
0,1.0,San Diego Airport,3.0,0.0,5524,0,0,241
1,2.0,Alpine,4.0,0.0,4255,0,0,109
2,3.0,Barrio Logan,4.0,0.0,4420,0,0,646
3,4.0,Carlsbad Palomar Airport,2.0,0.0,928,0,0,83
4,5.0,Carlsbad State Beach,3.0,0.0,831,0,0,0


In [1029]:
pop = pop.rename(columns = {'EC_Name_x':'EC_Name','Tier_x':'Tier','Parent_x':'Parent'})
pop.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,Pop_HH,GQ_Mil,GQ_Col,GQ_Oth
0,1.0,San Diego Airport,3.0,0.0,5524,0,0,241
1,2.0,Alpine,4.0,0.0,4255,0,0,109
2,3.0,Barrio Logan,4.0,0.0,4420,0,0,646


In [1030]:
pop['EC_ID'] = pop['EC_ID'].astype('int')
pop.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,Pop_HH,GQ_Mil,GQ_Col,GQ_Oth
0,1,San Diego Airport,3.0,0.0,5524,0,0,241
1,2,Alpine,4.0,0.0,4255,0,0,109
2,3,Barrio Logan,4.0,0.0,4420,0,0,646


In [1031]:
list(pop)

['EC_ID', 'EC_Name', 'Tier', 'Parent', 'Pop_HH', 'GQ_Mil', 'GQ_Col', 'GQ_Oth']

In [1032]:
pop['Pop_GQ'] = pop['GQ_Mil'] + pop['GQ_Col'] + pop['GQ_Oth']
pop['Pop_Total'] = pop['Pop_HH'] + pop['Pop_GQ']

In [1033]:
pop.shape

(145, 10)

In [1034]:
pop.head()

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,Pop_HH,GQ_Mil,GQ_Col,GQ_Oth,Pop_GQ,Pop_Total
0,1,San Diego Airport,3.0,0.0,5524,0,0,241,241,5765
1,2,Alpine,4.0,0.0,4255,0,0,109,109,4364
2,3,Barrio Logan,4.0,0.0,4420,0,0,646,646,5066
3,4,Carlsbad Palomar Airport,2.0,0.0,928,0,0,83,83,1011
4,5,Carlsbad State Beach,3.0,0.0,831,0,0,0,0,831


In [1035]:
def per(h):
    v = ((h/pop['Pop_Total'])*100).round(1)
    
    return v

In [1036]:
list(pop)

['EC_ID',
 'EC_Name',
 'Tier',
 'Parent',
 'Pop_HH',
 'GQ_Mil',
 'GQ_Col',
 'GQ_Oth',
 'Pop_GQ',
 'Pop_Total']

In [1037]:
pop['%Pop_HH']   = per(pop['Pop_HH'])
pop['%GQ_Mil']   = per(pop['GQ_Mil'])
pop['%GQ_Col']   = per(pop['GQ_Col'])
pop['%GQ_Oth']   = per(pop['GQ_Oth'])
pop['%Pop_GQ']   = per(pop['Pop_GQ'])

In [1038]:
pop.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,Pop_HH,GQ_Mil,GQ_Col,GQ_Oth,Pop_GQ,Pop_Total,%Pop_HH,%GQ_Mil,%GQ_Col,%GQ_Oth,%Pop_GQ
0,1,San Diego Airport,3.0,0.0,5524,0,0,241,241,5765,95.8,0.0,0.0,4.2,4.2
1,2,Alpine,4.0,0.0,4255,0,0,109,109,4364,97.5,0.0,0.0,2.5,2.5
2,3,Barrio Logan,4.0,0.0,4420,0,0,646,646,5066,87.2,0.0,0.0,12.8,12.8


In [1039]:
pop['Tier'] = pop['Tier'].astype('int')
pop['Parent'] = pop['Parent'].astype('int')
pop.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,Pop_HH,GQ_Mil,GQ_Col,GQ_Oth,Pop_GQ,Pop_Total,%Pop_HH,%GQ_Mil,%GQ_Col,%GQ_Oth,%Pop_GQ
0,1,San Diego Airport,3,0,5524,0,0,241,241,5765,95.8,0.0,0.0,4.2,4.2
1,2,Alpine,4,0,4255,0,0,109,109,4364,97.5,0.0,0.0,2.5,2.5
2,3,Barrio Logan,4,0,4420,0,0,646,646,5066,87.2,0.0,0.0,12.8,12.8


In [1040]:
pop = pop.set_index(['EC_ID','EC_Name','Tier','Parent'])
pop.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Pop_HH,GQ_Mil,GQ_Col,GQ_Oth,Pop_GQ,Pop_Total,%Pop_HH,%GQ_Mil,%GQ_Col,%GQ_Oth,%Pop_GQ
EC_ID,EC_Name,Tier,Parent,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,San Diego Airport,3,0,5524,0,0,241,241,5765,95.8,0.0,0.0,4.2,4.2
2,Alpine,4,0,4255,0,0,109,109,4364,97.5,0.0,0.0,2.5,2.5
3,Barrio Logan,4,0,4420,0,0,646,646,5066,87.2,0.0,0.0,12.8,12.8


In [669]:
pop.to_csv("pop_households.csv",sep = ",")

# Number of Housing Units by the Employment Centers in 2022


In [1041]:
# HU_Total: Total number of housing units

In [1042]:
# Query from DDAM sql server
hu_mgra = pd.read_sql_query(''' 
--Housing Units
 select mgra.mgra, house.structure_type_id, house.units
  from  [demographic_warehouse].[fact].[housing] as house
  join [demographic_warehouse].[dim].[mgra_denormalize] as mgra
  on house.mgra_id = mgra.mgra_id
  where house.datasource_id = 46 and house.yr_id = 2022;  ''',conn_DDAM)

  hu_mgra = pd.read_sql_query('''


In [1043]:
hu_mgra.head()

Unnamed: 0,mgra,structure_type_id,units
0,446,2,38
1,446,1,19
2,446,3,24
3,447,2,0
4,447,3,0


In [1044]:
# Query from DDAM sql server
hu_dim = pd.read_sql_query('''  SELECT *
  FROM [demographic_warehouse].[dim].[structure_type] ''',conn_DDAM)

  hu_dim = pd.read_sql_query('''  SELECT *


In [1045]:
hu_dim.shape

(6, 3)

In [1046]:
hu_dim

Unnamed: 0,structure_type_id,short_name,long_name
0,1,sf,Single Family - Detached
1,2,sfmu,Single Family - Multiple Unit
2,3,mf,Multifamily
3,4,mh,Mobile Home
4,5,sfd,Single-family Detached
5,6,sfa,Single-family Attached


In [1047]:
hu_mgra = pd.merge(hu_mgra,hu_dim,
                   how = "left",
                   left_on = "structure_type_id",
                   right_on = "structure_type_id")

In [1048]:
list(hu_mgra)

['mgra', 'structure_type_id', 'units', 'short_name', 'long_name']

In [1049]:
hu_mgra = hu_mgra[['mgra', 'structure_type_id', 'short_name', 'long_name', 'units']]

In [1050]:
hu_ec = pd.merge(hu_mgra,ec_mgra,
                  how = "left",
                  left_on = "mgra",
                  right_on = "MGRA15")

In [1051]:
hu_sc = pd.merge(hu_mgra,sub_ctrs_mgra,
                  how = "left",
                  left_on = "mgra",
                  right_on = "MGRA15")

In [1052]:
list(hu_ec)

['mgra',
 'structure_type_id',
 'short_name',
 'long_name',
 'units',
 'EC_ID',
 'EC_Name',
 'Tier',
 'MGRA15',
 'Parent']

In [1053]:
list(hu_sc)

['mgra',
 'structure_type_id',
 'short_name',
 'long_name',
 'units',
 'MGRA15',
 'EC_ID',
 'EC_Name',
 'Parent',
 'Tier']

In [1054]:
hu_ec = hu_ec[['EC_ID','EC_Name','Tier','Parent','structure_type_id','short_name','long_name','units']]
hu_ec = hu_ec.sort_values(by = 'structure_type_id')
hu_ec.head(3)      

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,structure_type_id,short_name,long_name,units
67955,,,,,1,sf,Single Family - Detached,30
62618,54.0,Oceanside Tri-city,4.0,0.0,1,sf,Single Family - Detached,1
22196,45.0,National City,2.0,0.0,1,sf,Single Family - Detached,10


In [1055]:
hu_sc = hu_sc[['EC_ID','EC_Name','Tier','Parent','structure_type_id','short_name','long_name','units']]
hu_sc = hu_sc.sort_values(by = 'structure_type_id')
hu_sc.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,structure_type_id,short_name,long_name,units
67955,,,0,,1,sf,Single Family - Detached,30
62618,,,0,,1,sf,Single Family - Detached,1
22196,,,0,,1,sf,Single Family - Detached,10


In [1056]:
hu = pd.concat([hu_ec,hu_sc])

In [1057]:
hu.shape

(194568, 8)

In [1058]:
hu_dim

Unnamed: 0,structure_type_id,short_name,long_name
0,1,sf,Single Family - Detached
1,2,sfmu,Single Family - Multiple Unit
2,3,mf,Multifamily
3,4,mh,Mobile Home
4,5,sfd,Single-family Detached
5,6,sfa,Single-family Attached


In [1059]:
hu_1 = hu[hu['structure_type_id'] == 1]

hu_2 = hu[hu['structure_type_id'] == 2]

hu_3 = hu[hu['structure_type_id'] == 3]

hu_4 = hu[hu['structure_type_id'] == 4]

hu_5 = hu[hu['structure_type_id'] == 5]

hu_6 = hu[hu['structure_type_id'] == 6]

In [1060]:
hu_1.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,structure_type_id,short_name,long_name,units
67955,,,,,1,sf,Single Family - Detached,30
62618,54.0,Oceanside Tri-city,4.0,0.0,1,sf,Single Family - Detached,1
22196,45.0,National City,2.0,0.0,1,sf,Single Family - Detached,10


In [1061]:
hu_2.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,structure_type_id,short_name,long_name,units
47698,,,,,2,sfmu,Single Family - Multiple Unit,0
51170,,,,,2,sfmu,Single Family - Multiple Unit,0
47847,,,,,2,sfmu,Single Family - Multiple Unit,0


In [1062]:
hu_3.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,structure_type_id,short_name,long_name,units
15346,,,,,3,mf,Multifamily,0
75942,4.0,Carlsbad Palomar Airport,2.0,0.0,3,mf,Multifamily,0
74633,,,,,3,mf,Multifamily,0


In [1063]:
hu_4.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,structure_type_id,short_name,long_name,units
85925,,,,,4,mh,Mobile Home,0
83424,,,,,4,mh,Mobile Home,0
36668,27.0,Golden Hill,4.0,0.0,4,mh,Mobile Home,0


In [1064]:
hu_5.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,structure_type_id,short_name,long_name,units


In [1065]:
hu_6.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,structure_type_id,short_name,long_name,units


In [1066]:
hu_1 = hu_1.groupby(['EC_ID','EC_Name','Tier','Parent','structure_type_id','short_name','long_name']).agg({'sum'})

hu_2 = hu_2.groupby(['EC_ID','EC_Name','Tier','Parent','structure_type_id','short_name','long_name']).agg({'sum'})

hu_3 = hu_3.groupby(['EC_ID','EC_Name','Tier','Parent','structure_type_id','short_name','long_name']).agg({'sum'})

hu_4 = hu_4.groupby(['EC_ID','EC_Name','Tier','Parent','structure_type_id','short_name','long_name']).agg({'sum'})

In [1067]:
hu_4.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,units
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,sum
EC_ID,EC_Name,Tier,Parent,structure_type_id,short_name,long_name,Unnamed: 7_level_2
1.0,San Diego Airport,3.0,0.0,4,mh,Mobile Home,0
2.0,Alpine,4.0,0.0,4,mh,Mobile Home,154
3.0,Barrio Logan,4.0,0.0,4,mh,Mobile Home,0


In [1068]:
hu_1.columns = hu_1.columns.levels[0]
hu_2.columns = hu_2.columns.levels[0]
hu_3.columns = hu_3.columns.levels[0]
hu_4.columns = hu_4.columns.levels[0]

In [1069]:
hu_4.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,units
EC_ID,EC_Name,Tier,Parent,structure_type_id,short_name,long_name,Unnamed: 7_level_1
1.0,San Diego Airport,3.0,0.0,4,mh,Mobile Home,0
2.0,Alpine,4.0,0.0,4,mh,Mobile Home,154
3.0,Barrio Logan,4.0,0.0,4,mh,Mobile Home,0


In [1070]:
hu_1 = hu_1.reset_index()
hu_2 = hu_2.reset_index()
hu_3 = hu_3.reset_index()
hu_4 = hu_4.reset_index()

In [1071]:
hu_2.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,structure_type_id,short_name,long_name,units
0,1.0,San Diego Airport,3.0,0.0,2,sfmu,Single Family - Multiple Unit,556
1,2.0,Alpine,4.0,0.0,2,sfmu,Single Family - Multiple Unit,62
2,3.0,Barrio Logan,4.0,0.0,2,sfmu,Single Family - Multiple Unit,515


In [1072]:
hu_dim

Unnamed: 0,structure_type_id,short_name,long_name
0,1,sf,Single Family - Detached
1,2,sfmu,Single Family - Multiple Unit
2,3,mf,Multifamily
3,4,mh,Mobile Home
4,5,sfd,Single-family Detached
5,6,sfa,Single-family Attached


In [1073]:
hu_1 = hu_1.rename(columns={'units':'HU_SF'})
hu_2 = hu_2.rename(columns={'units':'HU_SFMU'})
hu_3 = hu_3.rename(columns={'units':'HU_MF'})
hu_4 = hu_4.rename(columns={'units':'HU_MH'})

In [1074]:
hu_1.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,structure_type_id,short_name,long_name,HU_SF
0,1.0,San Diego Airport,3.0,0.0,1,sf,Single Family - Detached,458
1,2.0,Alpine,4.0,0.0,1,sf,Single Family - Detached,808
2,3.0,Barrio Logan,4.0,0.0,1,sf,Single Family - Detached,286


In [1075]:
hu_1  = hu_1.drop(columns = ['structure_type_id','short_name','long_name'])
hu_2  = hu_2.drop(columns = ['structure_type_id','short_name','long_name'])
hu_3  = hu_3.drop(columns = ['structure_type_id','short_name','long_name'])
hu_4  = hu_4.drop(columns = ['structure_type_id','short_name','long_name'])

In [1076]:
#Join the dataframes
dfs =[hu_1,hu_2,hu_3,hu_4]

import functools as ft

hu = ft.reduce(lambda left, right: pd.merge(left, right, on='EC_ID'), dfs)

  hu = ft.reduce(lambda left, right: pd.merge(left, right, on='EC_ID'), dfs)


In [1077]:
list(hu)

['EC_ID',
 'EC_Name_x',
 'Tier_x',
 'Parent_x',
 'HU_SF',
 'EC_Name_y',
 'Tier_y',
 'Parent_y',
 'HU_SFMU',
 'EC_Name_x',
 'Tier_x',
 'Parent_x',
 'HU_MF',
 'EC_Name_y',
 'Tier_y',
 'Parent_y',
 'HU_MH']

In [1078]:
hu = hu[['EC_ID','EC_Name_x','Tier_x','Parent_x',
 'HU_SF',
 'HU_SFMU',
 'HU_MF',
 'HU_MH']]

hu.head()

Unnamed: 0,EC_ID,EC_Name_x,EC_Name_x.1,Tier_x,Tier_x.1,Parent_x,Parent_x.1,HU_SF,HU_SFMU,HU_MF,HU_MH
0,1.0,San Diego Airport,San Diego Airport,3.0,3.0,0.0,0.0,458,556,1825,0
1,2.0,Alpine,Alpine,4.0,4.0,0.0,0.0,808,62,654,154
2,3.0,Barrio Logan,Barrio Logan,4.0,4.0,0.0,0.0,286,515,917,0
3,4.0,Carlsbad Palomar Airport,Carlsbad Palomar Airport,2.0,2.0,0.0,0.0,46,51,245,0
4,5.0,Carlsbad State Beach,Carlsbad State Beach,3.0,3.0,0.0,0.0,0,0,273,108


In [1079]:
list(hu)

['EC_ID',
 'EC_Name_x',
 'EC_Name_x',
 'Tier_x',
 'Tier_x',
 'Parent_x',
 'Parent_x',
 'HU_SF',
 'HU_SFMU',
 'HU_MF',
 'HU_MH']

In [1080]:
hu = hu.iloc[:,[0,1,3,5,7,8,9,10]]
hu.head()

Unnamed: 0,EC_ID,EC_Name_x,Tier_x,Parent_x,HU_SF,HU_SFMU,HU_MF,HU_MH
0,1.0,San Diego Airport,3.0,0.0,458,556,1825,0
1,2.0,Alpine,4.0,0.0,808,62,654,154
2,3.0,Barrio Logan,4.0,0.0,286,515,917,0
3,4.0,Carlsbad Palomar Airport,2.0,0.0,46,51,245,0
4,5.0,Carlsbad State Beach,3.0,0.0,0,0,273,108


In [1081]:
hu = hu.rename(columns = {'EC_Name_x':'EC_Name','Tier_x':'Tier','Parent_x':'Parent'})
hu['EC_ID'] = hu['EC_ID'].astype('int')
hu['Tier'] = hu['Tier'].astype('int')
hu['Parent'] = hu['Parent'].astype('int')
hu.head(3)

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,HU_SF,HU_SFMU,HU_MF,HU_MH
0,1,San Diego Airport,3,0,458,556,1825,0
1,2,Alpine,4,0,808,62,654,154
2,3,Barrio Logan,4,0,286,515,917,0


In [1082]:
list(hu)

['EC_ID', 'EC_Name', 'Tier', 'Parent', 'HU_SF', 'HU_SFMU', 'HU_MF', 'HU_MH']

In [1083]:
hu['HU_Total'] = hu['HU_SF'] + hu['HU_SFMU'] + hu['HU_MF'] + hu['HU_MH']
hu.head()

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,HU_SF,HU_SFMU,HU_MF,HU_MH,HU_Total
0,1,San Diego Airport,3,0,458,556,1825,0,2839
1,2,Alpine,4,0,808,62,654,154,1678
2,3,Barrio Logan,4,0,286,515,917,0,1718
3,4,Carlsbad Palomar Airport,2,0,46,51,245,0,342
4,5,Carlsbad State Beach,3,0,0,0,273,108,381


In [1084]:
def percent(u):
    value = ((u/hu['HU_Total'])*100).round(1)
    
    return value

In [1085]:
list(hu)

['EC_ID',
 'EC_Name',
 'Tier',
 'Parent',
 'HU_SF',
 'HU_SFMU',
 'HU_MF',
 'HU_MH',
 'HU_Total']

In [1086]:
hu['%HU_SF']   = percent(hu['HU_SF'])
hu['%HU_SFMU']   = percent(hu['HU_SFMU'])
hu['%HU_MF'] = percent(hu['HU_MF'])
hu['%HU_MH'] = percent(hu['HU_MH'])

hu.head()

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,HU_SF,HU_SFMU,HU_MF,HU_MH,HU_Total,%HU_SF,%HU_SFMU,%HU_MF,%HU_MH
0,1,San Diego Airport,3,0,458,556,1825,0,2839,16.1,19.6,64.3,0.0
1,2,Alpine,4,0,808,62,654,154,1678,48.2,3.7,39.0,9.2
2,3,Barrio Logan,4,0,286,515,917,0,1718,16.6,30.0,53.4,0.0
3,4,Carlsbad Palomar Airport,2,0,46,51,245,0,342,13.5,14.9,71.6,0.0
4,5,Carlsbad State Beach,3,0,0,0,273,108,381,0.0,0.0,71.7,28.3


In [1087]:
hu = hu.set_index(['EC_ID','EC_Name','Tier','Parent'])
hu.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,HU_SF,HU_SFMU,HU_MF,HU_MH,HU_Total,%HU_SF,%HU_SFMU,%HU_MF,%HU_MH
EC_ID,EC_Name,Tier,Parent,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,San Diego Airport,3,0,458,556,1825,0,2839,16.1,19.6,64.3,0.0
2,Alpine,4,0,808,62,654,154,1678,48.2,3.7,39.0,9.2
3,Barrio Logan,4,0,286,515,917,0,1718,16.6,30.0,53.4,0.0


In [1088]:
hu.to_csv("Number_of_housing-units.csv",sep = ",")

# Number of Households by the Employment Centers in 2022


In [1089]:
# HH_Total: Total number of households
# HH_SFD:   Number of single family detached households
# HH_SFA:   Number of single family attached households
# HH_SF:    Number of single family households
# HH_MF:    Number of multi-family households
# HH_MH:    Number of mobile home households

In [1090]:
# Query from DDAM sql server
sr15_2022 = pd.read_sql_query('''   
SELECT *
FROM [ws].[mgra_base].[sr15_2022_01] ''',conn_DDAM)

  sr15_2022 = pd.read_sql_query('''


In [1091]:
sr15_2022.shape

(24321, 67)

In [1092]:
sr15_2022.head(3)

Unnamed: 0,mgra,taz,LUZ,pop,hhp,hs,hs_sf,hs_mf,hs_mh,hh,...,hotelroomtotal,parkactive,openspaceparkpreserve,beachactive,district27,milestocoast,acre,landacre,effective_acres,truckregiontype
0,1,3010,10,440,440,176,84,92,0,174,...,0,0.0,0.0,0.0,9,4.35,18.837621,18.837621,18.837621,1
1,2,1797,28,130,68,56,0,56,0,48,...,0,0.0,0.0,0.0,15,0.64,2.87233,2.87233,2.87233,1
2,3,4361,239,549,549,200,23,177,0,192,...,0,0.0,0.0,0.0,13,12.22,25.713898,25.713898,25.713898,1


In [1093]:
sr15_2022 = sr15_2022[['mgra','hh','hh_sf','hh_mf','hh_mh']]

In [1094]:
sr15_2022.head()

Unnamed: 0,mgra,hh,hh_sf,hh_mf,hh_mh
0,1,174,83,91,0
1,2,48,0,48,0
2,3,192,23,169,0
3,4,2,2,0,0
4,5,36,36,0,0


In [1095]:
hh_ec = pd.merge(sr15_2022,ec_mgra,
                  how = "left",
                  left_on = "mgra",
                  right_on = "MGRA15")

In [1096]:
hh_sc = pd.merge(sr15_2022,sub_ctrs_mgra,
                  how = "left",
                  left_on = "mgra",
                  right_on = "MGRA15")

In [1097]:
hh_ec.head(3)

Unnamed: 0,mgra,hh,hh_sf,hh_mf,hh_mh,EC_ID,EC_Name,Tier,MGRA15,Parent
0,1,174,83,91,0,,,,,
1,2,48,0,48,0,18.0,Downtown,1.0,2.0,0.0
2,3,192,23,169,0,19.0,El Cajon,2.0,3.0,0.0


In [1098]:
hh_ec = hh_ec.drop(columns = 'mgra')
hh_ec = hh_ec.drop(columns = 'MGRA15')

In [1099]:
hh_sc.head()

Unnamed: 0,mgra,hh,hh_sf,hh_mf,hh_mh,MGRA15,EC_ID,EC_Name,Parent,Tier
0,1,174,83,91,0,1,,,,0
1,2,48,0,48,0,2,1008.0,Downtown Sub-Center: Cortez Hill,18.0,0
2,3,192,23,169,0,3,,,,0
3,4,2,2,0,0,4,,,,0
4,5,36,36,0,0,5,,,,0


In [1100]:
hh_sc = hh_sc.drop(columns = 'mgra')

In [1101]:
list(hh_ec)

['hh', 'hh_sf', 'hh_mf', 'hh_mh', 'EC_ID', 'EC_Name', 'Tier', 'Parent']

In [1102]:
list(hh_sc)

['hh',
 'hh_sf',
 'hh_mf',
 'hh_mh',
 'MGRA15',
 'EC_ID',
 'EC_Name',
 'Parent',
 'Tier']

In [1103]:
hh_sc = hh_sc.drop(columns = 'MGRA15')
list(hh_sc)

['hh', 'hh_sf', 'hh_mf', 'hh_mh', 'EC_ID', 'EC_Name', 'Parent', 'Tier']

In [1104]:
hh_ec = hh_ec.groupby(['EC_ID','EC_Name','Tier','Parent']).agg({'sum'})

hh_sc = hh_sc.groupby(['EC_ID','EC_Name','Tier','Parent']).agg({'sum'})

In [1105]:
hh_ec.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,hh,hh_sf,hh_mf,hh_mh
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,sum,sum,sum,sum
EC_ID,EC_Name,Tier,Parent,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
1.0,San Diego Airport,3.0,0.0,2585,933,1652,0
2.0,Alpine,4.0,0.0,1608,840,622,146
3.0,Barrio Logan,4.0,0.0,1563,761,802,0


In [1106]:
hh_sc.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,hh,hh_sf,hh_mf,hh_mh
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,sum,sum,sum,sum
EC_ID,EC_Name,Tier,Parent,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
1001.0,Carlsbad Palomar Airport Sub-Center: Airport,0,4.0,0,0,0,0
1002.0,Carlsbad Palomar Airport Sub-Center: El Camino Real,0,4.0,40,40,0,0
1003.0,Carlsbad Palomar Airport Sub-Center: Faraday,0,4.0,41,41,0,0
1004.0,Carlsbad Palomar Airport Sub-Center: Melrose,0,4.0,255,11,244,0
1005.0,Carlsbad Palomar Airport Sub-Center: Palomar Oaks,0,4.0,0,0,0,0


In [1107]:
hh_ec.columns = hh_ec.columns.levels[0]
hh_sc.columns = hh_sc.columns.levels[0]

In [1108]:
hh_ec = hh_ec.reset_index()

hh_sc = hh_sc.reset_index()

In [1109]:
hh_ec['EC_ID'] = hh_ec['EC_ID'].astype('int')
hh_ec['Tier'] = hh_ec['Tier'].astype('int')
hh_ec['Parent'] = hh_ec['Parent'].astype('int')

hh_sc['EC_ID'] = hh_sc['EC_ID'].astype('int')
hh_sc['Parent'] = hh_sc['Parent'].astype('int')

In [1110]:
hh = pd.concat([hh_ec, hh_sc])
hh.head()

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,hh,hh_sf,hh_mf,hh_mh
0,1,San Diego Airport,3,0,2585,933,1652,0
1,2,Alpine,4,0,1608,840,622,146
2,3,Barrio Logan,4,0,1563,761,802,0
3,4,Carlsbad Palomar Airport,2,0,336,92,244,0
4,5,Carlsbad State Beach,3,0,345,0,252,93


In [1111]:
def percent(h):
    value = ((h/hh['hh'])*100).round(1)
    
    return value

In [1112]:
hh['%hh_sf']   = percent(hh['hh_sf'])
hh['%hh_mf']   = percent(hh['hh_mf'])
hh['%hh_mh']   = percent(hh['hh_mh'])

hh.head()

Unnamed: 0,EC_ID,EC_Name,Tier,Parent,hh,hh_sf,hh_mf,hh_mh,%hh_sf,%hh_mf,%hh_mh
0,1,San Diego Airport,3,0,2585,933,1652,0,36.1,63.9,0.0
1,2,Alpine,4,0,1608,840,622,146,52.2,38.7,9.1
2,3,Barrio Logan,4,0,1563,761,802,0,48.7,51.3,0.0
3,4,Carlsbad Palomar Airport,2,0,336,92,244,0,27.4,72.6,0.0
4,5,Carlsbad State Beach,3,0,345,0,252,93,0.0,73.0,27.0


In [1113]:
hh = hh.rename(columns = {'hh':'HH','hh_sf':'HH_SF','hh_mf':'HH_MF','hh_mh':'HH_MH','%hh_sf':'%HH_SF','%hh_mf':'%HH_MF','%hh_mh':'%HH_MH'})

In [1114]:
hh = hh.set_index(['EC_ID','EC_Name','Tier','Parent'])
hh.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,HH,HH_SF,HH_MF,HH_MH,%HH_SF,%HH_MF,%HH_MH
EC_ID,EC_Name,Tier,Parent,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,San Diego Airport,3,0,2585,933,1652,0,36.1,63.9,0.0
2,Alpine,4,0,1608,840,622,146,52.2,38.7,9.1
3,Barrio Logan,4,0,1563,761,802,0,48.7,51.3,0.0
4,Carlsbad Palomar Airport,2,0,336,92,244,0,27.4,72.6,0.0
5,Carlsbad State Beach,3,0,345,0,252,93,0.0,73.0,27.0


In [828]:
hh.to_csv("Number_of_HouseHolds.csv",sep = ",")