In [1]:
import os 
import pandas as pd
import numpy as np
import glob
import pyodbc
import gc
from mgra_denormalize_creation import create_and_merge_all_data

# Create the MGRA Denormalize Table

In [2]:
mgra_denorm = create_and_merge_all_data()
mgra_denorm['region'] = 'San Diego'
mgra_denorm

Unnamed: 0,mgra_id,mgra,census_tract,LUZ,cpa,jurisdiction,SRA,region
0,1500000100,1,2705,10,Mid-City:Eastern Area,San Diego,MID-CITY,San Diego
1,1500000200,2,5601,28,Downtown,San Diego,CENTRAL SAN DIEGO,San Diego
2,1500000300,3,15407,112,*Not in a CPA*,El Cajon,EL CAJON,San Diego
3,1500000400,4,17407,151,*Not in a CPA*,Encinitas,SAN DIEGUITO,San Diego
4,1500000500,5,17407,151,*Not in a CPA*,Encinitas,SAN DIEGUITO,San Diego
...,...,...,...,...,...,...,...,...
24316,1502431700,24317,2905,11,County Islands,Unincorporated,LA MESA,San Diego
24317,1502431800,24318,20706,133,North County Metro,Unincorporated,ESCONDIDO,San Diego
24318,1502431900,24319,21001,225,Mountain Empire,Unincorporated,ANZA-BORREGO SPRINGS,San Diego
24319,1502432000,24320,21002,224,Desert,Unincorporated,ANZA-BORREGO SPRINGS,San Diego


# Age Group Table

In [9]:
conn = pyodbc.connect('Driver={ODBC Driver 17 for SQL Server};'
                    'Server=DDAMWSQL16.sandag.org;'
                    'Database=estimates;'
                    'Trusted_Connection=yes;')

with open(r'C:\Users\cra\OneDrive - San Diego Association of Governments\QA_Repository\2023\2023-024 Estimates 2022\SQL_Queries\age_group_query.sql', 'r') as sql_file:
    sql_query = sql_file.read()

df =  pd.read_sql_query(sql_query, conn)
df

Unnamed: 0,mgra_id,yr_id,name,population
0,1500000100,2020,Under 5,19
1,1500000100,2020,5 to 9,9
2,1500000100,2020,10 to 14,28
3,1500000100,2020,15 to 17,18
4,1500000100,2020,18 and 19,5
...,...,...,...,...
1459255,1502143900,2022,50 to 54,0
1459256,1502143900,2022,55 to 59,0
1459257,1502143900,2022,60 and 61,1
1459258,1502143900,2022,62 to 64,0


In [10]:
def age_cleaning(df, geo_level):
    age_output = pd.pivot(df, values='population', index=['mgra_id', 'yr_id'], columns=['name'])
    age_output.columns.name = ''
    age_output = age_output.reset_index()
    age_output = age_output.merge(mgra_denorm[['mgra_id', geo_level]], how='left', on='mgra_id')
    age_output = age_output.drop('mgra_id', axis=1)
    age_output = age_output.groupby([geo_level, 'yr_id']).sum()
    first_column = age_output.pop('Under 5')
    age_output.insert(0, 'Under 5', first_column)

    return age_output

In [11]:
age_cleaning(df, geo_level='jurisdiction').to_csv(r'C:\Users\cra\San Diego Association of Governments\SANDAG QA QC - Documents\Projects\2023\2023-023 Estimates 2022\Data\age\jurisdiction_age_est_2022_01_ind_QA.csv')

In [6]:
mgra_denorm.columns

Index(['mgra_id', 'mgra', 'census_tract', 'LUZ', 'cpa', 'jurisdiction', 'SRA',
       'region'],
      dtype='object')

# Age Sex Ethnicity

In [4]:
conn = pyodbc.connect('Driver={ODBC Driver 17 for SQL Server};'
                    'Server=DDAMWSQL16.sandag.org;'
                    'Database=estimates;'
                    'Trusted_Connection=yes;')

with open(r'C:\Users\cra\OneDrive - San Diego Association of Governments\QA_Repository\2023\2023-024 Estimates 2022\SQL_Queries\age_sex_eth_2.sql', 'r') as sql_file:
    sql_query = sql_file.read()

df =  pd.read_sql_query(sql_query, conn)
df

Unnamed: 0,mgra,yr_id,age_group,sex,race,Unnamed: 6
0,16521,2022,20 to 24,Male,"Non-Hispanic, White",14
1,19041,2021,40 to 44,Female,"Non-Hispanic, Asian",5
2,8616,2020,62 to 64,Female,"Non-Hispanic, White",4
3,10795,2022,18 and 19,Female,"Non-Hispanic, Asian",0
4,11577,2022,60 and 61,Female,"Non-Hispanic, White",5
...,...,...,...,...,...,...
23348155,20301,2022,45 to 49,Male,"Non-Hispanic, Other",0
23348156,9741,2021,10 to 14,Female,Hispanic,47
23348157,21011,2022,60 and 61,Female,"Non-Hispanic, Asian",0
23348158,9311,2022,20 to 24,Male,"Non-Hispanic, Hawaiian or Pacific Islander",0


In [9]:
df.columns

Index(['mgra', 'yr_id', 'age_group', 'sex', 'race', ''], dtype='object')

In [10]:
def age_sex_eth_cleaning(df, geo_level):
    age_output = pd.pivot(df, values='', index=['mgra', 'yr_id', 'age_group', 'sex'], columns=['race'])
    age_output.columns.name = ''
    age_output = age_output.reset_index()
    if geo_level != 'mgra':
        age_output = age_output.merge(mgra_denorm[['mgra', geo_level]], how='left', on='mgra')
        age_output = age_output.drop('mgra', axis=1)
    age_output = age_output.groupby([geo_level, 'yr_id', 'age_group', 'sex']).sum()
    # first_column = age_output.pop('Under 5')
    # age_output.insert(0, 'Under 5', first_column)

    return age_output

In [12]:
output = age_sex_eth_cleaning(df, geo_level='mgra').to_csv(r'C:\Users\cra\San Diego Association of Governments\SANDAG QA QC - Documents\Projects\2023\2023-024 Estimates 2022\Data\age_sex_ethnicity\mgra_age_sex_ethnicty_est_2022_01_ind_QA.csv')

output

In [None]:
age_sex_eth_cleaning(df, geo_level='census_tract').to_csv(r'C:\Users\cra\San Diego Association of Governments\SANDAG QA QC - Documents\Projects\2023\2023-024 Estimates 2022\Data\age_sex_ethnicity\census_tract_age_sex_ethnicty_est_2022_01_ind_QA.csv')

In [None]:
age_sex_eth_cleaning(df, geo_level='LUZ').to_csv(r'C:\Users\cra\San Diego Association of Governments\SANDAG QA QC - Documents\Projects\2023\2023-024 Estimates 2022\Data\age_sex_ethnicity\LUZ_age_sex_ethnicty_est_2022_01_ind_QA.csv')

In [None]:
age_sex_eth_cleaning(df, geo_level='cpa').to_csv(r'C:\Users\cra\San Diego Association of Governments\SANDAG QA QC - Documents\Projects\2023\2023-024 Estimates 2022\Data\age_sex_ethnicity\cpa_age_sex_ethnicty_est_2022_01_ind_QA.csv')

In [None]:
age_sex_eth_cleaning(df, geo_level='jurisdiction').to_csv(r'C:\Users\cra\San Diego Association of Governments\SANDAG QA QC - Documents\Projects\2023\2023-024 Estimates 2022\Data\age_sex_ethnicity\jurisdiction_age_sex_ethnicty_est_2022_01_ind_QA.csv')

In [None]:
age_sex_eth_cleaning(df, geo_level='region').to_csv(r'C:\Users\cra\San Diego Association of Governments\SANDAG QA QC - Documents\Projects\2023\2023-024 Estimates 2022\Data\age_sex_ethnicity\region_age_sex_ethnicty_est_2022_01_ind_QA.csv')

In [54]:
mgra_denorm.columns

Index(['mgra_id', 'mgra', 'census_tract', 'LUZ', 'cpa', 'jurisdiction',
       'region'],
      dtype='object')