In [1]:
import os
import pandas as pd
import numpy as np
import pyodbc

# Create Builder Functions

In [35]:
# Import Data
def import_mgra_based_data(path):
    df = pd.read_csv(path)

    # I will be rolling up values using the mgra denorm file, so I can drop these columns
    df = df.drop(['taz', 'LUZ'], axis=1)
    return df

In [27]:
# Download sql_data
def download_mgra_denorm_data(geo_level):
    conn = pyodbc.connect('Driver={ODBC Driver 17 for SQL Server};'
                    'Server=DDAMWSQL16.sandag.org;'
                    'Database=estimates;'
                    'Trusted_Connection=yes;')
    
    with open(rf'sql_queries\mgra_denorm.sql', 'r') as sql_file:
        sql_query = sql_file.read()
    
    return  pd.read_sql_query(sql_query, conn)[['mgra', geo_level]]

In [29]:
# Merge and Aggregate Data
def merge_and_aggregate(mgra_input_file, mgra_denorm, geo_level):
    df = pd.merge(mgra_denorm, mgra_input_file, how='left')

    if geo_level != 'mgra':
        df = df.drop('mgra', axis=1)

    df = df.groupby(geo_level).sum()

    return df

In [38]:
def hhs_adjustment(df):
    """Adjusts hhs values, returns the adjusted dataframe"""
    df['hhs'] = df['hhp']/df['hh']
    return df

In [43]:
def export_data(output_folder_path, geo_level, version, df):
    df.to_excel(output_folder_path + f"\mgra_based_input_{geo_level}_{version}.xlsx")

In [44]:
def create_mgra_denorm_table(mgra_denorm_path, geo_level, output_folder_path, version):
    '''
    In all paths add the 'r' command before the string
    If you do not want the data outputted set output_folder_path to False'''
    df_1 = import_mgra_based_data(path = mgra_denorm_path)

    df_2 = download_mgra_denorm_data(geo_level=geo_level)

    df_3 = merge_and_aggregate(mgra_input_file=df_1, mgra_denorm=df_2, geo_level=geo_level)

    df_4 = hhs_adjustment(df_3)

    if output_folder_path != False:
        export_data(output_folder_path=output_folder_path, geo_level=geo_level, version=version, df=df_4)

    return df_4

# Create Outputs

In [51]:
geo_levels = ['census_tract', 'cpa', 'jurisdiction', 'sra', 'luz', 'region']
mgra_denorm_path = r'T:\socioec\Current_Projects\SR15\S0\version3\abm_csv\mgra15_based_input_2022_01.csv'
output_folder_path=r'C:\Users\cra\San Diego Association of Governments\SANDAG QA QC - Documents\Projects\2023\2023-028 MGRA15 Input Table 2022\2023-028-02\Data'
version='2023-028-02'

In [53]:
for geo_level in geo_levels:
    create_mgra_denorm_table(mgra_denorm_path=mgra_denorm_path, 
                             geo_level=geo_level, 
                             output_folder_path=output_folder_path, 
                             version=version)
    print(f"{geo_level} is completed.")

census_tract is completed.
cpa is completed.
jurisdiction is completed.
sra is completed.
luz is completed.
region is completed.
