In [1]:
import os 
import pandas as pd
import numpy as np
import glob
import pyodbc
import gc

In [2]:
def standard_format_sql_download(sql_file_name, geo_level, estimates_version):
    conn = pyodbc.connect('Driver={ODBC Driver 17 for SQL Server};'
                    'Server=DDAMWSQL16.sandag.org;'
                    'Database=estimates;'
                    'Trusted_Connection=yes;')

    with open(rf'sql_queries\{sql_file_name}.sql', 'r') as sql_file:
        sql_query = sql_file.read()

    sql_query = sql_query.format(geo_level=geo_level, estimates_version=estimates_version)
    df =  pd.read_sql_query(sql_query, conn)
    df = df.rename(columns={'geo_level':geo_level})
    return df

In [3]:
def standard_format_pivot(df, geo_level):
    output = pd.pivot(df, values='value', index=[geo_level, 'yr_id'], columns=['breakdown_value'])
    output.columns.name = ''
    output = output.reset_index()
    return output

In [8]:
def export_to_j_drive(df, geo_level, estimates_version):
    df.to_excel(rf'J:\DataScience\DataQuality\QAQC\estimates_automation\aggregated_data\{geo_level}_est_{estimates_version}_ind_QA.xlsx', index=False)

In [15]:
def standard_format_output_creation(sql_file_name, geo_level, estimates_version):
    sql_output = standard_format_sql_download(sql_file_name=sql_file_name, geo_level=geo_level, estimates_version=estimates_version)
    
    pivoted_output = standard_format_pivot(df=sql_output, geo_level=geo_level)

    export_to_j_drive(df=pivoted_output, geo_level=geo_level, estimates_version=estimates_version)

    return pivoted_output

In [16]:
test = standard_format_output_creation(sql_file_name='age_query', geo_level='luz', estimates_version='2022_01')
test

Unnamed: 0,luz,yr_id,10 to 14,15 to 17,18 and 19,20 to 24,25 to 29,30 to 34,35 to 39,40 to 44,...,50 to 54,55 to 59,60 and 61,62 to 64,65 to 69,70 to 74,75 to 79,80 to 84,85 and Older,Under 5
0,2.0,2021,,,,,,,,,...,,,,,,,,,,
1,7.0,2021,,,,,,,,,...,,,776.0,,,,,,,
2,9.0,2022,,,,,,,,,...,,,976.0,,,,,,,
3,12.0,2020,,,,,,,,,...,,,,,,,,,,2783.0
4,12.0,2022,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87,235.0,2021,,1.0,,,,,,,...,,,,,,,,,,
88,238.0,2020,,,,,,,,,...,,,,515.0,812.0,,,,,
89,238.0,2021,,,,,1682.0,,,,...,,,,,,,,,,
90,242.0,2022,,,,,,,,,...,,,,,,,,,,


In [20]:
def age_manipulations(df, geo_level):
    '''This function puts the columns in the correct order'''
    return df[[geo_level, 'yr_id', 'Under 5', '5 to 9', '10 to 14', '15 to 17']]

In [21]:
table_information = {
    'age': {
            'sql_file_name':'age_query',
            'manipulation_function': age_manipulations
    }
}

In [22]:
table_information['age']['manipulation_function'](df=test, geo_level='luz')

Unnamed: 0,luz,yr_id,Under 5,5 to 9,10 to 14
0,2.0,2021,,234.0,
1,7.0,2021,,,
2,9.0,2022,,,
3,12.0,2020,2783.0,,
4,12.0,2022,,2245.0,
...,...,...,...,...,...
87,235.0,2021,,,
88,238.0,2020,,,
89,238.0,2021,,,
90,242.0,2022,,,
