In [3]:
import pandas as pd

In [6]:
ca_counties = pd.read_csv(
    '/Users/aolsen/Box/Modeling and Surveys/Projects/Regional Growth Forecast Update/Forecasts/ca_county_fips.csv', sep="\t")
ca_counties['STCOUNTY'] = ca_counties['FIPS Code'].apply(
    lambda x: f'06{x:03d}')
ca_counties['county'] = ca_counties['County Name'].str.title()

In [66]:
cog_regions = {'06001': 'ABAG/MTC',
               '06013': 'ABAG/MTC',
               '06041': 'ABAG/MTC',
               # '06053': 'AMBAG',
               '06055': 'ABAG/MTC',
               # '06069': 'AMBAG',
               '06075': 'ABAG/MTC',
               '06073': 'SANDAG',
               '06081': 'ABAG/MTC',
               '06085': 'ABAG/MTC',
               # '06087': 'AMBAG',
               '06095': 'ABAG/MTC',
               '06097': 'ABAG/MTC',
               # '06077': 'SJCOG',
               # '06099': 'STANCOG',
               # '06047': 'MCAG',
               '06025': 'SCAG',  # imperial
               '06037': 'SCAG',   # LA
               '06071': 'SCAG',  # san bernadino
               '06059': 'SCAG',  # orange
               '06065': 'SCAG',  # riverside
               '06111': 'SCAG',  # ventura
               '06067': 'SACOG',  # 'yolo'
               '06101': 'SACOG',  # 'sutter'
               '06113': 'SACOG',  # 'yolo'
               '06115': 'SACOG'
               }

cog_regions

{'06001': 'ABAG/MTC',
 '06013': 'ABAG/MTC',
 '06041': 'ABAG/MTC',
 '06055': 'ABAG/MTC',
 '06075': 'ABAG/MTC',
 '06073': 'SANDAG',
 '06081': 'ABAG/MTC',
 '06085': 'ABAG/MTC',
 '06095': 'ABAG/MTC',
 '06097': 'ABAG/MTC',
 '06025': 'SCAG',
 '06037': 'SCAG',
 '06071': 'SCAG',
 '06059': 'SCAG',
 '06065': 'SCAG',
 '06111': 'SCAG',
 '06067': 'SACOG',
 '06101': 'SACOG',
 '06113': 'SACOG',
 '06115': 'SACOG'}

In [67]:
def generate_labels(bin_year, pre_string, post_string, suffix=None):
    # Add 0 at the beginning of the breakpoints list
    #bin_year = [0] + bin_year

    # Generate labels for each bin
    labels = [f'{a} to {b-1}' for a, b in zip(bin_year[:-1], bin_year[1:])]

    # Modify the first and last labels
    labels[0] = f'{pre_string}{bin_year[1]}'
    labels[-1] = f'{post_string}{bin_year[-2]}'

    # Add the optional suffix to the last label if provided
    if suffix:
        for i, lab in enumerate(labels):
            newlab = f'{lab} {suffix}'
            labels[i] = newlab

        #labels[-1] += f' {suffix}'

    # Return the list of labels
    return labels


bin_age = list(range(0, 86, 5)) + [111]
labels_age = generate_labels(bin_age, 'under ', 'More than ', suffix='years')
labels_age[-1] = '85 years and over'
labels_age

['under 5 years',
 '5 to 9 years',
 '10 to 14 years',
 '15 to 19 years',
 '20 to 24 years',
 '25 to 29 years',
 '30 to 34 years',
 '35 to 39 years',
 '40 to 44 years',
 '45 to 49 years',
 '50 to 54 years',
 '55 to 59 years',
 '60 to 64 years',
 '65 to 69 years',
 '70 to 74 years',
 '75 to 79 years',
 '80 to 84 years',
 '85 years and over']

# Load age, race data

In [81]:
import pandas as pd


def process_data(file_path, mnemonic_slug,export_data=False):
    """
    Process the input data file and generate summary outputs in an Excel workbook.

    Args:
        file_path (str): The path to the input data file.
        mnemonic_slug (str): A mnemonic slug used for the output Excel workbook filename.

    Returns:
        None
    """
    # Load data
    fbp_age_race = pd.read_excel(file_path, skiprows=4)


    fbp_drop_regions = ['Other US', 'Other CA', 'All Regions']

    # Filter unwanted regions
    fbp_age_race = fbp_age_race.loc[~fbp_age_race.Region.isin(
        fbp_drop_regions)]
    fbp_age_race = fbp_age_race.rename(
        columns={'Region': 'county', 'Race': 'race_ethn', 'Gender': 'gender', 'Ages': 'age'})

    # Get age data
    fbp_age = fbp_age_race.groupby(['county', 'age'])[list(range(2001, 2051))].sum(
    ).stack().mul(1000).round(0).astype(int).reset_index(name='value')

    fbp_age = fbp_age.rename(columns={'level_2': 'year'})

    fbp_age['age'] = fbp_age.age.str.extract(
        '(\d{1,3})', expand=False).astype(int)
    fbp_age['cog_region'] = fbp_age.county.map(
        ca_counties.set_index('county').STCOUNTY.map(cog_regions))

    fbp_age['age_bin'] = pd.cut(fbp_age.age, bins=bin_age,
                                labels=labels_age, include_lowest=True,
                                right=False)

    # Get race data
    fbp_race = fbp_age_race.groupby(['county', 'race_ethn'])[list(range(2001, 2051))].sum(
    ).stack().mul(1000).round(0).astype(int).reset_index(name='value')

    fbp_race = fbp_race.rename(columns={'level_2': 'year'})
    fbp_race['cog_region'] = fbp_race.county.map(
        ca_counties.set_index('county').STCOUNTY.map(cog_regions))

    if export_data:
        # Write summary outputs to disk
        output_path = f'/Users/aolsen/Box/Modeling and Surveys/Projects/Regional Growth Forecast Update/Forecasts/harvested/pba50_summary_{mnemonic_slug}.xlsx'
        wb = pd.ExcelWriter(output_path)

        age_summary_by_yr_cnty = fbp_age.groupby(
            ['year', 'cog_region', 'county', 'age_bin'], observed=True).value.sum().reset_index()
        age_summary_by_yr_cnty.to_excel(wb, 'age_summary_by_yr_cnty')

        race_summary_by_yr_cnty = fbp_race.groupby(
            ['year', 'cog_region', 'county', 'race_ethn'], observed=True).value.sum().reset_index()
        race_summary_by_yr_cnty.to_excel(wb, 'race_summary_by_yr_cnty')

        wb.close()

    return fbp_age, fbp_race

In [75]:
# input data from remi model
FC_FBP_POP = '/Users/aolsen/Box/Modeling and Surveys/Projects/Regional Growth Forecast Update/PBA50/REMI/A1 hhtot 1 subreg pop age race COUNTY FBP.xlsx'
FC_RC_POP = '/Users/aolsen/Box/Modeling and Surveys/Projects/Regional Growth Forecast Update/PBA50/REMI/A1 hhtot 1 subreg pop age race COUNTY RC.xlsx'


In [82]:
mnemonic_slug='FBP50'
file_path = FC_FBP_POP
fbp_age, fbp_race = process_data(file_path, mnemonic_slug, export_data=True)

  warn("Workbook contains no default style, apply openpyxl's default")


In [84]:
mnemonic_slug='RC'
file_path = FC_RC_POP
rs_age, rs_race = process_data(file_path, mnemonic_slug, export_data=True)

  warn("Workbook contains no default style, apply openpyxl's default")


In [80]:
fbp_race

Unnamed: 0,county,race_ethn,year,value,cog_region
0,Alameda,Black-NonHispanic,2001,210753,ABAG/MTC
1,Alameda,Black-NonHispanic,2002,204827,ABAG/MTC
2,Alameda,Black-NonHispanic,2003,199786,ABAG/MTC
3,Alameda,Black-NonHispanic,2004,195124,ABAG/MTC
4,Alameda,Black-NonHispanic,2005,190509,ABAG/MTC
...,...,...,...,...,...
2995,Ventura,White-NonHispanic,2046,309740,SCAG
2996,Ventura,White-NonHispanic,2047,307397,SCAG
2997,Ventura,White-NonHispanic,2048,305006,SCAG
2998,Ventura,White-NonHispanic,2049,302586,SCAG
