Knudson et al., (2021). PyEI: A Python package for ecological inference. Journal of Open Source Software, 6(64), 3397, https://doi.org/10.21105/joss.03397

In [4]:
import pyei
import pandas as pd

In [15]:
race_columns = {
    'WHITE': 'WHT_NHSP22',
    'BLACK': 'BLK_NHSP22',
    'HISPANIC': 'HSP_POP22',
    'ASIAN': 'ASN_NHSP22',
}

### Functions

In [13]:
def calculate_population_distribution(df, race_columns, columns_to_keep):
    df['TOTAL_POP_RECALCULATED'] = df[list(race_columns.values())].sum(axis=1)
    
    for race, col in race_columns.items():
        percentage_col = f"{race}_PERCENT"
        df[percentage_col] = (df[col] / df['TOTAL_POP_RECALCULATED']) * 100
    columns_to_keep += [f"{race}_PERCENT" for race in race_columns.keys()]

    return df[columns_to_keep]



In [8]:
sc_election_gov_df = pd.read_json('states/south_carolina/election/sc_election_gov_22.json')
sc_race_df = pd.read_json('states/south_carolina/demographics/south_carolina_precincts_racial_population.json')

In [6]:
print(sc_election_gov_df.columns.values)

['UNIQUE_ID' 'TOT_REP' 'TOT_DEM' 'TOT_VOT' 'LEAN']


In [10]:
print(sc_race_df.columns.values)

['UNIQUE_ID' 'NAME' 'Prec_Code' 'CONG_DIST' 'TOT_POP22' 'NHSP_POP22'
 'HSP_POP22' 'WHT_NHSP22' 'BLK_NHSP22' 'AIA_NHSP22' 'ASN_NHSP22'
 'HPI_NHSP22' 'OTH_NHSP22']


In [12]:
columns_to_keep = ['UNIQUE_ID']

In [16]:
sc_race_with_percentages = calculate_population_distribution(sc_race_df, race_columns, columns_to_keep)

In [17]:
print(sc_race_with_percentages.columns.values)

['UNIQUE_ID' 'WHITE_PERCENT' 'BLACK_PERCENT' 'HISPANIC_PERCENT'
 'ASIAN_PERCENT']


In [18]:
print(len(sc_race_with_percentages))
print(len(sc_election_gov_df))

2258
2261


In [19]:
sc_merged_ei_df = pd.merge(sc_election_gov_df, sc_race_with_percentages, on='UNIQUE_ID', how='left')

In [20]:
sc_merged_ei_df.fillna(0, inplace=True)

In [24]:
print(sc_merged_ei_df.columns.values)

['UNIQUE_ID' 'TOT_REP' 'TOT_DEM' 'TOT_VOT' 'LEAN' 'WHITE_PERCENT'
 'BLACK_PERCENT' 'HISPANIC_PERCENT' 'ASIAN_PERCENT']
