In [17]:
import requests
import json
import pandas as pd
import geopandas as gpd

In [18]:
# Racial Population Data Querying, Cleaning, and Formatting

api_key = ' '
api_key_end = f'&key={api_key}'

population_scales = {"State":"in=state:","Counties":"for=county:*","CongressionalDistricts":"for=congressional%20district:*"}
population_scales_savedas = {"State":"estimated_population","Counties":"counties_estimated_populations","CongressionalDistricts":"cd_estimated_total_populations"}
population_scale_keys = {"State":"","Counties":"counties_","CongressionalDistricts":"cd_"}

states = [24, 45]
stateNames = {24:"Maryland",45:"South Carolina"}
stateNamesLower = {24:"maryland",45:"south_carolina"}

state_race_identifiers = {"White":"S2101_C01_014E", 
                       "BlackAfricanAmerican":"S2101_C01_015E",
                       "AmericanIndianAlaskaNative":"S2101_C01_016E",
                       "Asian":"S2101_C01_017E",
                       "NativeHawaiianOtherPacificIslander":"S2101_C01_018E",
                       "HispanicOrLatino":"S2101_C01_021E",
                       "Other":"S2101_C01_019E"}
urls = []
for state in states:
  for population_scale in population_scales:
    for race_key in state_race_identifiers:
      if population_scale == "State":
        url = f'https://api.census.gov/data/2022/acs/acs5/subject?get=NAME,{state_race_identifiers[race_key]}&for=state:{state}{api_key_end}'
      elif population_scale == "Counties":
        url = f'https://api.census.gov/data/2022/acs/acs5/subject?get=NAME,{state_race_identifiers[race_key]}&{population_scales["State"]}{state}&{population_scales["Counties"]}{api_key_end}'
      elif population_scale == "CongressionalDistricts":
        url = f'https://api.census.gov/data/2022/acs/acs5/subject?get=NAME,{state_race_identifiers[race_key]}&{population_scales["State"]}{state}&{population_scales["CongressionalDistricts"]}{api_key_end}'
      # url = f'https://api.census.gov/data/2022/acs/acs5/subject?get=NAME,{state_race_identifiers[race_key]}&in=state:{state}&for=county:*&key={api_key}'
      resp = requests.get(url)
      if not resp:
        print(f'No response for {race_key} in {state}')
      if resp.status_code == 200:
        df = pd.DataFrame(resp.json())
        df = df.drop([0])

        if population_scale == "State":
          df.insert(1, "Year", [2022] * len(df), False)
          df = df.rename(columns = {0:"State"})
          df = df.rename(columns = {2:"State FIPS Code"})
        if population_scale == "Counties":
          df.insert(1, "Year", [2022] * len(df), False)
          df = df.rename(columns = {0:"County Name"})
          df['County Name']  = df['County Name'].str.replace(f', {stateNames[state]}', '')
          df = df.rename(columns = {1:"Estimated Population"})
          df = df.rename(columns = {2:"State FIPS Code"})
          df = df.rename(columns = {3:"County Number"})
        if population_scale == "CongressionalDistricts":
          df = df.rename(columns = {0:"District Name"})
          df['District Name']  = df['District Name'].str.replace(f' (118th Congress), {stateNames[state]}', '')
          df.insert(1, "Congress", [118] * len(df), False)
          df.insert(1, "Year", [2022] * len(df), False)
          df = df.rename(columns = {2:"State FIPS Code"})
          df = df.rename(columns = {3:"District Number"})
        df = df.rename(columns = {1:"Estimated Population"})
        df.to_json(f'output2/{stateNamesLower[state]}_{population_scale_keys[population_scale]}estimated_populations_{race_key}.json', orient="records", indent=4)
        print(df)
      else:
        print(f'Response code {resp.status_code} for {race_key} in {state}')

JSONDecodeError: Expecting value: line 2 column 1 (char 1)

In [None]:
import requests
import json
import pandas as pd

# Pure Population Data Querying, Cleaning and Formatting

api_key = 'd80bd509a22b6f1f21706609ad0b09afe39b2759'
api_key_end = f'&key={api_key}'

total_population_identifier = "S0101_C01_001E"
voting_age_population_identifier = "S2902_C01_001E" # Citizens over 18
voting_age_population_identifiers = {"18-29":"S2902_C01_002E",
                                     "30-44":"S2902_C01_003E",
                                     "45-64":"S2902_C01_004E",
                                     "65+":"S2902_C01_005E"}
current_voting_age_population_key = "65+"
population_scales = {"State":"in=state:","Counties":"for=county:*","CongressionalDistricts":"for=congressional%20district:*"}
population_scale_paths = {"State":"","Counties":"counties","CongressionalDistricts":"congressional_districts"}
population_scales_savedas = {"State":"estimated_population","Counties":"counties_estimated_populations","CongressionalDistricts":"cd_estimated_total_populations"}
voting_age_population_scales_savedas = {"State":"estimated_voting_age_population_65plus","Counties":"counties_estimated_voting_age_populations_65plus","CongressionalDistricts":"cd_estimated_total_voting_age_populations_65plus"}


states = [24, 45]
stateNames = {24:"Maryland",45:"South Carolina"}
stateNamesLower = {24:"maryland",45:"south_carolina"}

urls = []
for state in states:
  for population_scale in population_scales:
    # print(f'Getting data for {state},{population_scale}')
    # &in=state:{state}&for=congressional%20district:*&key={api_key}
    if population_scale == "State":
      url = f'https://api.census.gov/data/2022/acs/acs5/subject?get=NAME,{voting_age_population_identifiers[current_voting_age_population_key]}&for=state:{state}{api_key_end}'
    elif population_scale == "Counties":
      url = f'https://api.census.gov/data/2022/acs/acs5/subject?get=NAME,{voting_age_population_identifiers[current_voting_age_population_key]}&{population_scales["State"]}{state}&{population_scales["Counties"]}{api_key_end}'
    elif population_scale == "CongressionalDistricts":
      url = f'https://api.census.gov/data/2022/acs/acs5/subject?get=NAME,{voting_age_population_identifiers[current_voting_age_population_key]}&{population_scales["State"]}{state}&{population_scales["CongressionalDistricts"]}{api_key_end}'
    resp = requests.get(url)
    if not resp:
      print(f'No response for {population_scale} in {state}')
    if resp.status_code == 200:
      df = pd.DataFrame(resp.json())
      df = df.drop([0])
      if population_scale == "State":
        df.insert(1, "Year", [2022] * len(df), False)
        df = df.rename(columns = {0:"State"})
        df = df.rename(columns = {2:"State FIPS Code"})
      if population_scale == "Counties":
        df.insert(1, "Year", [2022] * len(df), False)
        df = df.rename(columns = {0:"County Name"})
        df['County Name']  = df['County Name'].str.replace(f', {stateNames[state]}', '')
        df = df.rename(columns = {2:"State FIPS Code"})
        df = df.rename(columns = {3:"County Number"})
      if population_scale == "CongressionalDistricts":
        df = df.rename(columns = {0:"District Name"})
        df['District Name']  = df['District Name'].str.replace(f' (118th Congress), {stateNames[state]}', '')
        df.insert(1, "Congress", [118] * len(df), False)
        df.insert(1, "Year", [2022] * len(df), False)
        df = df.rename(columns = {2:"State"})
        df = df.rename(columns = {3:"District Number"})
      df = df.rename(columns = {1:"Estimated Voting Age Population 65+"})
      print(df)
      # df.to_json(f'output/{stateNamesLower[state]}_{voting_age_population_scales_savedas[population_scale]}.json', orient="records", indent=4)
      df.to_json(f'states/{stateNamesLower[state]}/{population_scale_paths[population_scale]}/{stateNamesLower[state]}_{voting_age_population_scales_savedas[population_scale]}.json', orient="records", indent=4)
    else:
      print(f'Response code {resp.status_code} for {population_scale} in {state}')

In [None]:
import requests
import json
import pandas as pd

# Median Income Data for last 12 months Querying, Cleaning and Formatting

api_key = 'd80bd509a22b6f1f21706609ad0b09afe39b2759'
api_key_end = f'&key={api_key}'

median_income_identifier_over_18 = "S2101_C05_023E"
median_household_income_identifier = "K201902_001E" # Estimate of median household income

income_bracket_identifiers = {"0-19999":"K201901_002E",
                                     "20000-39999":"K201901_003E",
                                     "40000-59999":"K201901_004E",
                                     "60000-99999":"K201901_005E",
                                     "100000-149000":"K201901_006E",
                                     "150000-199999":"K201901_007E",
                                     "200000+":"K201901_008E"}
current_income_bracket_key = "200000+"

household_income_identifier = "K201901_001E" # Total Household income in past 12 months acc for 2022 inflation

population_scales = {"State":"in=state:","Counties":"for=county:*","CongressionalDistricts":"for=congressional%20district:*"}
population_scales_savedas = {"State":"estimated_median_household_income","Counties":"counties_estimated_median_household_income","CongressionalDistricts":"cd_estimated_median_household_income"}

states = [24, 45]
stateNames = {24:"Maryland",45:"South Carolina"}
stateNamesLower = {24:"maryland",45:"south_carolina"}

urls = []
for state in states:
  for population_scale in population_scales:
    # print(f'Getting data for {state},{population_scale}')
    # &in=state:{state}&for=congressional%20district:*&key={api_key}

    # https://api.census.gov/data/2022/acs/acs5/subject?

    if population_scale == "State":
      # url = f'https://api.census.gov/data/2022/acs/acsse?get=NAME,{income_bracket_identifiers[current_income_bracket_key]}&for=state:{state}{api_key_end}'
      url = f'https://api.census.gov/data/2022/acs/acsse?get=NAME,{median_household_income_identifier}&for=state:{state}{api_key_end}'
    elif population_scale == "Counties":
      url = f'https://api.census.gov/data/2022/acs/acsse?get=NAME,{median_household_income_identifier}&{population_scales["State"]}{state}&{population_scales["Counties"]}{api_key_end}'
    elif population_scale == "CongressionalDistricts":
      url = f'https://api.census.gov/data/2022/acs/acsse?get=NAME,{median_household_income_identifier}&{population_scales["State"]}{state}&{population_scales["CongressionalDistricts"]}{api_key_end}'
    resp = requests.get(url)
    if not resp:
      print(f'No response for {population_scale} in {state}')
    if resp.status_code == 200:
      df = pd.DataFrame(resp.json())
      df = df.drop([0])
      if population_scale == "State":
        df.insert(1, "Year", [2022] * len(df), False)
        df = df.rename(columns = {0:"State"})
        df = df.rename(columns = {2:"State FIPS Code"})
      if population_scale == "Counties":
        df.insert(1, "Year", [2022] * len(df), False)
        df = df.rename(columns = {0:"County Name"})
        df['County Name']  = df['County Name'].str.replace(f', {stateNames[state]}', '')
        df = df.rename(columns = {2:"State FIPS Code"})
        df = df.rename(columns = {3:"County Number"})
      if population_scale == "CongressionalDistricts":
        df = df.rename(columns = {0:"District Name"})
        df['District Name']  = df['District Name'].str.replace(f' (118th Congress), {stateNames[state]}', '')
        df.insert(1, "Congress", [118] * len(df), False)
        df.insert(1, "Year", [2022] * len(df), False)
        df = df.rename(columns = {2:"State"})
        df = df.rename(columns = {3:"District Number"})
      df = df.rename(columns = {1:"Estimated Median Household Income"})
      print(df)
      df.to_json(f'output2/{stateNamesLower[state]}_{population_scales_savedas[population_scale]}.json', orient="records", indent=4)
    else:
      print(f'Response code {resp.status_code} for {population_scale} in {state}')

      State  Year Estimated Median Household Income State FIPS Code
1  Maryland  2022                             94991              24
               County Name  Year Estimated Median Household Income  \
1          Allegany County  2022                             46913   
2      Anne Arundel County  2022                            113125   
3         Baltimore County  2022                             86526   
4           Calvert County  2022                            120097   
5          Caroline County  2022                             64815   
6           Carroll County  2022                            104942   
7             Cecil County  2022                             86292   
8           Charles County  2022                            115880   
9        Dorchester County  2022                             51399   
10        Frederick County  2022                            119122   
11          Garrett County  2022                             58607   
12          Harford Coun

In [None]:
import requests
import json
import pandas as pd

# Poverty or Below for last 12 months Querying, Cleaning and Formatting

api_key = 'd80bd509a22b6f1f21706609ad0b09afe39b2759'
api_key_end = f'&key={api_key}'

poverty_identifier_over_18 = "S2101_C05_036E"
population_scales = {"State":"in=state:","Counties":"for=county:*","CongressionalDistricts":"for=congressional%20district:*"}
population_scales_savedas = {"State":"estimated_poverty_population_over_18","Counties":"counties_estimated_poverty_population_over_18","CongressionalDistricts":"cd_estimated_poverty_population_over_18"}

states = [24, 45]
stateNames = {24:"Maryland",45:"South Carolina"}
stateNamesLower = {24:"maryland",45:"south_carolina"}

urls = []
for state in states:
  for population_scale in population_scales:
    # print(f'Getting data for {state},{population_scale}')
    # &in=state:{state}&for=congressional%20district:*&key={api_key}
    if population_scale == "State":
      url = f'https://api.census.gov/data/2022/acs/acs5/subject?get=NAME,{poverty_identifier_over_18}&for=state:{state}{api_key_end}'
    elif population_scale == "Counties":
      url = f'https://api.census.gov/data/2022/acs/acs5/subject?get=NAME,{poverty_identifier_over_18}&{population_scales["State"]}{state}&{population_scales["Counties"]}{api_key_end}'
    elif population_scale == "CongressionalDistricts":
      url = f'https://api.census.gov/data/2022/acs/acs5/subject?get=NAME,{poverty_identifier_over_18}&{population_scales["State"]}{state}&{population_scales["CongressionalDistricts"]}{api_key_end}'
    resp = requests.get(url)
    if not resp:
      print(f'No response for {population_scale} in {state}')
    if resp.status_code == 200:
      df = pd.DataFrame(resp.json())
      df = df.drop([0])
      if population_scale == "State":
        df.insert(1, "Year", [2022] * len(df), False)
        df = df.rename(columns = {0:"State"})
        df = df.rename(columns = {2:"State FIPS Code"})
      if population_scale == "Counties":
        df.insert(1, "Year", [2022] * len(df), False)
        df = df.rename(columns = {0:"County Name"})
        df['County Name']  = df['County Name'].str.replace(f', {stateNames[state]}', '')
        df = df.rename(columns = {2:"State FIPS Code"})
        df = df.rename(columns = {3:"County Number"})
      if population_scale == "CongressionalDistricts":
        df = df.rename(columns = {0:"District Name"})
        df['District Name']  = df['District Name'].str.replace(f' (118th Congress), {stateNames[state]}', '')
        df.insert(1, "Congress", [118] * len(df), False)
        df.insert(1, "Year", [2022] * len(df), False)
        df = df.rename(columns = {2:"State"})
        df = df.rename(columns = {3:"District Number"})
      df = df.rename(columns = {1:"Population in Poverty"})
      print(df)
      df.to_json(f'output/{stateNamesLower[state]}_{population_scales_savedas[population_scale]}.json', orient="records", indent=4)
    else:
      print(f'Response code {resp.status_code} for {population_scale} in {state}')

Precinct Level Demographics Data

# Block Level
## South Carolina 
### Get Economic Population Data 

In [31]:
df_sc_race = pd.read_csv('raw/census_block/race/sc_race_2022_bg/sc_race_2022_bg.csv')
print(df_sc_race.columns)

Index(['GEOID', 'STATEFP', 'STATE', 'COUNTYFP', 'COUNTY', 'TOT_POP22',
       'NHSP_POP22', 'HSP_POP22', 'WHT_NHSP22', 'BLK_NHSP22', 'AIA_NHSP22',
       'ASN_NHSP22', 'HPI_NHSP22', 'OTH_NHSP22', '2OM_NHSP22', 'BLK_ALL22',
       'AIA_ALL22', 'ASN_ALL22', 'HPI_ALL22', 'OTH_ALL22'],
      dtype='object')


In [32]:
state_population = df_sc_race['TOT_POP22'].sum()

print(f"State Population: {state_population}")

State Population: 5142750


In [39]:
columns_to_sum = ['HSP_POP22', 'WHT_NHSP22', 'BLK_NHSP22', 'AIA_NHSP22', 'ASN_NHSP22', 'HPI_NHSP22', 'OTH_NHSP22', '2OM_NHSP22']
total_sum_from_columns = df_sc_race[columns_to_sum].sum().sum()
print(f"Total Sum from Columns: {total_sum_from_columns}")

Total Sum from Columns: 5142750


In [42]:
df_sc_race['OTH_NHSP22'] = df_sc_race['OTH_NHSP22'] + df_sc_race['2OM_NHSP22']

In [43]:
selected_columns = [
    "GEOID",
    "STATEFP",
    "STATE",
    "COUNTYFP",
    "COUNTY",
    "TOT_POP22",
    "NHSP_POP22",
    "HSP_POP22",
    "WHT_NHSP22",
    "BLK_NHSP22",
    "AIA_NHSP22",
    "ASN_NHSP22",
    "HPI_NHSP22",
    "OTH_NHSP22",
]


In [44]:
df_sc_race_filtered = df_sc_race[selected_columns]


In [45]:
path = "processed_individual/sc_race_block.csv"
df_sc_race_filtered.to_csv(path, index=False)

In [74]:
df_md_race = pd.read_csv('raw/census_block/race/md_race_2022_bg/md_race_2022_bg.csv')
print(df_md_race.columns)

Index(['GEOID', 'STATEFP', 'STATE', 'COUNTYFP', 'COUNTY', 'TOT_POP22',
       'NHSP_POP22', 'HSP_POP22', 'WHT_NHSP22', 'BLK_NHSP22', 'AIA_NHSP22',
       'ASN_NHSP22', 'HPI_NHSP22', 'OTH_NHSP22', '2OM_NHSP22', 'BLK_ALL22',
       'AIA_ALL22', 'ASN_ALL22', 'HPI_ALL22', 'OTH_ALL22'],
      dtype='object')


In [75]:
df_md_race['OTH_NHSP22'] = df_md_race['OTH_NHSP22'] + df_md_race['2OM_NHSP22']

In [76]:
df_md_race_filtered = df_md_race[selected_columns]

In [77]:
path = "processed_individual/md_race_block.csv"
df_md_race_filtered.to_csv(path, index=False)

# Preprocess State Summary

## South Carolina:

In [47]:
sc_precincts_gdf = gpd.read_file('states/south_carolina/geodata/south_carolina_precincts.geojson')

In [48]:
print(sc_precincts_gdf.columns.values)

['UNIQUE_ID' 'COUNTYFP' 'County' 'NAME' 'Prec_Code' 'G22A1NO' 'G22A1YES'
 'G22A2NO' 'G22A2YES' 'G22AGRCNEL' 'G22AGRGEDM' 'G22AGROWRI' 'G22AGRRWEA'
 'G22ATGOWRI' 'G22ATGRWIL' 'G22COMOWRI' 'G22COMRECK' 'G22GOVDCUN'
 'G22GOVLREE' 'G22GOVOWRI' 'G22GOVRMCM' 'G22SOSDBUT' 'G22SOSOWRI'
 'G22SOSRHAM' 'G22SUPAELL' 'G22SUPDELL' 'G22SUPGMIC' 'G22SUPOWRI'
 'G22SUPRWEA' 'G22TREAWOR' 'G22TREOWRI' 'G22TRERLOF' 'G22USSDMAT'
 'G22USSOWRI' 'G22USSRSCO' 'GCON01AODD' 'GCON01DAND' 'GCON01OWRI'
 'GCON01RMAC' 'GCON02DLAR' 'GCON02OWRI' 'GCON02RWIL' 'GCON03OWRI'
 'GCON03RDUN' 'GCON04OWRI' 'GCON04RTIM' 'GCON05DHUN' 'GCON05GGAI'
 'GCON05OWRI' 'GCON05RNOR' 'GCON06DCLY' 'GCON06OWRI' 'GCON06RBUC'
 'GCON07DSCO' 'GCON07OWRI' 'GCON07RFRY' 'GSL001OWRI' 'GSL001RWHI'
 'GSL002OWRI' 'GSL002RSAN' 'GSL003OWRI' 'GSL003RCAR' 'GSL004OWRI'
 'GSL004RHIO' 'GSL005OWRI' 'GSL005RCOL' 'GSL006OWRI' 'GSL006RCRO'
 'GSL007DSAL' 'GSL007OWRI' 'GSL007RWES' 'GSL008ATOD' 'GSL008DMAC'
 'GSL008OWRI' 'GSL008RCHA' 'GSL009DPOL' 'GSL009OWRI' 'GSL009R

In [54]:
state_population = sc_precincts_gdf['TOT_POP22'].sum()

total_votes = sc_precincts_gdf['TOT_VOT_DEM_REP'].sum()
votes_dem = sc_precincts_gdf['TOT_DEM'].sum()
votes_rep = sc_precincts_gdf['TOT_REP'].sum()
percent_dem = (votes_dem / total_votes) * 100
percent_rep = (votes_rep / total_votes) * 100

total_state_pop = sc_precincts_gdf['TOT_POP22'].sum()

white_pop = sc_precincts_gdf['WHT_NHSP22'].sum()
black_pop = sc_precincts_gdf['BLK_NHSP22'].sum()
hispanic_pop = sc_precincts_gdf['HSP_POP22'].sum()
asian_pop = sc_precincts_gdf['ASN_NHSP22'].sum()
native_american_pop = sc_precincts_gdf['AIA_NHSP22'].sum()
islander_pop = sc_precincts_gdf['HPI_NHSP22'].sum()
other_pop = sc_precincts_gdf['OTH_NHSP22'].sum()
percent_white = (white_pop / total_state_pop) * 100
percent_black = (black_pop / total_state_pop) * 100
percent_hispanic = (hispanic_pop / total_state_pop) * 100
percent_asian = (asian_pop / total_state_pop) * 100
percent_native_american = (native_american_pop / total_state_pop) * 100
percent_islander = (islander_pop / total_state_pop) * 100
percent_other = (other_pop / total_state_pop) * 100

In [None]:
print(f"State Population: {state_population}")
print(f"Total Votes: {total_votes}")
print(f"Total Votes for Democrats: {votes_dem}")
print(f"Total Votes for Republicans: {votes_rep}")
print(f"Percentage of Votes for Democrats: {percent_dem}")
print(f"Percentage of Votes for Republicans: {percent_rep}")

State Population: 5142750.0
Total Votes: 14328779.0
Total Votes for Democrats: 3550310.0
Total Votes for Republicans: 10778469.0
Percentage of Votes for Democrats: 24.777477550599393
Percentage of Votes for Republicans: 75.22252244940061


In [55]:
print(f"Total State Population: {total_state_pop}")
print(f"Total White Population: {white_pop}")
print(f"Total Black Population: {black_pop}")
print(f"Total Hispanic Population: {hispanic_pop}")
print(f"Total Asian Population: {asian_pop}")
print(f"Total Native American Population: {native_american_pop}")
print(f"Total Islander Population: {islander_pop}")
print(f"Total Other Population: {other_pop}")
print(f"Percentage of White Population: {percent_white}")
print(f"Percentage of Black Population: {percent_black}")
print(f"Percentage of Hispanic Population: {percent_hispanic}")
print(f"Percentage of Asian Population: {percent_asian}")
print(f"Percentage of Native American Population: {percent_native_american}")
print(f"Percentage of Islander Population: {percent_islander}")
print(f"Percentage of Other Population: {percent_other}")


Total State Population: 5142750.0
Total White Population: 3240171.0
Total Black Population: 1316074.0
Total Hispanic Population: 318875.0
Total Asian Population: 84972.0
Total Native American Population: 10304.0
Total Islander Population: 2443.0
Total Other Population: 169911.0
Percentage of White Population: 63.0046375966166
Percentage of Black Population: 25.590860920713627
Percentage of Hispanic Population: 6.200476398813865
Percentage of Asian Population: 1.6522677555782412
Percentage of Native American Population: 0.20035972971659133
Percentage of Islander Population: 0.047503767439599436
Percentage of Other Population: 3.3038938311214814


In [56]:
income_columns = ['0_35K', '35K_60K', '60K-100K', '100K_125K', '125K_150K', '150K_MORE']
income_totals = sc_precincts_gdf[income_columns].sum()
total_household_population = sc_precincts_gdf['TOT_HOUS22'].sum()
income_distribution = (income_totals / total_household_population) * 100

In [57]:
print(f"Total Household Population: {total_household_population}")
print(f"Income Distribution: {income_distribution}")


Total Household Population: 2023085.0
Income Distribution: 0_35K        27.430978
35K_60K      20.147992
60K-100K     22.765232
100K_125K     9.222796
125K_150K     6.171268
150K_MORE    14.261734
dtype: float64


In [62]:
income_ranges = ['0_35K', '35K_60K', '60K-100K', '100K_125K', '125K_150K', '150K_MORE']
income_distribution_data = dict(zip(income_ranges, income_distribution.values))

In [68]:
sc_summary_data = {
    "NAME": ["South Carolina"],
    "TOT_POP": [state_population],
    "TOT_WHITE": [white_pop],
    "TOT_BLACK": [black_pop],
    "TOT_HISP": [hispanic_pop],
    "TOT_ASIAN": [asian_pop],
    "TOT_NATIVE": [native_american_pop],
    "TOT_ISLANDER": [islander_pop],
    "TOT_OTHER": [other_pop],
    "DEM_VOT_DIS": [percent_dem],
    "REP_VOT_DIS": [percent_rep],
    # "WHITE_DIS": [percent_white],
    # "BLACK_DIS": [percent_black],
    # "HISP_DIS": [percent_hispanic],
    # "ASIAN_DIS": [percent_asian],
    # "NATIVE_DIS": [percent_native_american],
    # "ISLAND_DIS": [percent_islander],
    # "OTHER_DIS": [percent_other],
    "URBAN_DIS": [40.0],
    "SUBURBAN_DIS": [26.3],
    "RURAL_DIS": [33.7],
    "TOT_HOUS": [total_household_population],
    "HOUS_INCOME_DIS": [income_distribution_data],
    "POV_LEVEL": [32470]
}

In [69]:
sc_summary_df = pd.DataFrame(sc_summary_data)


In [70]:
sc_summary_df.to_json('states/south_carolina/summary/south_carolina_summary.json', orient='records', lines=True, indent=4)

## Maryland

In [80]:
md_precincts_gdf = gpd.read_file('states/maryland/geodata/maryland_precincts.geojson')

In [81]:
# Calculate state population
state_population = md_precincts_gdf['TOT_POP22'].sum()

# Calculate total votes and votes by party
total_votes = md_precincts_gdf['TOT_VOT_DEM_REP'].sum()
votes_dem = md_precincts_gdf['TOT_DEM'].sum()
votes_rep = md_precincts_gdf['TOT_REP'].sum()

# Calculate percentage of votes by party
percent_dem = (votes_dem / total_votes) * 100
percent_rep = (votes_rep / total_votes) * 100

# Calculate total state population
total_state_pop = md_precincts_gdf['TOT_POP22'].sum()

# Calculate population by racial/ethnic groups
white_pop = md_precincts_gdf['WHT_NHSP22'].sum()
black_pop = md_precincts_gdf['BLK_NHSP22'].sum()
hispanic_pop = md_precincts_gdf['HSP_POP22'].sum()
asian_pop = md_precincts_gdf['ASN_NHSP22'].sum()
native_american_pop = md_precincts_gdf['AIA_NHSP22'].sum()
islander_pop = md_precincts_gdf['HPI_NHSP22'].sum()
other_pop = md_precincts_gdf['OTH_NHSP22'].sum()

# Calculate percentage of population by racial/ethnic groups
percent_white = (white_pop / total_state_pop) * 100
percent_black = (black_pop / total_state_pop) * 100
percent_hispanic = (hispanic_pop / total_state_pop) * 100
percent_asian = (asian_pop / total_state_pop) * 100
percent_native_american = (native_american_pop / total_state_pop) * 100
percent_islander = (islander_pop / total_state_pop) * 100
percent_other = (other_pop / total_state_pop) * 100


KeyError: 'TOT_VOT_DEM_REP'