In [413]:
import pandas as pd
import geopandas as gpd
import json

## Preprocess SC Election result from Precincts

### Function and declaration

In [None]:
sc_state_election = gpd.read_file('data/sc.geojson')

In [481]:
def process_precinct_votes(precinct_gdf, columns_to_use, columns_to_ignore):
    """
    Args:
        precinct_gdf (pd.DataFrame): The DataFrame containing precinct vote data.
        columns_to_use (list): The list of columns to consider for vote calculations.

    Returns:
        pd.DataFrame: The updated DataFrame with calculated vote totals, lean, and filtered party-related columns.
    """
    precinct_gdf['TOT_REP'] = 0
    precinct_gdf['TOT_DEM'] = 0
    precinct_gdf['TOT_VOT'] = 0

    republican_columns = []
    democratic_columns = []

    for col in columns_to_use:
        if len(col) >= 7:
            party_code = col[6]
            if party_code == 'R':  # Republican
                precinct_gdf['TOT_REP'] += precinct_gdf[col]
                republican_columns.append(col)
            elif party_code == 'D':  # Democrat
                precinct_gdf['TOT_DEM'] += precinct_gdf[col]
                democratic_columns.append(col)  

    precinct_gdf['TOT_VOT'] = (
        precinct_gdf['TOT_REP'] +
        precinct_gdf['TOT_DEM']
    )

    precinct_gdf['LEAN'] = precinct_gdf.apply(
        lambda row: 'Unknown' if row['TOT_REP'] == 0 and row['TOT_DEM'] == 0 else
                    ('Republican' if row['TOT_REP'] > row['TOT_DEM'] else
                     ('Democratic' if row['TOT_DEM'] > row['TOT_REP'] else 'Unknown')),
        axis=1
    )
    filtered_columns = columns_to_ignore + ['TOT_REP', 'TOT_DEM', 'TOT_VOT', 'LEAN'] + republican_columns + democratic_columns
    precinct_gdf = precinct_gdf[filtered_columns]

    return precinct_gdf


### Main script

#### Preprocess conegssional district election

In [694]:
sc_precincts_gdf = gpd.read_file('states/south_carolina/geodata/south_carolina_precincts.geojson')
sc_election_gdf = gpd.read_file('raw/precincts/sc_2022_gen_prec/sc_2022_gen_cong_prec/sc_2022_gen_cong_prec.shp')

In [695]:
print(sc_precincts_gdf.columns)

Index(['UNIQUE_ID', 'NAME', 'Prec_Code', 'CONG_DIST', 'geometry'], dtype='object')


In [696]:
print(sc_precincts_gdf.head())

                 UNIQUE_ID          NAME Prec_Code CONG_DIST  \
0  DORCHESTER_PRECINCT_084  BEECH HILL 2       084        01   
1  DORCHESTER_PRECINCT_072       CYPRESS       072        01   
2  DORCHESTER_PRECINCT_036      DELEMARS       036        01   
3  DORCHESTER_PRECINCT_035       GIVHANS       035        01   
4  DORCHESTER_PRECINCT_094     GIVHANS 2       094        01   

                                            geometry  
0  MULTIPOLYGON (((-80.20876 32.94947, -80.20806 ...  
1  MULTIPOLYGON (((-80.23595 33.07929, -80.23603 ...  
2  POLYGON ((-80.29900 32.95621, -80.29874 32.956...  
3  POLYGON ((-80.37569 33.02291, -80.37544 33.022...  
4  POLYGON ((-80.34814 33.06418, -80.34802 33.064...  


In [697]:
sc_election_gdf['UNIQUE_ID'] = ( 
    sc_election_gdf['County'] + '_PRECINCT_' + 
    sc_election_gdf['Prec_Code'].astype(str)
)

In [698]:
print(sc_election_gdf.head())

                 UNIQUE_ID COUNTYFP      County      Precinct Prec_Code  \
0  DORCHESTER_PRECINCT_084      035  DORCHESTER  BEECH HILL 2       084   
1  DORCHESTER_PRECINCT_072      035  DORCHESTER       CYPRESS       072   
2  DORCHESTER_PRECINCT_036      035  DORCHESTER      DELEMARS       036   
3  DORCHESTER_PRECINCT_035      035  DORCHESTER       GIVHANS       035   
4  DORCHESTER_PRECINCT_094      035  DORCHESTER     GIVHANS 2       094   

  CONG_DIST  GCON01AODD  GCON01DAND  GCON01OWRI  GCON01RMAC  ...  GCON05GGAI  \
0        01          10         286           3         540  ...           0   
1        01          16         479           2         682  ...           0   
2        01           2          69           2          85  ...           0   
3        01           4         144           1         296  ...           0   
4        01           8         189           0         360  ...           0   

   GCON05OWRI  GCON05RNOR  GCON06DCLY  GCON06OWRI  GCON06RBUC  GCON0

In [699]:
print(sc_election_gdf.columns.values)  

['UNIQUE_ID' 'COUNTYFP' 'County' 'Precinct' 'Prec_Code' 'CONG_DIST'
 'GCON01AODD' 'GCON01DAND' 'GCON01OWRI' 'GCON01RMAC' 'GCON02DLAR'
 'GCON02OWRI' 'GCON02RWIL' 'GCON03OWRI' 'GCON03RDUN' 'GCON04OWRI'
 'GCON04RTIM' 'GCON05DHUN' 'GCON05GGAI' 'GCON05OWRI' 'GCON05RNOR'
 'GCON06DCLY' 'GCON06OWRI' 'GCON06RBUC' 'GCON07DSCO' 'GCON07OWRI'
 'GCON07RFRY' 'geometry']


In [700]:
columns_to_ignore = ['UNIQUE_ID', 'CONG_DIST','COUNTYFP', 'County', 'Prec_Code', 'Precinct', 'geometry']

In [701]:
columns_to_use = [col for col in sc_election_gdf.columns if col not in columns_to_ignore]

In [702]:
print(columns_to_use)

['GCON01AODD', 'GCON01DAND', 'GCON01OWRI', 'GCON01RMAC', 'GCON02DLAR', 'GCON02OWRI', 'GCON02RWIL', 'GCON03OWRI', 'GCON03RDUN', 'GCON04OWRI', 'GCON04RTIM', 'GCON05DHUN', 'GCON05GGAI', 'GCON05OWRI', 'GCON05RNOR', 'GCON06DCLY', 'GCON06OWRI', 'GCON06RBUC', 'GCON07DSCO', 'GCON07OWRI', 'GCON07RFRY']


In [703]:
sc_election_processed_gdf = process_precinct_votes(sc_election_gdf, columns_to_use, columns_to_ignore)

In [704]:
print(sc_election_processed_gdf[['TOT_REP', 'TOT_DEM', 'TOT_VOT', 'LEAN']].head())

   TOT_REP  TOT_DEM  TOT_VOT        LEAN
0      540      286      826  Republican
1      682      479     1161  Republican
2       85       69      154  Republican
3      296      144      440  Republican
4      360      189      549  Republican


In [705]:
print(sc_election_processed_gdf[['LEAN']].value_counts())

LEAN      
Republican    1672
Democratic     597
Unknown          8
Name: count, dtype: int64


In [706]:
print(sc_election_processed_gdf.columns.values)

['UNIQUE_ID' 'CONG_DIST' 'COUNTYFP' 'County' 'Prec_Code' 'Precinct'
 'geometry' 'TOT_REP' 'TOT_DEM' 'TOT_VOT' 'LEAN' 'GCON01RMAC' 'GCON02RWIL'
 'GCON03RDUN' 'GCON04RTIM' 'GCON05RNOR' 'GCON06RBUC' 'GCON07RFRY'
 'GCON01DAND' 'GCON02DLAR' 'GCON05DHUN' 'GCON06DCLY' 'GCON07DSCO']


In [707]:
print(sc_election_processed_gdf.columns.values)

['UNIQUE_ID' 'CONG_DIST' 'COUNTYFP' 'County' 'Prec_Code' 'Precinct'
 'geometry' 'TOT_REP' 'TOT_DEM' 'TOT_VOT' 'LEAN' 'GCON01RMAC' 'GCON02RWIL'
 'GCON03RDUN' 'GCON04RTIM' 'GCON05RNOR' 'GCON06RBUC' 'GCON07RFRY'
 'GCON01DAND' 'GCON02DLAR' 'GCON05DHUN' 'GCON06DCLY' 'GCON07DSCO']


In [708]:
#drop geometry column
sc_election_processed_gdf = sc_election_processed_gdf.drop(columns='geometry')

In [709]:
with open('states/south_carolina/election/sc_election.json', 'w') as json_file:
    json.dump(sc_election_processed_gdf.to_dict(orient='records'), json_file, indent=4)

#### Preprocess state-wide gov election

In [836]:
sc_gov_election_df = gpd.read_file('raw/precincts/sc_2022_gen_prec/sc_2022_gen_st_prec/sc_2022_gen_st_prec.shp')

In [839]:
sc_gov_election_df['UNIQUE_ID'] = (
    # sc_precincts_gdf.index.astype(str) + '_' + 
    sc_gov_election_df['County'].str.replace(' ', '_') 
    + '_PRECINCT_' +
    sc_gov_election_df['Prec_Code'].astype(str)
)

In [841]:
print(sc_gov_election_df.columns.values)

['UNIQUE_ID' 'COUNTYFP' 'County' 'Precinct' 'Prec_Code' 'G22A1NO'
 'G22A1YES' 'G22A2NO' 'G22A2YES' 'G22AGRCNEL' 'G22AGRGEDM' 'G22AGROWRI'
 'G22AGRRWEA' 'G22ATGOWRI' 'G22ATGRWIL' 'G22COMOWRI' 'G22COMRECK'
 'G22GOVDCUN' 'G22GOVLREE' 'G22GOVOWRI' 'G22GOVRMCM' 'G22SOSDBUT'
 'G22SOSOWRI' 'G22SOSRHAM' 'G22SUPAELL' 'G22SUPDELL' 'G22SUPGMIC'
 'G22SUPOWRI' 'G22SUPRWEA' 'G22TREAWOR' 'G22TREOWRI' 'G22TRERLOF'
 'G22USSDMAT' 'G22USSOWRI' 'G22USSRSCO' 'geometry']


In [842]:
columns_to_keep = ['UNIQUE_ID', 'COUNTYFP', 'County', 'Precinct', 'Prec_Code']
columns_to_keep += [col for col in sc_gov_election_df.columns if col.startswith('G22GOV')]
sc_gov_election_df = sc_gov_election_df[columns_to_keep]

In [843]:
print(sc_gov_election_df.columns.values)

['UNIQUE_ID' 'COUNTYFP' 'County' 'Precinct' 'Prec_Code' 'G22GOVDCUN'
 'G22GOVLREE' 'G22GOVOWRI' 'G22GOVRMCM']


In [844]:
columns_to_ignore = ['UNIQUE_ID','COUNTYFP', 'County', 'Prec_Code', 'Precinct']

In [845]:
columns_to_use = [col for col in sc_gov_election_df.columns if col not in columns_to_ignore]

In [846]:
print(columns_to_use)

['G22GOVDCUN', 'G22GOVLREE', 'G22GOVOWRI', 'G22GOVRMCM']


In [847]:
sc_gov_election_processed_df = process_precinct_votes(sc_gov_election_df, columns_to_use, columns_to_ignore)

In [848]:
print(sc_gov_election_processed_df[['TOT_REP', 'TOT_DEM', 'TOT_VOT', 'LEAN']].head())

   TOT_REP  TOT_DEM  TOT_VOT        LEAN
0      718      257      975  Republican
1      224      404      628  Democratic
2      336      238      574  Republican
3      316      121      437  Republican
4      599      123      722  Republican


In [849]:
print(sc_gov_election_processed_df[['LEAN']].value_counts())

LEAN      
Republican    1526
Democratic     727
Unknown          8
Name: count, dtype: int64


In [850]:
print(sc_gov_election_processed_df.columns.values)

['UNIQUE_ID' 'COUNTYFP' 'County' 'Prec_Code' 'Precinct' 'TOT_REP'
 'TOT_DEM' 'TOT_VOT' 'LEAN' 'G22GOVRMCM' 'G22GOVDCUN']


In [855]:
# drop 'COUNTYFP', 'County'
sc_gov_election_processed_df = sc_gov_election_processed_df.drop(columns=[ 'G22GOVRMCM', 'G22GOVDCUN'])

In [856]:
with open('states/south_carolina/election/sc_election_gov_22.json', 'w') as json_file:
    json.dump(sc_gov_election_processed_df.to_dict(orient='records'), json_file, indent=4)

## MD election preprocessing

In [678]:
md_precincts_gdf = gpd.read_file('states/maryland/geodata/maryland_precincts.geojson')


In [583]:
print(md_precincts_gdf.head())

                     NAME  NUMBER                 UNIQUE_ID  \
0  HOWARD PRECINCT 06-001  06-001  0_HOWARD_PRECINCT_06-001   
1  HOWARD PRECINCT 05-023  05-023  1_HOWARD_PRECINCT_05-023   
2  HOWARD PRECINCT 05-018  05-018  2_HOWARD_PRECINCT_05-018   
3  HOWARD PRECINCT 05-017  05-017  3_HOWARD_PRECINCT_05-017   
4  HOWARD PRECINCT 05-020  05-020  4_HOWARD_PRECINCT_05-020   

                                            geometry  
0  POLYGON Z ((-76.83025 39.14757 0.00000, -76.83...  
1  POLYGON Z ((-76.89107 39.22616 0.00000, -76.89...  
2  POLYGON Z ((-76.88639 39.20934 0.00000, -76.88...  
3  POLYGON Z ((-76.88670 39.20810 0.00000, -76.88...  
4  POLYGON Z ((-76.91361 39.14963 0.00000, -76.91...  


In [774]:
# load csv
md_election_df = pd.read_csv('raw/precincts/HOUSE_precinct_general.csv')

In [775]:
print(md_election_df.head())

                    precinct    office party_detailed party_simplified  \
0  061110097164_003026019037  US HOUSE       DEMOCRAT         DEMOCRAT   
1  061110097164_003026019037  US HOUSE     REPUBLICAN       REPUBLICAN   
2  061110097164_003026019037  US HOUSE       DEMOCRAT         DEMOCRAT   
3  061110097164_003026019037  US HOUSE     REPUBLICAN       REPUBLICAN   
4  061110097168_003026019037  US HOUSE       DEMOCRAT         DEMOCRAT   

           mode  votes county_name  county_fips jurisdiction_name  \
0      ABSENTEE     32     VENTURA       6111.0           VENTURA   
1      ABSENTEE      1     VENTURA       6111.0           VENTURA   
2  NOT ABSENTEE      1     VENTURA       6111.0           VENTURA   
3  NOT ABSENTEE      3     VENTURA       6111.0           VENTURA   
4      ABSENTEE    303     VENTURA       6111.0           VENTURA   

   jurisdiction_fips  ...       state  special writein  state_po state_fips  \
0             6111.0  ...  CALIFORNIA    False   False       

In [776]:
#filter state_po == MD
md_election_df = md_election_df[md_election_df['state_po'] == 'MD']

In [777]:
print(md_election_df.columns.values)

['precinct' 'office' 'party_detailed' 'party_simplified' 'mode' 'votes'
 'county_name' 'county_fips' 'jurisdiction_name' 'jurisdiction_fips'
 'candidate' 'district' 'dataverse' 'year' 'stage' 'state' 'special'
 'writein' 'state_po' 'state_fips' 'state_cen' 'state_ic' 'date'
 'readme_check' 'magnitude']


In [778]:
print(md_election_df['county_name'].value_counts())

county_name
BALTIMORE CITY     6365
MONTGOMERY         5510
PRINCE GEORGE'S    4545
BALTIMORE          4140
ANNE ARUNDEL       2925
HOWARD             2440
FREDERICK          1880
HARFORD            1335
WASHINGTON         1325
ALLEGANY            925
WICOMICO            810
CARROLL             655
CHARLES             645
DORCHESTER          600
ST MARY'S           540
GARRETT             475
CECIL               420
CALVERT             345
SOMERSET            345
WORCESTER           300
TALBOT              180
QUEEN ANNE'S        165
KENT                150
CAROLINE            120
Name: count, dtype: int64


In [808]:
md_election_df['NAME'] = md_election_df['county_name'].str.replace(' ', '_') + '_PRECINCT_' + md_election_df['precinct'].astype(str)

columns_order = ['NAME'] + [col for col in md_election_df.columns if col != 'NAME']
md_election_df = md_election_df[columns_order]

In [809]:
print(md_election_df.head())    

                               NAME precinct    office party_detailed  \
52046      WICOMICO_PRECINCT_01-001   01-001  US HOUSE       DEMOCRAT   
52047    DORCHESTER_PRECINCT_01-001   01-001  US HOUSE       DEMOCRAT   
52048      SOMERSET_PRECINCT_01-001   01-001  US HOUSE       DEMOCRAT   
52049      CAROLINE_PRECINCT_01-001   01-001  US HOUSE       DEMOCRAT   
52050  QUEEN_ANNE'S_PRECINCT_01-001   01-001  US HOUSE       DEMOCRAT   

      party_simplified          mode  votes   county_name  county_fips  \
52046         DEMOCRAT  2ND ABSENTEE      6      WICOMICO      24045.0   
52047         DEMOCRAT  2ND ABSENTEE     36    DORCHESTER      24019.0   
52048         DEMOCRAT  2ND ABSENTEE     37      SOMERSET      24039.0   
52049         DEMOCRAT  2ND ABSENTEE     43      CAROLINE      24011.0   
52050         DEMOCRAT  2ND ABSENTEE     55  QUEEN ANNE'S      24035.0   

      jurisdiction_name  ...  special writein  state_po state_fips  state_cen  \
52046          WICOMICO  ...    Fal

In [810]:
print(md_election_df[md_election_df['NAME'] == 'HOWARD_PRECINCT_05-023'])

                         NAME precinct    office party_detailed  \
79781  HOWARD_PRECINCT_05-023   05-023  US HOUSE       DEMOCRAT   
79782  HOWARD_PRECINCT_05-023   05-023  US HOUSE       DEMOCRAT   
79783  HOWARD_PRECINCT_05-023   05-023  US HOUSE       DEMOCRAT   
79784  HOWARD_PRECINCT_05-023   05-023  US HOUSE       DEMOCRAT   
79785  HOWARD_PRECINCT_05-023   05-023  US HOUSE       DEMOCRAT   
79786  HOWARD_PRECINCT_05-023   05-023  US HOUSE     REPUBLICAN   
79787  HOWARD_PRECINCT_05-023   05-023  US HOUSE     REPUBLICAN   
79788  HOWARD_PRECINCT_05-023   05-023  US HOUSE     REPUBLICAN   
79789  HOWARD_PRECINCT_05-023   05-023  US HOUSE     REPUBLICAN   
79790  HOWARD_PRECINCT_05-023   05-023  US HOUSE     REPUBLICAN   

      party_simplified          mode  votes county_name  county_fips  \
79781         DEMOCRAT  2ND ABSENTEE    198      HOWARD      24027.0   
79782         DEMOCRAT      ABSENTEE    467      HOWARD      24027.0   
79783         DEMOCRAT  EARLY VOTING    288   

In [811]:
print(md_election_df.columns.values)

['NAME' 'precinct' 'office' 'party_detailed' 'party_simplified' 'mode'
 'votes' 'county_name' 'county_fips' 'jurisdiction_name'
 'jurisdiction_fips' 'candidate' 'district' 'dataverse' 'year' 'stage'
 'state' 'special' 'writein' 'state_po' 'state_fips' 'state_cen'
 'state_ic' 'date' 'readme_check' 'magnitude' 'variable']


In [812]:
print(md_election_df[['NAME','office', 'party_detailed', 'candidate', 'writein', 'district', 'votes']].head())


                               NAME    office party_detailed  candidate  \
52046      WICOMICO_PRECINCT_01-001  US HOUSE       DEMOCRAT  MIA MASON   
52047    DORCHESTER_PRECINCT_01-001  US HOUSE       DEMOCRAT  MIA MASON   
52048      SOMERSET_PRECINCT_01-001  US HOUSE       DEMOCRAT  MIA MASON   
52049      CAROLINE_PRECINCT_01-001  US HOUSE       DEMOCRAT  MIA MASON   
52050  QUEEN_ANNE'S_PRECINCT_01-001  US HOUSE       DEMOCRAT  MIA MASON   

       writein  district  votes  
52046    False         1      6  
52047    False         1     36  
52048    False         1     37  
52049    False         1     43  
52050    False         1     55  


In [813]:
md_election_df = md_election_df[md_election_df['writein'] != True]

In [814]:
print(md_election_df[['NAME','office', 'party_detailed', 'candidate', 'district', 'votes']].head())

                               NAME    office party_detailed  candidate  \
52046      WICOMICO_PRECINCT_01-001  US HOUSE       DEMOCRAT  MIA MASON   
52047    DORCHESTER_PRECINCT_01-001  US HOUSE       DEMOCRAT  MIA MASON   
52048      SOMERSET_PRECINCT_01-001  US HOUSE       DEMOCRAT  MIA MASON   
52049      CAROLINE_PRECINCT_01-001  US HOUSE       DEMOCRAT  MIA MASON   
52050  QUEEN_ANNE'S_PRECINCT_01-001  US HOUSE       DEMOCRAT  MIA MASON   

       district  votes  
52046         1      6  
52047         1     36  
52048         1     37  
52049         1     43  
52050         1     55  


In [815]:
md_election_df['variable'] = (
    'GCON' + 
    md_election_df['district'].astype(str).str.zfill(2) + 
    md_election_df['party_detailed'].str[0] + 
    md_election_df['candidate'].apply(lambda x: ''.join([name[0] for name in x.split()]))
)

In [816]:
md_election_df_filtered = md_election_df[['NAME', 'precinct', 'county_name', 'county_fips', 'variable', 'votes', 'district']]
print(md_election_df_filtered)

                               NAME precinct   county_name  county_fips  \
52046      WICOMICO_PRECINCT_01-001   01-001      WICOMICO      24045.0   
52047    DORCHESTER_PRECINCT_01-001   01-001    DORCHESTER      24019.0   
52048      SOMERSET_PRECINCT_01-001   01-001      SOMERSET      24039.0   
52049      CAROLINE_PRECINCT_01-001   01-001      CAROLINE      24011.0   
52050  QUEEN_ANNE'S_PRECINCT_01-001   01-001  QUEEN ANNE'S      24035.0   
...                             ...      ...           ...          ...   
89131     FREDERICK_PRECINCT_26-002   26-002     FREDERICK      24021.0   
89132     FREDERICK_PRECINCT_26-002   26-002     FREDERICK      24021.0   
89133     FREDERICK_PRECINCT_26-002   26-002     FREDERICK      24021.0   
89134     FREDERICK_PRECINCT_26-002   26-002     FREDERICK      24021.0   
89135     FREDERICK_PRECINCT_26-002   26-002     FREDERICK      24021.0   

         variable  votes  district  
52046   GCON01DMM      6         1  
52047   GCON01DMM     36 

In [817]:
print(len(md_election_df_filtered['NAME'].unique()))

2035


In [818]:
md_election_pivoted_df = md_election_df_filtered.pivot_table(
    index=['NAME'], 
    columns='variable',
    values='votes',
    aggfunc='sum'
).fillna(0).reset_index() 


In [819]:
district_mapping = md_election_df_filtered[['NAME', 'district']].drop_duplicates()
md_election_pivoted_df = md_election_pivoted_df.merge(district_mapping, on='NAME', how='left')

In [820]:
print(district_mapping)

                               NAME  district
52046      WICOMICO_PRECINCT_01-001         1
52047    DORCHESTER_PRECINCT_01-001         1
52048      SOMERSET_PRECINCT_01-001         1
52049      CAROLINE_PRECINCT_01-001         1
52050  QUEEN_ANNE'S_PRECINCT_01-001         1
...                             ...       ...
89041     FREDERICK_PRECINCT_24-002         8
89061     FREDERICK_PRECINCT_24-003         8
89081     FREDERICK_PRECINCT_24-006         8
89101     FREDERICK_PRECINCT_26-001         8
89121     FREDERICK_PRECINCT_26-002         8

[2035 rows x 2 columns]


In [821]:
print(md_election_pivoted_df.head())


                       NAME  GCON01DMM  GCON01RAH  GCON02DCDR  GCON02RJRS  \
0  ALLEGANY_PRECINCT_01-000        0.0        0.0         0.0         0.0   
1  ALLEGANY_PRECINCT_02-000        0.0        0.0         0.0         0.0   
2  ALLEGANY_PRECINCT_03-000        0.0        0.0         0.0         0.0   
3  ALLEGANY_PRECINCT_04-002        0.0        0.0         0.0         0.0   
4  ALLEGANY_PRECINCT_04-003        0.0        0.0         0.0         0.0   

   GCON03DJS  GCON03RCA  GCON04DAGB  GCON04RGEM  GCON05DSHH  GCON05RCP  \
0        0.0        0.0         0.0         0.0         0.0        0.0   
1        0.0        0.0         0.0         0.0         0.0        0.0   
2        0.0        0.0         0.0         0.0         0.0        0.0   
3        0.0        0.0         0.0         0.0         0.0        0.0   
4        0.0        0.0         0.0         0.0         0.0        0.0   

   GCON06DDJT  GCON06GGG  GCON06RNCP  GCON07DKM  GCON07RKK  GCON08DJR  \
0        72.0      

In [822]:
print(md_election_pivoted_df)

                           NAME  GCON01DMM  GCON01RAH  GCON02DCDR  GCON02RJRS  \
0      ALLEGANY_PRECINCT_01-000        0.0        0.0         0.0         0.0   
1      ALLEGANY_PRECINCT_02-000        0.0        0.0         0.0         0.0   
2      ALLEGANY_PRECINCT_03-000        0.0        0.0         0.0         0.0   
3      ALLEGANY_PRECINCT_04-002        0.0        0.0         0.0         0.0   
4      ALLEGANY_PRECINCT_04-003        0.0        0.0         0.0         0.0   
...                         ...        ...        ...         ...         ...   
2030  WORCESTER_PRECINCT_05-002      955.0     1592.0         0.0         0.0   
2031  WORCESTER_PRECINCT_06-001      921.0     1742.0         0.0         0.0   
2032  WORCESTER_PRECINCT_06-002      250.0      753.0         0.0         0.0   
2033  WORCESTER_PRECINCT_06-003      430.0     1115.0         0.0         0.0   
2034  WORCESTER_PRECINCT_07-001     1532.0     3005.0         0.0         0.0   

      GCON03DJS  GCON03RCA 

In [826]:
print(md_election_pivoted_df.columns.values)

['NAME' 'GCON01DMM' 'GCON01RAH' 'GCON02DCDR' 'GCON02RJRS' 'GCON03DJS'
 'GCON03RCA' 'GCON04DAGB' 'GCON04RGEM' 'GCON05DSHH' 'GCON05RCP'
 'GCON06DDJT' 'GCON06GGG' 'GCON06RNCP' 'GCON07DKM' 'GCON07RKK' 'GCON08DJR'
 'GCON08RGTC' 'district']


## Process voting population:

In [827]:
columns_to_ignore = ['NAME', 'district']
columns_to_use = [col for col in md_election_pivoted_df.columns if col not in columns_to_ignore]

In [828]:
print(columns_to_use)

['GCON01DMM', 'GCON01RAH', 'GCON02DCDR', 'GCON02RJRS', 'GCON03DJS', 'GCON03RCA', 'GCON04DAGB', 'GCON04RGEM', 'GCON05DSHH', 'GCON05RCP', 'GCON06DDJT', 'GCON06GGG', 'GCON06RNCP', 'GCON07DKM', 'GCON07RKK', 'GCON08DJR', 'GCON08RGTC']


In [829]:
md_election_processed_df = process_precinct_votes(md_election_pivoted_df, columns_to_use, columns_to_ignore)


In [830]:
print(md_election_processed_df.columns.values)

['NAME' 'district' 'TOT_REP' 'TOT_DEM' 'TOT_VOT' 'LEAN' 'GCON01RAH'
 'GCON02RJRS' 'GCON03RCA' 'GCON04RGEM' 'GCON05RCP' 'GCON06RNCP'
 'GCON07RKK' 'GCON08RGTC' 'GCON01DMM' 'GCON02DCDR' 'GCON03DJS'
 'GCON04DAGB' 'GCON05DSHH' 'GCON06DDJT' 'GCON07DKM' 'GCON08DJR']


In [831]:
print(md_election_processed_df.head())

                       NAME  district  TOT_REP  TOT_DEM  TOT_VOT        LEAN  \
0  ALLEGANY_PRECINCT_01-000         6    406.0     72.0    478.0  Republican   
1  ALLEGANY_PRECINCT_02-000         6    398.0     93.0    491.0  Republican   
2  ALLEGANY_PRECINCT_03-000         6    419.0     89.0    508.0  Republican   
3  ALLEGANY_PRECINCT_04-002         6    188.0    144.0    332.0  Republican   
4  ALLEGANY_PRECINCT_04-003         6    354.0    248.0    602.0  Republican   

   GCON01RAH  GCON02RJRS  GCON03RCA  GCON04RGEM  ...  GCON07RKK  GCON08RGTC  \
0        0.0         0.0        0.0         0.0  ...        0.0         0.0   
1        0.0         0.0        0.0         0.0  ...        0.0         0.0   
2        0.0         0.0        0.0         0.0  ...        0.0         0.0   
3        0.0         0.0        0.0         0.0  ...        0.0         0.0   
4        0.0         0.0        0.0         0.0  ...        0.0         0.0   

   GCON01DMM  GCON02DCDR  GCON03DJS  GCON04D

In [832]:
print(md_election_processed_df[['LEAN']].value_counts())

LEAN      
Democratic    1392
Republican     596
Unknown         47
Name: count, dtype: int64


In [833]:
# get UNIQUE_ID where district == 4

print(md_election_processed_df[md_election_processed_df['district'] == 4])

                                 NAME  district  TOT_REP  TOT_DEM  TOT_VOT  \
64       ANNE_ARUNDEL_PRECINCT_02-002         4    517.0    626.0   1143.0   
75       ANNE_ARUNDEL_PRECINCT_02-013         4    709.0    693.0   1402.0   
80       ANNE_ARUNDEL_PRECINCT_02-018         4    354.0    767.0   1121.0   
82       ANNE_ARUNDEL_PRECINCT_02-020         4    727.0    883.0   1610.0   
83       ANNE_ARUNDEL_PRECINCT_02-021         4   1280.0   1170.0   2450.0   
...                               ...       ...      ...      ...      ...   
1795  PRINCE_GEORGE'S_PRECINCT_20-097         4      0.0      0.0      0.0   
1796  PRINCE_GEORGE'S_PRECINCT_20-098         4      0.0      0.0      0.0   
1797  PRINCE_GEORGE'S_PRECINCT_20-099         4      0.0      0.0      0.0   
1802  PRINCE_GEORGE'S_PRECINCT_21-005         4    172.0   1000.0   1172.0   
1811  PRINCE_GEORGE'S_PRECINCT_21-014         4    151.0    716.0    867.0   

            LEAN  GCON01RAH  GCON02RJRS  GCON03RCA  GCON04RGEM 

In [834]:
#rename NAME to UNIQUE_ID
md_election_processed_df = md_election_processed_df.rename(columns={'NAME': 'UNIQUE_ID'})

In [835]:
with open('states/maryland/election/md_election_cd.json', 'w') as json_file:
    json.dump(md_election_processed_df.to_dict(orient='records'), json_file, indent=4)