In [106]:
import pandas as pd
import geopandas as gpd

## Preprocess Election result from Precincts

### Function and declaration

In [107]:
def process_precinct_votes(precinct_gdf, columns_to_use):
    """
    Args:
        precinct_gdf (pd.DataFrame): The DataFrame containing precinct vote data.
        columns_to_use (list): The list of columns to consider for vote calculations.

    Returns:
        pd.DataFrame: The updated DataFrame with calculated vote totals, lean, and filtered party-related columns.
    """
    precinct_gdf['TOT_REP'] = 0
    precinct_gdf['TOT_DEM'] = 0
    precinct_gdf['TOT_VOT'] = 0

    republican_columns = []
    democratic_columns = []

    for col in columns_to_use:
        if len(col) >= 7:
            party_code = col[6]
            if party_code == 'R':  # Republican
                precinct_gdf['TOT_REP'] += precinct_gdf[col]
                republican_columns.append(col)
            elif party_code == 'D':  # Democrat
                precinct_gdf['TOT_DEM'] += precinct_gdf[col]
                democratic_columns.append(col)  

    precinct_gdf['TOT_VOT'] = (
        precinct_gdf['TOT_REP'] +
        precinct_gdf['TOT_DEM']
    )

    precinct_gdf['LEAN'] = precinct_gdf.apply(
        lambda row: 'Unknown' if row['TOT_REP'] == 0 and row['TOT_DEM'] == 0 else
                    ('Republican' if row['TOT_REP'] > row['TOT_DEM'] else
                     ('Democratic' if row['TOT_DEM'] > row['TOT_REP'] else 'Unknown')),
        axis=1
    )
    filtered_columns = ['TOT_REP', 'TOT_DEM', 'TOT_VOT', 'LEAN'] + republican_columns + democratic_columns
    precinct_gdf = precinct_gdf[filtered_columns]

    return precinct_gdf


### Main script

In [127]:
sc_precincts_gdf = gpd.read_file('states/south_carolina/geodata/south_carolina_precincts.geojson')
sc_election_gdf = gpd.read_file('raw/precincts/sc_2022_gen_prec/sc_2022_gen_cong_prec/sc_2022_gen_cong_prec.shp')

In [128]:
print(sc_election_gdf.head())

                             UNIQUE_ID COUNTYFP      County      Precinct  \
0  DORCHESTER-:-BEECH HILL 2-(CONG-01)      035  DORCHESTER  BEECH HILL 2   
1       DORCHESTER-:-CYPRESS-(CONG-01)      035  DORCHESTER       CYPRESS   
2      DORCHESTER-:-DELEMARS-(CONG-01)      035  DORCHESTER      DELEMARS   
3       DORCHESTER-:-GIVHANS-(CONG-01)      035  DORCHESTER       GIVHANS   
4     DORCHESTER-:-GIVHANS 2-(CONG-01)      035  DORCHESTER     GIVHANS 2   

  Prec_Code CONG_DIST  GCON01AODD  GCON01DAND  GCON01OWRI  GCON01RMAC  ...  \
0       084        01          10         286           3         540  ...   
1       072        01          16         479           2         682  ...   
2       036        01           2          69           2          85  ...   
3       035        01           4         144           1         296  ...   
4       094        01           8         189           0         360  ...   

   GCON05GGAI  GCON05OWRI  GCON05RNOR  GCON06DCLY  GCON06OWRI  GCON0

In [129]:
print(sc_election_gdf.head())  

                             UNIQUE_ID COUNTYFP      County      Precinct  \
0  DORCHESTER-:-BEECH HILL 2-(CONG-01)      035  DORCHESTER  BEECH HILL 2   
1       DORCHESTER-:-CYPRESS-(CONG-01)      035  DORCHESTER       CYPRESS   
2      DORCHESTER-:-DELEMARS-(CONG-01)      035  DORCHESTER      DELEMARS   
3       DORCHESTER-:-GIVHANS-(CONG-01)      035  DORCHESTER       GIVHANS   
4     DORCHESTER-:-GIVHANS 2-(CONG-01)      035  DORCHESTER     GIVHANS 2   

  Prec_Code CONG_DIST  GCON01AODD  GCON01DAND  GCON01OWRI  GCON01RMAC  ...  \
0       084        01          10         286           3         540  ...   
1       072        01          16         479           2         682  ...   
2       036        01           2          69           2          85  ...   
3       035        01           4         144           1         296  ...   
4       094        01           8         189           0         360  ...   

   GCON05GGAI  GCON05OWRI  GCON05RNOR  GCON06DCLY  GCON06OWRI  GCON0

In [130]:
columns_to_ignore = ['UNIQUE_ID', 'CONG_DIST','COUNTYFP', 'County', 'Prec_Code', 'Precinct', 'geometry']

In [131]:
columns_to_use = [col for col in sc_election_gdf.columns if col not in columns_to_ignore]

In [132]:
print(columns_to_use)

['GCON01AODD', 'GCON01DAND', 'GCON01OWRI', 'GCON01RMAC', 'GCON02DLAR', 'GCON02OWRI', 'GCON02RWIL', 'GCON03OWRI', 'GCON03RDUN', 'GCON04OWRI', 'GCON04RTIM', 'GCON05DHUN', 'GCON05GGAI', 'GCON05OWRI', 'GCON05RNOR', 'GCON06DCLY', 'GCON06OWRI', 'GCON06RBUC', 'GCON07DSCO', 'GCON07OWRI', 'GCON07RFRY']


In [133]:
sc_election_processed_gdf = process_precinct_votes(sc_election_gdf, columns_to_use)

In [134]:
sc_election_processed_gdf = sc_election_processed_gdf.join(sc_election_gdf[columns_to_ignore])

In [135]:
print(sc_election_processed_gdf[['TOT_REP', 'TOT_DEM', 'TOT_VOT', 'LEAN']].head())

   TOT_REP  TOT_DEM  TOT_VOT        LEAN
0      540      286      826  Republican
1      682      479     1161  Republican
2       85       69      154  Republican
3      296      144      440  Republican
4      360      189      549  Republican


In [136]:
print(sc_election_processed_gdf[['LEAN']].value_counts())

LEAN      
Republican    1672
Democratic     597
Unknown          8
Name: count, dtype: int64


In [141]:
print(sc_election_processed_gdf.columns.values)

['TOT_REP' 'TOT_DEM' 'TOT_VOT' 'LEAN' 'GCON01RMAC' 'GCON02RWIL'
 'GCON03RDUN' 'GCON04RTIM' 'GCON05RNOR' 'GCON06RBUC' 'GCON07RFRY'
 'GCON01DAND' 'GCON02DLAR' 'GCON05DHUN' 'GCON06DCLY' 'GCON07DSCO'
 'UNIQUE_ID' 'CONG_DIST' 'COUNTYFP' 'County' 'Prec_Code' 'Precinct'
 'geometry']


In [138]:
print(sc_election_processed_gdf.columns.values)

['TOT_REP' 'TOT_DEM' 'TOT_VOT' 'LEAN' 'GCON01RMAC' 'GCON02RWIL'
 'GCON03RDUN' 'GCON04RTIM' 'GCON05RNOR' 'GCON06RBUC' 'GCON07RFRY'
 'GCON01DAND' 'GCON02DLAR' 'GCON05DHUN' 'GCON06DCLY' 'GCON07DSCO'
 'UNIQUE_ID' 'CONG_DIST' 'COUNTYFP' 'County' 'Prec_Code' 'Precinct'
 'geometry']


In [139]:
sc_election_processed_gdf.to_file(
    "server/data/states/south_carolina/election/sc_election.geojson",
    driver="GeoJSON",
    drop_crs=True
)

AttributeError: 'DataFrame' object has no attribute 'to_file'

In [93]:
md_precincts_gdf = gpd.read_file('states/maryland/geodata/maryland_precincts.geojson')


In [94]:
print(md_precincts_gdf.columns.values)

['NAME' 'NUMBER' 'JURSCODE' 'VOTESPRE' 'G20PREDBID' 'G20PRERTRU'
 'G20PRELJOR' 'G20PREGHAW' 'G20PREBSEG' 'G20PREOWRI' 'MEDN_INC22'
 'TOT_HOUS22' '0_35K' '35K_60K' '60K-100K' '100K_125K' '125K_150K'
 '150K_MORE' 'TOT_POP22' 'NHSP_POP22' 'HSP_POP22' 'WHT_NHSP22'
 'BLK_NHSP22' 'AIA_NHSP22' 'ASN_NHSP22' 'HPI_NHSP22' 'OTH_NHSP22'
 'geometry']
