In [2]:

import pandas as pd 
import geopandas as gpd
import csv
from pathlib import Path

from collections import OrderedDict

In [3]:
data_dir = Path.cwd() / "data"
out_dir = Path.cwd() / "out-files"

In [6]:
    # read in moneyball data
    df = pd.read_csv(data_dir / 'processed_data.csv')

    # segment to upper and lower chamber
    upper_df = df[df['chamber'] == 'SD']
    lower_df = df[df['chamber'] == 'HD']

In [7]:
upper_df.head()

Unnamed: 0,state,district,incumbent,favored,confidence,nom_R,nom_D,nom_I,turnout_cvap,VOTER_POWER,GEOID,chamber
151,CT,CT-SD-1,D,D,Safe,False,False,False,34015,-7.9e-07,9001,SD
152,CT,CT-SD-2,D,D,Safe,False,False,False,36836,-7.3e-07,9002,SD
153,CT,CT-SD-3,D,D,Safe,False,False,False,38453,-6.99e-07,9003,SD
154,CT,CT-SD-4,D,D,Likely,False,False,False,38997,-2.28e-06,9004,SD
155,CT,CT-SD-5,D,D,Safe,False,False,False,38559,-6.97e-07,9005,SD


In [8]:
lower_df.head()

Unnamed: 0,state,district,incumbent,favored,confidence,nom_R,nom_D,nom_I,turnout_cvap,VOTER_POWER,GEOID,chamber
0,CT,CT-HD-1,D,D,Safe,False,False,False,8256,-7.48e-07,9001,HD
1,CT,CT-HD-2,D,D,Lean,False,False,False,9538,-2.77e-06,9002,HD
2,CT,CT-HD-3,D,D,Safe,False,False,False,7240,-8.53e-07,9003,HD
3,CT,CT-HD-4,D,D,Safe,False,False,False,8519,-7.25e-07,9004,HD
4,CT,CT-HD-5,D,D,Safe,False,False,False,8851,-6.98e-07,9005,HD


In [9]:
upper_shp = gpd.read_file(data_dir / 'UPPER_cb_2019_us_sldu_500k/cb_2019_us_sldu_500k.shp')

In [10]:
upper_shp.head()

Unnamed: 0,STATEFP,SLDUST,AFFGEOID,GEOID,NAME,LSAD,LSY,ALAND,AWATER,geometry
0,34,16,610U600US34016,34016,16,LU,2018,763668910,7272222,"POLYGON ((-75.04435 40.41259, -75.02472 40.431..."
1,23,26,610U600US23026,23026,26,LU,2018,535557926,115675982,"POLYGON ((-70.79991 43.85805, -70.79743 43.858..."
2,31,23,610U600US31023,31023,23,LU,2018,4288368209,54838073,"POLYGON ((-97.36819 41.33874, -97.36822 41.358..."
3,31,34,610U600US31034,31034,34,LU,2018,4230994361,55826999,"POLYGON ((-98.49394 40.85621, -98.49370 40.856..."
4,30,5,610U600US30005,30005,5,LU,2018,835660316,95496113,"POLYGON ((-114.33053 48.22589, -114.32399 48.2..."


In [12]:
def get_voter_power(row, df):
    voterpower = 0
    geomatchs = df[df['GEOID'] == row['GEOID']]
    if len(geomatchs.index) > 0:
        voterpower = geomatchs.iloc[0]['VOTER_POWER']
    return voterpower
    

In [13]:
def pandas_lambda_geolocate(row, df, df_columns, default_values):
    vals = []

    geomatch = df[df['GEOID'] == row['GEOID']]
    if len(geomatch.index) < 1:
        return pd.Series(default_values)
    elif len(geomatch.index) > 1:
        print(f"More than one match found for GEOID: {row['GEOID']}")
    geomatch = geomatch.iloc[0]
    
    for i in range(0, len(df_columns)):
        vals.append(geomatch[df_columns[i]])

    return pd.Series(vals)

In [14]:
def get_lean(row, df):
    geomatch = df[df['GEOID'] == row['GEOID']]
    if len(geomatch.index) < 1:
        return 'no data'
    elif len(geomatch.index) > 1:
        print(f"More than one match found for GEOID: {row['GEOID']}")
    geomatch = geomatch.iloc[0]
    confidence = geomatch['confidence']
    favored = geomatch['favored']
    if confidence == 'Toss-Up': return confidence
    return confidence + " " + favored




In [15]:
df_columns = ['district', 'nom_R', 'nom_D', 'incumbent', 'VOTER_POWER']
default_values = ['no data', 'no data', 'no data', 'no data', 0]
upper_shp[['DISTRICT', 'NOM_R', 'NOM_D', "INCUMBENT",'VOTER_POWER']] = upper_shp.apply(lambda row: pandas_lambda_geolocate(row, upper_df, df_columns, default_values), axis = 1)
upper_shp['LEAN'] = upper_shp.apply(lambda row: get_lean(row, upper_df), axis = 1)


More than one match found for GEOID: 13030
More than one match found for GEOID: 13031
More than one match found for GEOID: 13030
More than one match found for GEOID: 13031


In [16]:
upper_shp[upper_shp['VOTER_POWER'] != 0].head()

Unnamed: 0,STATEFP,SLDUST,AFFGEOID,GEOID,NAME,LSAD,LSY,ALAND,AWATER,geometry,DISTRICT,NOM_R,NOM_D,INCUMBENT,VOTER_POWER,LEAN
11,13,43,610U600US13043,13043,43,LU,2018,581735370,11760621,"POLYGON ((-84.18805 33.65409, -84.18391 33.655...",GA-SD-43,Melanie Williams,Tonya Anderson,D,1.01e-09,Safe D
12,13,17,610U600US13017,13017,17,LU,2018,1144431704,25560639,"POLYGON ((-84.35419 33.35336, -84.35418 33.377...",GA-SD-17,Brian Strickland,Kelly Rose,R,8.15e-08,Likely R
17,27,28,610U600US27028,27028,28,LU,2018,4738895114,72931748,"POLYGON ((-92.44957 43.67444, -92.44953 43.682...",MN-SD-28,Jeremy Miller,Sarah Kruger,R,-1.06e-06,Safe R
18,27,21,610U600US27021,27021,21,LU,2018,3804014578,138178830,"POLYGON ((-93.04090 44.25475, -93.04069 44.256...",MN-SD-21,Michael P. Goggin,Ralph Kaehler,R,-4.26e-06,Likely R
33,20,13,610U600US20013,20013,13,LU,2018,3667142751,27696376,"MULTIPOLYGON (((-94.72288 37.84116, -94.71885 ...",KS-SD-13,Richard Hilderbrand,Nancy Ingle,R,4.4e-07,Safe R


In [17]:
# choose columns you want in output
upper_shp = upper_shp[['STATEFP', 'GEOID', 'DISTRICT', 'NOM_R', 'NOM_D', 'INCUMBENT', 'LEAN', 'VOTER_POWER', 'geometry']]	

In [18]:
upper_shp[upper_shp['VOTER_POWER'] != 0]

Unnamed: 0,STATEFP,GEOID,DISTRICT,NOM_R,NOM_D,INCUMBENT,LEAN,VOTER_POWER,geometry
11,13,13043,GA-SD-43,Melanie Williams,Tonya Anderson,D,Safe D,1.010000e-09,"POLYGON ((-84.18805 33.65409, -84.18391 33.655..."
12,13,13017,GA-SD-17,Brian Strickland,Kelly Rose,R,Likely R,8.150000e-08,"POLYGON ((-84.35419 33.35336, -84.35418 33.377..."
17,27,27028,MN-SD-28,Jeremy Miller,Sarah Kruger,R,Safe R,-1.060000e-06,"POLYGON ((-92.44957 43.67444, -92.44953 43.682..."
18,27,27021,MN-SD-21,Michael P. Goggin,Ralph Kaehler,R,Likely R,-4.260000e-06,"POLYGON ((-93.04090 44.25475, -93.04069 44.256..."
33,20,20013,KS-SD-13,Richard Hilderbrand,Nancy Ingle,R,Safe R,4.400000e-07,"MULTIPOLYGON (((-94.72288 37.84116, -94.71885 ..."
...,...,...,...,...,...,...,...,...,...
1892,20,20005,KS-SD-5,Kevin Braun,Jeffrey Pittman,R,Toss-Up,5.920000e-06,"POLYGON ((-94.96504 39.29503, -94.96093 39.295..."
1900,27,27039,MN-SD-39,Karin Housley,TBA,R,Likely R,-4.070000e-06,"POLYGON ((-93.01951 45.25808, -93.01949 45.286..."
1905,27,27056,MN-SD-56,Dan Hall,TBA,R,Toss-Up,-1.580000e-05,"POLYGON ((-93.39901 44.75362, -93.39901 44.753..."
1934,37,37039,NC-SD-39,Joshua Niday,DeAndrea Salvador,FALSE,Safe D,1.550000e-07,"POLYGON ((-80.87733 35.09865, -80.87585 35.103..."


In [19]:
upper_shp.to_file(out_dir / "upper_state_moneyball.geojson", driver="GeoJSON")

Previous lower geojson size: 46.8mb
Previous upper geojson size: 32.7mb