# Match 2018 NCSBE Senate Votes

Retrieve precinct-sorted 2018 general election results from [`2019-04-04 precinct_con_can_stats.zip`](https://dl.ncsbe.gov/index.html?prefix=ENRS/2018_11_06/precinct_sort/), match them to [parties from Ballotpedia 2018 races](https://docs.google.com/spreadsheets/d/1LmNoDfZH9lMtGh5kLwQG7Te7o3XugiarnK2zron72pE/edit#gid=0), and output [new 2018 North Carolina model data](https://docs.google.com/spreadsheets/d/1aMPAXJN7Km3fxglhHXPqsCl4TKGCE_JXooufT6KVom0/edit#gid=1429158271).

In [2]:
import os; os.environ['DYLD_LIBRARY_PATH'] = './.venv-NC/lib'
import pandas, editdistance, numpy, geopandas, re

## Import NCSBE Votes

In [5]:
df_2018 = pandas.read_csv('results_pct_20181106.txt.gz', sep='\t', dtype=str)
df_2018sldu = df_2018[df_2018['Contest Name'].str.startswith('NC STATE SENATE DISTRICT ')]

df_2016 = pandas.read_csv('precinct_sort_20161108.txt.gz', sep='\t', dtype=str)
df_2016uspres = df_2016[df_2016.contest_name == 'US PRESIDENT']

df_2018sldu

Unnamed: 0,County,Election Date,Precinct,Contest Group ID,Contest Type,Contest Name,Choice,Choice Party,Vote For,Election Day,One Stop,Absentee by Mail,Provisional,Total Votes,Real Precinct
1462,BUNCOMBE,11/06/2018,07.1,1172,S,NC STATE SENATE DISTRICT 49,Terry Van Duyn,DEM,1,373,0,29,3,405,Y
1463,BUNCOMBE,11/06/2018,25.1,1172,S,NC STATE SENATE DISTRICT 49,Terry Van Duyn,DEM,1,299,0,23,1,323,Y
1464,BUNCOMBE,11/06/2018,64.1,1172,S,NC STATE SENATE DISTRICT 49,Terry Van Duyn,DEM,1,203,0,7,1,211,Y
1465,BUNCOMBE,11/06/2018,OS ECBL,1172,S,NC STATE SENATE DISTRICT 49,Terry Van Duyn,DEM,1,0,3186,0,0,3186,N
1466,BUNCOMBE,11/06/2018,CURBSIDE,1172,S,NC STATE SENATE DISTRICT 49,Terry Van Duyn,DEM,1,0,0,0,0,0,N
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182554,WASHINGTON,11/06/2018,P2,1124,S,NC STATE SENATE DISTRICT 01,D. Cole Phelps,DEM,1,174,0,0,0,174,Y
182555,WASHINGTON,11/06/2018,SC,1124,S,NC STATE SENATE DISTRICT 01,D. Cole Phelps,DEM,1,206,0,0,0,206,Y
182556,WASHINGTON,11/06/2018,LM,1124,S,NC STATE SENATE DISTRICT 01,D. Cole Phelps,DEM,1,342,0,0,0,342,Y
182557,WASHINGTON,11/06/2018,SK,1124,S,NC STATE SENATE DISTRICT 01,D. Cole Phelps,DEM,1,126,0,0,0,126,Y


## Import Ballotpedia Candidates

Match candidate names between Ballotpedia and NCSBE in each senate district to later determine party votes.

In [9]:
df_candidates = pandas.read_csv('North Carolina State Lege Candidates - 2018 Senate.csv')

def closest_name(name, names):
    if name in (numpy.nan, 'No candidate'):
        return None
    distances = sorted([(editdistance.distance(n, str(name)), n) for n in names])
    return distances[0][1]

DEMs, REPs, contests = list(), list(), list()

for (index, row) in df_candidates.iterrows():
    contest_name = 'NC STATE SENATE DISTRICT {:02d}'.format(int(row['State Senate District'][9:]))
    names = set(df_2018sldu[df_2018sldu['Contest Name'] == contest_name].Choice)
    DEMs.append(closest_name(row.Democrat, names))
    REPs.append(closest_name(row.Republican, names))
    contests.append(contest_name)

df_candidates['Democrat'], df_candidates['Republican'], df_candidates['Contest'] = DEMs, REPs, contests

df_candidates

Unnamed: 0,State Senate District,Winning Party,Incumbent Party,Democrat,Republican,Other,Contest
0,District 1,,O,D. Cole Phelps,Bob Steinburg,,NC STATE SENATE DISTRICT 01
1,District 2,,R,Ginger Garner,Norman Sanderson,Tim Harris (Libertarian Party),NC STATE SENATE DISTRICT 02
2,District 3,,D,Erica D. Smith,"C. (Chuck) Earley, Jr.",,NC STATE SENATE DISTRICT 03
3,District 4,,D,"Milton F. (Toby) Fitch, Jr.",Richard Scott,Jesse Shearin (Libertarian Party),NC STATE SENATE DISTRICT 04
4,District 5,,D,Don Davis,Kimberly Robb,,NC STATE SENATE DISTRICT 05
5,District 6,,R,Joseph (Joe) Webb,Harry Brown,,NC STATE SENATE DISTRICT 06
6,District 7,,R,David B. Brantley,"Louis Milford Pate, Jr.",,NC STATE SENATE DISTRICT 07
7,District 8,,R,"David W. Sink, Jr.",Bill Rabon,Anthony Mascolo (Libertarian Party),NC STATE SENATE DISTRICT 08
8,District 9,,R,Harper Peterson,Michael Lee,Ethan Bickley (Libertarian Party),NC STATE SENATE DISTRICT 09
9,District 10,,R,Vernon R. Moore,Brent Jackson,,NC STATE SENATE DISTRICT 10


## Count Precinct Party Votes

Create a new `df_2018new` DataFrame with vote counts for State representatives and U.S. President by party.

In [12]:
arrays = dict(county_desc=list(), precinct_code=list(), district=list(),
              incumbent=list(), winner=list(), sldu_votes_D=list(), sldu_votes_R=list(),
              uspres_votes_D=list(), uspres_votes_R=list())

groups1 = df_2018sldu.groupby(['County', 'Precinct', 'Contest Name']).groups

for (County, Precinct, Contest_Name) in groups1.keys():
    _df1 = df_2018sldu[df_2018sldu.County == County]
    _df2 = _df1[_df1.Precinct == Precinct]
    subdf_2018sldu = _df2[_df2['Contest Name'] == Contest_Name]
    
    arrays['county_desc'].append(County)
    arrays['precinct_code'].append(Precinct)
    
    row_candidates = df_candidates[df_candidates.Contest == Contest_Name].iloc[0]
    arrays['district'].append(row_candidates['State Senate District'][9:])
    arrays['incumbent'].append(row_candidates['Incumbent Party'])
    arrays['winner'].append(row_candidates['Winning Party'])
    
    try:
        DEM_name = row_candidates.Democrat
        DEM_row = subdf_2018sldu[subdf_2018sldu.Choice == DEM_name].iloc[0]
    except IndexError:
        arrays['sldu_votes_D'].append(0)
    else:
        arrays['sldu_votes_D'].append(int(DEM_row['Total Votes']))
            
    try:
        REP_name = row_candidates.Republican
        REP_row = subdf_2018sldu[subdf_2018sldu.Choice == REP_name].iloc[0]
    except IndexError:
        arrays['sldu_votes_R'].append(0)
    else:
        arrays['sldu_votes_R'].append(int(REP_row['Total Votes']))
    
    arrays['uspres_votes_D'].append(0)
    arrays['uspres_votes_R'].append(0)
    

groups2 = df_2018sldu.groupby(['County', 'Precinct']).groups

for (County, Precinct) in groups2.keys():
    _df1 = df_2016uspres[df_2016uspres.county_desc == County]
    subdf_2016uspres = _df1[_df1.precinct_code == Precinct]
    
    arrays['county_desc'].append(County)
    arrays['precinct_code'].append(Precinct)
    
    arrays['district'].append(None)
    arrays['incumbent'].append(None)
    arrays['winner'].append(None)
    
    arrays['sldu_votes_D'].append(0)
    arrays['sldu_votes_R'].append(0)
    
    try:
        DEM_row = subdf_2016uspres[subdf_2016uspres.candidate_name == 'Hillary Clinton'].iloc[0]
    except IndexError:
        arrays['uspres_votes_D'].append(0)
    else:
        arrays['uspres_votes_D'].append(int(DEM_row.votes))
            
    try:
        REP_row = subdf_2016uspres[subdf_2016uspres.candidate_name == 'Donald J. Trump'].iloc[0]
    except IndexError:
        arrays['uspres_votes_R'].append(0)
    else:
        arrays['uspres_votes_R'].append(int(REP_row.votes))
    

df_2018new = pandas.DataFrame(arrays)
print(df_2018new.shape)
print('SLDU Votes:', df_2018new.sldu_votes_D.sum(), df_2018new.sldu_votes_R.sum())
print('US Pres Votes:', df_2018new.uspres_votes_D.sum(), df_2018new.uspres_votes_R.sum())
df_2018new

(6369, 9)
SLDU Votes: 1856678 1797125
US Pres Votes: 2126510 2321481


Unnamed: 0,county_desc,precinct_code,district,incumbent,winner,sldu_votes_D,sldu_votes_R,uspres_votes_D,uspres_votes_R
0,ALAMANCE,01,24,R,,358,1462,0,0
1,ALAMANCE,02,24,R,,409,1572,0,0
2,ALAMANCE,035,24,R,,835,1167,0,0
3,ALAMANCE,03C,24,R,,631,1012,0,0
4,ALAMANCE,03N,24,R,,763,963,0,0
...,...,...,...,...,...,...,...,...,...
6364,YANCEY,07 BRU,,,,0,0,91,163
6365,YANCEY,08 CRA,,,,0,0,522,1260
6366,YANCEY,09 SOU,,,,0,0,596,766
6367,YANCEY,10 PEN,,,,0,0,106,270


## Assign Precinct Geography PSIDs

Read precinct PSID values from `geogs_2018.shp`, merge by county name and precinct ID.

In [13]:
geogs_2018 = geopandas.read_file('geogs_2018.shp', layer='geogs_2018')

geogs_2018.PSID = geogs_2018.PSID.str.replace(re.compile(r'^'), 'PSID:')

df_2018out = df_2018new.merge(geogs_2018, how='left',
                              left_on=('county_desc', 'precinct_code'),
                              right_on=('COUNTY_NAM', 'PREC_ID'))

df_2018out

Unnamed: 0,county_desc,precinct_code,district,incumbent,winner,sldu_votes_D,sldu_votes_R,uspres_votes_D,uspres_votes_R,PREC_ID,...,G18GHOR,G18LStHOR,G18LStSEN,G18LHOR,G18RStHOR,G18RStSEN,G18RHOR,G18UnaHOR,PSID,geometry
0,ALAMANCE,01,24,R,,358,1462,0,0,01,...,0.0,0.0,0.0,0.0,1474.0,1462.0,1475.0,0.0,PSID:1158854937,POLYGON ((-79.43197308657012 35.88929191508174...
1,ALAMANCE,02,24,R,,409,1572,0,0,02,...,0.0,0.0,0.0,0.0,1589.0,1572.0,1619.0,0.0,PSID:1158848961,POLYGON ((-79.53483899133799 36.04382100018476...
2,ALAMANCE,035,24,R,,835,1167,0,0,035,...,0.0,0.0,0.0,0.0,1159.0,1167.0,1185.0,0.0,PSID:1158850573,POLYGON ((-79.50051299120325 36.15426500013934...
3,ALAMANCE,03C,24,R,,631,1012,0,0,03C,...,0.0,0.0,0.0,0.0,1002.0,1012.0,1034.0,0.0,PSID:1158854377,POLYGON ((-79.52032399117273 36.08219700003401...
4,ALAMANCE,03N,24,R,,763,963,0,0,03N,...,0.0,0.0,0.0,0.0,964.0,963.0,979.0,0.0,PSID:1158852503,POLYGON ((-79.49082799096276 36.10568999961696...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6364,YANCEY,07 BRU,,,,0,0,91,163,07 BRU,...,0.0,0.0,0.0,4.0,143.0,135.0,145.0,0.0,PSID:1158854169,POLYGON ((-82.19135299166359 35.97501800047963...
6365,YANCEY,08 CRA,,,,0,0,522,1260,08 CRA,...,0.0,0.0,0.0,26.0,1096.0,997.0,1131.0,0.0,PSID:1158852683,POLYGON ((-82.18617999165116 35.94352800006673...
6366,YANCEY,09 SOU,,,,0,0,596,766,09 SOU,...,0.0,0.0,0.0,21.0,626.0,604.0,633.0,0.0,PSID:1158854577,POLYGON ((-82.22187599103364 35.86516199983036...
6367,YANCEY,10 PEN,,,,0,0,106,270,10 PEN,...,0.0,0.0,0.0,7.0,225.0,221.0,232.0,0.0,PSID:1158853255,POLYGON ((-82.26803599172993 35.76373800026749...


## Group Votes by Precinct

Add up all votes, grouping by precinct and concatenating multiple districts and incumbents.

In [15]:
grouped = df_2018out.groupby(['county_desc', 'precinct_code'])

def semicolon(series):
    return ';'.join({str(v) for v in series.values if v and v is not numpy.nan})

def doit(df):
    #print(df)
    return pandas.DataFrame({
        'psid': [semicolon(df.PSID)],
        'district': [semicolon(df.district)],
        'winner': [semicolon(df.winner)],
        'incumbent': [semicolon(df.incumbent)],
        'sldu_votes_D': [df.sldu_votes_D.sum()],
        'sldu_votes_R': [df.sldu_votes_R.sum()],
        'uspres_votes_D': [df.uspres_votes_D.sum()],
        'uspres_votes_R': [df.uspres_votes_R.sum()],
    })

df_2018final = grouped.apply(doit)

df_2018final

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,psid,district,winner,incumbent,sldu_votes_D,sldu_votes_R,uspres_votes_D,uspres_votes_R
county_desc,precinct_code,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
ALAMANCE,01,0,PSID:1158854937,24,,R,358,1462,411,1865
ALAMANCE,02,0,PSID:1158848961,24,,R,409,1572,403,2004
ALAMANCE,035,0,PSID:1158850573,24,,R,835,1167,995,1485
ALAMANCE,03C,0,PSID:1158854377,24,,R,631,1012,679,1059
ALAMANCE,03N,0,PSID:1158852503,24,,R,763,963,1036,1160
...,...,...,...,...,...,...,...,...,...,...
YANCEY,07 BRU,0,PSID:1158854169,47,,R,112,135,91,163
YANCEY,08 CRA,0,PSID:1158852683,47,,R,667,997,522,1260
YANCEY,09 SOU,0,PSID:1158854577,47,,R,649,604,596,766
YANCEY,10 PEN,0,PSID:1158853255,47,,R,130,221,106,270


In [16]:
df_2018final.to_csv('df_2018-senate.csv')