Created by [SmirkyGraphs](http://smirkygraphs.github.io/). Code: [GitHub](https://github.com/SmirkyGraphs/Python-Notebooks). Source: [RI BOE](https://elections.ri.gov/elections/preresults/index.php).
<hr>

# RI Election Data Reports

This notebook contains code used to find out information regarding Rhode Island elections. Including things like turnout aggregated by statewide, city, precinct and individual contests. Aswell as some interesting things like how people cast their ballots (mail/ballot), closest elections, endorsed candidates who lost and elections with results canged by mail-in ballots.

<hr>

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('./data/clean/election_results.csv', dtype={'Precinct #': str})

In [3]:
# get total votes cast & registered voters
tv = df[df['Contest']=='BALLOTS CAST - TOTAL']
reg = df[df['Contest']=='REGISTERED VOTERS - TOTAL']

In [4]:
# data grouped by party info (only available for primary)
rep = df[df['Contest']=='BALLOTS CAST - REPUBLICAN']
non = df[df['Contest']=='BALLOTS CAST - NONPARTISAN']
dem = df[df['Contest']=='BALLOTS CAST - DEMOCRAT']

party = pd.concat([rep, non, dem])
party = party.groupby(['election', 'Contest', 'year']).sum()
party['% - Mail'] = party['Mail Ballots']/party['Total Votes']

party.to_csv('./data/reports/vote_by_party.csv')

In [5]:
# results by election cycle (2018 primary missing numbers)
group = ['election', 'year']
ele = tv.groupby(group).sum()

ele['% - Mail'] = ele['Mail Ballots']/ele['Total Votes']
ele['% - Turnout'] = ele.div(reg.groupby(group).sum())['Total Votes']

ele.to_csv('./data/reports/election_report.csv')

In [6]:
# results by city/town
group = ['City/Town', 'election', 'year']
city = tv.groupby(group).sum()

# remove federal precinct & state precinct
city = city[~city.index.get_level_values(0).str.contains('STATERICT')]
city = city[~city.index.get_level_values(0).str.contains('FEDERALRICT')]

city['% - Mail'] = city['Mail Ballots']/city['Total Votes']
city['% - Turnout'] = city.div(reg.groupby(group).sum())['Total Votes']
city['turnout_rank'] = city.groupby(['election', 'year'])['% - Turnout'].rank(ascending=False)

city.to_csv('./data/reports/city_report.csv')

In [7]:
# results by precinct (2010 removed due to redistricting)
group = ['Precinct Name', 'Precinct #', 'election', 'year']
pre = tv[tv['year'] != 2010].groupby(group).sum()

# remove federal precinct & state precinct
pre = pre[~pre.index.get_level_values(0).str.contains(' LIMITED')]
pre = pre[~pre.index.get_level_values(0).str.contains(' PRESIDENT')]

pre['% - Mail'] = pre['Mail Ballots']/pre['Total Votes']
pre['% - Turnout'] = pre.div(reg.groupby(group).sum())['Total Votes']
pre['turnout_rank'] = pre.groupby(['election', 'year'])['% - Turnout'].rank(ascending=False)

pre.to_csv('./data/reports/precinct_report.csv')

In [8]:
# results by contest
ct = df[df['year'] == 2020]

# remove ballots cast & reg
can_remove = ['UNDER VOTES', 'OVER VOTES']

con_remove = [
    'REGISTERED VOTERS - TOTAL', 
    'BALLOTS CAST - TOTAL',
    'BALLOTS CAST - DEMOCRAT', 
    'BALLOTS CAST - REPUBLICAN',
    'BALLOTS CAST - NONPARTISAN'
]

ct = ct[(~ct['Contest'].isin(con_remove)) & (~ct['Candidate'].isin(can_remove))]

# turnout by contest (looks at both parties)
con_tv = tv[tv['year'] == 2020].groupby('Precinct Name')['Total Votes'].sum().rename('Ballots Cast')
con_reg = reg[reg['year'] == 2020].groupby('Precinct Name')['Total Votes'].sum().rename('Total Registered')

con_turnout = ct.groupby(['election', 'year', 'Contest', 'Precinct Name'])['Total Votes'].sum().reset_index()
con_turnout = con_turnout.merge(con_tv, on='Precinct Name').merge(con_reg, on='Precinct Name')

con_turnout = con_turnout.groupby(['election', 'year', 'Contest']).sum()
con_turnout['turnout - %'] = con_turnout['Ballots Cast'] / con_turnout['Total Registered']
con_turnout.to_csv('./data/reports/contest_report.csv')

In [9]:
# groupby by contest & candidate totals, sort, and get top 2 candidates 
ct = ct.groupby(['election', 'year', 'Contest', 'Party', 'Candidate']).sum().reset_index()
ct = ct.sort_values(by=['Contest', 'Party', 'Total Votes'], ascending=[True, True, False])

# add finishing place in the contest 1 = most votes
ct['finished'] = ct.groupby(['election', 'year', 'Contest', 'Party']).cumcount() + 1

# add endorsement
ct['endorsed'] = ct['Candidate'].str.endswith('*')

# get total endorsed by contest -> party
tot_endorsed = ct.groupby(['election', 'year', 'Contest', 'Party'])['endorsed'].sum()
tot_endorsed.name = 'total_endorsed'

# get only contests with 1 person endorsed
endorse_loss = ct.merge(tot_endorsed, on=['Contest', 'Party'])
endorse_loss = endorse_loss[endorse_loss['total_endorsed'] == 1]

# filter for "didnt come in first" and "endorsed"
endorse_loss = endorse_loss[(endorse_loss['finished'] > 1) & (endorse_loss['endorsed'] == True)]
endorse_loss.drop(columns=['total_endorsed', 'endorsed']).to_csv('./data/reports/endorsed_loss.csv', index=False)

In [10]:
# get run off of top 2 candidates by race
top_2 = ct.groupby(['election', 'year', 'Contest', 'Party']).head(2).copy()
top_2.loc[top_2['finished']==1, 'finished'] = 'winner'
top_2.loc[top_2['finished']==2, 'finished'] = 'runner up'

top_2 = top_2.pivot(index=['election', 'year', 'Contest', 'Party'], 
              values=['Total Votes', 'Election Day', 'Candidate', 'endorsed'], columns='finished')

cols = top_2.columns.sortlevel([0, 1], [True, False])[0]
top_2 = top_2[cols].drop(columns='endorsed')

# get contested turned by mail-in/emergency votings from election day votes
turned = top_2[top_2[('Election Day', 'winner')] <= top_2[('Election Day', 'runner up')]]
turned.to_csv('./data/reports/election_turned_by_mail.csv')

# get vote difference between top 2 candidates
top_2['vote_diff'] = top_2[('Total Votes', 'winner')] - top_2[('Total Votes', 'runner up')]
top_2 = top_2.sort_values(by='vote_diff').dropna(axis=0, how='any')
top_2.head(25).to_csv('./data/reports/closest_elections.csv')