Created by [SmirkyGraphs](http://smirkygraphs.github.io/). Code: [GitHub](https://github.com/SmirkyGraphs/Python-Notebooks). Source: [RI BOE](https://elections.ri.gov/elections/preresults/index.php).
<hr>

# RI Election Data Cleaning

This notebook contains code used to normalize all election results data from Rhode Island Board of Elections since 2010. Column names are normalized using the "rename_cols" dictonary and some other minor tweaks like uppercasing fields and stripping excess whitespace. The data is then saved into the clean folder for use in the "reports" notebook which filters & performs calculations for analysis on the data.

<hr>

In [1]:
import re
import pandas as pd
import glob as glob

In [2]:
def shape_file(fname):
    df = pd.read_excel(fname)

    rename_cols = {
        'Total': 'Total Votes',
        'E Day': 'Election Day',
        'Mail': 'Mail Ballots',
        'Contest Title': "Contest",
        "Office Title": "Contest",
        "Candidate Name": "Candidate",
        'City/Town': 'Precinct Name',
        "Electon Day": "Election Day",
        "Party Code": "Party"
    }

    df = df.rename(columns=rename_cols)

    if 'Precinct #' in list(df):
        df = df.rename(columns={'Precinct': 'Precinct Name'})
    elif 'Precinct #' not in list(df) and 'Precinct Name' in list(df):
        df = df.rename(columns={'Precinct': 'Precinct #'})
        

    # remove unwanted columns
    cols = [x for x in list(df) if not x.startswith('Unnamed')]
    df = df[cols]

    drop_cols = ['District Type', 'District Code', 'Vote for', 'Vote For']
    cols = [x for x in list(df) if x not in drop_cols]
    df = df[cols]
    
    # add election info
    election = fname[14:26].split('_')
    df['election'] = election[0]
    df['year'] = election[1]
    
    return df

def clean_name(name: str) -> str:
    name = str(name).upper()
    name = re.sub('\s+',' ', name)
    
    return name

def normalize_precinct(name: str) -> str:
    name = name.replace('PRECINCT #', 'DISTRICT ')
    name = name.replace('PRESIDENTIAL', 'PRESIDENT')
    
    return name

def precinct_name(name: str) -> str:
    name = name.replace(' LIMITED', '')
    name = name.replace(' PRESIDENT', '')
    name = name.replace(' DIST', '')
    name = re.sub(r'\d+', '', name)
    
    return name.strip()

In [3]:
files = glob.glob('./data/raw/*')
data = []
for f in files:
    df = shape_file(f)
    data.append(df)
    
df = pd.concat(data)

In [4]:
# normalize names
df['Contest'] = df['Contest'].apply(clean_name)
df['Candidate'] = df['Candidate'].apply(clean_name)
df['Precinct Name'] = df['Precinct Name'].apply(clean_name).apply(normalize_precinct)

In [5]:
# 0 pad precinct numbers
df['Precinct #'] = df['Precinct #'].apply(lambda x: str(x).zfill(4))

# add city/town
df['City/Town'] = df['Precinct Name'].apply(precinct_name)

In [6]:
# save output
df.to_csv('./data/clean/election_results.csv', index=False)