Created by: [SmirkyGraphs](https://smirkygraphs.github.io/). Code: [Github](https://github.com/SmirkyGraphs/Python-Notebooks). Source: [sos.ri.gov](https://vote.sos.ri.gov/).
<hr>

# Rhode Island Candidate Cleaning

The code below is used to clean the raw information collected by `scraper.ipynb`. This currently only cleans the House and Seante General Assembly data. This combines the primary and general to get a count of total candidates by race, along with information regarding any challengers for the REP & DEM primary, and if there is another party/independent candidate in the general.

<hr>

In [1]:
import pandas as pd
import numpy as np

In [2]:
def get_challengers(df):
    df['dem_cand'] = np.where(df['Democrat'] > 0, True, False)
    df['rep_cand'] = np.where(df['Republican'] > 0, True, False)

    df['dem_primary_chal'] = np.where(df['Democrat'] > 1, True, False)
    df['rep_primary_chal'] = np.where(df['Republican'] > 1, True, False)
    
    cols = ['Democrat', 'Republican', 'Independent']
    df['general_chal'] = np.where((df[cols] > 0).sum(axis=1) > 1, True, False)
    
    return df

def combine_elections(df1, df2):
    # get a count of independents by district
    df2 = df2.groupby('District').size().reset_index(name='Independent')
    
    # get a count by part & merge independent
    df = (df1
     .groupby(['Party', 'District']).size()
     .reset_index()
     .pivot_table(index='District', columns='Party', values=0)
     .reset_index()
     .merge(df2, how='left', on='District')
     .fillna(0)
    )
    
    # get challengers
    df = get_challengers(df)
    
    return df

In [3]:
# state represenatives
df1 = pd.read_csv('./data/raw/primary/statewide/representative_in_general_assembly.csv')
df2 = pd.read_csv('./data/raw/general/statewide/representative_in_general_assembly.csv')
df = combine_elections(df1, df2)
df.to_csv('./data/clean/rep_gen_assem_challengers.csv', index=False)

# state senators
df1 = pd.read_csv('./data/raw/primary/statewide/senator_in_general_assembly.csv')
df2 = pd.read_csv('./data/raw/general/statewide/senator_in_general_assembly.csv')
df = combine_elections(df1, df2)
df.to_csv('./data/clean/sen_gen_assem_challengers.csv', index=False)