Created by: [SmirkyGraphs](https://smirkygraphs.github.io/). Code: [Github](https://github.com/SmirkyGraphs/Python-Notebooks). Source: [sos.ri.gov](https://vote.sos.ri.gov/).
<hr>

# Rhode Island Candidate Scraper

The code below is used to get all information related to candidate filings from the secretary of state website. This is used for collecting both primary and general election information. Primary will show candidate challengers by party and General election will show independent candidates for the chosen office. The data is saved in the `/data/raw/` folder for later cleaning & analysis.

<hr>

In [1]:
import time
import json
import pandas as pd
from pathlib import Path

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By

from fake_useragent import UserAgent

In [2]:
def get_contest_table(con, election):
    url = f'{base_url}/CandidateSearchSummary?OfficeType={con}&Election={election}'
    driver.get(url)
    
    try: driver.find_element(By.TAG_NAME, value="table")
    except: return None
    
    # get table data
    table = driver.find_element(By.TAG_NAME, value="table")
    df = pd.read_html(table.get_attribute('outerHTML'))[0]
    
    return df

def get_contests(elect_id):
    # get office ids
    url = f'{base_url}/GetOfficesForElection/?ElectionID={elect_id}'
    driver.get(url)

    pre = driver.find_element(By.TAG_NAME, value="pre").text
    return json.loads(pre)

def get_city_table(city_id, election):
    city_id = str(city_id).zfill(5)
    url = f'{base_url}/CandidateSearchSummary?CityTown={city_id}&Election={election}'
    driver.get(url)
    
    try: driver.find_element(By.TAG_NAME, value="table")
    except: return None
    
    # get table data
    table = driver.find_element(By.TAG_NAME, value="table")
    df = pd.read_html(table.get_attribute('outerHTML'))[0]
    
    return df

In [3]:
with open('./data/files/cities.json', 'r') as f:
    cities = json.load(f)

ua = UserAgent()
options = Options()

options.add_argument(f'user-agent={ua.random}')
s = Service('your-chromedriver.exe')
driver = webdriver.Chrome(options=options, service=s)

In [4]:
base_url = 'https://vote.sos.ri.gov/Candidates'
election_ids = [16997, 18103]

In [5]:
for election in election_ids:
    contests = get_contests(election)
    
    for con in contests:
        # get table
        name = con['Text'].lower().replace(' ', '_')
        df = get_contest_table(con['Value'], election)
        
        if df is not None:
            # save table
            Path(f'./data/raw/{election}/statewide/').mkdir(parents=True, exist_ok=True)
            df.to_csv(f'./data/raw/{election}/statewide/{name}.csv', index=False)

            # wait a few seconds
            time.sleep(5)

    for city in cities:
        name = city['location'].lower().replace(' ', '_')
        df = get_city_table(city['id'], election)
        
        if df is not None:
            Path(f'./data/raw/{election}/location/').mkdir(parents=True, exist_ok=True)
            df.to_csv(f'./data/raw/{election}/location/{name}.csv', index=False)

            # wait a few seconds
            time.sleep(5)