## Scraping [COVID-Related Election Litigation Tracker](https://healthyelections-case-tracker.stanford.edu/)

Use AI to create a dataframe of all cases in the litigation tracker.

In [3]:
import time
import pandas as pd
from lxml import html
import requests
import re
import time

from selenium import webdriver
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By


url = "https://healthyelections-case-tracker.stanford.edu/"

##set up selenium scraper
options = Options()
options.add_argument('--disable-gpu')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

#since there is no more chromedriver.exe, Selenium should auto-detect the integrated driver
driver = webdriver.Chrome(options=options)
driver.get(url)
time.sleep(2)

#set up function to run for each page on site
def scrape_tracker_page():
    
    # parse the HTML content with lxml
    page_content = driver.page_source
    # extract page content
    tree = html.fromstring(page_content)
    
    cases = tree.xpath("//*//tbody//a[@class='case-name']")
    case_name = [case.text for case in cases]

    states = tree.xpath("//*//tbody//tr[contains(@class,'clickable')]//td[3]")
    state = [state.text for state in states]

    dates = tree.xpath("//*//tbody//tr//td[4]")
    date_filed = [date.text for date in dates]

    issues = tree.xpath("//*//tbody//tr[@class='even']//td[3]")
    issue = [issue.text for issue in issues]

    statuses = tree.xpath("//*//tbody//tr[contains(@class,'clickable')]//td[6]")
    status = [status.text for status in statuses]

    links = tree.xpath("//*//tbody//a[@class='case-name']//@href")
    link = [url + link for link in links]
    
    z = pd.DataFrame(list(zip(case_name,state,date_filed,issue,status,link)),columns=["case_name","state","date_filed","issue","status","link"])
    
    return(z)

all_data = pd.DataFrame()

while True:
    # Scrape current page
    page_data = scrape_tracker_page()
    all_data = pd.concat([all_data, page_data], ignore_index=True)

    # Find the "next" button and click if it's available
    try:
        next_button = driver.find_element(By.XPATH, "//li[@class='paginate_button page-item'][@id='bottomBtn_btn9']/a")
        if 'disabled' in next_button.get_attribute('class'):
            break  # Exit loop if the "next" button is disabled
        else:
            next_button.click()
            time.sleep(2)  # Wait for the next page to load
    except Exception as e:
        print("No more pages to scrape or error occurred:", str(e)[:100])
        break
    
# scrape all pages
display(all_data)

# Save the data to a CSV file
all_data.to_csv("litigation.csv", index=False)

# Close the Selenium driver
driver.quit()

No more pages to scrape or error occurred: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//li[@class='pagin


Unnamed: 0,case_name,state,date_filed,issue,status,link
0,"Daugherty v. Fulton, No. 2021CV344953 (Ga. Sup...",Georgia,01/25/2021,"Contestant, a Georgia voter, alleges a range o...",Active,https://healthyelections-case-tracker.stanford...
1,"Gohmert v. Pence, No. 20A115 (S. Ct.)",Texas,01/06/2021,,Active,https://healthyelections-case-tracker.stanford...
2,"Trump v. Kemp, 1:20-cv-05310-MHC (N.D. Ga.)",Georgia,12/31/2020,Emergency Application for Stay to Justice Alito.,Closed,https://healthyelections-case-tracker.stanford...
3,"Trump v. Wisconsin Elections Commissions, No. ...",Wisconsin,12/30/2020,,Closed,https://healthyelections-case-tracker.stanford...
4,"Trump v. Biden, No. 20-882 (Sup. Ct.)",Wisconsin,12/29/2020,01/06/2021:,Closed,https://healthyelections-case-tracker.stanford...
...,...,...,...,...,...,...
643,"Democratic National Committee v. Reagan, No. 1...",Arizona,09/23/2016,03/31/2020:,Active,https://healthyelections-case-tracker.stanford...
644,"One Wisconsin Institute vs. Jacobs, No. 3:15-c...",Wisconsin,09/12/2016,"Reardon v. LaRose, No. 20-CV-2105 (Ohio Ct. Co...",Closed,https://healthyelections-case-tracker.stanford...
645,"One Wisconsin Institute vs. Jacobs, No. 3:15-c...",Wisconsin,09/12/2016,State of Wisconsin appealed the district court...,Closed,https://healthyelections-case-tracker.stanford...
646,Democratic National Committee v. Reagan (also ...,Arizona,04/15/2016,,Active,https://healthyelections-case-tracker.stanford...
