## Pennsylvania Senate Elections 2022  
This script is scraping the 2022 midterm election results of the Senate elections in Pennsylvania. For now, it is scraping the winning party (i.e. with the highest win margin) per state from CNN.  

Source: https://edition.cnn.com/election/2022/results/pennsylvania/senate  

In [1]:
import requests as re
from bs4 import BeautifulSoup
import csv
import time

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [2]:
# Defining function to scrape Wrappers with midterm senate election results in PA per county

def senate_election_results(soup):
    # Locating + getting relevant table content
    senate_win = soup.find_all("article", class_="core-result cnn-pcl-1hnl3l7")

    # Initializing empty list to store results
    senate_win_list = []

    for result in senate_win:
        try:
            county = result.find(class_="header-2-AOgLYo cnn-pcl-xk8c6r").get_text()
        except:
            county = ""
        try:
            winning_party = result.find(class_="party-label-239xt1 cnn-pcl-1me6450").get_text()
        except:
            winning_party = ""
          
        # Appending the scraped data to the previously initialised list as a dictionary
        senate_win_list.append({"County": county, "Senate_Winning_Party": winning_party})

    return senate_win_list

In [3]:
if __name__ == "__main__":

    #Setting up Selenium
    driver = webdriver.Chrome()
    url = "https://edition.cnn.com/election/2022/results/pennsylvania/senate"
    driver.get(url)
    time.sleep(5)  

    #Initialising list to store data from all pages
    all_senate_win_list = []

    try:
        for _ in range(8):  
            # Waittime of 10 seconds before automatically clicking the next button to ensure page is fully loaded
            #Note: Cookie Pop-up must be accepted/rejected manually!
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CLASS_NAME, "core-result.cnn-pcl-1hnl3l7"))
            )

            # Parsing, extracting and storing relevant information from current web page 
            current_soup = BeautifulSoup(driver.page_source, "html.parser")
            current_page_data = senate_election_results(current_soup)
            all_senate_win_list.extend(current_page_data)

            # Automatically clicking button to navigate to next page if it exists 
            next_buttons = driver.find_elements(By.CLASS_NAME, "rightButton.cnn-pcl-13b0kh1")
            if next_buttons:
                next_button = next_buttons[0]  
                next_button.click()

                #wait time of 5 seconds for next page to load
                time.sleep(5)  

            # End of pages, i.e no more next button found
            else:
                print("No more pages found.")
                break

    #Error handling or no more pages         
    except Exception as e:
        print("Navigation error or no more pages:", e)

    # Exporting aggregated data to csv
    csv_file_path = "senate_win_list.csv"
    with open(csv_file_path, mode='w', newline='') as file:
        writer = csv.DictWriter(file, fieldnames=["County", "Senate_Winning_Party"])
        writer.writeheader()
        for row in all_senate_win_list:
            writer.writerow(row)

    print(f"Data has been written to {csv_file_path}.")
    driver.quit()

No more pages found.
Data has been written to senate_win_list.csv.
