In [6]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
import time
import json
import os
import logging

logging.basicConfig(level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s")


def setup_driver():
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")
    return webdriver.Chrome(options=options)

def click_show_more(driver, max_click= 20):
    clicks = 0
    while clicks < max_click:
        try:
            button = driver.find_element(By.XPATH, "//button[contains(@class, 'PaginationButton__StyledPaginationButton')]")
            button.click()
            clicks += 1
            logging.info(f"Clicked 'Show More' {clicks} times...")
        except NoSuchElementException:
            logging.info("No more 'Show More' button found. Done loading content.")
            break


def extract_professor_cards(driver):
    res = []
    cards = driver.find_elements(By.XPATH, "//a[contains(@class, 'TeacherCard__StyledTeacherCard')]")

    for card in cards:

        name = card.find_element(By.XPATH, ".//div[contains(@class, 'CardName')]").text.strip()
        dept = card.find_element(By.XPATH, ".//div[contains(@class, 'CardSchool__Department')]").text.strip()
        take_again = card.find_element(By.XPATH, ".//div[contains(@class, 'CardFeedback__CardFeedbackNumber')]").text.strip()

        res.append({"professor": name,
            "department": dept,
            "would_take_again": take_again})
    return res


url = "https://www.ratemyprofessors.com/search/professors/1530?q=*"  

driver = setup_driver()
driver.get(url)

click_show_more(driver) # max the show more
profs = extract_professor_cards(driver)
driver.quit()

for r in profs:
    print(r)


os.chdir(r"..\data")
with open("prof_uw.json", "w", encoding="utf-8") as file:
    json.dump(profs, file, ensure_ascii=False, indent=4)



2025-04-26 01:04:14,458 - INFO - Clicked 'Show More' 1 times...
2025-04-26 01:04:20,511 - INFO - Clicked 'Show More' 2 times...
2025-04-26 01:04:23,877 - INFO - Clicked 'Show More' 3 times...
2025-04-26 01:04:28,261 - INFO - Clicked 'Show More' 4 times...
2025-04-26 01:04:30,653 - INFO - Clicked 'Show More' 5 times...
2025-04-26 01:04:32,877 - INFO - Clicked 'Show More' 6 times...
2025-04-26 01:04:36,541 - INFO - Clicked 'Show More' 7 times...
2025-04-26 01:04:40,198 - INFO - Clicked 'Show More' 8 times...
2025-04-26 01:04:42,518 - INFO - Clicked 'Show More' 9 times...
2025-04-26 01:04:47,082 - INFO - Clicked 'Show More' 10 times...
2025-04-26 01:04:48,739 - INFO - Clicked 'Show More' 11 times...
2025-04-26 01:04:49,706 - INFO - Clicked 'Show More' 12 times...
2025-04-26 01:04:51,148 - INFO - Clicked 'Show More' 13 times...
2025-04-26 01:04:53,097 - INFO - Clicked 'Show More' 14 times...
2025-04-26 01:04:54,848 - INFO - Clicked 'Show More' 15 times...
2025-04-26 01:04:58,080 - INFO - C

{'professor': 'Zoraida Rico', 'department': 'Statistics', 'would_take_again': '0%'}
{'professor': 'Kenneth Sebens', 'department': 'Biology', 'would_take_again': '38%'}
{'professor': 'Jennifer Taggart', 'department': 'Mathematics', 'would_take_again': '81%'}
{'professor': 'Elena Pezzoli', 'department': 'Mathematics', 'would_take_again': '49%'}
{'professor': 'Yael Jacobs', 'department': 'Economics', 'would_take_again': '38%'}
{'professor': 'James Zhang', 'department': 'Mathematics', 'would_take_again': '46%'}
{'professor': 'Sumit Roy', 'department': 'Engineering', 'would_take_again': '15%'}
{'professor': 'Bianca Viray', 'department': 'Mathematics', 'would_take_again': '35%'}
{'professor': 'Shiping Cao', 'department': 'Mathematics', 'would_take_again': '35%'}
{'professor': 'Sang-gyeun Ahn', 'department': 'Art', 'would_take_again': '43%'}
{'professor': 'Anastassiya Semenova', 'department': 'Applied Mathematics', 'would_take_again': '29%'}
{'professor': 'Mathilde Magga', 'department': 'Engl