In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import unicodedata
from IPython.display import clear_output
import time
import csv


In [2]:
# Function for name formatting

def to_ascii(text):
    return unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('ascii')

In [3]:
# Get the list of names

# Setup
options = Options()
options.add_argument("--headless")
driver = webdriver.Chrome(options=options)

# Open the site
driver.get("https://collegeofcardinalsreport.com/cardinals/")
time.sleep(2)

# Optional: click 'Show full list'
try:
    show_button = driver.find_element(By.XPATH, '//*[contains(text(), "Show full list")]')
    show_button.click()
    time.sleep(2)  # wait for content to load
except:
    print("Show full list button not found or already loaded.")

# Grab all name elements
name_elements = driver.find_elements(By.XPATH, '//*[contains(text(), "Cardinal ")]')

# Extract names by removing the "Cardinal " prefix
names = [el.text.replace("Cardinal ", "").strip() for el in name_elements]
names.pop(-1)

print(names)
print(len(names))

names_formatted = [to_ascii(name.replace("ł", 'l')).strip().lower().replace(' ', '-').replace("'", '') for name in names]

driver.quit()

['Santos Abril y Castelló', 'Angelo Acerbi', 'José Fuerte Advincula', 'Américo Manuel Aguiar Alves', 'Carlos Aguiar Retes', 'George Alencherry', 'Fridolin Ambongo Besungu', 'Ennio Antonelli', 'Celestino Aós Braco', 'Anders Arborelius', 'Francis Arinze', 'Felipe Arizmendi Esquivel', 'Raymundo Damasceno Assis', 'Jean-Marc Aveline', 'João Braz de Aviz', 'Audrys Juozas Bačkis', 'Fabio Baggio', 'Angelo Bagnasco', 'Lorenzo Baldisseri', 'Philippe Barbarin', 'Pedro Ricardo Barreto Jimeno', 'Cleemis Baselios', 'Gualtiero Bassetti', 'Domenico Battaglia', 'Giovanni Angelo Becciu', 'Giuseppe Bertello', 'Tarcisio Bertone', 'Ignace Bessi Dogbo', 'Giuseppe Betori', 'Ricardo Blázquez Pérez', 'Charles Maung Bo', 'Aquilino Bocos Merino', 'Vincente Bokalic Iglic', 'Josip Bozanić', 'Seán Baptist Brady', 'Walter Brandmüller', 'Leopoldo José Brenes Solórzano', 'Stephen Brislin', 'Raymond Leo Burke', 'François-Xavier Bustillo', 'Mykola Bychok', 'Luis Gerardo Cabrera Herrera', 'Domenico Calcagno', 'Antonio Ca

In [4]:
# Setup informatin dictionary

assert len(names) == len(names_formatted), "List length does not match"

info = {}
for i, name in enumerate(names):
    info[name] = {"formatted_name" : names_formatted[i]}

In [None]:
# Obtain the summaries

options = Options()
options.add_argument("--lang=en-US")  
options.add_argument("--headless")  
driver = webdriver.Chrome(options=options)

try:
    for i, name in enumerate(names):
        try:
            driver.get(f"https://collegeofcardinalsreport.com/cardinals/{info[name]['formatted_name']}")

            try:
                summary_div = driver.find_element(By.CLASS_NAME, "cardinals-summary-block")
                summary_text = summary_div.text
            except:
                # time.sleep(2)
                driver.get(f"https://collegeofcardinalsreport.com/cardinals/cardinal-{info[name]['formatted_name']}")
                summary_div = driver.find_element(By.CLASS_NAME, "cardinals-summary-block")
                summary_text = summary_div.text

            i = i + 1
            clear_output(wait=True)
            print(i)

            info[name]["summary"] = summary_text
            
        except Exception as e:
            print(f"Error processing {name}: {e}")
            # Continue with next cardinal instead of quitting
            print(f"searched for: {info[name]['formatted_name']}")
            continue
            
finally:
    driver.quit()

97


In [None]:
# Go through the names that have not worked

print("Cant find the following names, please add manually via: https://collegeofcardinalsreport.com/cardinals/")

no_summary = [(name, data["formatted_name"]) for name, data in info.items() if "summary" not in data or not data["summary"].strip()]
for name, formatted in no_summary:
    print(f"{name} ({formatted})")

In [None]:
# Save to scv

with open('cardinals_info.csv', 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['name', 'formatted_name', 'summary'])
    for name, data in info.items():
        summary = data.get("summary", "").replace('\n', ' ')
        writer.writerow([name, data["formatted_name"], summary])