In [7]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import ElementClickInterceptedException
import time
import pandas as pd
import requests

# Select the representative profile

Considering the following parameters
- tipo de candidatura = Diputación mayoría relativa 
- entidad = all states, iterate through hard-coded list
-

In [8]:
#   FUNCTIONS
# Function to scroll and try clicking the button
def scroll_and_click(driver,button_selector):
    while True:
        try:
            # Attempt to click the button
            button = driver.find_element(By.CSS_SELECTOR, button_selector)
            button.click()
            break  # Exit the loop if the click is successful
        except ElementClickInterceptedException:
            # Scroll down 100 pixels
            driver.execute_script("window.scrollBy(0, 500);")
            time.sleep(0.5)  # Optional: wait a little before trying again

def open_page(url):
    # Set up WebDriver (assuming you're using Chrome)
    options = webdriver.ChromeOptions()
    options.add_experimental_option('prefs', {
        "download.prompt_for_download": False,
        "download.directory_upgrade": True,
        "safebrowsing.enabled": True
    })

    driver = webdriver.Chrome(options=options)

    driver.get(url)

    return driver

def search_by_state(driver, state_name, search_button_pixel_scroll=500):
    # select only those that correspond to the tipo_de_candidatura == mayoria relativa
    candidacy_type = driver.find_element(By.CSS_SELECTOR, '#busqueda-form_tipoCandidatura')
    # Click the dropdown to reveal the options
    candidacy_type.click()

    # select the needed option
    mayoria_relativa = driver.find_element(By.CSS_SELECTOR, 'div[title="Diputación mayoría relativa"]')
    mayoria_relativa.click()

    # select only those that correspond to the tipo_de_candidatura == mayoria relativa
    state_menu = driver.find_element(By.CSS_SELECTOR, '#busqueda-form_entidad')
    # Click the dropdown to reveal the options
    state_menu.click()

    # select the needed state
    state = driver.find_element(By.CSS_SELECTOR, f'div[title="{state_name}"]')
    state.click()

    #execute search
    driver.execute_script(f"window.scrollBy(0, {search_button_pixel_scroll});")  # Scroll down by 1000 pixels
    buscar = driver.find_element(By.CSS_SELECTOR, 'button.ant-btn:nth-child(2)')
    # Click the dropdown to reveal the options
    time.sleep(1)
    buscar.click()
    time.sleep(2)


def get_candidate_table(driver):
    # select only those that correspond to the tipo_de_candidatura == mayoria relativa
    table = driver.find_element(By.CSS_SELECTOR, '#contenedor-resultados > div.ant-table-wrapper > div > div > div > div > div > table')

    # Find all rows in the table (usually inside <tr> tags)
    rows = table.find_elements(By.XPATH, './/tbody/tr')  

    return table, rows

def download_table_candidates(driver, output_dir, state_name, debug=False):
    # Loop through each row and find the url with the file to download
    n=1

    table, rows = get_candidate_table(driver)

    if debug:
        rows_to_eval=range(1,2)
    else: 
        rows_to_eval=range(1,len(rows))

    for i in rows_to_eval: #the table links begin on the 2nd body row, get the link and open, download file, and go back
        n=n+1
        table, rows = get_candidate_table(driver) #when you refresh the page, the old table variable expires, so a re-declaration is needed

        # Locate the 4th column (index 3 because it is 0-based)
        link_element = table.find_element(By.CSS_SELECTOR, f'tr.ant-table-row:nth-child({n}) > td:nth-child(4) > div:nth-child(1) > a:nth-child(2)')
        # Extract the link and print it (optional)
        link_url = link_element.get_attribute('href')
        print(f"Opening link: {link_url}")

        # Click the link
        link_element.click()
        # Allow the new page to load (adjust time as necessary)
        time.sleep(5)

        button_selector='#imprimir > div.ant-row > div:nth-child(2) > div > a'
        scroll_and_click(driver,button_selector)  # Scroll down by 1000 pixels
        time.sleep(3)

        # Switch to the new tab
        driver.switch_to.window(driver.window_handles[1])  # Switch to the new tab

        # Get the URL of the PDF file
        pdf_url = driver.current_url

        # Download the PDF
        response = requests.get(pdf_url)
        pdf_path = f'{output_dir}/{state_name}_{n-1}.pdf'  

        # Save the PDF file
        with open(pdf_path, 'wb') as f:
            f.write(response.content)

        print(f"Downloaded PDF to: {pdf_path}")

        # Close the PDF tab
        driver.close()

        # Switch back to the original tab
        driver.switch_to.window(driver.window_handles[0])

        # Go back to the original table page
        driver.back()
        time.sleep(4)

    return len(rows)

def get_candidate_pdfs(driver, output_dir, state_name, debug=False):
    while True:
        page_number = 1
        try:
            # evaluate current table view
            download_table_candidates(driver, output_dir, state_name , page_number = page_number , debug=True)
            # scroll down to get the table page index in view
            scroll_and_click(driver,".ant-pagination-next > button:nth-child(1)")
            # Locate the next-page button using its selector
            button = driver.find_element(By.CSS_SELECTOR, ".ant-pagination-next > button:nth-child(1)")
            # Check if the 'disabled' attribute is present
            is_disabled = button.get_attribute('disabled')
            if is_disabled:
                print("Downloaded all candidates!")
                break
            else: 
                page_number=page_number+1
                time.sleep(0.1)

        except Exception as e:
            print(f"An error occurred: {e}")
            break


# Parameters

In [9]:
url = 'https://candidaturas2021.ine.mx/'
search_button_pixel_scroll = 500
output_dir = "C:/Users/luisf/Downloads/propuestas_politicas"

states = ["AGUASCALIENTES",
            "BAJA CALIFORNIA",
            "BAJA CALIFORNIA SUR",
            "CAMPECHE",
            "CHIAPAS",
            "CHIHUAHUA",
            "COAHUILA",
            "COLIMA",
            "DURANGO",
            "GUANAJUATO",
            "GUERRERO",
            "HIDALGO",
            "JALISCO",
            "MEXICO",
            "MICHOACAN",
            "MORELOS",
            "NAYARIT",
            "NUEVO LEON",
            "OAXACA",
            "PUEBLA",
            "QUERETARO",
            "QUINTANA ROO",
            "SAN LUIS POTOSI",
            "SINALOA",
            "SONORA",
            "TABASCO",
            "TAMAULIPAS",
            "TLAXCALA",
            "VERACRUZ",
            "YUCATAN",
            "ZACATECAS"]


# Processing

In [10]:
home_page = open_page(url)

time.sleep(5)

search_by_state(home_page, states[0], search_button_pixel_scroll)

time.sleep(2)

get_candidate_pdfs(home_page, output_dir, states[0], debug=True)

Opening link: https://candidaturas2021.ine.mx/detalleCandidato/24042/4
Downloaded PDF to: C:/Users/luisf/Downloads/propuestas_politicas/AGUASCALIENTES_1.pdf


In [16]:
scroll_and_click(home_page,".ant-pagination-next > button:nth-child(1)")