In [None]:
!pip install selenium

In [None]:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

driver = webdriver.Chrome()
driver.get("https://dane.gov.pl/pl/dataset/2582,statystyki-zakazen-i-zgonow-z-powodu-covid-19-z-uwzglednieniem-zaszczepienia-przeciw-covid-19/resource/36897/table?page=1&per_page=20&q=&sort=")

# Define a function to get all elements on the page
def get_all_elements():
    return driver.find_elements(By.XPATH, "//*")

# Display information about each element found
def display_element_info(element):
    print("Tag Name:", element.tag_name)
    print("Text:", element.text)
    print("Attribute 'id':", element.get_attribute("id"))
    print("Attribute 'class':", element.get_attribute("class"))
    print("------------")

# Find all elements on the page and store them in a variable
all_elements = get_all_elements()

# Display information about each element found
for element in all_elements:
    try:
        display_element_info(element)
    except StaleElementReferenceException:
        # If the element is stale, refind it and display its information
        all_elements = get_all_elements()
        for refreshed_element in all_elements:
            if refreshed_element == element:
                display_element_info(refreshed_element)
time.sleep(5)

try:
    cookie_popup = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CLASS_NAME, "modal-content"))
    )
    # Once the cookie consent popup is found, click the accept button
    close_button = cookie_popup.find_element(By.ID, "footer-close")
    close_button.click()

except:
    # If the cookie popup doesn't appear, continue without accepting cookies
    print("No cookie consent popup found or it took too long to appear.")

time.sleep(5)

data = [] # Scraped data goes here
wait = WebDriverWait(driver, 10)
table_page_number = 7 # Init index of the "next page button". It increases up to 11

for page_number in range(1, 498):  # Feel free to lower the upper bound for testing
    table = wait.until(EC.presence_of_element_located((By.CLASS_NAME, "datagrid"))) # Wait for table to appear on page

    if page_number == 1: # Get columns names
        header_row = table.find_element(By.XPATH, "/html/body/app-root/app-main-layout/main/app-dataset-parent/div/app-dataset-resource/section/div[4]/div[2]/app-resource-table-no-filters/div/div[3]/div/table/thead/tr")
        columns_names = [th.text for th in header_row.find_elements(By.CLASS_NAME, "datagrid__heading")]

    rows = table.find_elements(By.XPATH, ".//tbody/tr")

    for row in rows:
        cells = row.find_elements(By.XPATH, "./td")
        row_data = [cell.text for cell in cells] # Get text from cell
        data.append(row_data)

    # Wait until the "next button" (it's an arrow tho) is present on the page
    next_button = wait.until(EC.presence_of_element_located((By.XPATH, f"/html/body/app-root/app-main-layout/main/app-dataset-parent/div/app-dataset-resource/section/div[4]/div[2]/app-resource-table-no-filters/div/div[4]/div/app-pagination/nav/div/ul/li[{table_page_number}]/a")))
    if table_page_number < 500:
        table_page_number += 1

    # Scroll to the button to be sure that we will click it (actually it is not a button but a list item with a link attached to it)
    # driver.execute_script("arguments[0].scrollIntoView(true);", next_button) PROBABLY WE DON'T NEED THIS ANYMORE

    actions = ActionChains(driver) # ActionChains instead of regular CLICK because for some reason CLICK turned out to be a bit more buggy
    actions.move_to_element(next_button).click().perform()

    # Let the page load after clicking
    time.sleep(5)

driver.quit()

In [None]:
df = pd.DataFrame(data, columns=columns_names)
pd.set_option('display.max_rows', None)
df.head(100)

In [None]:
df.to_csv('covid_dataset.csv')