In [1]:
import os
import time
from pathlib import Path

from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.webdriver import WebDriver
from selenium.webdriver.remote.webelement import WebElement

In [2]:
# URL = 'https://www.skyscanner.ru'
URL = 'https://www.skyscanner.ru/transport/flights/mosc/ber/211123'
dump_path = Path(os.environ['PWD']) / 'app/notebook/dump.html'

In [3]:
def dump_HTML(driver: WebDriver) -> None:
    with open(dump_path, 'w') as file:
        file.write(driver.page_source)

In [9]:
def check_and_handle_captcha(driver: WebDriver, cp: str) -> None:
    time.sleep(2)
    
    if driver.find_elements(By.ID, 'px-captcha'):
        print(f'--- CAPTCHA DETECTED at checkpoint {cp} ---')
        while driver.find_elements(By.CLASS_NAME, 'px-loader-wrapper'):
            time.sleep(2)
        
        while driver.find_elements(By.ID, 'px-captcha'):
            time.sleep(2)
            captcha: WebElement = driver.find_element(By.ID, 'px-captcha')
            time.sleep(2)
            ActionChains(driver) \
                .move_to_element(captcha) \
                .click_and_hold(captcha) \
                .pause(10) \
                .release() \
                .perform()
            
            time.sleep(10)

        driver.save_screenshot(f'captcha_handled_at_checkpoint_{cp}.png')

In [5]:
options = Options()
options.headless = True
options.add_argument('--disable-blink-features=AutomationControlled')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--disable-web-security')
options.add_argument('--no-sandbox')
options.add_argument('--user-agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36"')
options.add_argument('--window-size=1920,1080')
options.add_experimental_option('excludeSwitches', ['enable-automation'])
options.add_experimental_option('useAutomationExtension', False)

In [10]:
driver: WebDriver = webdriver.Chrome(
    options=options,
    service=Service('/usr/bin/chromedriver')
)

try:
    driver.get(URL)
    time.sleep(2)
    check_and_handle_captcha(driver, '1')
    
    # ждём, пока пропадёт индикатор продолжающейся загрузки
    while driver.execute_script('''
        return document.querySelector(
            "div[class^='SummaryInfo_progressTextContainer']"
        );
    ''') is not None:
        time.sleep(10)
        
    check_and_handle_captcha(driver, '2')
    
    # переходим на дно страницы
    driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')
    check_and_handle_captcha(driver, '3')
    # кликаем на кнопку "Показать больше"
    driver.execute_script('''
        const matches = [];
        document.querySelectorAll('button').forEach(
            e => e.textContent == 'Показать больше' ? matches.push(e) : null
        );
        
        return matches[0];
    ''').click()
    check_and_handle_captcha(driver, '4')
    
    # прокручиваемся вниз, пока не будут видны все результаты
    element_count = 0
    
    while (current_count := driver.execute_script('''
        return document.querySelector(
            "div[class^='FlightsResults_dayViewItems']"
        ).childElementCount;
    ''')) > element_count:
        element_count = current_count
        driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')
        time.sleep(2)
        
    print(f'childElementCount = {element_count}')
except Exception:
    driver.save_screenshot('exception.png')
    dump_HTML(driver)
    raise
finally:
    driver.quit()

--- CAPTCHA DETECTED at checkpoint 2 ---
childElementCount = 361
