In [1]:
%%capture
!pip install --upgrade selenium
!apt update
!apt install chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin

In [3]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
from datetime import datetime
import csv
import time
import shutil


# Initialisation du WebDriver (Chrome)
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")

driver = webdriver.Chrome(options=chrome_options)

In [None]:
# Fonction pour extraire les liens des obligations et leurs informations principales
def extract_bond_links_and_data(soup):
    bond_table = soup.find("div", {"class": "table-responsive"})
    bonds = []

    if bond_table:
        rows = bond_table.find_all("tr")  # Trouve les lignes du tableau

        # Itère sur chaque ligne (en sautant l'en-tête)
        for row in rows[1:6]:
            cols = row.find_all("td")
            if len(cols) >= 2:  # Vérifie qu'il y a au moins 2 colonnes
                link = cols[0].find("a", href=True)  # Trouve le lien vers la page de détail
                bond_info = {
                    "Name": cols[0].text.strip(),
                    "WKN": cols[1].text.strip(),
                    "Detail Link": link["href"] if link else None
                }
                bonds.append(bond_info)
    return bonds

# Fonction pour extraire les static data depuis une page de détail
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def extract_static_data_from_detail(url, driver):
    driver.get(url)

    try:
        # Attendre que l'élément contenant "maturity" soit visible
        wait = WebDriverWait(driver, 10)  # Attente de 10 secondes
        maturity_element = wait.until(
            EC.presence_of_element_located((By.XPATH, "//td[contains(translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'maturity')]"))
        )
        # Extraire la valeur associée
        maturity_value = maturity_element.find_element(By.XPATH, "following-sibling::td/font").text.strip()
        print(f"Maturity trouvée via Selenium avec attente : {maturity_value}")
    except Exception as e:
        print(f"Erreur lors de l'extraction de Maturity avec Selenium : {e}")
        maturity_value = "N/A"

    return {
        "maturity": maturity_value
    }




# Script principal
try:
    # URL cible
    url = "https://www.boerse-frankfurt.de/anleihen/green-bonds"

    # Naviguer vers la page principale
    driver.get(url)

    # Afficher 100 lignes
    wait = WebDriverWait(driver, 5)
    hundred_button = wait.until(
        EC.element_to_be_clickable((By.XPATH, "//button[contains(@class, 'page-bar-type-button btn btn-lg ng-star-inserted') and text()='100']"))
    )
    hundred_button.click()
    time.sleep(3)

    # Récupérer les obligations et leurs liens
    all_bonds = []
    page_buttons = driver.find_elements(By.XPATH, "//button[contains(@class, 'page-bar-type-button page-bar-type-button-width-auto btn btn-lg ng-star-inserted') and not(@disabled)]")
    total_pages = int(page_buttons[-1].text.strip())

    for page in range(1, total_pages + 1):
        try:
            if page != 1:
                page_button = wait.until(
                    EC.element_to_be_clickable((By.XPATH, f"//button[contains(@class, 'page-bar-type-button page-bar-type-button-width-auto btn btn-lg ng-star-inserted') and text()='{page}']"))
                )
                page_button.click()
                time.sleep(3)

            page_source = driver.page_source
            soup = BeautifulSoup(page_source, "html.parser")
            bonds = extract_bond_links_and_data(soup)
            all_bonds.extend(bonds)

        except Exception as e:
            print(f"Erreur à la page {page}: {e}")
            break

    # Collecter les static data pour chaque obligation
    detailed_bonds = []
    for bond in all_bonds:
        if bond["Detail Link"]:
            detail_url = "https://www.boerse-frankfurt.de" + bond["Detail Link"]
            static_data = extract_static_data_from_detail(detail_url, driver)
            bond.update(static_data)  # Ajoute les données à l'obligation
            detailed_bonds.append(bond)

    # Sauvegarder les données dans un fichier CSV
    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    file_name = f"bonds_static_data_{timestamp}.csv"

    with open(file_name, "w", newline="", encoding="utf-8") as csvfile:
        fieldnames = ["Name", "WKN", "Detail Link", "issue date", "maturity"]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        writer.writerows(detailed_bonds)

    print(f"Static data saved to {file_name}")

except Exception as e:
    print(f"Critical error: {e}")

finally:
    driver.quit()


Erreur lors de l'extraction de Maturity avec Selenium : Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF7A9CCFB05+28789]
	(No symbol) [0x00007FF7A9C386E0]
	(No symbol) [0x00007FF7A9AD592A]
	(No symbol) [0x00007FF7A9B2930E]
	(No symbol) [0x00007FF7A9B295FC]
	(No symbol) [0x00007FF7A9B728A7]
	(No symbol) [0x00007FF7A9B4F47F]
	(No symbol) [0x00007FF7A9B6F654]
	(No symbol) [0x00007FF7A9B4F1E3]
	(No symbol) [0x00007FF7A9B1A938]
	(No symbol) [0x00007FF7A9B1BAA1]
	GetHandleVerifier [0x00007FF7AA00933D+3410093]
	GetHandleVerifier [0x00007FF7AA01E7DD+3497293]
	GetHandleVerifier [0x00007FF7AA012A73+3448803]
	GetHandleVerifier [0x00007FF7A9D97BBB+848171]
	(No symbol) [0x00007FF7A9C43C3F]
	(No symbol) [0x00007FF7A9C3F6E4]
	(No symbol) [0x00007FF7A9C3F87D]
	(No symbol) [0x00007FF7A9C2ED49]
	BaseThreadInitThunk [0x00007FF8292C259D+29]
	RtlUserThreadStart [0x00007FF8295AAF38+40]

Erreur lors de l'extraction de Maturity avec Selenium : Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF7A9CCFB05+28