In [1]:
import re
import time
import pandas as pd
from bs4 import BeautifulSoup
from playwright.async_api import async_playwright

# --- Liste officielle fournie ---
TICKERS = [
    "ABJC","BICB","BICC","BNBC","BOAB","BOABF","BOAC","BOAM","BOAN","BOAS",
    "CABC","CBIBF","CFAC","CIEC","ECOC","ETIT","FTSC","LNBB","NEIC","NSBC",
    "NTLC","ONTBF","ORAC","ORGT","PALC","PRSC","SAFC","SCRC","SDCC","SDSC",
    "SEMC","SGBC","SHEC","SIBC","SICC","SIVC","SLBC","SMBC","SNTS","SOGC",
    "SPHC","STAC","STBC","SVOC","TTLC","TTLS","UNLC","UNXC"
]

BASE_URL = "https://www.richbourse.com/investisseur/analyse-societe/ratios/{ticker}"

# Colonnes attendues (créées si absentes)
TARGET_COLS = [
    "Ticker",
    "Secteur d'activité",
    "Symbole",
    "Quantité totale de titres",
    "Quantité de titres du flottant",
    "Capital social",
    "Cours de l'action",
    "Capitalisation boursière",
    "Volume moyen",
    "Book value (BV)",
    "Book value per share (BVPS)",
    "Price Earning Ratio (PER)",
    "PER relatif sectoriel",
    "Price-to-Book Ratio (PBR)",
    "Levrage",
    "Gearing",
    "PBR relatif sectoriel",
    "Price-to-Sales (PSR)",
    "Return on equity (ROE)",
    "Return on Asset (ROA)",
    "Bénéfice net par action (BNPA)",
    "Statut"
]

# Nettoyages
def clean_number(s: str) -> str:
    if s is None:
        return ""
    s = s.strip()
    # couper le texte annexe éventuel après " ("
    s = re.split(r"\(", s)[0]
    # enlever % et convertir virgule décimale
    s = s.replace("%", "").replace(",", ".")
    # supprimer espaces (y compris insécables)
    s = s.replace("\xa0", " ").replace(" ", "")
    return s

def normalize_key(k: str) -> str:
    k = k.strip()
    # uniformiser quelques variantes d’écriture éventuelles
    aliases = {
        "Secteur d’activité": "Secteur d'activité",
        "Lev rage": "Levrage",
        "Leverage": "Levrage",
        "Book value": "Book value (BV)",
        "Book value (BVPS)": "Book value per share (BVPS)",
        "BVPS": "Book value per share (BVPS)",
        "PER": "Price Earning Ratio (PER)",
        "P/E": "Price Earning Ratio (PER)",
        "PBR": "Price-to-Book Ratio (PBR)",
        "ROE": "Return on equity (ROE)",
        "ROA": "Return on Asset (ROA)",
        "BNPA": "Bénéfice net par action (BNPA)",
    }
    return aliases.get(k, k)

async def scrape_ratios_for_ticker(page, ticker: str) -> dict:
    url = BASE_URL.format(ticker=ticker)
    await page.goto(url, timeout=60000)
    # l'onglet “Bilan” est la vue par défaut : attendre le bloc
    # on tolère les pages sans tableau en capturant tout le contenu
    await page.wait_for_load_state("domcontentloaded", timeout=15000)
    try:
        await page.wait_for_selector("div.tab-content, .panel, body", timeout=8000)
    except:
        pass

    html = await page.content()
    soup = BeautifulSoup(html, "html.parser")

    data = {c: "" for c in TARGET_COLS}
    data["Ticker"] = ticker
    data["Statut"] = ""

    # Récupérer toutes les listes/paragraphes contenant des paires "Libellé : Valeur"
    # On parcourt tous les <li> et <p>
    candidates = []
    candidates.extend(soup.find_all("li"))
    candidates.extend(soup.find_all("p"))

    for node in candidates:
        txt = node.get_text(" ", strip=True)
        if ":" not in txt:
            continue
        # scinder à la première occurrence de ':'
        left, right = txt.split(":", 1)
        key = normalize_key(left)
        val = right.strip()
        if key in TARGET_COLS:
            data[key] = clean_number(val)

    # Si rien d’utile n’a été rempli en dehors du Ticker
    useful = [k for k in TARGET_COLS if k not in ("Ticker","Statut")]
    if all((data.get(k, "") == "" for k in useful)):
        data["Statut"] = "Aucune donnée"

    return data

async def run_all():
    out_rows = []
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=False)  # True pour invisible
        context = await browser.new_context(storage_state="cookies.json")  # session premium
        page = await context.new_page()

        for t in TICKERS:
            try:
                print(f"🔎 {t}")
                row = await scrape_ratios_for_ticker(page, t)
                out_rows.append(row)
                time.sleep(0.5)
            except Exception as e:
                print(f"❌ {t}: {e}")
                out_rows.append({"Ticker": t, "Statut": f"Erreur: {e}"})

        await browser.close()

    df = pd.DataFrame(out_rows)
    # s’assurer que toutes les colonnes cibles existent et ordre correct
    for c in TARGET_COLS:
        if c not in df.columns:
            df[c] = ""
    df = df[TARGET_COLS]
    df.to_csv("ratios.csv", index=False)
    print("✅ Fichier généré : ratios.csv")

# Lancer (Notebook/Jupyter) :
await run_all()


🔎 ABJC
🔎 BICB
🔎 BICC
🔎 BNBC
🔎 BOAB
🔎 BOABF
🔎 BOAC
🔎 BOAM
🔎 BOAN
🔎 BOAS
🔎 CABC
🔎 CBIBF
🔎 CFAC
🔎 CIEC
🔎 ECOC
🔎 ETIT
🔎 FTSC
🔎 LNBB
🔎 NEIC
🔎 NSBC
🔎 NTLC
🔎 ONTBF
🔎 ORAC
🔎 ORGT
🔎 PALC
🔎 PRSC
🔎 SAFC
🔎 SCRC
🔎 SDCC
🔎 SDSC
🔎 SEMC
🔎 SGBC
🔎 SHEC
🔎 SIBC
🔎 SICC
🔎 SIVC
🔎 SLBC
🔎 SMBC
🔎 SNTS
🔎 SOGC
🔎 SPHC
🔎 STAC
🔎 STBC
🔎 SVOC
🔎 TTLC
🔎 TTLS
🔎 UNLC
🔎 UNXC
✅ Fichier généré : ratios.csv
