In [1]:
import re
import pandas as pd
from bs4 import BeautifulSoup
from playwright.async_api import async_playwright
import asyncio
import time

# --------- Liste officielle des tickers BRVM ---------
TICKERS = [
    "ABJC","BICB","BICC","BNBC","BOAB","BOABF","BOAC","BOAM","BOAN","BOAS",
    "CABC","CBIBF","CFAC","CIEC","ECOC","ETIT","FTSC","LNBB","NEIC","NSBC",
    "NTLC","ONTBF","ORAC","ORGT","PALC","PRSC","SAFC","SCRC","SDCC","SDSC",
    "SEMC","SGBC","SHEC","SIBC","SICC","SIVC","SLBC","SMBC","SNTS","SOGC",
    "SPHC","STAC","STBC","SVOC","TTLC","TTLS","UNLC","UNXC"
]

MAX_PAGES = 2          # limite de sécurité pour éviter boucles infinies
PAGE_PAUSE = 0.5        # pause courtoisie entre pages (s)

# --------- Nettoyages ---------
def clean_number(val: str) -> str:
    if not val:
        return ""
    val = re.split(r"\(", val)[0]
    val = val.replace("\xa0", " ").replace(" ", "")
    return val.strip()

def clean_pct(val: str) -> str:
    if not val:
        return ""
    val = val.replace("%", "").replace(",", ".").strip()
    return val

# --------- Scraper dividendes avec pagination ---------
async def scrape_dividendes(page, ticker: str) -> pd.DataFrame:
    """
    Parcourt la page de base puis ?page=1,2,... jusqu'à ce qu'il n'y ait plus de lignes.
    Renvoie un DataFrame consolidé pour le ticker.
    """
    keep_cols = ["Exercice","Symbole","Nom","Dividende","Dividende ajusté","Rendement","Ex-dividende","Date paiement"]
    all_rows = []

    for p in range(0, MAX_PAGES + 1):
        if p == 0:
            url = f"https://www.richbourse.com/investisseur/dividende/index/0/{ticker}"
        else:
            url = f"https://www.richbourse.com/investisseur/dividende/index/0/{ticker}?page={p}"

        try:
            await page.goto(url, timeout=60000)
            await page.wait_for_selector("table.table", timeout=8000)
        except Exception:
            # pas de tableau ou accès impossible -> on arrête la pagination
            break

        try:
            html = await page.inner_html("table.table")
        except Exception:
            break

        soup = BeautifulSoup(html, "html.parser")
        thead = soup.find("thead")
        tbody = soup.find("tbody")
        if not thead or not tbody:
            break

        trs = tbody.find_all("tr")
        if not trs:
            # plus de lignes -> on arrête
            break

        rows_added = 0
        for tr in trs:
            tds = [td.get_text(" ", strip=True) for td in tr.find_all("td")]
            if len(tds) < 9:
                continue
            d = {
                "Exercice": tds[1],
                "Symbole": tds[2],
                "Nom": tds[3],
                "Dividende": clean_number(tds[4]),
                "Dividende ajusté": clean_number(tds[5]),
                "Rendement": clean_pct(tds[6]),
                "Ex-dividende": tds[7],
                "Date paiement": tds[8],
                "Ticker": ticker,
            }
            all_rows.append(d)
            rows_added += 1

        # si aucune nouvelle ligne sur cette page, on stoppe
        if rows_added == 0:
            break

        # courte pause pour être cool avec le site
        time.sleep(PAGE_PAUSE)

    if not all_rows:
        return pd.DataFrame()

    df = pd.DataFrame(all_rows, columns=["Ticker"] + keep_cols)

    # déduplication défensive (au cas où une page répète des lignes)
    df = df.drop_duplicates(subset=["Ticker","Exercice","Symbole","Ex-dividende","Date paiement"], keep="first")

    return df

# --------- Main : boucle sur tous les tickers ---------
async def main():
    all_data = pd.DataFrame()

    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=False)  # True pour sans fenêtre
        # IMPORTANT : avoir déjà créé cookies.json (session premium)
        context = await browser.new_context(storage_state="cookies.json")
        page = await context.new_page()

        for ticker in TICKERS:
            try:
                print(f"📈 Dividendes {ticker} ...")
                df = await scrape_dividendes(page, ticker)

                if df.empty:
                    print(f"⚠️ Aucune donnée pour {ticker}")
                    all_data = pd.concat([
                        all_data,
                        pd.DataFrame([{
                            "Ticker": ticker,
                            "Exercice": "",
                            "Symbole": "",
                            "Nom": "",
                            "Dividende": "",
                            "Dividende ajusté": "",
                            "Rendement": "",
                            "Ex-dividende": "",
                            "Date paiement": "",
                            "Statut": "Aucune donnée"
                        }])
                    ], ignore_index=True)
                    time.sleep(0.3)
                    continue

                df["Statut"] = ""  # aligne la colonne Statut si présente
                # synchronise les colonnes avec all_data (robuste)
                for col in df.columns:
                    if col not in all_data.columns:
                        all_data[col] = ""
                for col in all_data.columns:
                    if col not in df.columns:
                        df[col] = ""

                all_data = pd.concat([all_data, df], ignore_index=True)
                time.sleep(0.4)

            except Exception as e:
                print(f"❌ Erreur {ticker}: {e}")
                all_data = pd.concat([
                    all_data,
                    pd.DataFrame([{
                        "Ticker": ticker,
                        "Exercice": "",
                        "Symbole": "",
                        "Nom": "",
                        "Dividende": "",
                        "Dividende ajusté": "",
                        "Rendement": "",
                        "Ex-dividende": "",
                        "Date paiement": "",
                        "Statut": f"Erreur: {e}"
                    }])
                ], ignore_index=True)

        await browser.close()

    # remettre Ticker en première colonne & export
    if not all_data.empty:
        cols = ["Ticker"] + [c for c in all_data.columns if c != "Ticker"]
        all_data = all_data[cols]
        all_data.to_csv("dividende_histo.csv", index=False)
        print("✅ Fichier généré : dividende_histo.csv")
    else:
        print("⚠️ Rien récupéré")

# Lancer (Notebook / IPython)
await main()


📈 Dividendes ABJC ...
📈 Dividendes BICB ...
📈 Dividendes BICC ...
📈 Dividendes BNBC ...
📈 Dividendes BOAB ...
📈 Dividendes BOABF ...
📈 Dividendes BOAC ...
📈 Dividendes BOAM ...
📈 Dividendes BOAN ...
📈 Dividendes BOAS ...
📈 Dividendes CABC ...
📈 Dividendes CBIBF ...
📈 Dividendes CFAC ...
📈 Dividendes CIEC ...
📈 Dividendes ECOC ...
📈 Dividendes ETIT ...
📈 Dividendes FTSC ...
📈 Dividendes LNBB ...
📈 Dividendes NEIC ...
📈 Dividendes NSBC ...
📈 Dividendes NTLC ...
📈 Dividendes ONTBF ...
📈 Dividendes ORAC ...
📈 Dividendes ORGT ...
📈 Dividendes PALC ...
📈 Dividendes PRSC ...
📈 Dividendes SAFC ...
📈 Dividendes SCRC ...
📈 Dividendes SDCC ...
📈 Dividendes SDSC ...
📈 Dividendes SEMC ...
📈 Dividendes SGBC ...
📈 Dividendes SHEC ...
📈 Dividendes SIBC ...
📈 Dividendes SICC ...
📈 Dividendes SIVC ...
📈 Dividendes SLBC ...
📈 Dividendes SMBC ...
📈 Dividendes SNTS ...
📈 Dividendes SOGC ...
📈 Dividendes SPHC ...
📈 Dividendes STAC ...
📈 Dividendes STBC ...
📈 Dividendes SVOC ...
📈 Dividendes TTLC ...
📈 Divid