In [6]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from pathlib import Path
import pandas as pd
import re

# Dossier où sont tes CSV individuels
SRC_DIR = Path("/Users/jeanjoelgoli/Documents/FINANCE/Travaux BRVM/RICH")
OUT_CSV = SRC_DIR / "combined_journalier.csv"

EXPECTED_COLS = [
    "Date","Variation","Volume Devise Total","Cours Ajuste",
    "Volume Ajuste Total","Cours Normal","Volume Normal Total"
]
OUT_COLS = ["Ticker"] + EXPECTED_COLS

def to_float(x):
    if pd.isna(x):
        return None
    s = str(x).strip()
    if s in ("", "-", "None"):
        return None
    s = s.replace("%", "").replace("\xa0"," ").replace(" ", "").replace(",", ".")
    m = re.match(r"^[-+]?\d*\.?\d+$", s)
    if not m:
        return None
    try:
        return float(s)
    except:
        return None

def load_one(csv_path: Path) -> pd.DataFrame:
    ticker = csv_path.stem.upper() + " J"  # ajoute suffixe " J"

    df = pd.read_csv(csv_path)

    # Vérifie les colonnes
    missing = [c for c in EXPECTED_COLS if c not in df.columns]
    if missing:
        raise ValueError(f"{csv_path.name}: colonnes manquantes: {missing}")

    # Ajoute Ticker
    df.insert(0, "Ticker", ticker)

    # Reformate la date (YYYY-MM-DD -> DD/MM/YYYY)
    df["Date"] = pd.to_datetime(df["Date"], errors="coerce").dt.strftime("%d/%m/%Y")

    # Nettoie Variation
    df["Variation"] = df["Variation"].apply(to_float)

    # Nettoie colonnes numériques
    for c in ["Volume Devise Total","Cours Ajuste","Volume Ajuste Total","Cours Normal","Volume Normal Total"]:
        df[c] = df[c].apply(to_float)
        if df[c].dropna().apply(float.is_integer).all():
            df[c] = df[c].astype("Int64")

    return df[OUT_COLS]

def main():
    files = sorted(SRC_DIR.glob("*.csv"))
    if not files:
        raise SystemExit(f"Aucun CSV trouvé dans {SRC_DIR}")

    frames = []
    for f in files:
        try:
            frames.append(load_one(f))
            print(f"OK  {f.name}")
        except Exception as e:
            print(f"SKIP {f.name} -> {e}")

    combined = pd.concat(frames, ignore_index=True)

    # Trie par Ticker puis Date
    combined["_d"] = pd.to_datetime(combined["Date"], format="%d/%m/%Y", errors="coerce")
    combined = combined.sort_values(["Ticker","_d"]).drop(columns=["_d"])

    combined.to_csv(OUT_CSV, index=False, encoding="utf-8")
    print(f"✔ Fichier combiné : {OUT_CSV} ({len(combined)} lignes)")

if __name__ == "__main__":
    main()


OK  ABJC.csv
OK  BICB.csv
OK  BICC.csv
OK  BNBC.csv
OK  BOAB.csv
OK  BOABF.csv
OK  BOAC.csv
OK  BOAM.csv
OK  BOAN.csv
OK  BOAS.csv
OK  CABC.csv
OK  CBIBF.csv
OK  CFAC.csv
OK  CIEC.csv
OK  ECOC.csv
OK  ETIT.csv
OK  FTSC.csv
OK  LNBB.csv
OK  NEIC.csv
OK  NSBC.csv
OK  NTLC.csv
OK  ONTBF.csv
OK  ORAC.csv
OK  ORGT.csv
OK  PALC.csv
OK  PRSC.csv
OK  SAFC.csv
OK  SCRC.csv
OK  SDCC.csv
OK  SDSC.csv
OK  SEMC.csv
OK  SGBC.csv
OK  SHEC.csv
OK  SIBC.csv
OK  SICC.csv
OK  SIVC.csv
OK  SLBC.csv
OK  SMBC.csv
OK  SNTS.csv
OK  SOGC.csv
OK  SPHC.csv
OK  STAC.csv
OK  STBC.csv
OK  SVOC.csv
OK  TTLC.csv
OK  TTLS.csv
OK  UNLC.csv
OK  UNXC.csv
SKIP combined_journalier.csv -> cannot insert Ticker, already exists
✔ Fichier combiné : /Users/jeanjoelgoli/Documents/FINANCE/Travaux BRVM/RICH/combined_journalier.csv (147584 lignes)
