mon-compta25

import streamlit as st
from pdf2image import convert_from_bytes
import pytesseract
import pandas as pd
import io
import re

# --- 1) Chargement mapping
@st.cache_data
def load_mapping(path='mapping.csv'):
    return pd.read_csv(path)

# --- 2) OCR + extraction texte
def ocr_extract_text(pdf_bytes):
    images = convert_from_bytes(pdf_bytes, dpi=300)
    textes = [pytesseract.image_to_string(img, lang='fra') for img in images]
    return "\n".join(textes)

# --- 3) Extraction date / montants via regex
def extract_fields(text):
    date_match = re.search(r'(\d{2}/\d{2}/\d{4})', text)
    montant_match = re.search(r'Montant\s+HT[:\s]+([\d\s.,]+)', text)
    tva_match = re.search(r'TVA[:\s]+([\d\s.,]+)', text)
    return {
        'Date': date_match.group(1) if date_match else '',
        'Montant HT': montant_match.group(1).replace(' ', '') if montant_match else '0',
        'TVA': tva_match.group(1).replace(' ', '') if tva_match else '0'
    }

# --- 4) Mapping vers PCG
def get_code_pcg(text, mapping_df):
    for _, row in mapping_df.iterrows():
        if row['Mot-clé'].lower() in text.lower():
            return row['Code PCG'], row.get('Libellé PCG', '')
    return 'À_CLASSER', ''

# --- 5) Traitement de tous les PDF
def process_pdfs(files, mapping_df):
    rows = []
    for f in files:
        data = f.read()
        text = ocr_extract_text(data)
        fields = extract_fields(text)
        code, libelle = get_code_pcg(text, mapping_df)
        rows.append({
            'Fichier': f.name,
            'Date': fields['Date'],
            'Compte': code,
            'Libellé': libelle,
            'Débit': fields['Montant HT'],
            'Crédit': 0,
            'TVA': fields['TVA']
        })
    return pd.DataFrame(rows)

# --- Interface Streamlit ---
st.title("🧾 Générateur d'écritures comptables")
st.write("Importez jusqu'à 100 PDF, puis cliquez sur Générer.")

mapping_df = load_mapping()

uploaded = st.file_uploader(
    "Sélectionnez vos factures (PDF)",
    type='pdf',
    accept_multiple_files=True
)

if st.button("Générer les écritures"):
    if not uploaded:
        st.warning("⚠️ Importez au moins un PDF.")
    else:
        df = process_pdfs(uploaded, mapping_df)
        st.dataframe(df)
        csv = df.to_csv(index=False).encode('utf-8')
        st.download_button(
            "Télécharger CSV",
            data=csv,
            file_name="ecritures.csv",
            mime="text/csv"
        )

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

mon-compta25 #18305

--- 1) Chargement mapping

--- 2) OCR + extraction texte

--- 3) Extraction date / montants via regex

--- 4) Mapping vers PCG

--- 5) Traitement de tous les PDF

--- Interface Streamlit ---

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

mon-compta25 #18305

Description

--- 1) Chargement mapping

--- 2) OCR + extraction texte

--- 3) Extraction date / montants via regex

--- 4) Mapping vers PCG

--- 5) Traitement de tous les PDF

--- Interface Streamlit ---

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions