Skip to content

mon-compta25 #18305

@elfilalii

Description

@elfilalii

import streamlit as st
from pdf2image import convert_from_bytes
import pytesseract
import pandas as pd
import io
import re

--- 1) Chargement mapping

@st.cache_data
def load_mapping(path='mapping.csv'):
return pd.read_csv(path)

--- 2) OCR + extraction texte

def ocr_extract_text(pdf_bytes):
images = convert_from_bytes(pdf_bytes, dpi=300)
textes = [pytesseract.image_to_string(img, lang='fra') for img in images]
return "\n".join(textes)

--- 3) Extraction date / montants via regex

def extract_fields(text):
date_match = re.search(r'(\d{2}/\d{2}/\d{4})', text)
montant_match = re.search(r'Montant\s+HT[:\s]+([\d\s.,]+)', text)
tva_match = re.search(r'TVA[:\s]+([\d\s.,]+)', text)
return {
'Date': date_match.group(1) if date_match else '',
'Montant HT': montant_match.group(1).replace(' ', '') if montant_match else '0',
'TVA': tva_match.group(1).replace(' ', '') if tva_match else '0'
}

--- 4) Mapping vers PCG

def get_code_pcg(text, mapping_df):
for _, row in mapping_df.iterrows():
if row['Mot-clé'].lower() in text.lower():
return row['Code PCG'], row.get('Libellé PCG', '')
return 'À_CLASSER', ''

--- 5) Traitement de tous les PDF

def process_pdfs(files, mapping_df):
rows = []
for f in files:
data = f.read()
text = ocr_extract_text(data)
fields = extract_fields(text)
code, libelle = get_code_pcg(text, mapping_df)
rows.append({
'Fichier': f.name,
'Date': fields['Date'],
'Compte': code,
'Libellé': libelle,
'Débit': fields['Montant HT'],
'Crédit': 0,
'TVA': fields['TVA']
})
return pd.DataFrame(rows)

--- Interface Streamlit ---

st.title("🧾 Générateur d'écritures comptables")
st.write("Importez jusqu'à 100 PDF, puis cliquez sur Générer.")

mapping_df = load_mapping()

uploaded = st.file_uploader(
"Sélectionnez vos factures (PDF)",
type='pdf',
accept_multiple_files=True
)

if st.button("Générer les écritures"):
if not uploaded:
st.warning("⚠️ Importez au moins un PDF.")
else:
df = process_pdfs(uploaded, mapping_df)
st.dataframe(df)
csv = df.to_csv(index=False).encode('utf-8')
st.download_button(
"Télécharger CSV",
data=csv,
file_name="ecritures.csv",
mime="text/csv"
)

Metadata

Metadata

Assignees

No one assigned

    Labels

    not-this-repoFor any issues that are not for this repository

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions