-
Notifications
You must be signed in to change notification settings - Fork 1k
Description
import streamlit as st
from pdf2image import convert_from_bytes
import pytesseract
import pandas as pd
import io
import re
--- 1) Chargement mapping
@st.cache_data
def load_mapping(path='mapping.csv'):
return pd.read_csv(path)
--- 2) OCR + extraction texte
def ocr_extract_text(pdf_bytes):
images = convert_from_bytes(pdf_bytes, dpi=300)
textes = [pytesseract.image_to_string(img, lang='fra') for img in images]
return "\n".join(textes)
--- 3) Extraction date / montants via regex
def extract_fields(text):
date_match = re.search(r'(\d{2}/\d{2}/\d{4})', text)
montant_match = re.search(r'Montant\s+HT[:\s]+([\d\s.,]+)', text)
tva_match = re.search(r'TVA[:\s]+([\d\s.,]+)', text)
return {
'Date': date_match.group(1) if date_match else '',
'Montant HT': montant_match.group(1).replace(' ', '') if montant_match else '0',
'TVA': tva_match.group(1).replace(' ', '') if tva_match else '0'
}
--- 4) Mapping vers PCG
def get_code_pcg(text, mapping_df):
for _, row in mapping_df.iterrows():
if row['Mot-clé'].lower() in text.lower():
return row['Code PCG'], row.get('Libellé PCG', '')
return 'À_CLASSER', ''
--- 5) Traitement de tous les PDF
def process_pdfs(files, mapping_df):
rows = []
for f in files:
data = f.read()
text = ocr_extract_text(data)
fields = extract_fields(text)
code, libelle = get_code_pcg(text, mapping_df)
rows.append({
'Fichier': f.name,
'Date': fields['Date'],
'Compte': code,
'Libellé': libelle,
'Débit': fields['Montant HT'],
'Crédit': 0,
'TVA': fields['TVA']
})
return pd.DataFrame(rows)
--- Interface Streamlit ---
st.title("🧾 Générateur d'écritures comptables")
st.write("Importez jusqu'à 100 PDF, puis cliquez sur Générer.")
mapping_df = load_mapping()
uploaded = st.file_uploader(
"Sélectionnez vos factures (PDF)",
type='pdf',
accept_multiple_files=True
)
if st.button("Générer les écritures"):
if not uploaded:
st.warning("
else:
df = process_pdfs(uploaded, mapping_df)
st.dataframe(df)
csv = df.to_csv(index=False).encode('utf-8')
st.download_button(
"Télécharger CSV",
data=csv,
file_name="ecritures.csv",
mime="text/csv"
)