In [18]:
import os
from conllu import parse

def renumeroter_conllu(inputfile, outputfile):
    with open(inputfile, 'r', encoding='utf-8') as infile:
        data = infile.read()

    sentences = parse(data)
    
    sent_counter = 1
    current_para = None
    current_section = None
    current_chapter = None
    current_book = None

    with open(outputfile, 'w', encoding='utf-8') as outfile:
        for i, sentence in enumerate(sentences):
            sent_id = sentence.metadata.get("sent_id", "")
            parts = sent_id.split('_')
            
            if len(parts) >= 5:
                book_id = parts[0]
                chapter_id = parts[1]
                section_id = parts[2]
                para_id = parts[3]

                if (current_book is None or current_book != book_id or
                    current_chapter is None or current_chapter != chapter_id or
                    current_section is None or current_section != section_id or
                    current_para is None or current_para != para_id):
                    
                    current_book = book_id
                    current_chapter = chapter_id
                    current_section = section_id
                    current_para = para_id
                    sent_counter = 1
                else:
                    sent_counter += 1

                new_sent_id = f"{book_id}_{chapter_id}_{section_id}_{para_id}_{sent_counter}"
                sentence.metadata["sent_id"] = new_sent_id
            
            outfile.write(sentence.serialize())

def process_directory(input_dir, output_dir):
    # Créer le sous-dossier s'il n'existe pas
    os.makedirs(output_dir, exist_ok=True)

    # Traiter tous les fichiers .conllu dans le dossier d'entrée
    for filename in os.listdir(input_dir):
        if filename.endswith('.conllu'):
            inputfile = os.path.join(input_dir, filename)
            outputfile = os.path.join(output_dir, filename)

            renumeroter_conllu(inputfile, outputfile)
            print(f"Processed: {filename}")

# Exemple d'utilisation
input_directory = '/home/ziane212/Téléchargements/dump_Gascon_23/last'  # Remplacez par le chemin de votre dossier d'entrée
output_directory = '/home/ziane212/Téléchargements/dump_Gascon_23/last/renum_sent_id'  # Remplacez par le chemin de votre dossier de sortie

process_directory(input_directory, output_directory)


Processed: 1214-1342_Coutumes_et_Privileges_de_l_Entre-deux-Mer.conllu
Processed: 1250_Coutumes_de_Bagnieres.conllu
Processed: 1251_Coutumes_de_Bagnieres.conllu
Processed: 1270_Charte_de_boucherie_dOrthez.conllu
Processed: 1278_Charte_dHerrere.conllu
