In [1]:
# %pip install bm25s
# %pip install spacy
# %pip install -U 'spacy[cuda12x]'
# %pip install rouge_score
# %pip install pysbd

In [1]:
import functions as fct
import time
import os
import pandas as pd
from tqdm import tqdm

# BM25

In [2]:
# Définition des chemins vers les fichiers JSON d'entraînement et de développement
train_path_json = 'SCOTUS/train.json'
dev_path_json = 'SCOTUS/dev.json'

# Ouverture du fichier d'entraînement JSON
train_json = fct.open_file(train_path_json, "json")

def save_txt(text, folder_name, file_name):
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)

    file_path = os.path.join(folder_name, file_name)

    with open(file_path, 'w') as file:
        file.write(text)

# Fonction pour évaluer les modèles
def evaluate_models(document, paragraph_target, file_type, only_f1=True):
    results = pd.DataFrame()
    summaries = []
    
    for method in methods:
        start_time = time.time()
        
        # Segmentation des phrases du document source
        sentences = fct.sent_segmentation(document, method=method)
        
        # Résumé des phrases
        query = fct.select_query(document)
        summary = fct.bb25LegalSum(sentences, model, query)
        
        # Évaluation de la qualité du résumé à l'aide de la métrique ROUGE et BERT
        bb25_evaluation = fct.evaluations(" ".join(summary), paragraph_target, only_f1)

        end_time = time.time()
        execution_time = end_time - start_time

        # Prepare results for this method
        bb25_evaluation.insert(0, 'Method', file_type + '_' + method)
        bb25_evaluation['Execution time'] = execution_time
                
        # Append results
        results = pd.concat([results, bb25_evaluation], ignore_index=True)
        summaries.append("\n".join(summary))
        
    return results, summaries

In [3]:
import pandas as pd

def highlight_min_max(df, only_f1=True):
    styles = pd.DataFrame('', index=df.index, columns=df.columns)

    # Appliquer le style pour les colonnes 'Precision', 'Recall', 'F1-Score'
    if only_f1:
        columns = ['rouge1', 'rouge2', 'rougeL', 'bert_score'] 
    else:
        columns = ['rouge1_F1', 'rouge2_F1', 'rougeL_F1', 'bert_score_F1'] 
        
    for col in columns:
        # Top 3 maximums et minimums
        top_3_max = df[col].nlargest(3)
        top_3_min = df[col].nsmallest(3)

        # Appliquer le dégradé rouge pour les min
        for i in df.index:
            if df[col].iloc[i] in top_3_min.values:
                rank = top_3_min.rank()[top_3_min == df[col].iloc[i]].values[0]
                alpha = 1 - (rank - 1) / 3  
                styles.loc[i, col]= f'background-color: rgba(200, 50, 50, {alpha});'

        # Appliquer le dégradé vert pour les max
        for i in df.index:
            if df[col].iloc[i] in top_3_max.values:
                rank = top_3_max.rank(ascending=False)[top_3_max == df[col].iloc[i]].values[0]
                alpha = 1 - (rank - 1) / 3
                styles.loc[i, col]= f'background-color: rgba(50, 200, 50, {alpha});'

    # Pour la colonne 'Execution time', inverser les couleurs (max en rouge, min en vert)
    col = 'Execution time'
    top_3_max = df[col].nlargest(3)
    top_3_min = df[col].nsmallest(3)

    # Appliquer le dégradé vert pour les min
    for i in df.index:
        if df[col].iloc[i] in top_3_min.values:
            rank = top_3_min.rank()[top_3_min == df[col].iloc[i]].values[0]
            alpha = 1 - (rank - 1) / 3 
            styles.loc[i, col]= f'background-color: rgba(50, 200, 50, {alpha});'

    # Appliquer le dégradé rouge pour les max
    for i in df.index:
        if df[col].iloc[i] in top_3_max.values:
            rank = top_3_max.rank(ascending=False)[top_3_max == df[col].iloc[i]].values[0]
            alpha = 1 - (rank - 1) / 3 
            styles.loc[i, col]= f'background-color: rgba(200, 50, 50, {alpha});'
            
    return styles


Test sur 1 texte

In [4]:
text_number = 0

# Récupération du document source et des éléments de la cible (faits, question, conclusion)
document_json = train_json[text_number]["raw_source"]
paragraph_target_json = (
    train_json[text_number]['raw_target']['facts_of_the_case'] +
    train_json[text_number]['raw_target']['question'] +
    train_json[text_number]['raw_target']['conclusion']
)

# Fichier texte à traiter
text_path = f'SCOTUS_data/text/train_{text_number}.txt'
document_txt = fct.open_file(text_path, "txt")

# Liste des méthodes de segmentation et des modèles à tester
methods = ['nltk', 'spacy', 'pySBD', 'custom_spacy']  # Méthodes de segmentation à tester
model = "bert-base-uncased"  # Modèle de résumé à utiliser
only_f1 = True

results = pd.DataFrame()

# Évaluation des modèles pour le texte html
r, summaries = evaluate_models(document_json, paragraph_target_json, "JSON", only_f1)
results = pd.concat([results, r], ignore_index=True)
# Évaluation des modèles pour le texte nettoyé
result, summaries = evaluate_models(document_txt, paragraph_target_json, "TXT", only_f1)
results = pd.concat([results, result], ignore_index=True)

# Choix segmenteur

In [5]:
df = pd.DataFrame(results)

styled_df = df.style.apply(highlight_min_max, axis=None, only_f1=only_f1)

styled_df

Unnamed: 0,Method,rouge1,rouge2,rougeL,bert_score,Execution time
0,JSON_nltk,0.485893,0.191824,0.194357,0.77412,16.312103
1,JSON_spacy,0.289086,0.130564,0.159292,0.814223,45.178729
2,JSON_pySBD,0.316667,0.106145,0.183333,0.800038,48.740111
3,JSON_custom_spacy,0.289086,0.130564,0.159292,0.813568,48.841163
4,TXT_nltk,0.505495,0.206612,0.225275,0.835671,8.814383
5,TXT_spacy,0.507586,0.207469,0.212414,0.831892,9.995616
6,TXT_pySBD,0.506829,0.207002,0.248862,0.831253,8.786521
7,TXT_custom_spacy,0.507163,0.212644,0.234957,0.841034,10.182218


We try pySBD on 100 cleaned documents

In [None]:
train_path_json = 'SCOTUS/train.json'
train_json = fct.open_file(train_path_json, "json")
# model = "bert-base-uncased"
model = "nlpaueb/legal-bert-base-uncased"
# model = "law-ai/InCaseLawBERT"

methods = ["pySBD"]

results = pd.DataFrame()

for i in tqdm(range(0, 100)):

    document_json = train_json[i]["raw_source"]
    paragraph_target_json = (
        train_json[i]['raw_target']['facts_of_the_case'] +
        train_json[i]['raw_target']['question'] +
        train_json[i]['raw_target']['conclusion']
    )

    text_path = f'SCOTUS_data/text/train_{i}.txt'
    document_txt = fct.open_file(text_path, "txt")
    
    r, summary = evaluate_models(document_txt, paragraph_target_json, "TXT")  
    
    # Save the generated summary
    summary_conc = "\n".join(summary)
    save_txt(summary_conc, f"summaries/{methods[0]}_{model.split('/')[len(model.split('/'))-1]}/", f"summary_train_{i}.txt")  # save train_n summary
        
    results = pd.concat([results, r], ignore_index=True)
   
metrics = [col for col in results.columns if col in ['rouge1', 'rouge2', 'rougeL', 'bert_score', 'Execution time']]
means = results[metrics].mean()

print("Means :")
print(means)

100%|██████████| 100/100 [21:08<00:00, 12.68s/it]

Means :
rouge1             0.438605
rouge2             0.172471
rougeL             0.219571
bert_score         0.829147
Execution time    12.664285
dtype: float64





In [5]:
styled_df = results.style.apply(highlight_min_max, axis=None)

styled_df

Unnamed: 0,Method,rouge1,rouge2,rougeL,bert_score,Execution time
0,TXT_pySBD,0.56926,0.24,0.29222,0.850662,9.479026
1,TXT_pySBD,0.348409,0.171429,0.174204,0.781521,3.128317
2,TXT_pySBD,0.602219,0.356121,0.377179,0.878,9.36014
3,TXT_pySBD,0.386606,0.070229,0.158295,0.806829,7.796248
4,TXT_pySBD,0.250746,0.068862,0.134328,0.766226,3.34799
5,TXT_pySBD,0.602694,0.297297,0.343434,0.866006,7.217578
6,TXT_pySBD,0.447761,0.161677,0.247761,0.824734,10.031364
7,TXT_pySBD,0.410184,0.161702,0.175389,0.79004,7.374803
8,TXT_pySBD,0.251852,0.074349,0.133333,0.77545,4.564169
9,TXT_pySBD,0.475806,0.132075,0.188172,0.821989,6.20589
