In [11]:
from __future__ import absolute_import
from __future__ import division, print_function, unicode_literals

from sumy.parsers.html import HtmlParser
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer
from sumy.summarizers.lex_rank import LexRankSummarizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words

import os
from tqdm import tqdm
import pandas as pd

In [12]:
LANGUAGE = "english"
SENTENCES_COUNT = 10
TEXTS_COUNT = 100

In [13]:
test_path_txt = '../SCOTUS_data/text'
test_path_sum = '../SCOTUS_data/summary'
target_path_csv = '../SCOTUS_data/paragraph_target_df.csv'

In [14]:
summary_gen = []
texts = []

for file_name in tqdm(os.listdir(test_path_txt)[:TEXTS_COUNT]):
    text = open(os.path.join(test_path_txt, file_name), 'r').read()
    texts.append(text)
    parser = PlaintextParser.from_file(os.path.join(test_path_txt, file_name), Tokenizer(LANGUAGE))

    summarizer = LexRankSummarizer()
    summarizer.stop_words = get_stop_words(LANGUAGE)

    sentence_txt = ''

    for sentence in summarizer(parser.document, SENTENCES_COUNT):
        sentence_txt = sentence_txt + str(sentence) + "\n"
    summary_gen.append(sentence_txt)

df_target = pd.read_csv(target_path_csv)

100%|██████████| 100/100 [08:11<00:00,  4.91s/it]

                                   facts_of_the_case  \
0  William Packer was convicted in a California s...   
1  In 1984 Dow Chemical Co. negotiated a settleme...   
2  In 1995, the U.S. Supreme Court, in Adarand Co...   
3  After stopping him for speeding, an Iowa polic...   
4  To prevent "local media monopolies," Section 5...   

                                            question  \
0  Was a state court’s determination that encoura...   
1  Does the 1984 Agent Orange settlement preclude...   
2  Did the Court of Appeals misapply the strict s...   
3  Can a search of a stopped vehicle that occurs ...   
4  Does 47 U.S.C. 533(b), which bars local teleph...   

                                          conclusion  
0  Encouraging the jury to continue deliberations...  
1  An equally divided Court affirmed in part and ...  
2  In a per curiam opinion, the Court dismissed t...  
3  No. In a unanimous opinion, the Court held tha...  
4  Unanswered. After the Court heard oral argumen..




In [15]:
summary_ref = []

for file_name in tqdm(os.listdir(test_path_sum)[:TEXTS_COUNT]):
    with open(os.path.join(test_path_sum, file_name), 'r') as f:
        text = f.read()
        summary_ref.append(text)

  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 100/100 [00:00<00:00, 5393.70it/s]


In [16]:
print(len(summary_gen), len(summary_ref))
if len(summary_ref) != len(summary_gen):
    raise ValueError("Les listes summary_ref et summary_gen doivent avoir la même longueur.")

100 100


In [17]:
from rouge_score import rouge_scorer
from bert_score import BERTScorer

ROUGE_scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
BERT_scorer = BERTScorer(lang="en")
ROUGE_scores = []
BERT_scores = []
for i in range(len(summary_gen)):
    score = ROUGE_scorer.score(summary_ref[i], summary_gen[i])
    ROUGE_scores.append(score)
    BERT_scores.append(BERT_scorer.score([summary_ref[i]], [summary_gen[i]]))
    # print(f"Scores pour le résumé {i+1} :", score)

# Moyennes des scores
avg_scores = {
    'rouge1': sum(s['rouge1'].fmeasure for s in ROUGE_scores) / len(ROUGE_scores),
    'rouge2': sum(s['rouge2'].fmeasure for s in ROUGE_scores) / len(ROUGE_scores),
    'rougeL': sum(s['rougeL'].fmeasure for s in ROUGE_scores) / len(ROUGE_scores),
    'bert_score': sum(s[2].mean().item() for s in BERT_scores) / len(BERT_scores),
}

print("Scores ROUGE :", avg_scores)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Scores ROUGE : {'rouge1': 0.4427502395093806, 'rouge2': 0.15796302795858608, 'rougeL': 0.20809716197503245, 'bert_score': 0.8313004660606385}


In [18]:
scores = {
    'facts_of_the_case': {'rouge1': [], 'rouge2': [], 'rougeL': [], 'bert_score': []},
    'question': {'rouge1': [], 'rouge2': [], 'rougeL': [], 'bert_score': []},
    'conclusion': {'rouge1': [], 'rouge2': [], 'rougeL': [], 'bert_score': []}
}

for column_name in df_target.columns:
    for i in range(TEXTS_COUNT):
        ref = df_target[column_name].iloc[i]
        gen = summary_gen[i]

        # Scores ROUGE
        rouge_score = ROUGE_scorer.score(ref, gen)
        scores[column_name]['rouge1'].append(rouge_score['rouge1'].fmeasure)
        scores[column_name]['rouge2'].append(rouge_score['rouge2'].fmeasure)
        scores[column_name]['rougeL'].append(rouge_score['rougeL'].fmeasure)

        # Scores BERT
        _, _, bert_score = BERT_scorer.score([ref], [gen])
        scores[column_name]['bert_score'].append(bert_score.mean().item())

avg_scores_target = {
    col: {
        'rouge1': sum(scores[col]['rouge1']) / len(scores[col]['rouge1']),
        'rouge2': sum(scores[col]['rouge2']) / len(scores[col]['rouge2']),
        'rougeL': sum(scores[col]['rougeL']) / len(scores[col]['rougeL']),
        'bert_score': sum(scores[col]['bert_score']) / len(scores[col]['bert_score'])
    }
    for col in df_target.columns
}

for col, metrics in avg_scores_target.items():
    print(f"\nScores moyens pour {col} :")
    print(metrics)



Scores moyens pour facts_of_the_case :
{'rouge1': 0.21444836455050267, 'rouge2': 0.028111137244745295, 'rougeL': 0.12013335297395639, 'bert_score': 0.7932844638824463}

Scores moyens pour question :
{'rouge1': 0.07259777337038266, 'rouge2': 0.007529909716499205, 'rougeL': 0.05349701536978904, 'bert_score': 0.7901528000831604}

Scores moyens pour conclusion :
{'rouge1': 0.18009157868819536, 'rouge2': 0.02828797872297114, 'rougeL': 0.10679586301441497, 'bert_score': 0.7971158695220947}


In [31]:
import pandas as pd

df_results = pd.DataFrame({"Text": texts, "Reference": summary_ref, "Generated": summary_gen})
df_results.to_csv("./output/results_LexRank.csv", index=False)

In [29]:
df_avg_scores = pd.DataFrame([avg_scores])
df_avg_scores.index = ['global']

df_avg_scores_target = pd.DataFrame(avg_scores_target).T

df_score = pd.concat([df_avg_scores, df_avg_scores_target], axis=0)
print(df_score.head())

                     rouge1    rouge2    rougeL  bert_score
global             0.442750  0.157963  0.208097    0.831300
facts_of_the_case  0.214448  0.028111  0.120133    0.793284
question           0.072598  0.007530  0.053497    0.790153
conclusion         0.180092  0.028288  0.106796    0.797116


In [32]:
df_score.to_csv("./output/scores_LexRank.csv")