In [1]:
!pip install transformers datasets sentencepiece rouge -qq

In [2]:
import torch
import pandas as pd
from rouge import Rouge
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

In [3]:
rouge = Rouge()
dataset = load_dataset("C:/Personal One drive/OneDrive - vit.ac.in/8th semester (Winter)23_24/legal-ease/Code/LED-new/dataset", split='test')

In [19]:
CasesText = dataset['Case'][:25]
GoldSummary = dataset['Summary'][:25]

In [20]:
len(CasesText), len(GoldSummary)

(25, 25)

In [21]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [22]:
def summarize(model, tokenizer, Cases):

  SystemSummaries = []
  for i, case in enumerate(Cases):
      
      input_ids = tokenizer(case, return_tensors="pt").input_ids.to(device)
      global_attention_mask = torch.zeros_like(input_ids)
      global_attention_mask[:, 0] = 1
      sequences = model.generate(input_ids, global_attention_mask=global_attention_mask)
      Summary = tokenizer.batch_decode(sequences, skip_special_tokens=True)

      SystemSummaries.append(Summary)
      print(i)

  return SystemSummaries

In [23]:
checkpoint = "checkpoint-10"

tokenizer_led = AutoTokenizer.from_pretrained(checkpoint)
model_led = AutoModelForSeq2SeqLM.from_pretrained(checkpoint).to(device)

In [24]:
SystemSummary = summarize(model_led, tokenizer_led, CasesText)

0
1
2
3
4
5


Input ids are automatically padded from 1774 to 2048 to be a multiple of `config.attention_window`: 1024


6


Input ids are automatically padded from 2276 to 3072 to be a multiple of `config.attention_window`: 1024


7


Input ids are automatically padded from 2540 to 3072 to be a multiple of `config.attention_window`: 1024


8


Input ids are automatically padded from 1438 to 2048 to be a multiple of `config.attention_window`: 1024


9


Input ids are automatically padded from 2282 to 3072 to be a multiple of `config.attention_window`: 1024


10


Input ids are automatically padded from 4413 to 5120 to be a multiple of `config.attention_window`: 1024


11


Input ids are automatically padded from 2714 to 3072 to be a multiple of `config.attention_window`: 1024


12


Input ids are automatically padded from 553 to 1024 to be a multiple of `config.attention_window`: 1024


13


Input ids are automatically padded from 1197 to 2048 to be a multiple of `config.attention_window`: 1024


14


Input ids are automatically padded from 1246 to 2048 to be a multiple of `config.attention_window`: 1024


15


Input ids are automatically padded from 1363 to 2048 to be a multiple of `config.attention_window`: 1024


16
17


Input ids are automatically padded from 5364 to 6144 to be a multiple of `config.attention_window`: 1024


18


Input ids are automatically padded from 6298 to 7168 to be a multiple of `config.attention_window`: 1024


19


Input ids are automatically padded from 8269 to 9216 to be a multiple of `config.attention_window`: 1024
Input ids are automatically padded from 809 to 1024 to be a multiple of `config.attention_window`: 1024


20


Input ids are automatically padded from 3102 to 4096 to be a multiple of `config.attention_window`: 1024


21


Input ids are automatically padded from 2890 to 3072 to be a multiple of `config.attention_window`: 1024


22


Input ids are automatically padded from 1130 to 2048 to be a multiple of `config.attention_window`: 1024


23
24


In [52]:
def compute_rouge_scores(system_summaries, gold_summaries):
    # Join the lists of tokens into strings
    system_summaries = [' '.join(summary) for summary in system_summaries]
    gold_summaries = [' '.join(summary) for summary in gold_summaries]

    scores = rouge.get_scores(system_summaries, gold_summaries, avg=True)
    
    rouge_scores = {
        "ROUGE-1": {
            "Recall": round(scores["rouge-1"]["r"] * 100, 2),
            "Precision": round(scores["rouge-1"]["p"] * 100, 2),
            "F-Measure": round(scores["rouge-1"]["f"] * 100, 2),
        },
        "ROUGE-2": {
            "Recall": round(scores["rouge-2"]["r"] * 100, 2),
            "Precision": round(scores["rouge-2"]["p"] * 100, 2),
            "F-Measure": round(scores["rouge-2"]["f"] * 100, 2),
        },
        "ROUGE-L": {
            "Recall": round(scores["rouge-l"]["r"] * 100, 2),
            "Precision": round(scores["rouge-l"]["p"] * 100, 2),
            "F-Measure": round(scores["rouge-l"]["f"] * 100, 2),
        },
    }
    
    return pd.DataFrame(rouge_scores)


In [55]:
# Call the compute_rouge_scores function
rouge_scores = compute_rouge_scores(SystemSummary, GoldSummary)

# Print the rouge_scores DataFrame
print(rouge_scores)

           ROUGE-1  ROUGE-2  ROUGE-L
Recall        1.94     0.03     1.94
Precision     2.91     0.18     2.91
F-Measure     2.31     0.05     2.31


In [38]:
# Combine gold and system summaries into a DataFrame
Summaries = pd.DataFrame(list(zip(GoldSummary, SystemSummary)), columns=['GoldSummary', 'SystemSummary'])

In [39]:
# Save the summaries to a CSV file
dir_path = "./Generated_Summaries"
Summaries.to_csv(dir_path + "LEDSummaries.csv", index=False)


In [65]:
# Visualize 3 sample summaries
sample_indices = [0, 1, 2]  # Choose 3 random sample indices
for idx in sample_indices:
    print("="*50)
    print(f"Sample {idx + 1} Summary:")
    print(SystemSummary[idx])
    print("\nGold Summary:")
    print(GoldSummary[idx])
    print("="*50)

Sample 1 Summary:
['The judgment of the High Court of Madhya Pradesh in the case of the Electricity Dispute between the two parties was passed by a High Court in a judgment passed by the High court in a judgement passed in a matter between the parties. The judgment was passed in the judgment of a single bench of the Madhya High Court. The judgement was passed between the High Courts of the State and the State. The High Court held that the judgment passed in this case was passed under a single Bench of the']

Gold Summary:
Section 86(1)(f) vests a statutory jurisdiction with the State Electricity Commission to adjudicate upon disputes between licensees and generating companies and to refer any dispute for arbitration. therefore, the appointment of arbitrators by the commission overrides the appointment of arbitrators by the High Court. This judgment was passed in the case of Chief General Manager (IPC) M P Power Trading Co. Ltd. &amp; Anr. vs. Narmada Equipments Pvt. Ltd. [C.A.No.1051/2