In [1]:
import os
from generations_pipeline import LLMGenerator
from huggingface_hub import login

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# log into HuggingFace
hf_token = "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" # Replace with your HuggingFace token
login(token=hf_token)

In [8]:
OUTPUT_DIR = os.path.join(os.path.abspath("."), "output")

### Load dataset

In [3]:
# load UK supreme court cases 
UK_CASES_PATH = "/import/nlp-datasets/court_case_summarisation/datasets/UK-Abs/test-data"

# the data that needs to be summarised
uk_cases_judgement = {}
for root, dirs, files in os.walk(os.path.join(UK_CASES_PATH, "judgement")):
    for file in files:
        if file.endswith(".txt"):
            with open(os.path.join(root, file), 'r', encoding='utf-8') as f:
                uk_cases_judgement[file.split('.txt')[0]] = f.read()

# gold summaries for reference (and later for evaluation)
uk_cases_gold_summaries = {}
for root, dirs, files in os.walk(os.path.join(UK_CASES_PATH, "summary/full")):
    for file in files:
        if file.endswith(".txt"):
            with open(os.path.join(root, file), 'r', encoding='utf-8') as f:
                uk_cases_gold_summaries[file.split('.txt')[0]] = f.read()

In [4]:
# for testing purposes look at only one case for now. The index can be changed to test other cases
CASE_INDEX = 0
CASE_KEY = list(uk_cases_judgement.keys())[CASE_INDEX]

### Initilaise generation pipeline

In [5]:
'''
initialise the LLM generator
you can either use a model from HuggingFace e.g. "meta-llama/Meta-Llama-3.1-8B-Instruct" 
or you can type "tulu" to use the temporal reasoning model that Jiayu developed
'''
summariser = LLMGenerator(
    model_name="tulu",
)

# NOTE: it looks like the tulu model is not trained to take on a document that is as long as the UK supreme court cases
# I'm getting the following error:
# Token indices sequence length is longer than the specified maximum sequence length for this model (16100 > 4096). Running this sequence through the model will result in indexing errors
# the summary still seems decent though

Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.27it/s]
Device set to use cuda:0


Using device: cuda


### summarise in one go zerosot

In [None]:
import os
import re

# Improved prompt for chunk summarization with emphasis on completeness and no hallucination
prompt = [
    """
    You are a legal expert tasked with summarizing the UK Supreme Court case text provided.

    Please only summarize information explicitly contained in the text. Do NOT add any information, assumptions, or interpretations beyond what is clearly stated.

    Structure your summary in three clearly marked sections with headings:
    1. Background to the Appeal
    2. Judgement
    3. Reasons for Judgement

    Use plain English, avoid legal jargon where possible. Be concise and factual. Do not repeat information.

    IMPORTANT:
    - Do NOT hallucinate or add any unsupported details.
    - Only include facts present in the text.
    - Ensure all sentences are complete and the summary ends logically and clearly.
    - Do not cut off sentences abruptly or leave incomplete thoughts.

    Limit the length to approximately 700 words.
    """
]

# Improved prompt for final refinement (combining chunk summaries)
refine_prompt = [
    """
    You are a legal expert tasked with combining multiple summaries of a UK Supreme Court case.

    Only use information explicitly present in the provided summaries. Do NOT add, infer, or assume any details not contained in the text.

    Structure the combined summary in three clearly marked sections with headings:
    1. Background to the Appeal
    2. Judgement
    3. Reasons for Judgement

    Use plain English, avoid legal jargon where possible. Be concise and factual. Avoid repetition.

    IMPORTANT:
    - Do NOT hallucinate or include unsupported information.
    - Ensure all sentences are complete and the summary ends logically and clearly.
    - Do not cut off sentences abruptly or leave incomplete thoughts.

    Limit the overall length to approximately 700 words.
    """
]

def chunk_text_by_chars(text, max_chunk_chars=14000):
    chunks = []
    start = 0
    text_len = len(text)
    
    while start < text_len:
        end = min(start + max_chunk_chars, text_len)
        chunk = text[start:end]
        
        # Try to break at last period to avoid splitting sentences
        if end < text_len:
            last_period = chunk.rfind('.')
            if last_period != -1 and last_period > max_chunk_chars * 0.5:
                end = start + last_period + 1
                chunk = text[start:end]

        chunks.append(chunk.strip())
        start = end
    return chunks

def trim_incomplete_sentence(text):
    """
    Trim trailing incomplete sentence from text.
    Keeps only complete sentences ending with ., !, or ?
    """
    sentences = re.findall(r'.*?[.!?]', text, flags=re.DOTALL)
    if sentences:
        return ' '.join(sentences).strip()
    else:
        # No sentence ends found, return original text
        return text.strip()

# Replace with your actual case text and key
long_text = uk_cases_judgement[CASE_KEY]

# Model limits
max_model_tokens = 3500
max_chars_per_chunk = max_model_tokens * 4  # Approximate token to char ratio

# Split into chunks
chunks = chunk_text_by_chars(long_text, max_chars_per_chunk)

chunk_summaries = []
for i, chunk in enumerate(chunks):
    summary = summariser.run_summary(
        prompt=prompt,
        text=[chunk],
        max_tokens=[700],  # limit tokens per chunk summary
        temperature=0,     # reduce hallucination by limiting creativity
        # top_p=1.0,         # optional, keep full token probability distribution
    )
    if isinstance(summary, list) and len(summary) == 1:
        chunk_summaries.append(summary[0])
    else:
        chunk_summaries.append(str(summary))

combined_summary_text = "\n\n".join(chunk_summaries)

final_summary = summariser.run_summary(
    prompt=refine_prompt,
    text=[combined_summary_text],
    max_tokens=[700],  # max tokens for final summary
    temperature=0,
    # top_p=1.0,
)

if isinstance(final_summary, list) and len(final_summary) == 1:
    final_summary_text = final_summary[0]
else:
    final_summary_text = str(final_summary)

# Trim incomplete sentence at the end if any
final_summary_text = trim_incomplete_sentence(final_summary_text)

# Ensure output directory exists before saving


os.makedirs(OUTPUT_DIR, exist_ok=True)

output_path = os.path.join(OUTPUT_DIR, "uk_case_summary2.txt")

try:
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_summary_text)
    print(f"Summary successfully saved to {output_path}")
except Exception as e:
    print(f"Error saving summary: {e}")


The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


NameError: name 'OUTPUT_DIR' is not defined

 summarise the dataset in two rounds (Intermediate summary)

Layer 1

In [11]:
import os
import re

# Layer 1 prompt (simple chunk summarization)
prompt = [
    """
    You are an expert summarizer.

    Summarize the given text clearly and simply, focusing only on the facts stated in the text.

    Structure your summary in three clearly marked sections with these headings:
    1. Background
    2. Outcome
    3. Explanation

    Use plain English, avoid any jargon or technical terms. Be concise and factual. Do not repeat information.

    IMPORTANT:
    - Only include facts present in the text.
    - Do NOT add assumptions or extra information.
    - Ensure all sentences are complete and the summary ends clearly.
    - Limit the summary to approximately 700 words.
    """
]

def chunk_text_by_chars(text, max_chunk_chars=14000):
    chunks = []
    start = 0
    text_len = len(text)

    while start < text_len:
        end = min(start + max_chunk_chars, text_len)
        chunk = text[start:end]

        if end < text_len:
            last_period = chunk.rfind('.')
            if last_period != -1 and last_period > max_chunk_chars * 0.5:
                end = start + last_period + 1
                chunk = text[start:end]

        chunks.append(chunk.strip())
        start = end
    return chunks

# Replace with your actual document text and key
long_text = uk_cases_judgement[CASE_KEY]

max_model_tokens = 3500
max_chars_per_chunk = max_model_tokens * 4  # Approximate

chunks = chunk_text_by_chars(long_text, max_chars_per_chunk)

chunk_summaries = []
for chunk in chunks:
    summary = summariser.run_summary(
        prompt=prompt,
        text=[chunk],
        max_tokens=[700],
        temperature=0,
    )
    if isinstance(summary, list) and len(summary) == 1:
        chunk_summaries.append(summary[0])
    else:
        chunk_summaries.append(str(summary))

# Save chunk summaries to a file for layer 2
output_dir = os.path.join(OUTPUT_DIR, "summaries_simple1")
os.makedirs(output_dir, exist_ok=True)
layer1_output_path = os.path.join(output_dir, "chunk_summaries.txt")

try:
    with open(layer1_output_path, "w", encoding="utf-8") as f:
        for s in chunk_summaries:
            f.write(s + "\n\n")
    print(f"Layer 1 summaries saved to {layer1_output_path}")
except Exception as e:
    print(f"Error saving layer 1 summaries: {e}")


The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Layer 1 summaries saved to /homes/sl318/adsolve_evaluation_platform/models/output/summaries_simple1/chunk_summaries.txt


Layer 2

In [13]:
import os
import re

# Layer 2 prompt (legal expert refinement)
prompt = [
    """
    You are a legal expert tasked with summarizing the UK Supreme Court case text provided.

    Please only summarize information explicitly contained in the text. Do NOT add any information, assumptions, or interpretations beyond what is clearly stated.

    Structure your summary in three clearly marked sections with headings:
    1. Background to the Appeal
    2. Judgement
    3. Reasons for Judgement

    Use plain English, avoid legal jargon where possible. Be concise and factual. Do not repeat information.

    IMPORTANT:
    - Do NOT hallucinate or add any unsupported details.
    - Only include facts present in the text.
    - Ensure all sentences are complete and the summary ends logically and clearly.
    - Do not cut off sentences abruptly or leave incomplete thoughts.

    Limit the length to approximately 700 words.
    """
]

# Layer 2 final refinement prompt (combining chunk summaries)
refine_prompt = [
    """
    You are a legal expert tasked with combining multiple summaries of a UK Supreme Court case.

    Only use information explicitly present in the provided summaries. Do NOT add, infer, or assume any details not contained in the text.

    Structure the combined summary in three clearly marked sections with headings:
    1. Background to the Appeal
    2. Judgement
    3. Reasons for Judgement

    Use plain English, avoid legal jargon where possible. Be concise and factual. Avoid repetition.

    IMPORTANT:
    - Do NOT hallucinate or include unsupported information.
    - Ensure all sentences are complete and the summary ends logically and clearly.
    - Do not cut off sentences abruptly or leave incomplete thoughts.

    Limit the overall length to approximately 700 words.
    """
]

def trim_incomplete_sentence(text):
    sentences = re.findall(r'.*?[.!?]', text, flags=re.DOTALL)
    if sentences:
        return ' '.join(sentences).strip()
    else:
        return text.strip()

# Read chunk summaries from layer 1 output
layer1_output_path = os.path.join(OUTPUT_DIR, "summaries_simple1/chunk_summaries.txt")

try:
    with open(layer1_output_path, "r", encoding="utf-8") as f:
        combined_summary_text = f.read()
except Exception as e:
    print(f"Error reading layer 1 summaries: {e}")
    combined_summary_text = ""

if combined_summary_text:
    # Run first layer style summarization on combined summaries
    chunk_summary = summariser.run_summary(
        prompt=prompt,
        text=[combined_summary_text],
        max_tokens=[700],
        temperature=0,
    )
    if isinstance(chunk_summary, list) and len(chunk_summary) == 1:
        chunk_summary_text = chunk_summary[0]
    else:
        chunk_summary_text = str(chunk_summary)

    # Final refinement combining chunk summaries
    final_summary = summariser.run_summary(
        prompt=refine_prompt,
        text=[chunk_summary_text],
        max_tokens=[700],
        temperature=0,
    )

    if isinstance(final_summary, list) and len(final_summary) == 1:
        final_summary_text = final_summary[0]
    else:
        final_summary_text = str(final_summary)

    final_summary_text = trim_incomplete_sentence(final_summary_text)

    output_dir = os.path.join(OUTPUT_DIR, "summaries_legal")
    os.makedirs(output_dir, exist_ok=True)
    output_path = os.path.join(output_dir, "uk_case_summary2.txt")

    try:
        with open(output_path, "w", encoding="utf-8") as f:
            f.write(final_summary_text)
        print(f"Layer 2 final summary saved to {output_path}")
    except Exception as e:
        print(f"Error saving final summary: {e}")
else:
    print("No combined summaries found to process in Layer 2.")


The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Layer 2 final summary saved to /homes/sl318/adsolve_evaluation_platform/models/output/summaries_legal/uk_case_summary2.txt


### load gold summary for reference

In [43]:
print(uk_cases_gold_summaries[CASE_KEY])  # Print the gold summary for comparison

The Public Processions (Northern Ireland) Act 1998 (the 1998 Act) placed responsibility for the management of parades in Northern Ireland in the hands of an independent statutory body called the Parades Commission.
The Act placed a duty on anyone proposing to organise a public procession to give advance notice to the police and made it a criminal offence to organise, or take part in, a public procession of which notification had not been given.
On 3 December 2012 Belfast City Council decided to stop flying the Union flag over Belfast City Hall every day.
The flag was to be flown on certain designated days only.
The decision sparked a wave of protests by loyalists which continued for some months and became known as the flags protests.
The protesters marched from a meeting point in East Belfast to Belfast City Hall in the centre of the city and back again.
The route took the parade through the Short Strand, which is perceived to be a nationalist area, and where violence, disorder and sec