In [1]:
import re
import json
import random
import pandas as pd

random.seed(42)
with open("../data/summeval.json") as file:
    data = json.load(file)

# LLM Initiation

In [None]:
from transformers import pipeline

llama = pipeline("text-generation", model="../Meta-Llama-3.1-8B-Instruct/", device_map="auto")

# Dimension Data Preparation

In [2]:
DIMENSION = "coherence"
DIMENSION_RUBRIC = {
    "coherence":'Coherence ( 1 - 5 ) - the collective quality of all sentences. We align this dimension with the DUC quality question of structure and coherence whereby "the summary should be well-structured and well-organized. The summary should not just be a heap of related information, but should build from sentence to a coherent body of information about a topic."',
    "consistency":"Consistency ( 1 - 5 ) - the factual alignment between the summary and the summarized source. A factually consistent summary contains only statements that are entailed by the source document. Annotators were also asked to penalize summaries that contained hallucinated facts.",
    "fluency":'Fluency ( 1 - 5 ) - the quality of individual sentences. Drawing again from the DUC quality guidelines, sentences in the summary "should have no formatting problems, capitalization errors or obviously ungrammatical sentences (e.g., fragments, missing components) that make the text difficult to read."',
    "relevance":"Relevance ( 1 - 5 ) - selection of important content from the source. The summary should include only important information from the source document. Annotators were instructed to penalize summaries which contained redundancies and excess information."
}

# Prompt Construction

In [6]:
sample_list = pd.read_pickle(f"../data/{DIMENSION}/sample.pkl")

sampled_source = list()
sampled_summary = list()
sampled_score = list()

for sample in sample_list:
    sampled_source.append(sample["source"])
    sampled_summary.append(sample["system_output"])
    sampled_score.append(sample["scores"][DIMENSION])

In [7]:
sampled_score

[1.3333333333333333, 2.6666666666666665, 3.6666666666666665, 4.0]

# Prompt Construction

In [4]:
with open("../prompts/think_aloud/llama/user_prompt.txt", "r") as file:
    user_prompt = file.read()

In [22]:
messages = [
    {"role": "user", "content": user_prompt.format(DIMENSION, DIMENSION, DIMENSION, DIMENSION,
                                        DIMENSION, DIMENSION_RUBRIC[DIMENSION], sampled_source[0], sampled_summary[0], sampled_score[0],
                                        sampled_source[1], sampled_summary[1], sampled_score[1], sampled_source[2], sampled_summary[2], sampled_score[2],
                                        sampled_source[3], sampled_summary[3], sampled_score[3])}
]

llama_response = llama(messages, max_length=4096)
llama_response = ''.join(re.findall(r'\{[^}]*\}', llama_response[0]["generated_text"][1]["content"]))

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [23]:
eval(llama_response)