In [2]:
import json
import pickle 
from src.gpt_model import Model
import ast

DIMENSION = "coherence"
coherence_rubric = 'Coherence - the collective quality of all sentences. We align this dimension with the DUC quality question of structure and coherence whereby "the summary should be well-structured and well-organized. The summary should not just be a heap of related information, but should build from sentence to a coherent body of information about a topic."'
consistency_rubric = "Consistency - the factual alignment between the summary and the summarized source. A factually consistent summary contains only statements that are entailed by the source document. Annotators were also asked to penalize summaries that contained hallucinated facts. "
fluency_rubric = 'Fluency - the quality of individual sentences. Drawing again from the DUC quality guidelines, sentences in the summary "should have no formatting problems, capitalization errors or obviously ungrammatical sentences (e.g., fragments, missing components) that make the text difficult to read."'
relevance_rubric = "Relevance - selection of important content from the source. The summary should include only important information from the source document. Annotators were instructed to penalize summaries which contained redundancies and excess information."

DIMENSION_RUBRIC = {
    "coherence": coherence_rubric,
    "consistency": consistency_rubric, 
    "fluency": fluency_rubric,
    "relevance": relevance_rubric
}

with open(f"./data/{DIMENSION}/human_llm_attributes.txt", "r") as file:
    attributes_list = file.read().splitlines()
attributes = "\n".join(attributes_list)

# LLM Initiation

In [3]:
with open("./api_keys.json", "r") as file:
    api_keys = json.load(file)

OPENAI_API_KEY = api_keys["openai"]

gpt4 = Model(model="gpt-4", temperature=0.0, api_key=OPENAI_API_KEY)

gpt-4


# Component Extraction

# Prompt Construction

In [4]:
with open("./prompts/checklist_construction/component_extraction/system_prompt.txt", "r") as file:
    sys_prompt = file.read()

with open("./prompts/checklist_construction/component_extraction/user_prompt.txt", "r") as file:
    user_prompt = file.read()

# Component Generation

In [5]:
prompt_list = [
    {"role":"system", "content": sys_prompt.format(DIMENSION, DIMENSION, DIMENSION_RUBRIC[DIMENSION])},
    {"role": "user", "content": user_prompt.format(attributes)}
]

gpt4_response = gpt4.ask_chatgpt(prompt_list)
components = ast.literal_eval(gpt4_response)

# Attributes Clustering

In [6]:
with open("./prompts/checklist_construction/attributes_clustering/system_prompt.txt", "r") as file:
    sys_prompt = file.read()

with open("./prompts/checklist_construction/attributes_clustering/user_prompt.txt", "r") as file:
    user_prompt = file.read()

In [7]:
prompt_list = [
    {"role":"system", "content": sys_prompt},
    {"role": "user", "content": user_prompt.format(components, attributes)}
]

gpt4_response = gpt4.ask_chatgpt(prompt_list)
components_attributes_dic = eval(gpt4_response)

In [8]:
components_attributes = ""
for k, v in components_attributes_dic.items():
    components_attributes += f"{k}:\n{v}\n\n"
    

# Key Question Generation

In [9]:
with open("./prompts/checklist_construction/question_generation/system_prompt.txt", "r") as file:
    sys_prompt = file.read()
with open("./prompts/checklist_construction/question_generation/user_prompt.txt", "r") as file:
    user_prompt = file.read()

In [10]:
prompt_list = [
    {"role":"system", "content": sys_prompt.format(DIMENSION)},
    {"role": "user", "content": user_prompt.format(DIMENSION, DIMENSION, DIMENSION_RUBRIC[DIMENSION], components_attributes)}
]

generated_key_questions = gpt4.ask_chatgpt(prompt_list)

generated_key_questions = eval(generated_key_questions)

In [11]:
key_questions = ""
for component, question in generated_key_questions.items():
    key_questions += "- "+component+": "+question+"\n"

# Sub-question Generation

In [12]:
with open("./prompts/checklist_construction/sub_question_generation/system_prompt.txt", "r") as file:
    sys_prompt = file.read()
with open("./prompts/checklist_construction/sub_question_generation/user_prompt.txt", "r") as file:    
    user_prompt = file.read()

In [13]:
prompt_list = [
    {"role":"system", "content": sys_prompt},
    {"role": "user", "content": user_prompt.format(DIMENSION, DIMENSION, DIMENSION, DIMENSION_RUBRIC[DIMENSION], key_questions)}
]
generated_sub_questions = gpt4.ask_chatgpt(prompt_list)
generated_sub_questions = eval(generated_sub_questions)

In [14]:
sub_questions = ""
for component, sub_question_list in generated_sub_questions.items():
    for sub_question in sub_question_list:
        sub_questions += f"- {sub_question}\n"

# Question Validation

In [15]:
with open("./prompts/checklist_construction/question_validation/system_prompt.txt", "r") as file:
    sys_prompt = file.read()
with open("./prompts/checklist_construction/question_validation/user_prompt.txt", "r") as file:    
    user_prompt = file.read()

In [16]:
prompt_list = [
    {"role":"system", "content": sys_prompt},
    {"role": "user", "content": user_prompt.format(DIMENSION, DIMENSION, DIMENSION_RUBRIC[DIMENSION], DIMENSION, sub_questions)}
]

final_sub_questions = gpt4.ask_chatgpt(prompt_list)

final_sub_questions_list = ast.literal_eval(final_sub_questions)

In [17]:
checklist = ""

for sub_question in final_sub_questions_list:

    checklist+=f"- {sub_question}\n"

In [18]:
print(checklist)

- Does the summary start with an introduction that sets the context?
- Does the summary have a body that elaborates on the main points?
- Does the summary conclude with a summary or wrap-up of the information presented?
- Does each sentence in the summary follow logically from the previous one?
- Are there effective transitions between ideas in the summary?
- Does the summary avoid abrupt shifts in topic or context?
- Does the summary clearly present the main points of the original document?
- Does the summary avoid including unnecessary details?
- Is the language used in the summary straightforward and easy to understand?
- Is the tone and style of the summary consistent throughout?
- Does the summary include all key details from the original document?
- Does the summary conclude logically, without leaving out essential information?
- Does the summary avoid ending abruptly or leaving the reader with unanswered questions?

