In [1]:
import re
import google.generativeai as genai
import json
import random
import pandas as pd


random.seed(42)
with open("../data/summeval.json") as file:
    data = json.load(file)

# LLM Initiation

In [2]:
with open("../api_keys.json", "r") as file:
    api_keys = json.load(file)
GOOGLE_API_KEY = api_keys["google"]

genai.configure(api_key=GOOGLE_API_KEY)

model = genai.GenerativeModel('gemini-1.5-pro-latest')

# Dimension Data Preparation

In [3]:
DIMENSION = "coherence"
DIMENSION_RUBRIC = {
    "coherence":'Coherence ( 1 - 5 ) - the collective quality of all sentences. We align this dimension with the DUC quality question of structure and coherence whereby "the summary should be well-structured and well-organized. The summary should not just be a heap of related information, but should build from sentence to a coherent body of information about a topic."',
    "consistency":"Consistency ( 1 - 5 ) - the factual alignment between the summary and the summarized source. A factually consistent summary contains only statements that are entailed by the source document. Annotators were also asked to penalize summaries that contained hallucinated facts.",
    "fluency":'Fluency ( 1 - 5 ) - the quality of individual sentences. Drawing again from the DUC quality guidelines, sentences in the summary "should have no formatting problems, capitalization errors or obviously ungrammatical sentences (e.g., fragments, missing components) that make the text difficult to read."',
    "relevance":"Relevance ( 1 - 5 ) - selection of important content from the source. The summary should include only important information from the source document. Annotators were instructed to penalize summaries which contained redundancies and excess information."
}

# Prompt Construction

In [4]:
sample_list = pd.read_pickle(f"../data/{DIMENSION}/sample.pkl")

sampled_source = list()
sampled_summary = list()
sampled_score = list()

for sample in sample_list:
    sampled_source.append(sample["source"])
    sampled_summary.append(sample["system_output"])
    sampled_score.append(sample["scores"][DIMENSION])

# Prompt Construction

In [5]:
with open("../prompts/think_aloud/gemini/user_prompt.txt", "r") as file:
    user_prompt = file.read()

In [6]:
gemini_response = model.generate_content(user_prompt.format(DIMENSION, DIMENSION, DIMENSION, DIMENSION,
                                        DIMENSION, DIMENSION_RUBRIC[DIMENSION], sampled_source[0], sampled_summary[0], sampled_score[0],
                                        sampled_source[1], sampled_summary[1], sampled_score[1], sampled_source[2], sampled_summary[2], sampled_score[2],
                                        sampled_source[3], sampled_summary[3], sampled_score[3]))

gemini_response = ''.join(re.findall(r'\{[^}]*\}', gemini_response.text))

In [7]:
eval(gemini_response)

{'1': "Does the summary read like a single, cohesive paragraph rather than a collection of disjointed sentences?  The rubric emphasizes a 'well-structured and well-organized' summary, indicating a need for clear connections between sentences.",
 '2': 'Are there clear transitions between sentences, or do they abruptly shift topics?  The flow from one sentence to the next is crucial for maintaining coherence.',
 '3': "Does each sentence logically follow from the one preceding it? The rubric emphasizes a summary that 'builds from sentence to sentence,' implying a logical progression of information.",
 '4': 'Does the summary maintain a consistent focus, or does it meander between unrelated aspects of the article? A coherent summary will maintain a clear focus throughout.',
 '5': 'Could the order of sentences be rearranged without affecting the clarity or flow of information? If so, this suggests a lack of inherent structure and coherence.'}