In [15]:
from openai import OpenAI
import os
import json
import time

from secret import OPENAI_API_KEY
client = OpenAI(api_key=OPENAI_API_KEY)

In [16]:
COT_PREFIX = "Given a document and its ground-truth summary, do the following tasks:\n(1) According to the ground-truth summary, extract essential aspects of the document.\n(2) For each essential aspect, retrieve detailed triples in the format [ENTITY1 | RELATION | ENTITY2] used to compose the ground-truth summary.\n(3) With the retrieved triples, compose a summary. The essential aspects, triples, and composed summary should be in the same response, separated by a new line.\n\nAll triples [ENTITY1 | RELATION | ENTITY2] should be in length 3 (separated by \"|\").\n\n"
EXAMPLE_PROMPT = "Example:\n================Example=================\nPrompt:\n[Document]: [document]\n[Ground-truth Summary]: [ground-truth summary]\nUpdate:\nEssential Aspects:\n[aspects]\nTriples:\n- [ENTITY1_1 | RELATION_1 | ENTITY1_2]\n- [ENTITY2_1 | RELATION_2 | ENTITY2_2]\n- [ENTITY3_1 | RELATION_3 | ENTITY3_2]\n- ...\nGenerated Summary:\n[summary]\n========================================\n\n"

Entity like a point in the graph, and the event is the directed line between the points.

In [2]:
CoT_SUMMARY_PREFIX = """Given a document, do the following tasks:
(1) According to the document, find the essential entities.
(2) For each entities, extract essential relevance to another entities in the format [TARGET_ENTITY | {EVENT1 | ENTITY1}, {EVENT2 | ENTITY2}, ...]
(3) Review the document, extract the 5 most important triples in the format [ENTITY1 | RELATION | ENTITY2] for ready to compose a summary.
(4) With the retrieved triples, compose a summary in 3 sentences.

Example: 
================Example=================
Prompt:
Document: [document]
Update:
Essential Relevance:
- [TARGET_ENTITY | {EVENT1 | ENTITY1}, {EVENT2 | ENTITY2}, ...]
- [TARGET_ENTITY | {EVENT1 | ENTITY1}, {EVENT2 | ENTITY2}, ...]
- ...

Triples:
- [ENTITY1_1 | RELATION | ENTITY1_2]
- [ENTITY2_1 | RELATION | ENTITY2_2]
- ...

Generated Summary:
[summary]
========================================
"""

In [17]:
def _distill_gpt(document, g_summary, mname="gpt-3.5-turbo", max_token=256, temperature=0):
    prompt = f"{COT_PREFIX}\n{EXAMPLE_PROMPT}Prompt:\n[Document]: {document}\n[Ground-truth Summary]: {g_summary}\n\n Update:" 
    
    response = client.chat.completions.create(
        model=mname,
        messages = [
            {"role": "user", "content": prompt}
        ],
        temperature=temperature,
        max_tokens=max_token,
    )
    return response.choices[0].message.content

In [None]:
with open("data/cnndm_sumllm/gpt4/train.jsonl") as f:
    data = [json.loads(line) for line in f.readlines()]
    document = data[0]["article"]
    summary = data[0]["abstract"]
    response = _distill_gpt(document, summary)
    print(response)

In [38]:
res = response
Aspects = res.split("Essential Aspects:")[1].split("Triples:")[0].strip()
aspects = []
for line in Aspects.split("\n"):
    line = line[2:]
    aspects.append(line)

Triples = res.split("Triples:")[1].split("Generated Summary:")[0].strip()
triples = []
for line in Triples.split("\n"):
    line = line[2:]
    tri = line.split(" | ")
    triples.append(tri)

rationale = []
for (a, tri) in zip(aspects, triples):
    rationale.append({"aspect": a, "triples": tri})

summary = res.split("Generated Summary:")[1].strip()

[['Tom Daley', 'had', 'disappointing outing'], ['Tom Daley', 'failed to qualify for', 'final'], ['Tom Daley', 'scored', "54 for 'firework' dive routine"], ['Jiang Yang', 'claimed', 'first place'], ['Tonia Couch', 'placed', "fourth in 10m women's platform"], ['Sarah Barrow', 'came in', "sixth in 10m women's platform"]]


In [43]:
def _extract_gpt_response(response: str): 
    res = response
    aspects.append([x[2:] for x in res.split("Essential Aspects:")[1].split("Triples:")[0].strip().split("\n")])
    triples.append([x[2:] for x in res.split("Triples:")[1].split("Generated Summary:")[0].strip().split("\n")])
    # rationale.append([a+tri for (a, tri) in zip(aspects, triples)])
    summary = res.split("Generated Summary:")[1].strip()

    return {
        "aspects": aspects, # "aspects": ["aspect1", "aspect2", ...]
        "triples": triples, # "triples": [["entity1", "relation1", "entity2"], ["entity1", "relation1", "entity2"], ...]
        # "rationale": rationale,
        "summary": summary
    }

In [None]:
_extract_gpt_response(response)