# Summarize Dialogue (GPT-4)

<a name='1'></a>
## 1 - Set up Kernel and Required Dependencies

In [None]:
# # Upgrade pip, setuptools, and wheel to prevent version conflicts
# %pip install --upgrade pip setuptools wheel --quiet

# # Install all required packages in a single command for efficiency
# %pip install --no-cache-dir --quiet \
#     tensorflow keras torchdata datasets evaluate rouge_score peft

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [None]:
# Load the datasets, Large Language Model (LLM), tokenizer, and configurator.

from datasets import load_dataset
import openai
import os
from dotenv import load_dotenv



In [None]:
import general_util
import os
import openai
from dotenv import load_dotenv
dataset = general_util.huggingface_dataset()


# sample_indices = [40, 80, 120, 200]
# general_util.display_dialogue_summaries(sample_indices, dataset)

list_index_to_train = []
index_to_summarize = 40
input_text, summary = general_util.make_prompt(dataset, "Summaries the above conversation:", index_to_summarize, list_index_to_train)

# Load OPENAI_API KEY model
load_dotenv()  # Load from .env file
openai.api_key = os.getenv("OPENAI_API_KEY")
response = openai.chat.completions.create(
    model="gpt-4",
    messages=[{"role": "user", "content": input_text}]
)
generated_summary = response.choices[0].message.content
general_util.output_text("ZERO SHOT", summary, generated_summary)

OpenAIError: The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable

In [75]:
def generate_summaries(example_indices: list, task: str, prompt_type, openai) -> None:
    dash_line = "-" * 80
    for i, index in enumerate(example_indices):
        dialogue = dataset["test"][index]["dialogue"]
        summary = dataset["test"][index]["summary"]

        # Select the appropriate prompt format
        if prompt_type == "summarize":
            input_text = f"""
Summarize the following conversation.

{dialogue}

Summary:
            """
        elif prompt_type == "summarize_with_context":
            context = f"Summarize the following conversation in 20 words or less."
            input_text = f"""


Dialogue:

{dialogue}

{context}
            """
        else:  # Default to raw dialogue
            input_text = dialogue
        response = openai.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": input_text}]
        )
        output = response.choices[0].message.content
        print(dash_line)
        print(f'Example {i + 1}')
        print(dash_line)
        print(f'INPUT PROMPT:\n{input_text}')
        print(dash_line)
        print(f'BASELINE HUMAN SUMMARY:\n{summary}')
        print(dash_line)
        print(f'MODEL GENERATION - {task}:\n{output}\n')


<a name='3'></a>
## 3 - Summarize Dialogue with an Instruction Prompt

<a name='3.1'></a>
### 3.1 - Zero Shot Inference from GPT-4

In [77]:
example_index_to_summarize = [40]
generate_summaries(example_index_to_summarize, "ZERO SHOT", "summarize", openai)

--------------------------------------------------------------------------------
Example 1
--------------------------------------------------------------------------------
INPUT PROMPT:

Summarize the following conversation.

#Person1#: What time is it, Tom?
#Person2#: Just a minute. It's ten to nine by my watch.
#Person1#: Is it? I had no idea it was so late. I must be off now.
#Person2#: What's the hurry?
#Person1#: I must catch the nine-thirty train.
#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.

Summary:
            
--------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY:
#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.
--------------------------------------------------------------------------------
MODEL GENERATION - ZERO SHOT:
Person 1 asked Tom (Person 2) for the time and learned it was almost nine o'clock. Realizi

<a name='4'></a>
## 4 - Summarize Dialogue with One Shot and Few Shot Inference

In [79]:
def make_prompt(example_indices_full, example_index_to_summarize):
    prompt = ''
    for index in example_indices_full:
        dialogue = dataset['test'][index]['dialogue']
        summary = dataset['test'][index]['summary']

        # The stop sequence '{summary}\n\n\n' is important for FLAN-T5. Other models may have their own preferred stop sequence.
        prompt += f"""
Dialogue:

{dialogue}

What was going on?
{summary}


"""

    dialogue = dataset['test'][example_index_to_summarize]['dialogue']

    prompt += f"""
Dialogue:

{dialogue}

What was going on?
"""

    return prompt

In [86]:
def generate_summary(example_indices_full: list, example_index_to_summarize: int, task: str) -> None:
    dash_line = "-" * 80

    # Create the appropriate prompt based on the mode
    prompt = make_prompt(example_indices_full, example_index_to_summarize)
    summary = dataset["test"][example_index_to_summarize]["summary"]

    response = openai.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}]
    )
    output = response.choices[0].message.content
    # Display results
    print(dash_line)
    print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
    print(dash_line)
    print(f'MODEL GENERATION - {task}:\n{output}\n')



<a name='4.1'></a>
### 4.1 - One Shot Inference

In [90]:
example_indices_full = [120]

generate_summary(example_indices_full, example_index_to_summarize, "ONE SHOT")

--------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY:
['#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.']

--------------------------------------------------------------------------------
MODEL GENERATION - ONE SHOT:
#Person1# needs to leave in order to catch a train and is checking the time with #Person2#.



4.2 - Few Shot Inference

In [88]:
example_indices_full = [200, 80, 120]
generate_summary(example_indices_full, example_index_to_summarize, "FEW SHOT")

--------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY:
['#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.']

--------------------------------------------------------------------------------
MODEL GENERATION - FEW SHOT:
#Person1# is getting late and must leave to catch the nine-thirty train.

