In [1]:
import os
import sys
import json
from iconic_tools.langchain import InstructSonnet, InstructOpus3, InstructGPT4, InstructGeminiPro, InstructGPT35
from langchain_core.prompts import ChatPromptTemplate

  from .autonotebook import tqdm as notebook_tqdm


In [20]:
# CONSTANTS AND INITIALISATION

PATH = os.path.abspath(os.getcwd())

# DIALOGUE_MODEL = InstructGeminiPro(temperature=1.0, max_tokens=3000)
# QUERY_MODEL = InstructGeminiPro(temperature=1.0, max_tokens=3000)

DIALOGUE_MODEL = InstructGPT4(temperature=1.0, max_tokens=3000)
QUERY_MODEL = InstructGPT4(temperature=1.0, max_tokens=3000)

# DIALOGUE_MODEL = InstructSonnet(temperature=1.0, max_tokens=3000)
# QUERY_MODEL = InstructSonnet(temperature=1.0, max_tokens=3000)

# GAME = "troy"
# SCENE = "achilles"
# ACTORS = ["Achilles", "Odysseus"]

# GENRE = "drama"
# SCENE_IDX = 124
# GAME = "amadeus"
# SCENE = "constanze"
# ACTORS = ["Mozart", "Constanze", "Schikaneder"]
# SCENE_VERSION = 2

GENRE = "comedy"
SCENE_IDX = 116
GAME = "cedar_rapids"
SCENE = "car"
ACTORS = ["Tim", "Ronald", "Dean", "Joan"]
SCENE_VERSION = 0

RED = "\033[91m"
GREEN = "\033[92m"
BLUE = "\033[94m"
YELLOW = "\033[93m"
WHITE = "\033[0m"

In [3]:
# UTILITIES


def load_prompt(filename):
    with open(PATH + f"/prompts/{filename}") as f:
        return f.read()


def write_transcript(dialogue, filename):
    with open(PATH + f"/transcripts/{filename}", "w") as f:
        f.write(dialogue)


def list_to_conjunction(L):
    """Takes a list strings and returns a string with every element in the list separated by commas."""
    if L == "":
        return ""
    elif len(L) == 1:
        return L[0]
    elif len(L) == 2:
        return f"{L[0]} and {L[1]}"
    else:
        return ", ".join(L[:-1]) + f", and {L[-1]}"
    

def list_to_string(L):
    """Takes a list of strings and returns a string consisting of every element in the list separated by a newline."""
    return "\n".join(L)

In [4]:
# PROMPT TEMPLATES AND INSTRUCTION PROMPTS


dialogue_instruction_prefix = """
You are going to generate one line of dialogue for a scene in the middle of a computer game.
"""


preamble_template = """
{instruction_prefix}
This is the game back story. {back_story}\n
Here is a description of the scene in question. {scene_description}{scene_supplement}\n
The characters in the dialogue are {actors}.
"""

instruction_template = """
{preamble}
Here is the dialogue so far\n\n
{dialogue}
{instruction_suffix}
"""

speech_template = '[{actor}]: {speech}\n'

dialogue_instruction_suffix = """
Give me the next line in the dialogue in the same format. Don't provide stage directions, just the actor's words.\n
"""

query_instruction_suffix_template = """
{query} Answer with a single word, yes or no.
"""

query_instruction_prefix = """
You are going to answer a single yes/no question about the current state of the dialogue in a scene in the middle of a computer game.\n
"""

In [5]:
# BUILDING DIALOGUES


def prompt_llm(prompt, model):
    # print(prompt)
    # print()
    prompt = ChatPromptTemplate.from_template(template=prompt)
    chain = prompt | model
    return chain


def load_prompts(scene_version, supplement_version=-1):
    back_story = load_prompt(GAME + "/back_story.txt")
    scene_description = load_prompt(
        GAME + "/scenes/" + SCENE + "_scene/" + SCENE + "_scene_description_" + str(scene_version) + ".txt")
    if supplement_version == -1:  # no supplementary scene text
        scene_supplement = ""
    else:
        scene_supplement = "\n\n" + load_prompt(
            GAME + "/scenes/" + SCENE + "_scene/" + SCENE + "_scene_supplement_" + str(scene_version) + ".txt")
    opening_speech = load_prompt(
        GAME + "/scenes/" + SCENE + "_scene/" + SCENE + "_opening_speech.txt")
    query = load_prompt(GAME + "/scenes/" + SCENE + "_scene/" + SCENE + "_query1.txt")
    end_queries = [query]
    return (back_story, scene_description, scene_supplement, opening_speech, end_queries)


def query_dialogue(
        dialogue, back_story, scene_description,
        actors, query):
    query_model = QUERY_MODEL
    query_preamble = preamble_template.format(
        instruction_prefix=query_instruction_prefix,
        back_story=back_story,
        scene_description=scene_description,
        scene_supplement="",
        actors=list_to_conjunction(actors))
    query_instruction_suffix = query_instruction_suffix_template.format(query=query)
    query_test = instruction_template.format(
        preamble=query_preamble, dialogue=dialogue,
        instruction_suffix=query_instruction_suffix)
    chain = prompt_llm(query_test, query_model)
    response = chain.invoke({})
    return response


def sim_mini_scene(
        back_story, scene_description, scene_supplement,
        actors, opening_speech, end_queries,
        player=False,
        max_turns=10):
    """Generates dialogue for a mini-scene.
    
    back_story: The backdrop to the whole game.
    scene_description: A description of this particular mini-scene. Should state the goals of the scene.
    scene_supplement: Extra text describing the scene. Can be used to simulate adversarial players, for example.
    actors: A list of the names of the actors involved in generating dialogue.
    opening_speech: The first words spoken by actor1. (These have to be scripted.)
    player: If True then the user is one of the players, otherwise both players are LLMs.
    end_queries: A list of natural language questions; the scene is terminated if all answers are yes.
    max_turns: The scene will terminate when this many turns have been taken whether or not goals have been reached.
    """

    print(RED + scene_description)
    print()

    dialogue_model = DIALOGUE_MODEL
 
    dialogue_preamble = preamble_template.format(
        instruction_prefix=dialogue_instruction_prefix,
        back_story=back_story,
        scene_description=scene_description,
        scene_supplement=scene_supplement,
        actors=list_to_conjunction(actors))
    
    if player:
        speech = input()
        response = speech_template.format(actor=actors[0], speech=speech)
    else:
        response = speech_template.format(actor=actors[0], speech=opening_speech)

    dialogue = response + "\n"

    print(GREEN + response)

    turn = 1
    success = False

    while turn < max_turns and not success:
        
        if player and (turn % 2 == 0):  # user is playing actor 1
            speech = input()
            response = speech_template.format(actor=actors[0], speech=speech)
        else:  # both actors are played by the LLM
            prompt = instruction_template.format(
                preamble=dialogue_preamble, dialogue=dialogue,
                instruction_suffix=dialogue_instruction_suffix)
            chain = prompt_llm(prompt, dialogue_model)
            response = chain.invoke({}) + "\n"

        dialogue += response

        print(GREEN + response)

        # Have the conditions for ending the scene been met?
        success = True
        for  query in end_queries:
            response = query_dialogue(
                dialogue, back_story, scene_description,
                actors, query)
            success = success and (response[0:3] == "Yes" or response[0:3] == "yes")

        turn += 1
    
    return dialogue

In [6]:
# # GENERATE DIALOGUES

# player = False
# supplement_version = -1
# n_versions = 3
# n_runs = 3

# for scene_version in range(n_versions):
#     print(WHITE + "VERSION {}".format(scene_version))
#     print()
#     print()
#     for run_no in range(n_runs):
#         print(WHITE + "Run {}".format(run_no))
#         print()

#         (back_story, scene_description, scene_supplement,
#          opening_speech, end_queries) = load_prompts(scene_version, supplement_version)
#         dialogue = sim_mini_scene(
#             back_story, scene_description, scene_supplement,
#             ACTORS, opening_speech, end_queries,
#             player=player, max_turns=20)
        
#         query = end_queries[0]
#         response = query_dialogue(
#             dialogue, back_story, scene_description,
#             ACTORS, query)
        
#         print(BLUE + query + ' ' + response)
#         print()
#         print()

#         write_transcript(dialogue, GAME + "/transcript_" + str(scene_version) + "_" + str(run_no) + ".txt")

In [7]:
# player = False
# scene_version = 2
# supplement_version = 2

# (back_story, scene_description, scene_supplement,
#  opening_speech, end_queries) = load_prompts(scene_version, supplement_version)
# dialogue = sim_mini_scene(
#     back_story, scene_description, scene_supplement,
#     ACTORS, opening_speech, end_queries,
#     player=player, max_turns=2)

In [8]:
import asyncio
import concurrent
import os
import sys
import re

# Allow loading dialogue middleware packages
parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(parent_dir)

from benchmark import agent_interface
from benchmark import dataset_utils
from benchmark import dialogue_graph
from benchmark import task_runner
from tqdm.asyncio import tqdm

from iconic_tools import langchain
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import ChatPromptTemplate

In [9]:
def extract_dialogue(dialogue_state):
    """Takes a dialogue state and returns the dialogue itself."""
    start_tag = "<chat_history_start>"
    end_tag = "<chat_history_end>"
    start_index = dialogue_state.find(start_tag)
    end_index = dialogue_state.find(end_tag)
    if start_index == -1 or end_index == -1:  # tags absent
        return None
    start_index += len(start_tag)   
    dialogue = dialogue_state[start_index:end_index].strip()
    return dialogue


def extract_role(dialogue_line):
    """Takes one line of dialogue and returns the character cue."""
    parts = dialogue_line.split(':', 1)  # split line at the first colon
    if len(parts) < 2:  # no colon
        return None
    return parts[0].strip()


def extract_speech(dialogue_line):
    """Takes one line of dialogue and removes the character cue."""
    parts = dialogue_line.split(':', 1)  # split line at the first colon
    if len(parts) < 2:  # no colon
        return None
    return parts[1].strip()


def strip_brackets(s):
    """takes a string and replaces every substring of the form '[X]:' with 'X:'."""
    return re.sub(r'\[(\w+)\]:', r'\1:', s)


def generate_next_line(dialogue_state: str) -> agent_interface.VirtualActorResponse:

    (back_story, scene_description, scene_supplement,
     opening_speech, end_queries) = load_prompts(SCENE_VERSION)

    dialogue = extract_dialogue(dialogue_state)

    if dialogue == "":
        lines = speech_template.format(actor=ACTORS[0], speech=opening_speech)
        lines = strip_brackets(lines)
        response = extract_speech(lines)
        role = ACTORS[0]

    else:

        dialogue_model = DIALOGUE_MODEL

        dialogue_preamble = preamble_template.format(
            instruction_prefix=dialogue_instruction_prefix,
            back_story=back_story,
            scene_description=scene_description,
            scene_supplement=scene_supplement,
            actors=list_to_conjunction(ACTORS))
        
        prompt = instruction_template.format(
            preamble=dialogue_preamble, dialogue=dialogue,
            instruction_suffix=dialogue_instruction_suffix)
        
        chain = prompt_llm(prompt, dialogue_model)
        lines = chain.invoke({}) + "\n"
        response = extract_speech(lines)
        role = extract_role(strip_brackets(lines))
        
    dialogue += lines + "\n"
  
    # Have the conditions for ending the scene been met?
    success = True
    for  query in end_queries:
        query_response = query_dialogue(
            dialogue, back_story, scene_description,
            ACTORS, query)
        success = success and (query_response[0:3] == "Yes" or query_response[0:3] == "yes")

    response_dict = {
        "role": role,
        "text": response,
        "is_last": success,
    }

    return agent_interface.VirtualActorResponse(**response_dict)

In [21]:
metric_llm = langchain.InstructGPT4(temperature=0, max_tokens=4096)

# initial_state = dialogue_graph.Dialogue(
#     facts = ["""
#         Constanze just received news from men from Salzburg that her husband's father is dead, Mozart still does not know.
#         The scene takes place in Morzart's apartment, in the living room. At night. It is 1780's.
#         Explosive laughter as Mozart and Schikaneder enter the apartment.
#         They are very pleased with themselves and accompanied by the three actresses.
#         The front door opens, very gingerly.
#         Mozart, still rather drunk, sticks his head into the room, anxious not to make a noise.
#         He sees the strangers and breaks into a smile.
#     """],
#     comm_style = ["""
#         Mozart is drunk.
#         Mozart is eccentric and boisterous, often engaging in childish behavior that frustrates those around him.
#         Mozart is a musical genius.
#         Mozart is sick of Vienna.
#         Mozart has infantile tendencies.
#         Constanze is Mozarts wife, he calls him Wolfi.
#     """],
#     goals = ["""
#         Constanze tells Mozart about his father's death.
#         Create a stark contrast between Mozart's jubilant mood and the news of his father's death.
#     """],
# )

(dialogue, dialogue_state) = dataset_utils.load_scene(GENRE, GAME, SCENE_IDX)

facts = [list_to_string(dialogue.facts)]
comm_style = [list_to_string(dialogue.comm_style)]
goals = [list_to_string(dialogue.goals)]

initial_state = dialogue_graph.Dialogue(facts=facts, comm_style=comm_style, goals=goals)

# Run the evaluation
results = await task_runner.eval_agent(
    task_idx=0, 
    task=dataset_utils.Tasks.EMOTIONS, 
    initial_state=initial_state, 
    agent=generate_next_line, 
    metric_calc_llm=metric_llm)

# Print out the results
results_dict = dict(results)
for key in results_dict.keys():
    print(key + ": " + str(results_dict[key][0]))

  0%|          | 0/7 [00:00<?, ?it/s]                                  
[A

[A[A


[A[A[A



[A[A[A[A




[A[A[A[A[A





[A[A[A[A[A[A






[A[A[A[A[A[A[A







[A[A[A[A[A[A[A[A








[A[A[A[A[A[A[A[A[A









[A[A[A[A[A[A[A[A[A[A










 14%|█▍        | 1/7 [01:40<10:04, 100.76s/it]

100%|██████████| 3/3 [02:32<00:00, 50.87s/it] 



100%|██████████| 3/3 [02:32<00:00, 50.87s/it] 




100%|██████████| 3/3 [02:32<00:00, 50.87s/it] 






100%|██████████| 3/3 [02:32<00:00, 50.87s/it] 







100%|██████████| 3/3 [02:32<00:00, 50.87s/it] 








100%|██████████| 2/2 [02:32<00:00, 76.31s/it] 









100%|██████████| 3/3 [02:32<00:00, 50.87s/it] 










100%|██████████| 3/3 [01:28<00:00, 29.51s/it]











100%|██████████| 4/4 [01:28<00:00, 22.13s/it]
100%|██████████| 2/2 [02:42<00:00, 81.15s/it] 

100%|██████████| 2/2 [02:32<00:00, 76.31s/it] 





100%|██████████| 2/2 [02:32<00:00, 76.31s/it] 
100%|██████████| 7/7 [02

task_idx: 0
task: emotions
score: 0.4583333333333333
synergy_impersonation_score: 0.1875
exec_seconds: 165.6350929737091
precision: 1
synergy_Dean: 1
impersonation_Dean: 0.3333333333333333
synergy_Ronald: 0.6666666666666666
impersonation_Ronald: 0.125
synergy_Tim: 0.0
impersonation_Tim: 0.0





In [22]:
results_dict = dict(results)
for key in results_dict.keys():
    print(key + ": " + str(results_dict[key][0]))

task_idx: 0
task: emotions
score: 0.4583333333333333
synergy_impersonation_score: 0.1875
exec_seconds: 165.6350929737091
precision: 1
synergy_Dean: 1
impersonation_Dean: 0.3333333333333333
synergy_Ronald: 0.6666666666666666
impersonation_Ronald: 0.125
synergy_Tim: 0.0
impersonation_Tim: 0.0
