In [82]:
import os
import sys
import json
from iconic_tools.langchain import InstructSonnet, InstructOpus3, InstructGPT4, InstructGeminiPro, InstructGPT35
from langchain_core.prompts import ChatPromptTemplate

In [83]:
# CONSTANTS AND INITIALISATION

PATH = os.path.abspath(os.getcwd())

# DIALOGUE_MODEL = InstructGeminiPro(temperature=1.0, max_tokens=3000)
# QUERY_MODEL = InstructGeminiPro(temperature=1.0, max_tokens=3000)

DIALOGUE_MODEL = InstructGPT4(temperature=1.0, max_tokens=3000)
QUERY_MODEL = InstructGPT4(temperature=1.0, max_tokens=3000)

# DIALOGUE_MODEL = InstructSonnet(temperature=1.0, max_tokens=3000)
# QUERY_MODEL = InstructSonnet(temperature=1.0, max_tokens=3000)

# GAME = "troy"
# SCENE = "achilles"
# ACTORS = ["Achilles", "Odysseus"]

# GENRE = "drama"
# SCENE_IDX = 124
# GAME = "amadeus"
# SCENE = "constanze"
# ACTORS = ["Mozart", "Constanze", "Schikaneder"]
# SCENE_VERSION = 2

# GENRE = "comedy"
# SCENE_IDX = 116
# GAME = "cedar_rapids"
# SCENE = "car"
# ACTORS = ["Tim", "Ronald", "Dean", "Joan"]
# SCENE_VERSION = 0

GENRE = "drama"
SCENE_IDX = 2
GAME = "bound"
SCENE = "coffee"
ACTORS = ["Violet", "Corky"]

RED = "\033[91m"
GREEN = "\033[92m"
BLUE = "\033[94m"
YELLOW = "\033[93m"
WHITE = "\033[0m"

In [84]:
# UTILITIES


def load_prompt(filename):
    with open(PATH + f"/prompts/{filename}") as f:
        return f.read()


def write_transcript(dialogue, filename):
    with open(PATH + f"/transcripts/{filename}", "w") as f:
        f.write(dialogue)


def list_to_conjunction(L):
    """Takes a list strings and returns a string with every element in the list separated by commas."""
    if L == "":
        return ""
    elif len(L) == 1:
        return L[0]
    elif len(L) == 2:
        return f"{L[0]} and {L[1]}"
    else:
        return ", ".join(L[:-1]) + f", and {L[-1]}"


def list_to_string(L):
    """Takes a list of strings and returns a string consisting of every element in the list separated by a newline."""
    return "\n".join(L)


def split_text(text):
    # Split the text by double newlines
    paragraphs = text.split('\n\n')
    # Remove leading or trailing whitespacen  and remove empty paragraphs
    paragraphs = [p.strip() for p in paragraphs]
    paragraphs = [p for p in paragraphs if p]
    return paragraphs


def read_queries(filename):
    queries = split_text(load_prompt(filename))
    queries = [s.strip() for s in queries if not s.strip().startswith('#')]
    return queries

In [85]:
# PROMPT TEMPLATES AND INSTRUCTION PROMPTS


dialogue_instruction_prefix = """
You are going to generate one line of dialogue for a scene in the middle of a computer game.
"""

preamble_template = """
{instruction_prefix}
This is the game back story. {back_story}\n
Here is a description of the scene in question. {scene_description}{scene_supplement}\n
The characters in the dialogue are {actors}.
"""

instruction_template = """
{preamble}
Here is the dialogue so far\n\n
{dialogue}
{instruction_suffix}
"""

speech_template = '[{actor}]: {speech}\n'

dialogue_instruction_suffix = """
Give me the next line in the dialogue in the same format. Don't provide stage directions, just the actor's words.\n
"""

query_preamble_template = """
{instruction_prefix}
This is the game back story. {back_story}\n
"""

query_instruction_prefix = """
You are going to answer a single question about the current state of the dialogue in a scene in the middle of a computer game.
"""

query_instruction_suffix_template = """
Now consider the following statement about this dialogue. {statement} Is this statement true or false? Answer with a single word, true or false.
"""

In [86]:
# BUILDING DIALOGUES


def prompt_llm(prompt, model):
    # print(prompt)
    # print()
    prompt = ChatPromptTemplate.from_template(template=prompt)
    chain = prompt | model
    return chain


def load_prompts(scene_version, supplement_version=-1):
    back_story = load_prompt(GAME + "/back_story.txt")
    scene_description = load_prompt(
        GAME + "/scenes/" + SCENE + "_scene/" + SCENE + "_scene_description_" + str(scene_version) + ".txt")
    if supplement_version == -1:  # no supplementary scene text
        scene_supplement = ""
    else:
        scene_supplement = "\n\n" + load_prompt(
            GAME + "/scenes/" + SCENE + "_scene/" + SCENE + "_scene_supplement_" + str(scene_version) + ".txt")
    opening_speech = load_prompt(
        GAME + "/scenes/" + SCENE + "_scene/" + SCENE + "_opening_speech.txt")
    queries = read_queries(GAME + "/scenes/" + SCENE + "_scene/" + SCENE + "_queries.txt")
    return (back_story, scene_description, scene_supplement, opening_speech, queries)




In [87]:
import asyncio
import concurrent
import os
import sys
import re

# Allow loading dialogue middleware packages
parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(parent_dir)

from benchmark import agent_interface
from benchmark import dataset_utils
from benchmark import dialogue_graph
from benchmark import task_runner
from tqdm.asyncio import tqdm

from iconic_tools import langchain
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import ChatPromptTemplate

In [88]:
def extract_dialogue(dialogue_state):
    """Takes a dialogue state and returns the dialogue itself."""
    start_tag = "<chat_history_start>"
    end_tag = "<chat_history_end>"
    start_index = dialogue_state.find(start_tag)
    end_index = dialogue_state.find(end_tag)
    if start_index == -1 or end_index == -1:  # tags absent
        return None
    start_index += len(start_tag)   
    dialogue = dialogue_state[start_index:end_index].strip()
    return dialogue


def extract_role(dialogue_line):
    """Takes one line of dialogue and returns the character cue."""
    parts = dialogue_line.split(':', 1)  # split line at the first colon
    if len(parts) < 2:  # no colon
        return None
    return parts[0].strip()


def extract_speech(dialogue_line):
    """Takes one line of dialogue and removes the character cue."""
    parts = dialogue_line.split(':', 1)  # split line at the first colon
    if len(parts) < 2:  # no colon
        return None
    return parts[1].strip()


def strip_brackets(s):
    """takes a string and replaces every substring of the form '[X]:' with 'X:'."""
    return re.sub(r'\[(\w+)\]:', r'\1:', s)


def generate_next_line(dialogue_state: str) -> agent_interface.VirtualActorResponse:

    (back_story, scene_description, scene_supplement,
     opening_speech, queries) = load_prompts(SCENE_VERSION)

    dialogue = extract_dialogue(dialogue_state)
    dialogue_model = DIALOGUE_MODEL

    if dialogue == "":
        lines = speech_template.format(actor=ACTORS[0], speech=opening_speech)
        lines = strip_brackets(lines)
        response = extract_speech(lines)
        role = ACTORS[0]

    else:

        dialogue_preamble = preamble_template.format(
            instruction_prefix=dialogue_instruction_prefix,
            back_story=back_story,
            scene_description=scene_description,
            scene_supplement=scene_supplement,
            actors=list_to_conjunction(ACTORS))
        
        prompt = instruction_template.format(
            preamble=dialogue_preamble, dialogue=dialogue,
            instruction_suffix=dialogue_instruction_suffix)
        
        chain = prompt_llm(prompt, dialogue_model)
        lines = chain.invoke({}) + "\n"
        response = extract_speech(lines)
        role = extract_role(strip_brackets(lines))
        
    dialogue += lines + "\n"

    # Have the conditions for ending the scene been met?
    query_preamble = preamble_template.format(
        instruction_prefix=query_instruction_prefix,
        back_story=back_story,
        scene_description="",
        scene_supplement="",
        actors=list_to_conjunction(ACTORS))
    fails = 0
    for statement in queries:                
        instruction = query_instruction_suffix_template.format(statement=statement)
        prompt = instruction_template.format(
            preamble=query_preamble, dialogue=dialogue,
            instruction_suffix=instruction)
        chain = prompt_llm(prompt, dialogue_model)
        response = chain.invoke({})
        # print(BLUE + "Statement: {}".format(statement))
        # print(BLUE + "Result: {}".format(response))
        # print()
        if response[0:4] != "True" and response[0:3] != "true":
            fails += 1
    success = (fails == 0)

    response_dict = {
        "role": role,
        "text": response,
        "is_last": success,
    }

    return agent_interface.VirtualActorResponse(**response_dict)

In [89]:
metric_llm = langchain.InstructGPT4(temperature=0, max_tokens=4096)

(dialogue, dialogue_state) = dataset_utils.load_scene(GENRE, GAME, SCENE_IDX)

facts = [list_to_string(dialogue.facts)]
comm_style = [list_to_string(dialogue.comm_style)]
goals = [list_to_string(dialogue.goals)]

initial_state = dialogue_graph.Dialogue(facts=facts, comm_style=comm_style, goals=goals)

# Run the evaluation
results = await task_runner.eval_agent(
    task_idx=0, 
    task=dataset_utils.Tasks.EMOTIONS, 
    initial_state=initial_state, 
    agent=generate_next_line, 
    metric_calc_llm=metric_llm)

# Print out the results
results_dict = dict(results)
for key in results_dict.keys():
    print(key + ": " + str(results_dict[key][0]))

  0%|          | 0/5 [00:00<?, ?it/s]                                   
[A

[A[A


[A[A[A



[A[A[A[A




[A[A[A[A[A





[A[A[A[A[A[A






100%|██████████| 3/3 [00:59<00:00, 19.74s/it]



100%|██████████| 3/3 [00:59<00:00, 19.74s/it]




100%|██████████| 3/3 [00:50<00:00, 16.88s/it]





100%|██████████| 3/3 [00:50<00:00, 16.88s/it]






100%|██████████| 6/6 [00:50<00:00,  8.44s/it]







100%|██████████| 3/3 [00:50<00:00, 16.88s/it]
 40%|████      | 2/5 [01:01<01:38, 32.85s/it]
100%|██████████| 2/2 [00:59<00:00, 29.62s/it]


100%|██████████| 2/2 [00:59<00:00, 29.62s/it]
100%|██████████| 5/5 [01:01<00:00, 12.37s/it]

task_idx: 0
task: emotions
score: 0.1111111111111111
synergy_impersonation_score: 0.16666666666666666
exec_seconds: 91.57588386535645
precision: 0
synergy_Corky: 0
impersonation_Corky: 0.0
synergy_Violet: 0.3333333333333333
impersonation_Violet: 0.3333333333333333





In [90]:
results_dict = dict(results)
for key in results_dict.keys():
    print(key + ": " + str(results_dict[key][0]))

task_idx: 0
task: emotions
score: 0.1111111111111111
synergy_impersonation_score: 0.16666666666666666
exec_seconds: 91.57588386535645
precision: 0
synergy_Corky: 0
impersonation_Corky: 0.0
synergy_Violet: 0.3333333333333333
impersonation_Violet: 0.3333333333333333
