# Imports and boilerplate

In [5]:
import asyncio
import concurrent
import os
import sys

# Allow loading dialogue middleware packages
parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(parent_dir)

from benchmark import agent_interface
from benchmark import dataset_utils
from benchmark import dialogue_graph
from benchmark import task_runner
from tqdm.asyncio import tqdm


# TODO: Your agent implementation goes here

Template of `dialogue_state`


```Python
"""<dialogue_state_start>
<facts_start>
Fact1
Fact2
<facts_end>

<style_start>
Style1
Style2
<style_end>

<goals_start>
Goal1
Goal2
<goals_end>

<chat_history_start>
Alice: Hey Bob
Bob: Hey Alice
Bob: I'm great, how are you?
Alice: Lousy
<chat_history_end>
<dialogue_state_end>"""
```

In [6]:
from iconic_tools import langchain
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import ChatPromptTemplate


agent_llm = langchain.InstructGPT4(temperature=0, max_tokens=4096)
parser = JsonOutputParser(pydantic_object=agent_interface.VirtualActorResponse)
prompt = ChatPromptTemplate.from_template(
"""
Given the following dialogue, predict the next line in that dialogue.

Do not return any text that is not the JSON. Follow these instructions:\n{output_instructions}

Here's the dialogue until now, along with the contextual information:
{dialogue}
""")


def generate_next_line(dialogue_state: str) -> agent_interface.VirtualActorResponse:
    chain = prompt | agent_llm | parser
    output_format = parser.get_format_instructions()
    
    response_dict = chain.invoke({
        "dialogue": dialogue_state,
        "output_instructions": output_format,
    })  
    return agent_interface.VirtualActorResponse(**response_dict)

# Evaluation of the agent on a custom dialogue scene

In [7]:

# TODO: select the llm to use for metric calculations
metric_llm = langchain.InstructGPT4(temperature=0, max_tokens=4096)

# TODO: write the scene you want to cast the agent it
initial_state = dialogue_graph.Dialogue(
    facts = ["""
    Alice and Bob, old friends, are meeting in a coffee shop. Bob is very thirsty, but didn't bring any money. 
    """],
    comm_style = ["""
                Alice is a cheerful and helpful person.
                Bob is a little bit shy.
              """],
    goals = [
        "Alice and Bob say hello",
        "Bob says that he is thirsty.",
        "Alice offers to buy him coffee.",
    ],
)

# Run the evaluation
results = await task_runner.eval_agent(
    task_idx=0, 
    task=dataset_utils.Tasks.EMOTIONS, 
    initial_state=initial_state, 
    agent=generate_next_line, 
    metric_calc_llm=metric_llm)

print(results)

  0%|          | 0/4 [00:00<?, ?it/s]                                  
[A

[A[A


[A[A[A



 25%|██▌       | 1/4 [00:18<00:54, 18.10s/it]
[A

100%|██████████| 2/2 [00:14<00:00,  7.12s/it]



100%|██████████| 3/3 [00:14<00:00,  4.75s/it]




100%|██████████| 3/3 [00:14<00:00,  4.75s/it]
100%|██████████| 2/2 [00:18<00:00,  9.05s/it]
100%|██████████| 2/2 [00:18<00:00,  9.05s/it]
100%|██████████| 4/4 [00:18<00:00,  4.53s/it]

   task_idx      task  score  exec_seconds  precision  synergy  \
0         0  emotions    1.0      23.43347          1      1.0   

   impersonation_Alice  impersonation_Bob  
0                  1.0                1.0  



