In [1]:
from autogen import AssistantAgent, GroupChatManager, GroupChat, config_list_from_json
from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent, TEXT_FORMATS

from CypherExecutor import CypherCodeExecutor

In [2]:
# This list will 'connect' to the llama3.1 model running on llm container
config_list = config_list_from_json(env_or_file="CONFIG_LIST", filter_dict={"model": "llama3.1"})


llm_config = {"config_list": config_list, "temperature": 0.2}

prompt = """You are a data scientist that works with Cypher queries.
Your work is to translate the given answer as Cypher queries.

You have some rules to respect:
- You must use cypher language.
- You can use the pdf files. 
- You must generate the easiest cypher query possible.
- Every query must be in a separate cypher format.
- You must be precise and explain everything, code outputs too. 
- If the code runs successfully you can send 'TERMINATE'.

Now i will give you some information about the database schema.
- nodes -
(:Movie), Describe a movie that has a title and a plot. It can also have the number of likes.
(:Person), Describe actors and directors. They have a name, a birthday.

- relationships -
(:Person) -[:ACTED_IN]-> (:Movie)

QUESTION IS:
{input_question}
"""

print(f'Accepted formats for "docs_path": \n{TEXT_FORMATS}')

Accepted formats for "docs_path": 
['txt', 'json', 'csv', 'tsv', 'md', 'html', 'htm', 'rtf', 'rst', 'jsonl', 'log', 'xml', 'yaml', 'yml', 'pdf']


In [3]:
def termination_msg(x):
    return isinstance(x, dict) and "TERMINATE" == str(x.get("content", ""))[-9:].upper()

doc_retriever = RetrieveUserProxyAgent(
    name="doc_retriever",
    is_termination_msg=termination_msg,
    max_consecutive_auto_reply=3,
    human_input_mode="NEVER",
    retrieve_config={
        "task": "code",
        "docs_path": "https://s3.amazonaws.com/artifacts.opencypher.org/openCypher9.pdf",
        "model": config_list[0]["model"],
        "get_or_create": True,
        "customized_prompt": prompt
    },
    code_execution_config=False,
    description="Assistant who has extra content retrieval power for solving difficult problems."
)

coder      = AssistantAgent(
    name="coder",
    is_termination_msg=termination_msg,
    system_message=prompt,
    llm_config=llm_config,
)

executor   = AssistantAgent(
    name="executor",
    is_termination_msg=termination_msg,
    human_input_mode="NEVER",
    description= """Executor provides feedback based on the errors and warning generated by the query.""",
    code_execution_config={"executor": CypherCodeExecutor()}
    )


PROBLEM  = "Who directed Matrix?"

def _reset_agents():
    """
    This function reset all the agents used for the group chat.
    This should be used every time you start a new conversation.
    """
    doc_retriever.reset()
    coder.reset()
    executor.reset()

#TODO: find out why match/case not working 
def state_transition(last_speaker, groupchat):
    """
    This function simply return the next speaker based on the last one.
    In this case we decided that doc_retriever should speak only to initiate chat
    and then the chat is from coder to executor in a sort of loop.
    """
    if last_speaker is coder: return executor
    elif last_speaker is executor: return coder
    elif last_speaker is doc_retriever: return coder

def rag_chat():
    _reset_agents()
    groupchat = GroupChat(agents=[doc_retriever, coder, executor], messages=[], 
                          max_round=5, speaker_selection_method=state_transition)
    manager   = GroupChatManager(groupchat=groupchat, llm_config=llm_config)

    doc_retriever.initiate_chat(manager,message=doc_retriever.message_generator, problem=PROBLEM,n_results=3)


  from tqdm.autonotebook import tqdm, trange


In [7]:
rag_chat()

2024-09-11 12:11:38,775 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - [32mUse the existing collection `autogen-docs`.[0m


Trying to create collection.


max_tokens is too small to fit a single line of text. Breaking this line:
	          ...
Failed to split docs with must_break_at_empty_line being True, set to False.
2024-09-11 12:11:46,815 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - Found 287 chunks.[0m
Model llama3.1 not found. Using cl100k_base encoding.


VectorDB returns doc_ids:  [['bb85fb1c', 'fd79e570', 'c3fdf098']]
[32mAdding content of doc bb85fb1c to context.[0m


Model llama3.1 not found. Using cl100k_base encoding.


[32mAdding content of doc fd79e570 to context.[0m


Model llama3.1 not found. Using cl100k_base encoding.


[33mdoc_retriever[0m (to chat_manager):

You are a data scientist that works with Cypher queries.
Your work is to translate the given answer as Cypher queries.

You have some rules to respect:
- You must use cypher language.
- You can use the pdf files. 
- You must generate the easiest cypher query possible.
- Every query must be in a separate cypher format.
- You must be precise and explain everything, code outputs too. 
- If the code runs successfully you can send 'TERMINATE'.

Now i will give you some information about the database schema.
- nodes -
(:Movie), Describe a movie that has a title and a plot. It can also have the number of likes.
(:Person), Describe actors and directors. They have a name, a birthday.

- relationships -
(:Person) -[:ACTED_IN]-> (:Movie)

QUESTION IS:
Who directed Matrix?


--------------------------------------------------------------------------------
[32m
Next speaker: coder
[0m
