In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import autogen
from autogen import register_function, AssistantAgent, UserProxyAgent
from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent
from autogen.agentchat.contrib.qdrant_retrieve_user_proxy_agent import QdrantRetrieveUserProxyAgent
from autogen.agentchat.contrib.web_surfer import WebSurferAgent
from qdrant_client import QdrantClient
from ARGO import ArgoWrapper
from CustomLLMAutogen2 import ARGO_LLM
from typing import Dict, List
import pandas as pd
from dotenv import load_dotenv

In [None]:
load_dotenv()

In [None]:
# All available models
config_list = [
        {
            "model": "gemini-pro",
            "api_key": os.environ["GOOGLE_API_KEY"],
            "api_type": "google",
            "tags": ["gemini", "api"]
        },
        {
            "model": "phi3", 
            "api_key": "ollama", 
            "base_url": 'http://localhost:11434/v1',
            "tags": ["ollama", "phi"]
        },
        {
            "model": "llama3", 
            "api_key": "ollama", 
            "base_url": 'http://localhost:11434/v1',
            "tags": ["ollama", "llama"]
        },
        {
            "model": "mistral", 
            "api_key": "ollama", 
            "base_url": 'http://localhost:11434/v1',
            "tags": ["ollama", "mistral"]
        },
        {
            "model": "gemma:7b", 
            "api_key": "ollama", 
            "base_url": 'http://localhost:11434/v1',
            "tags": ["ollama", "gemma"]
        },
        {
            'model': 'gpt-3.5-turbo-16k',
            'api_key': os.environ["OPENAI_API_KEY"],
            'tags': ['gpt3.5']
        },
        {
            'model': 'Argo',
            'api_type': 'argo',
            'argo_client': ARGO_LLM(argo=ArgoWrapper,model_type='gpt4', temperature = 0.3),
            'tags': ['argo']
        },
]
# Filters the models based on the tags. Filters models
filter_dict = {'tags': ['gpt3.5']}
config_list = autogen.filter_config(config_list, filter_dict)

In [None]:
# Sets up configuration for agents
llm_config = {
    "config_list": config_list, 
    "cache_seed": None, # Ensures differing responses
    "timeout": 600,
    "seed": 42,
    "temperature": 1, # Temperature max is 2
}
llm_config

In [None]:
from chromadb.utils import embedding_functions
googleai_embedding_function= embedding_functions.GoogleGenerativeAiEmbeddingFunction(api_key = os.environ["GOOGLE_API_KEY"])

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n", "\r", "\t"])

In [None]:
def termination_msg(x):
    return isinstance(x, dict) and "TERMINATE" == str(x.get("content", ""))[-9:].upper()

In [None]:
SCHEMA = {
  "question": "The generated question",
  "correct_answer": "The correct answer",
  "distractors": [
    "Incorrect answer #1", "Incorrect answer #2", "Incorrect Answer #3", "Incorrect Answer #4"
  ],
  "skills": '''Choose the necessary skills for answering the question using at least one of the following options 
  [Generalization, Basic comprehension, Summarization, Interpolation/extrapolation, Cross-domain application, Reasoning, General knowledge, Fundamental domain science concepts, Understanding identifiers/notation, Understanding evolution of ideas]''',
  "domains": '''Choose the most applicable domain of the question using at least one of the following options 
  [physics, material science, biology, chemistry, computer science, mathematics, climate]''',
  "difficulty": "Choose the difficulty of the question using one of the following options ['easy', 'medium', 'hard'] ",
  "doi": "Identify the digital object identifier (DOI) of the paper and provide it here. It will be the link of the form doi.org",
  "author": "Jose A. Tandoc",
  "comments": "generated question",
  "affiliation": "Argonne",
  "position": "Student",
}

In [None]:
problem = f'''Generate 3 unique 5 choice multiple choice question that are extremely difficult. Use the paper as context, but do not directly reference the paper in the question.
These questions should be general knowledge, such that the question statement cannot have 'paper', 'experiment', or 'study'.
All context required to answer the question must be provided within the question statement.
This question MUST be answerable without reading the paper.
There should be exactly one correct answer.
The incorrect answers must be difficult to distinguish from the correct answer, however they cannot be correct under the context of the paper.
The incorrect answers are 'distractors' that are designed to be confuse the large language model that is answering the question.
'''

doc = r"C:\Users\judoc\Documents\Work\Argonne\papers\Mechanism Design for Large Language Models.pdf"
collection = 'mechDesign3'

In [None]:
retrieval_assistant = QdrantRetrieveUserProxyAgent(
    name="assistant",
    human_input_mode="NEVER",
    default_auto_reply="Reply `TERMINATE` if the task is done.",
    # max_consecutive_auto_reply=10,
    retrieve_config={
        "task": "qa",
        "docs_path": [
            doc,
        ],
        "custom_text_split_function": text_splitter.split_text,
        "embedding_funcion": googleai_embedding_function,
        "client": QdrantClient(url="https://7f9bbc68-cbea-48e0-9841-2dd23f878d28.us-east4-0.gcp.cloud.qdrant.io", api_key= os.environ["QDRANT_API_KEY"]),
        "collection_name": collection,
        "get_or_create": True,
    },
    code_execution_config=False,
    description="Assistant who has extra content retrieval power for solving difficult problems.",
)

expert = AssistantAgent(
    "Expert",
    system_message=f'''You are an expert on {problem} Assist in answering the problem. Then, put the information in a list using the following schema: {SCHEMA}. Follow the instructions of the SCHEMA.
    You should return a list: [SCHEMA, SCHEMA, SCHEMA].
    Reply 'TERMINATE' in the end when everything is done.''',
    llm_config=llm_config,
    human_input_mode="NEVER",  # Never ask for human input.
    description="Expert in question generation.",
)

generality_verifier = autogen.ConversableAgent(
    "generality_verifier",
    system_message=f'''You are a generality verifier that ensures that the question is general enough to be answered without reading the paper.
    If the question mentions 'in the paper' or 'in this study', then it is too specific and you prompt the expert to formulate new questions.
    Reply 'TERMINATE' in the end when everything is done.''',
    llm_config=llm_config,
    human_input_mode="NEVER",
    description="Generality Verifier who ensures that each question is general enough to be answered without reading the given paper.",
)

# verifier = autogen.ConversableAgent(
#     "Verifier",
#     system_message=f'''You are a verifier that uses the provided context for {problem} to ensure that the expert has concluded a correct answer. 
#     Reply 'VALID' if the question and answers are correct and if the format is correct.
#     Otherwise, reply 'INVALID' and provide feedback on how to improve the question and answers.''',
#     llm_config=llm_config,
#     human_input_mode="NEVER",  # Never ask for human input.
#     description="Verifier who can verify the correctness of expert's questions.",
# )
automator = AssistantAgent(
    'question_generation_automator',
    system_message=f'''You are an automator that takes the generated question and pipes it into a pandas dataframe using the add_question function.
    Reply 'TERMINATE' in the end when everything is done.''',
    llm_config=llm_config,
    human_input_mode="NEVER",
    description="Automator utilizes functions to automate the process of generating questions.",
)

user_proxy = autogen.ConversableAgent(
    name="User",
    is_termination_msg=termination_msg,
    human_input_mode="NEVER",
    code_execution_config=False,
    description="The proxy that adds the question to the CSV file using add_question function"
)

# web_surfer = WebSurferAgent(
#     "web_surfer",
#     system_message=f'''You are a web surfer that can find the DOI of a paper using the paper's title.
#     Reply 'TERMINATE' in the end when everything is done''',
#     llm_config=llm_config,
#     summarizer_llm_config=llm_config,
#     browser_config={"viewport_size": 4096, "bing_api_key": os.environ["BING_API_KEY"]},
#     description="Web Surfer who can search the web for information.",
# )

In [None]:
def add_question(question: str, correct_answer: str, distractors: List[str], skills: str, domains: str, difficulty: str, doi: str, author: str, comments: str, affiliation: str, position: str) -> str:
    cur = pd.DataFrame([{
        "question": question,
        "correct_answer": correct_answer,
        "distractors": distractors,
        "skills": skills,
        "domains": domains,
        "difficulty": difficulty,
        "doi": doi,
        "author": author,
        "comments": comments,
        "affiliation": affiliation,
        "position": position,
    }])
    if os.path.exists(r"generatedQuestions/generated_questions.csv"):
        df = pd.read_csv(r"generatedQuestions/generated_questions.csv")
        df = pd.concat([df, cur])
        df.to_csv(r"generatedQuestions/generated_questions.csv", index=False)
    else:
        cur.to_csv(r"generatedQuestions/generated_questions.csv", index=False)
    return "Question added to the CSV file."

In [None]:
def add_questions(questions: List[Dict]) -> str:
    cur = None
    for question in questions:
        if cur == None:
            cur = pd.DataFrame([question])
        else:
            cur = pd.concat([df, pd.DataFrame([question])])
    if os.path.exists(r"generatedQuestions/generated_questions.csv"):
        df = pd.read_csv(r"generatedQuestions/generated_questions.csv")
        df = pd.concat([df, cur])
        df.to_csv(r"generatedQuestions/generated_questions.csv", index=False)
    else:
        cur.to_csv(r"generatedQuestions/generated_questions.csv", index=False)
    return 'Finished'

In [None]:
register_function(
    add_question,
    caller=automator,
    executor=user_proxy,
    name="add_question",
    description="Adds all generated questions to the CSV file.",
)

In [None]:
def _reset_agents():
    retrieval_assistant.reset()
    automator.reset()
    generality_verifier.reset()
    expert.reset()
    user_proxy.reset()
    #verifier.reset()
    #web_surfer.reset()

In [None]:
allowed_transitions = {
    retrieval_assistant: [expert],
    expert: [generality_verifier],
    generality_verifier: [expert, automator],
    #verifier: [user_proxy, expert],
    automator: [user_proxy],
    user_proxy: [],
}

In [None]:
def rag_chat():
    _reset_agents()
    groupchat = autogen.GroupChat(
        agents=[retrieval_assistant, expert, automator, user_proxy], messages=[], max_round=10,
        #allowed_or_disallowed_speaker_transitions=allowed_transitions,
        #speaker_transitions_type="allowed",
        speaker_selection_method="round_robin",
        send_introductions=True, # Provides information on each agent in the group chat to the manager.
    )
    
    manager = autogen.GroupChatManager(groupchat=groupchat, llm_config={
        "config_list": config_list, 
        "cache_seed": None, # Ensures differing responses
        "timeout": 600,
        "seed": 42,
        }
    )

    # Start chatting with boss_aid as this is the user proxy agent.
    retrieval_assistant.initiate_chat(
        manager,
        message=retrieval_assistant.message_generator,
        problem=problem,
        n_results=3,
    )

In [None]:
rag_chat()

In [None]:
# user_proxy = autogen.ConversableAgent(
#     name="User",
#     llm_config=False,
#     is_termination_msg=lambda msg: msg.get("content") is not None and "TERMINATE" in msg["content"],
#     human_input_mode="NEVER",
# )

# assistant = autogen.ConversableAgent(
#     name="Assistant",
#     system_message="You are a helpful AI assistant. "
#     "You help with JSON Formatting"
#     "Return 'TERMINATE' when the task is done.",
#     llm_config=llm_config,
# )
# test = []
# def json_return(first_name: str, last_name: str, email: str) -> Dict:
#     form = {
#         'first_name': first_name,
#         'last_name': last_name,
#         'email': email,
#     }
#     test.append(form)
#     return form

# register_function(
#     json_return,
#     caller=assistant,  # The assistant agent can suggest calls to the calculator.
#     executor=user_proxy,  # The user proxy agent can execute the calculator calls.
#     name="json_return",  # By default, the function name is used as the tool name.
#     description="A json object returner",  # A description of the tool.
# )
# form = {
#     'first_name': 'first name',
#     'last_name': 'last name',
#     'email': 'email',
# }
# chat_result = user_proxy.initiate_chat(assistant, message= f'''Produce a json object using the following format: {form} given this information: Alec Tandoc, metandoc@gmail.com''', max_turns=3)