A jupyter notebook that generates questions based on provided texts and submits them to the Aurora API.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import autogen
from autogen import register_function, AssistantAgent, UserProxyAgent, ConversableAgent
from autogen.agentchat.contrib.qdrant_retrieve_user_proxy_agent import QdrantRetrieveUserProxyAgent
from chromadb.utils import embedding_functions
from langchain.text_splitter import RecursiveCharacterTextSplitter
from qdrant_client import QdrantClient
from ARGO import ArgoWrapper
from CustomLLMAutogen2 import ARGO_LLM
from typing import Dict, List, Any, Optional
import pandas as pd
from dotenv import load_dotenv
import glob
import ast

In [None]:
# Required:
# OPEN_AI_API_KEY
# GOOGLE_API_KEY, for free, found in Google AI Studio: https://ai.google.dev/gemini-api
# ID, must be obtained from the Aurora API, found here https://web.cels.anl.gov/projects/auroragptquestions/docs#/default/store_author_api_author_post 
# Optional: 
# QDRANT_URL
# QDRANT_API_KEY
load_dotenv()

In [None]:
# All available models
config_list = [
        {
            'model': 'gpt-3.5-turbo-16k',
            'tags': ['gpt3.5']
        },
        {
            'model': 'Argo',
            'api_type': 'argo',
            'argo_client': ARGO_LLM(argo=ArgoWrapper,model_type='gpt4', temperature = 0.3),
            'tags': ['argo']
        },
]
# Filters the models based on the tags. Filters models
filter_dict = {'tags': ['gpt3.5']}
config_list = autogen.filter_config(config_list, filter_dict)

In [None]:
# Sets up configuration for agents
llm_config = {
    "config_list": config_list, 
    "cache_seed": None, # Ensures differing responses
    "timeout": 600,
    "seed": 44,
    "temperature": 0.2, # Temperature max is 2
}
# GPT4 for better question generation
llm_config_gen = {
    "config_list": 
    [
        {
            'model': 'gpt-4-turbo',
        }
    ], 
    "cache_seed": None, # Ensures differing responses
    "timeout": 600,
    "seed": 44,
    "temperature": 1.0, # Temperature max is 2
    "response_format": { "type": "json_object" },
}
llm_config

In [None]:
# Necessary embedding function for the vector database
googleai_embedding_function= embedding_functions.GoogleGenerativeAiEmbeddingFunction(api_key = os.environ["GOOGLE_API_KEY"])

In [None]:
# Chunks text for vectorization
text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n", "\r", "\t"])

In [None]:
def termination_msg(x):
    return isinstance(x, dict) and "TERMINATE" == str(x.get("content", ""))[-9:].upper()

In [None]:
# IF EDITED, change add_questions function
QUESTION_COUNT = 5

In [None]:
problem = f'''For each paper, generate {QUESTION_COUNT} unique and extremely difficult to answer multiple choice questions with 5 choices each.
The answerer does not have access to the paper, you cannot require context for the question. Do not include author names, dates, or any other identifying information.
These should be general knowledge questions with supporting evidence from the paper.
All context required to answer the question must be provided within the question statement.
The question statement cannot include 'in this study', 'in this paper', 'according to the paper', etc.
There should be exactly one correct answer.
The incorrect answers must be difficult to distinguish from the correct answer, however they cannot be correct.
The incorrect answers are 'distractors' that are designed to be confuse the large language model that is answering the question.
'''

In [None]:
def schema(filename: str):
    return {
  "question": "The generated question",
  "correct_answer": "The correct answer",
  "distractors": [
    "Incorrect answer #1", "Incorrect answer #2", "Incorrect Answer #3", "Incorrect Answer #4"
  ],
  "skills": f'''Choose the necessary skills for answering the question using at least one of the following options 
  {["Generalization", "Basic comprehension", "Summarization", "Interpolation/extrapolation", "Cross-domain application", "Reasoning", "General knowledge", "Fundamental domain science concepts", "Understanding identifiers/notation", "Understanding evolution of ideas"]}
  Must be in the form of a {list}''',
  "domains": f'''Choose the most applicable domains for the question using at least one of the following options
  {["physics", "material science", "biology", "chemistry", "computer science", "mathematics", "climate"]}
  Must be in the form of a {list}''',
  "difficulty": "Choose the difficulty of the question using one of the following options ['easy', 'medium', 'hard'] ",
  "doi": filename,
  "author": "BLANK",
  "support": "NA",
  "comments": "generated question"
}

In [None]:
SCHEMA_ENTRIES = [
        "question",
        "correct_answer",
        "distractors",
        "skills",
        "domains",
        "difficulty",
        "doi",
        "author",
        "support",
        "comments"
    ]

In [None]:
def initiate_RAG_and_expert(docs: List[str], collection: str, filename: str):
    """
    Initiates the RAG and expert agent based on the current file we are generating on.
    """
    retrieval_assistant = QdrantRetrieveUserProxyAgent(
        name="assistant",
        human_input_mode="NEVER",
        default_auto_reply="Reply `TERMINATE` if the task is done.",
        # max_consecutive_auto_reply=10,
        retrieve_config={
            "task": "qa",
            "docs_path": docs,
            "custom_text_split_function": text_splitter.split_text,
            "embedding_funcion": googleai_embedding_function,
            "client": QdrantClient(":memory:"),
            # "client": QdrantClient(os.environ["QDRANT_URL"], os.environ["QDRANT_API_KEY"]),
            # Memory option functional for one-time use, otherwise use the commented out line above for persistent storage
            "collection_name": collection,
            "get_or_create": True,
        },
        code_execution_config=False,
        description="Assistant who has extra content retrieval power for solving difficult problems.",
    )
    expert = AssistantAgent(
    "Expert",
    system_message=f'''You are an expert on {problem} Assist in answering the problem. Then, put the information in a list using the following schema: {schema(filename)}. Follow the instructions of the SCHEMA.
    You should return a python {Dict}: {{'question1': SCHEMA, 'question2': SCHEMA, 'question3': SCHEMA, 'question4': SCHEMA, 'question5': SCHEMA}}. Do not change the author, question, affiliation, or comments fields. This response must be a JSON object.
    Reply 'TERMINATE' in the end when everything is done.''',
    llm_config=llm_config_gen,
    human_input_mode="NEVER",  # Never ask for human input.
    description="Expert in question generation.",
    )
    return retrieval_assistant, expert

automator = AssistantAgent(
    'question_generation_automator',
    system_message=f'''DO NOT UPDATE CONTEXT. You are an automator that takes the JSON object produced by the expert and makes a function call to the add_questions function.
    Reply 'TERMINATE' in the end when everything is done.''',
    llm_config=llm_config,
    human_input_mode="NEVER",
    description="Automator utilizes functions to automate the process of generating questions.",
)

user_proxy = autogen.ConversableAgent(
    name="User",
    system_message=f'''You are a user that can execute the add_questions function to add the generated questions to the CSV file.''',
    is_termination_msg=termination_msg,
    human_input_mode="NEVER",
    code_execution_config=False,
    description="The proxy that adds the question to the CSV file using the add_questions function"
)

In [None]:
WORKING = pd.DataFrame(
    columns=SCHEMA_ENTRIES
)

In [None]:
WORKING

In [None]:
QUESTION_CSV = r"generatedQuestions/generated_questions.csv"

In [None]:
def add_questions(question1: Dict, question2: Dict, question3: Dict, question4: Dict, question5: Dict) -> str:
    """
    Adds questions to the WORKING dataframe and saves them in the CSV file.
    Agents cannot interpret dynamic arguments like **kwargs, so we have to pass each question individually.
    """
    args = locals()
    questions = []
    for _, question in args.items():
        questions.append(question)
    cur = pd.DataFrame(questions)
    if os.path.exists(QUESTION_CSV):
        cur.to_csv(QUESTION_CSV, mode='a', header=False, index=False)
    else:
        cur.to_csv(QUESTION_CSV, mode='w', index=False)
    global WORKING
    WORKING = pd.concat([WORKING, cur], ignore_index=True)
    return 'Finished'

In [None]:
register_function(
    add_questions,
    caller=automator,
    executor=user_proxy,
    name="add_questions",
    description="Adds all generated questions to the CSV file. Arguments required: question1=Dict, question2=Dict, question3=Dict, question4=Dict, question5=Dict",
)

In [None]:
FILE_PATH = '../genQuestionPapers/*.pdf'
USED_FILE_PATH = '../genQuestionPapers/usedPapers/*.pdf'
docs = glob.glob(FILE_PATH)
used = glob.glob(USED_FILE_PATH)
filenames = [doc[doc.rfind('\\')+1:doc.rfind('.pdf')] for doc in docs]
used_filenames = [doc[doc.rfind('\\')+1:doc.rfind('.pdf')] for doc in used]
idx = 0
while idx < len(filenames):
    if filenames[idx] in used_filenames:
        docs.pop(idx)
        filenames.pop(idx)
    else:
        idx += 1
doc_count = len(docs)
filenames

In [None]:
def _reset_agents(retrieval_assistant, expert):
    """
    Necessary function to reset the agents for the next document.
    """
    retrieval_assistant.reset()
    expert.reset()
    automator.reset()
    user_proxy.reset()

In [None]:
for idx, doc in enumerate(docs[:1]):
    """
    Initiates a new groupchat and for each new expert and retrieval assistant.
    """
    retrieval_assistant, expert = initiate_RAG_and_expert([doc], f'doc{idx}', filenames[idx])
    def state_transition(last_speaker, groupchat):
        """
        Defines the order of conversation of the groupchat
        """
        messages = groupchat.messages
        if last_speaker is retrieval_assistant:
            return expert
        elif last_speaker is expert:
            if "UPDATE CONTEXT" in messages[-1]["content"]:
                return retrieval_assistant
            return automator
        elif last_speaker is automator:
            return user_proxy
        elif last_speaker is user_proxy:
            return None
    def rag_chat(retrieval_assistant):
        """
        Initiates the RAG chat using the previous functions.
        """
        _reset_agents(retrieval_assistant, expert)
        groupchat = autogen.GroupChat(
            agents=[retrieval_assistant, expert, automator, user_proxy], messages=[], max_round=20,
            speaker_selection_method=state_transition,
            send_introductions=True, # Provides information on each agent in the group chat to the manager.
        )
        
        manager = autogen.GroupChatManager(groupchat=groupchat, llm_config={
            "config_list": config_list, 
            "cache_seed": None, # Ensures differing responses
            "timeout": 600,
            "seed": 42,
            }
        )

        # Start chatting with retrieval_assistant as this is the user proxy agent.
        retrieval_assistant.initiate_chat(
            manager,
            message=retrieval_assistant.message_generator,
            problem=problem,
            n_results=3,
        )
    rag_chat(retrieval_assistant)


In [None]:
WORKING_COUNT = WORKING.shape[0]

In [None]:
WORKING = pd.read_csv(QUESTION_CSV, on_bad_lines='skip')

In [None]:
WORKING

In [None]:
user = UserProxyAgent(
        name="user",
        human_input_mode="NEVER",
        #is_termination_msg=lambda x: True if "TERMINATE" in x.get("content") else False,
        max_consecutive_auto_reply=0,
        code_execution_config=False
    )

analyzer = ConversableAgent(
    name="Analyzer",
    llm_config=llm_config,
    system_message="""
    You are an analyzer who is experienced in determining if a question is too specific if it mentions a specific publication, author, or requires context from the origin paper to answer. Provide a step-by-step thought process, and finally reply with
    'VALID' 
    """,
)

# function_caller = UserProxyAgent(
#     name="function_caller",
#     human_input_mode="NEVER",
#     max_consecutive_auto_reply=2,
#     code_execution_config=False
# )


In [None]:
def dataframe_function(context_required: bool, index: Optional[int] = None) -> bool:
    if context_required:
        WORKING.drop(index, inplace=True)
        return True
    else:
        return False

### Optional function registration mode, malfunctions when used repeatedly by LLM
# register_function(
#     dataframe_function,
#     caller=analyzer,
#     executor=function_caller,
#     name="dataframe_function",
#     description="Removes the question from the dataframe. Arguments required: context_required=bool, index=int",
# )

# def speaker_selection(last_speaker, groupchat):
#     messages = groupchat.messages
#     if last_speaker is user:
#         return analyzer
#     elif last_speaker is analyzer:
#         return function_caller
#     else:
#         return None

In [None]:
# This was taken from Autogen's ReAct demonstration: https://microsoft.github.io/autogen/docs/topics/prompting-and-reasoning/react/
# NOTE: this ReAct prompt is adapted from Langchain's ReAct agent: https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/agents/react/agent.py#L79
ReAct_prompt = """
Answer the following questions as best you can. You have access to tools provided.

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take
Action Input: the input to the action
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!
Question: {input}
"""

# Define the ReAct prompt message. Assuming a "question" field is present in the context


def react_prompt_message(sender, recipient, context):
    return ReAct_prompt.format(input=context["question"])

In [None]:
groupchat = autogen.GroupChat(
        agents=[user, analyzer], messages=[], max_round=3,
        #speaker_selection_method=speaker_selection,
    )
manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=llm_config)
for i, row in WORKING.iterrows():
    """
    Throws out questions that require context to answer.
    """
    user.reset()
    analyzer.reset()
    #function_caller.reset()
    response = user.initiate_chat(analyzer, 
                       message=react_prompt_message,
                       question=f"""Analyze if the question can be answered without reading the origin paper and without searching up anything.
                       If it requires context, respond FALSITY.
                       Otherwise, response MAGICALITY.
                       question: {row['question']}"
                       """, n_results=1, send_introductions=True, max_round=1)
    if 'FALSITY' in response.chat_history[-1]['content'].upper():
        dataframe_function(context_required=True, index=i)

In [None]:
FILTER_COUNT = WORKING.shape[0]

In [None]:
WORKING

In [None]:
def prep_data(cur: pd.Series) -> None:
    """
    Prepares data for submission to the Aurora API. LLM JSON objects are only strings, so we need to convert them back to their original form.
    """
    data = cur.copy()
    if type(data['distractors']) is str:
        data['distractors'] = ast.literal_eval(data["distractors"])
    if type(data['skills']) is str:
        data['skills'] = ast.literal_eval(data["skills"])
    if type(data['domains']) is str:
        data['domains'] = ast.literal_eval(data["domains"])
    data['author'] = int(os.environ["ID"])
    data['support'] = "NA"
    data['comments'] = "generated question"
    return data

In [None]:
WORKING = WORKING[SCHEMA_ENTRIES]
FINAL = pd.DataFrame(
    columns=SCHEMA_ENTRIES
)

In [None]:
# This function tests each question against Mistral7b, Llama2-7b, Llama3-8b. If at least two models fail the question, the question is saved
from json import loads, dumps
import requests
import pandas as pd
length = WORKING.shape[0]
headers = {
            "Content-Type": "application/json"
        }
TEST_URL = 'https://web.cels.anl.gov/projects/auroragptquestions/api/test_question'
SUBMIT_URL = 'https://web.cels.anl.gov/projects/auroragptquestions/api/question'
for idx in range(length):
    cur = WORKING.iloc[idx]
    edited_data = prep_data(cur)
    if edited_data.isna().any():
        continue
    data = dumps(edited_data.to_dict())
    response = requests.post(TEST_URL, headers=headers, data=data)
    if response.status_code == 200:
        content = loads(response.text)
        correct = 0
        for model in content:
            if model["correct"]:
                correct += 1
        if correct <= 1:
            FINAL = pd.concat([FINAL, cur.to_frame().T], ignore_index=True)
    else:
        raise Exception(f"Error in testing question {idx}, exited with status code: {response.status_code}")

In [None]:
FINAL_COUNT = FINAL.shape[0]

In [None]:
VALID_CSV = r"generatedQuestions/valid_questions.csv"
if os.path.exists(VALID_CSV):
    FINAL.to_csv(VALID_CSV, mode='a', header=False, index=False)
else:
    FINAL.to_csv(VALID_CSV, mode='w', index=False)

In [None]:
for idx in range(FINAL.shape[0]):
    print(f"{FINAL.iloc[idx].name}: {FINAL.iloc[idx]['question']}")

In [None]:
FINAL

In [None]:
# submits the question to the Aurora API
submission_count = 0
for idx in range(FINAL.shape[0]):
    submission = prep_data(FINAL.iloc[idx])
    submission = submission.to_json()
    response = requests.post(SUBMIT_URL, headers=headers, data=submission)
    if response.status_code == 200:
        submission_count += 1
    else:
        raise Exception(f"Error in submitting question {idx}, exited with status code: {response.status_code}")

In [None]:
print(f"{WORKING_COUNT} questions were generated, {FINAL_COUNT} were valid, and {submission_count} were submitted.")