In [17]:
import os
from openai import OpenAI
from pydantic import BaseModel
import time

assistant_data = "/home/adrian/Documents/University Work/Part III Project/PaperQA2/LitQA2_Papers"

client = OpenAI(api_key = os.environ["OPENAI_API_KEY"])

class category_format(BaseModel):
    Classification: str
    Sub_Questions: str

class rag_format(BaseModel):
    Ranked_Relevant_Information: str
    File_Sources: str

class answer_format(BaseModel):
    Response: str
    File_Sources: str

class eval_format(BaseModel):
    Evaluation: str

def rag_eval_agent(question, vector_store, ideal) -> str:
    category_message="""You are a classification agent tasked with classifying the type of question presented to you, by complexity of the context required to answer the question.
        Break the question down into sub-questions, where the answers to the sub-questions will be necessary to answer the original question.
        The subquestions must be easier to answer than the original question.
        
        Your answer must have the following structure:
        
        Classification:
        
        <give the complexity/length of the context required to answer the question> 
        
        Sub_Questions:
        
        <give a series of sub-questions, where the answers will be required to answer the original question>
        """
    category_assistant = client.beta.assistants.create(
        name="category_test",
        instructions=category_message,
        tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
        model="gpt-4o-mini",
        temperature=0,
        top_p=0.2,
        response_format={
            "type": "json_schema",
            "json_schema": {
                "name": "answer",
                "schema": category_format.model_json_schema()
            }
        }
    )
    
    thread = client.beta.threads.create(
                    messages=[],
                )
    
    parsed = client.beta.threads.messages.create(
                    thread_id=thread.id,
                    content=question,
                    role='user',
                )
    
    run = client.beta.threads.runs.create(
        thread_id=thread.id,
        assistant_id=category_assistant.id,
        # pass the latest system message as instructions
        instructions=category_message,
    )
    run = client.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    while run.status!="completed":
        run = client.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    response_messages = client.beta.threads.messages.list(thread.id, order="asc")
    for message in response_messages.data:
        for content in message.content:
            output=content.text.value
            if output.startswith("{"):
                data=json.loads(output)
                category=data["Classification"]
                sub_questions=data["Sub_Questions"]
    rag_message="""You are a retrieval agent tasked with performing file searches to find information for the purpose of providing answers to a series of queries.
        Find pieces of information that will be directly relevant for answering the queries and rank these pieces of information from most relevant to least relevan.
        You must give sources for the information you give, via filenames.
        You must not include information unless you have a source for that piece of information. 
        If no information is relevant, you must return a single piece of information, where you state "No information found". 
        You must search in all of the files presented to you before concluding that no information is relevant.
        Do not exceed 10 pieces of information and ensure that you also include the filenames of the files you retrieved the information from. 
        Ideally, these pieces of information will be sentences, phrases, data points or sets of data points, but you have limited flexiblility to include other pieces of information if you think they are appropriate.
        
        You must use tool call (i.e., file search).
        
        You know about the content of the code-base.
        
        Your answer must have the following structure:
        
        Ranked_Relevant_Information:
        
        <the relevant pieces of information, ranked from most to least relevant> 
        
        File_Sources:
        
        <include the associated filenames of the files you retrieved the information from, format "{...}.pdf">
        """
    rag_assistant = client.beta.assistants.create(
        name="rag_test",
        instructions=rag_message,
        tools=[
            {"type": "file_search",
                "file_search":{
                    'max_num_results': 10,
                    "ranking_options": {
                        "ranker": "auto",
                        "score_threshold": 0.8
                    }
                }
            }
        ],
        tool_resources={"file_search": {"vector_store_ids":[vector_store.id]}},
        model="gpt-4o-mini", 
        temperature = 0,
        top_p = 0.2,
        response_format= {
            "type": "json_schema",
            "json_schema": {
                "name": "answer",
                "schema": rag_format.model_json_schema()
            },
        }
    )
    
    thread = client.beta.threads.create(
                    messages=[],
                )
    
    parsed = client.beta.threads.messages.create(
                    thread_id=thread.id,
                    content=question+" "+sub_questions,
                    role='user',
                )
    
    run = client.beta.threads.runs.create(
        thread_id=thread.id,
        assistant_id=rag_assistant.id,
        # pass the latest system message as instructions
        instructions=rag_message,
    )
    run = client.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    while run.status!="completed":
        run = client.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    response_messages = client.beta.threads.messages.list(thread.id, order="asc")
    for message in response_messages.data:
        for content in message.content:
            output=content.text.value
            if output.startswith("{"):
                data=json.loads(output)
                answer=data["Ranked_Relevant_Information"]
                sources=data["File_Sources"]
    if not ("answer" in locals()):
        answer="No information."
    if not ("sources" in locals()):
        sources="No relevant sources."
    client.beta.assistants.delete(assistant_id=rag_assistant.id)
    answer_message="""
    You are an answering agent tasked with answering a main question or providing a summary only using the relevant information or prompts that are given to you, via the "Ranked Relevant Information".
    You may use answer the sub-questions first to help with answering the main question, if you judge some of the sub-questions to be useful to answering the main question.
    You must only provide a response to answer the main question.
    Generate a logical and reasoned response to the main question or prompt only using the ranked relevant information and the sub-questions. If no file sources are given, you must answer "No information.".
    If you judge pieces of information or sub-questions to be redundant or irrelevant, you may choose to not consider them further in the formulation of your answer, but you must consider the given pieces of information at least once each.
    You may say that you do not have enough information to answer the question, if it is appropriate.
    Your response must be concise and to-the-point.
    
    Your answer must have the following structure and no other sections:
    
    Response:
    
    <answer/summary to the main question only goes here>

    File_Sources:

    <the full series of file sources used to provide the answer/summary. Give full filenames here, format "{...}.pdf">
    """
    answer_assistant = client.beta.assistants.create(
        name="answer_test",
        instructions=answer_message+answer+sources,
        model="gpt-4o-mini", 
        temperature = 0.0,
        top_p = 0.2,
        response_format= {
            "type": "json_schema",
            "json_schema": {
                "name": "answer",
                "schema": answer_format.model_json_schema()
            },
        }
    )
    
    thread = client.beta.threads.create(
                    messages=[],
                )
    
    parsed = client.beta.threads.messages.create(
                    thread_id=thread.id,
                    content="Main Question: "+question+"\nSub-Questions: "+sub_questions,
                    role='user',
                )
    
    run = client.beta.threads.runs.create(
        thread_id=thread.id,
        assistant_id=answer_assistant.id,
        # pass the latest system message as instructions
        instructions=answer_message+answer+sources,
    )
    run = client.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    while run.status!="completed":
        run = client.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    response_messages = client.beta.threads.messages.list(thread.id, order="asc")
    del answer
    for message in response_messages.data:
        for content in message.content:
            output=content.text.value
            if output.startswith("{"):
                data=json.loads(output)
                answer=data["Response"]
                sources=data["File_Sources"]
    if not ("answer" in locals()):
        answer="No information."
    client.beta.assistants.delete(assistant_id=answer_assistant.id)
    eval_message="""
    You are an evaluation agent tasked with comparing the two given passages of text. 
    Focus on the meaning of both passages, and whether the points conveyed in both have the same meaning.
    A "point" is not a sample of text, but rather a single piece of information being conveyed to the reader.
    If a point in both passages conveys the same meaning, output "Same".
    If a similar point in both passages conveys the same meaning and no point in both passages conveys the same meaning, output "Similar".
    If all of the points in both passages conveys different meanings, output "Different".
    If you are unsure about the above criteria for passages, output "Unsure".
    Ensure that differences between numerical values and results between the two passages are emphasised in your analysis, unless the question in the passage specifically allows for approximations/inexact numerical values. 
    Then, if the question specifically allows for approximations/inexact numerical values, only compare the numerical values approximately.

    Your response must have the following structure and no other sections:

    Evaluation:
    <either the word "Same", the word "Similar, "the word "Different" or the word "Unsure"> 
    """
    eval_assistant = client.beta.assistants.create(
        name="eval_test",
        instructions=eval_message,
        model="gpt-4o-mini", 
        temperature = 0.0,
        top_p = 0.2,
        response_format= {
            "type": "json_schema",
            "json_schema": {
                "name": "answer",
                "schema": eval_format.model_json_schema()
            },
        }
    )
    
    thread = client.beta.threads.create(
                    messages=[],
                )
    
    parsed = client.beta.threads.messages.create(
                    thread_id=thread.id,
                    content="Passage 1:\n"+question+"\n"+answer+"Passage 2:\n"+question+str(ideal),
                    role='user',
                )
    
    run = client.beta.threads.runs.create(
        thread_id=thread.id,
        assistant_id=eval_assistant.id,
        # pass the latest system message as instructions
        instructions=eval_message,
    )
    run = client.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    while run.status!="completed":
        run = client.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    response_messages = client.beta.threads.messages.list(thread.id, order="asc")
    for message in response_messages.data:
        for content in message.content:
            output=content.text.value
            if output.startswith("{"):
                data=json.loads(output)
                evaluation=data["Evaluation"]
    client.beta.assistants.delete(assistant_id=eval_assistant.id)
    ideal_clean= ''.join(char for char in str(ideal) if char.isalnum())
    ideal_clean=ideal_clean.upper()
    answer_clean=''.join(char for char in answer if char.isalnum())
    answer_clean=answer_clean.upper()
    if ideal_clean in answer_clean:
        simple_eval="Simple_Same"
    else:
        simple_eval="Simple_Different"
    return simple_eval+" "+evaluation+" "+answer+" "+sources

In [11]:
import pandas as pd

lit = pd.read_csv('../PaperQA2/LitQA2_edit.csv')
chunking_strategy =  {
        "type": "static",
        "static": {
            "max_chunk_size_tokens": 4000, # reduce size to ensure better context integrity
            "chunk_overlap_tokens": 100 # increase overlap to maintain context across chunks
        }}
    
vector_store = client.beta.vector_stores.create(name="rag_eval_test", chunking_strategy=chunking_strategy)
file_paths = []
for root, dirs, files in os.walk(assistant_data):
    # Filter out unwanted directories like .ipynb_checkpoints
    dirs[:] = [d for d in dirs if not d.startswith('.')]
    for file in files:
        if file.startswith('.') or file.endswith('.ipynb')  or file.endswith('.yaml') or file.endswith('.txt') or (not '.' in file):
            continue
        file_paths.append(os.path.join(root, file))
    
file_streams = [open(path, "rb") for path in file_paths]
    
file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
        vector_store_id=vector_store.id,
        files=file_streams
        )

In [3]:
import pandas as pd

lit = pd.read_csv('../PaperQA2/LitQA2_edit.csv')
vector_store=client.beta.vector_stores.retrieve(vector_store_id="vs_67c78fa003cc8191bf83063419802921")
vector_store_id=vector_store.id

In [19]:
for i in range(lit.shape[0]):
    query=lit.loc[i, "question"]
    output=rag_eval_agent(query, vector_store, lit.loc[i, "ideal"])
    with open("output_categories.txt", "a") as file:
        file.write(str(output.replace("\n", ""))+"\t"+str(lit.loc[i, "ideal"])+"\n\n")

KeyboardInterrupt: 

In [None]:
files = client.beta.vector_stores.files.list(vector_store_id=vector_store.id)
for file in files.data:
    client.beta.vector_stores.files.delete(vector_store_id=vector_store.id, file_id=file.id)

In [7]:
files = client.files.list()
for file in files.data:
    if file.purpose == "assistants" and ".pdf" in file.filename:
        client.files.delete(file_id=file.id)
        print(f"Deleted file: {file.id}")
client.beta.vector_stores.delete(vector_store_id=vector_store.id)

Deleted file: file-4974tk7hacVFHBGtbaAjyQ
Deleted file: file-XZcTQabVWJFEnHKk1v8jiw
Deleted file: file-UJ192E3Ah5L2hgbrsianP8
Deleted file: file-Qsq2nz8zha31iM89kcZtgd
Deleted file: file-6sSwjkw7H9dAhYxtb1aDcx
Deleted file: file-4AJVFq9gzo5D8AfhDVRnqy
Deleted file: file-GszvqFhCBBJRbJ2Hq3KULu
Deleted file: file-CAiQxq5rhd25GtNYDAuVJV
Deleted file: file-8khMHifL2SRnukSARyuLhb
Deleted file: file-EbkenDCB8yoiD2XpsX9Gkt
Deleted file: file-5Gc5upjMfCcYJJukY1wYHa
Deleted file: file-1CxDCNuRDsg8bJqzwk6Lip
Deleted file: file-QtkJBcta6UTtBJrtg5ygjC
Deleted file: file-VKm2qZod4zJ29riGzoDNv5
Deleted file: file-CwzBGVz1qw4ErK5RFjarHP
Deleted file: file-TGFTfDBnkNeqSJe3rPf7yo
Deleted file: file-GFFehGKrPFx2JgmNVvKWrc
Deleted file: file-P3rcz4WufidH1QEP8ZLtvG
Deleted file: file-1tfQRFFc2dxXXquAjoZw2Z
Deleted file: file-N8Dhe3tNKvTnN5euLGUFr5
Deleted file: file-DVaUt23zKyKf7zKVMADZBw
Deleted file: file-LQFLnBxUq3fu5x152UNNj6
Deleted file: file-86Ut6KYmDkqs6BUkrK7WDc
Deleted file: file-3YmBHpNqsXiD2XW

NameError: name 'vector_store' is not defined