In [7]:
import os
from openai import OpenAI
from pydantic import BaseModel, Field
import time

client = OpenAI(api_key = os.environ["OPENAI_API_KEY"])

client = OpenAI(api_key = os.environ["OPENAI_API_KEY"])

class rag_format(BaseModel):
    Ranked_Relevant_Information: str = Field(description="The ranked pieces of information that will be directly relevant for answering the query.")
    File_Sources: str = Field(description="The filenames of the files from which the information was retrieved, with the format '{...}.pdf'.")

class answer_format(BaseModel):
    Response: str = Field(description="The answer to the question/prompt using the given information only.")

class eval_format(BaseModel):
    Evaluation: str = Field(description="The evaluation of whether the two answers to the given question are the same or not.")

def rag_eval_agent(question, vector_store, ideal, rag_model) -> str:
    rag_message="""You are a retrieval agent tasked with performing file searches to find information for the purpose of providing answers.
        Find pieces of information that will be directly relevant for answering the query and rank these pieces of information from most relevant to least relevant
        You must quote the passages from the files directly. Do not paraphrase or change the text in any way.
        Do not include information unless you have a source for that piece of information. 
        If no information is relevant, you must return a single piece of information, where you state "No information found".
        Ideally, these pieces of information will be sentences, phrases, data points or sets of data points, but you have limited flexiblility to include other pieces of information if you think they are appropriate.
        
        You must use tool call (i.e., file search).
        
        You know about the content of the code-base.
        """
    rag_assistant = client.beta.assistants.create(
        name="rag_test",
        instructions=rag_message,
        tools=[
            {"type": "file_search",
                "file_search":{
                    'max_num_results': 10,
                    "ranking_options": {
                        "ranker": "auto",
                        "score_threshold": 0.6
                    }
                }
            }
        ],
        tool_resources={"file_search": {"vector_store_ids":[vector_store.id]}},
        model=rag_model, 
        temperature = 0,
        top_p = 0.2,
        response_format= {
            "type": "json_schema",
            "json_schema": {
                "name": "answer",
                "schema": rag_format.model_json_schema()
            },
        }
    )
    
    thread = client.beta.threads.create(
                    messages=[],
                )
    
    parsed = client.beta.threads.messages.create(
                    thread_id=thread.id,
                    content=question,
                    role='user',
                )
    
    run = client.beta.threads.runs.create(
        thread_id=thread.id,
        assistant_id=rag_assistant.id,
        # pass the latest system message as instructions
        instructions=rag_message,
    )
    run = client.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    while run.status!="completed":
        run = client.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    response_messages = client.beta.threads.messages.list(thread.id, order="asc")
    for message in response_messages.data:
        for content in message.content:
            output=content.text.value
            if output.startswith("{"):
                data=json.loads(output)
                try:
                    answer=data["Ranked_Relevant_Information"]
                except:
                    answer=data["Ranked Relevant Information"]
                try:
                    sources=data["File_Sources"]
                except:
                    sources=data["File Sources"]
    if not ("answer" in locals()):
        answer="No relevant information."
    if not ("sources" in locals()):
        sources="No relevant sources."
    client.beta.assistants.delete(assistant_id=rag_assistant.id)
    answer_message="""
    You are an answering agent tasked with answering a question or providing a summary only using the relevant information or prompts that are given to you, via the "Ranked Relevant Information".
    Generate a logical and reasoned response to the question or prompts only using the ranked relevant information.
    Use the question to provide context to the information before deciding if the information is relevant or not.
    If no file sources are given, you must answer "No information.".
    If you judge pieces of information to be redundant or irrelevant, you may choose to not consider them further in the formulation of your answer, but you must consider the given pieces of information at least once each.
    If you do not have any information to answer the question, you must say "No information given".
    You may say that you do not have enough information to answer the question, if it is appropriate.
    """
    answer_assistant = client.beta.assistants.create(
        name="answer_test",
        instructions=answer_message,
        model=rag_model, 
        temperature = 0.0,
        top_p = 0.2,
        response_format= {
            "type": "json_schema",
            "json_schema": {
                "name": "answer",
                "schema": answer_format.model_json_schema()
            },
        }
    )
    thread = client.beta.threads.create(
                    messages=[],
                )
    
    parsed = client.beta.threads.messages.create(
                    thread_id=thread.id,
                    content="Question: "+question+"\nRanked Relevant Information: "+answer,
                    role='user',
                )
    
    run = client.beta.threads.runs.create(
        thread_id=thread.id,
        assistant_id=answer_assistant.id,
        # pass the latest system message as instructions
        instructions=answer_message,
    )
    run = client.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    while run.status!="completed":
        run = client.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    response_messages = client.beta.threads.messages.list(thread.id, order="asc")
    del answer
    for message in response_messages.data:
        for content in message.content:
            output=content.text.value
            if output.startswith("{"):
                data=json.loads(output)
                answer=data["Response"]
    if not ("answer" in locals()):
        answer="No information."
    client.beta.assistants.delete(assistant_id=answer_assistant.id)
    eval_message="""
    You are an evaluation agent tasked with comparing the given two different answers to the same question. 
    Focus on the meaning of both answers, in the context of the question, when formulating your evaluation.
    If a point is conveyed in both answers, as responses to the associated question, output "Same".
    If a similar points is conveyed in both answers, as responses to the associated question, output "Similar".
    If all of the points are different in both answers, as responses to the associated question, output "Different".
    If you are unsure about the above criteria for the answers to the associated question, output "Unsure".
    Ensure that differences between numerical values and results between the two answers are emphasised in your analysis, unless the question specifically allows for approximations/inexact numerical values. 
    Then, if the question specifically allows for approximations/inexact numerical values, only compare the numerical values approximately.
    """
    eval_assistant = client.beta.assistants.create(
        name="eval_test",
        instructions=eval_message,
        model="gpt-4o-mini", 
        temperature = 0.0,
        top_p = 0.2,
        response_format= {
            "type": "json_schema",
            "json_schema": {
                "name": "answer",
                "schema": eval_format.model_json_schema()
            },
        }
    )
    
    thread = client.beta.threads.create(
                    messages=[],
                )
    
    parsed = client.beta.threads.messages.create(
                    thread_id=thread.id,
                    content=question+answer+str(ideal),
                    role='user',
                )
    
    run = client.beta.threads.runs.create(
        thread_id=thread.id,
        assistant_id=eval_assistant.id,
        # pass the latest system message as instructions
        instructions=eval_message,
    )
    run = client.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    while run.status!="completed":
        run = client.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    response_messages = client.beta.threads.messages.list(thread.id, order="asc")
    for message in response_messages.data:
        for content in message.content:
            output=content.text.value
            if output.startswith("{"):
                data=json.loads(output)
                evaluation=data["Evaluation"]
    client.beta.assistants.delete(assistant_id=eval_assistant.id)
    ideal_clean= ''.join(char for char in str(ideal) if char.isalnum())
    ideal_clean=ideal_clean.upper()
    answer_clean=''.join(char for char in answer if char.isalnum())
    answer_clean=answer_clean.upper()
    if ideal_clean in answer_clean:
        simple_eval="Simple_Same"
    else:
        simple_eval="Simple_Different"
    return simple_eval+" "+evaluation+" "+answer+" "+sources

In [None]:
import pandas as pd

lit = pd.read_csv('../cmbagent_dataset/cmbagent_dataset.csv')
chunking_strategy =  {
        "type": "static",
        "static": {
            "max_chunk_size_tokens": 4000, # reduce size to ensure better context integrity
            "chunk_overlap_tokens": 100 # increase overlap to maintain context across chunks
        }}
    
vector_store = client.vector_stores.create(name="rag_eval_test", chunking_strategy=chunking_strategy)
file_paths = []
for root, dirs, files in os.walk(assistant_data):
    # Filter out unwanted directories like .ipynb_checkpoints
    dirs[:] = [d for d in dirs if not d.startswith('.')]
    for file in files:
        if file.startswith('.') or file.endswith('.ipynb')  or file.endswith('.yaml') or file.endswith('.txt') or (not '.' in file):
            continue
        file_paths.append(os.path.join(root, file))
    
file_streams = [open(path, "rb") for path in file_paths]
    
file_batch = client.vector_stores.file_batches.upload_and_poll(
        vector_store_id=vector_store.id,
        files=file_streams
        )

In [9]:
import pandas as pd

lit = pd.read_csv('../cmbagent_dataset/cmbagent_dataset.csv')
vector_store=client.vector_stores.retrieve(vector_store_id="vs_67da9f09a6b48191a32189befe73c49e")
vector_store_id=vector_store.id

In [15]:
for i in range(lit.shape[0]):
    print(i/lit.shape[0]*100, end="")
    print("\r", end="")
    query=lit.loc[i, "question"]
    output=rag_eval_agent(query, vector_store, lit.loc[i, "ideal"], "gpt-4o-mini")
    with open("output_cmbagent1.txt", "a") as file:
        file.write(str(output.replace("\n", ""))+"\t"+str(lit.loc[i, "ideal"])+"\n\n")

99.047619047619054

In [None]:
files = client.beta.vector_stores.files.list(vector_store_id=vector_store.id)
for file in files.data:
    client.beta.vector_stores.files.delete(vector_store_id=vector_store.id, file_id=file.id)

In [7]:
files = client.files.list()
for file in files.data:
    if file.purpose == "assistants" and ".pdf" in file.filename:
        client.files.delete(file_id=file.id)
        print(f"Deleted file: {file.id}")
client.beta.vector_stores.delete(vector_store_id=vector_store.id)

Deleted file: file-4974tk7hacVFHBGtbaAjyQ
Deleted file: file-XZcTQabVWJFEnHKk1v8jiw
Deleted file: file-UJ192E3Ah5L2hgbrsianP8
Deleted file: file-Qsq2nz8zha31iM89kcZtgd
Deleted file: file-6sSwjkw7H9dAhYxtb1aDcx
Deleted file: file-4AJVFq9gzo5D8AfhDVRnqy
Deleted file: file-GszvqFhCBBJRbJ2Hq3KULu
Deleted file: file-CAiQxq5rhd25GtNYDAuVJV
Deleted file: file-8khMHifL2SRnukSARyuLhb
Deleted file: file-EbkenDCB8yoiD2XpsX9Gkt
Deleted file: file-5Gc5upjMfCcYJJukY1wYHa
Deleted file: file-1CxDCNuRDsg8bJqzwk6Lip
Deleted file: file-QtkJBcta6UTtBJrtg5ygjC
Deleted file: file-VKm2qZod4zJ29riGzoDNv5
Deleted file: file-CwzBGVz1qw4ErK5RFjarHP
Deleted file: file-TGFTfDBnkNeqSJe3rPf7yo
Deleted file: file-GFFehGKrPFx2JgmNVvKWrc
Deleted file: file-P3rcz4WufidH1QEP8ZLtvG
Deleted file: file-1tfQRFFc2dxXXquAjoZw2Z
Deleted file: file-N8Dhe3tNKvTnN5euLGUFr5
Deleted file: file-DVaUt23zKyKf7zKVMADZBw
Deleted file: file-LQFLnBxUq3fu5x152UNNj6
Deleted file: file-86Ut6KYmDkqs6BUkrK7WDc
Deleted file: file-3YmBHpNqsXiD2XW

NameError: name 'vector_store' is not defined