In [2]:
from dotenv import load_dotenv
import os

load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [3]:
from openai import OpenAI

client = OpenAI(api_key=OPENAI_API_KEY)

assistant = client.beta.assistants.create(
    name="Math professor",
    instructions="You are an Math professor. Use you knowledge base to answer questions about math lectures based on the provided files.",
    model="gpt-4o",
    tools=[{"type": "file_search"}],
)

In [5]:
vector_store = client.beta.vector_stores.create(name="Math 201 lecture")

# prepare the files for upload to OpenAI
file_paths = ["C:\projects\Sports-Buddy\support_material\Lecture01_Script.pdf",
              "C:\projects\Sports-Buddy\support_material\Lecture02_Script.pdf"]
file_streams = [open(path, "rb") for path in file_paths]

# Use the upload and poll SDK helper to upload the files, add them to the vector store, and poll the status of the file batch for completion.
file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
    vector_store_id=vector_store.id, files=file_streams
)

print(file_batch.status)
print(file_batch.file_counts)

completed
FileCounts(cancelled=0, completed=2, failed=0, in_progress=0, total=2)


In [6]:
from typing_extensions import override
from openai import AssistantEventHandler


class EventHandler(AssistantEventHandler):
    @override
    def on_text_created(self, text) -> None:
        print(f"\nassistant > ", end="", flush=True)

    @override
    def on_tool_call_created(self, tool_call):
        print(f"\nassistant > {tool_call.type}\n", flush=True)

    @override
    def on_message_done(self, message) -> None:
        # print a citation to the file searched
        message_content = message.content[0].text
        annotations = message_content.annotations
        citations = []
        for index, annotation in enumerate(annotations):
            message_content.value = message_content.value.replace(
                annotation.text, f"[{index}]"
            )
            if file_citation := getattr(annotation, "file_citation", None):
                cited_file = client.files.retrieve(file_citation.file_id)
                citations.append(f"[{index}] {cited_file.filename}")

        print(message_content.value)
        print("\n".join(citations))


student_question = "Can you explain what functions are?"

thread = client.beta.threads.create(
    messages=[{"role": "user", "content": student_question}],
    tool_resources={
        "file_search": {
            "vector_store_ids": [vector_store.id]
        }
    }
)

current_lecture = "Lecture 1"
instruction_for_search = "Search in which lecture the requested concept is explained. If it appears on another Lecture that in not the current lecture, reply explaining that concept is out of the scope of this class (in a friendly manner), since it will be explained in the lecture X, where X is the lecture where it appears."
length_limitation = "Keep your answers short, no longer than 2 sentences."
tone_instructions = "Answer in a friendly and formal manner."

instruction_text = f"We are currently on {current_lecture}. {instruction_for_search}. {length_limitation}. {tone_instructions}"

with client.beta.threads.runs.stream(
        thread_id=thread.id,
        assistant_id=assistant.id,
        instructions=instruction_text,
        event_handler=EventHandler(),
) as stream:
    stream.until_done()


assistant > file_search


assistant > The concept of functions is out of the scope of this class, as it will be explained in Lecture 2. We will cover what a function is, how to recognize one, and function notation in that lecture[0].
[0] Lecture02_Script.pdf


# All Combined

In [None]:
from dotenv import load_dotenv
import os

load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

from openai import OpenAI

client = OpenAI(api_key=OPENAI_API_KEY)

assistant = client.beta.assistants.create(
    name="Math professor",
    instructions="You are an Math professor. Use you knowledge base to answer questions about math lectures based on the provided files.",
    model="gpt-4o",
    tools=[{"type": "file_search"}],
)

vector_store = client.beta.vector_stores.create(name="Math 201 lecture")

# prepare the files for upload to OpenAI
file_paths = ["C:\projects\Sports-Buddy\support_material\Lecture01_Script.pdf",
              "C:\projects\Sports-Buddy\support_material\Lecture02_Script.pdf"]
file_streams = [open(path, "rb") for path in file_paths]

# Use the upload and poll SDK helper to upload the files, add them to the vector store, and poll the status of the file batch for completion.
file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
    vector_store_id=vector_store.id, files=file_streams
)

print(file_batch.status)
print(file_batch.file_counts)

from typing_extensions import override
from openai import AssistantEventHandler


class EventHandler(AssistantEventHandler):
    @override
    def on_text_created(self, text) -> None:
        print(f"\nassistant > ", end="", flush=True)

    @override
    def on_tool_call_created(self, tool_call):
        print(f"\nassistant > {tool_call.type}\n", flush=True)

    @override
    def on_message_done(self, message) -> None:
        # print a citation to the file searched
        message_content = message.content[0].text
        annotations = message_content.annotations
        citations = []
        for index, annotation in enumerate(annotations):
            message_content.value = message_content.value.replace(
                annotation.text, f"[{index}]"
            )
            if file_citation := getattr(annotation, "file_citation", None):
                cited_file = client.files.retrieve(file_citation.file_id)
                citations.append(f"[{index}] {cited_file.filename}")

        print(message_content.value)
        print("\n".join(citations))


student_question = "Can you explain what functions are?"

thread = client.beta.threads.create(
    messages=[{"role": "user", "content": student_question}],
    tool_resources={
        "file_search": {
            "vector_store_ids": [vector_store.id]
        }
    }
)

current_lecture = "Lecture 1"
instruction_for_search = "Search in which lecture the requested concept is explained. If it appears on another Lecture that in not the current lecture, reply explaining that concept is out of the scope of this class (in a friendly manner), since it will be explained in the lecture X, where X is the lecture where it appears."
length_limitation = "Keep your answers short, no longer than 2 sentences."
tone_instructions = "Answer in a friendly and formal manner."

instruction_text = f"We are currently on {current_lecture}. {instruction_for_search}. {length_limitation}. {tone_instructions}"

with client.beta.threads.runs.stream(
        thread_id=thread.id,
        assistant_id=assistant.id,
        instructions=instruction_text,
        event_handler=EventHandler(),
) as stream:
    stream.until_done()