In [7]:
from typing_extensions import override
import openai as client
from openai.lib.streaming import AssistantEventHandler

# open ai assistant 생성
# assistant = client.beta.assistants.create(
#     name="Book Assistant",
#     instructions="You help users with their questions on the files they upload",
#     model="gpt-4-turbo",
#     tools=[{"type": "file_search"}]
# )

assistant_id = 'asst_vSYfxk144i8kWlJbqn7Cb22i'

# thread 에 파일을 직접 붙이면 thread 별로 vectorstore 가 별도로 생성 된다 (기본 적으로 7일 이 지나면 만료됨) 
message_file = client.files.create(
    file=open("../files/sample.txt", "rb"), purpose="assistants"
)

'''
# vector store 를 고정 으로 사용 하는 방법
# vector_store 에 파일을 업로드
vector_store = client.beta.vector_stores.create(name="Financial Statements")
 
file_paths = ["edgar/goog-10k.pdf", "edgar/brka-10k.txt"]
file_streams = [open(path, "rb") for path in file_paths]
 
file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
  vector_store_id=vector_store.id, files=file_streams
)
 
print(file_batch.status)
print(file_batch.file_counts)

# thread 에 file_search 수행 시 기존 vector store 를 가져 오도록 설정
thread = client.beta.threads.create(
  messages=[ { "role": "user", "content": "How do I cancel my subscription?"} ],
  tool_resources={
    "file_search": {
      "vector_store_ids": ["vs_2"]
    }
  }
)
'''
thread = client.beta.threads.create(
    messages=[
        {
            "role": "user",
            "content": "I want to know where does Winston live and how his apartment looks like.",
            # "content": "I want you to help me with this file",
            "attachments": [
                {"file_id": message_file.id, "tools": [{"type": "file_search"}]}
            ],
        }
    ]
)


class EventHandler(AssistantEventHandler):
    @override
    def on_text_created(self, text) -> None:
        print(f"\nassistant > ", end="", flush=True)

    @override
    def on_tool_call_created(self, tool_call):
        print(f"\nassistant > {tool_call.type}\n", flush=True)

    @override
    def on_message_done(self, message) -> None:
        # print a citation to the file searched
        message_content = message.content[0].text
        annotations = message_content.annotations
        citations = []
        for index, annotation in enumerate(annotations):
            message_content.value = message_content.value.replace(
                annotation.text, f"[{index}]"
            )
            if file_citation := getattr(annotation, "file_citation", None):
                cited_file = client.files.retrieve(file_citation.file_id)
                citations.append(f"[{index}] {cited_file.filename}")

        print(message_content.value)
        print("\n".join(citations))


# Then, we use the stream SDK helper
# with the EventHandler class to create the Run
# and stream the response.

with client.beta.threads.runs.stream(
        thread_id=thread.id,
        assistant_id=assistant_id,
        # instructions="Please address the user as Jane Doe. The user has a premium account.",
        event_handler=EventHandler(),
) as stream:
    stream.until_done()


assistant > file_search


assistant > Winston lives in a dilapidated apartment building called Victory Mansions[0]. His apartment is described as having a hallway that smells of boiled cabbage and old rag mats. The flat is located seven flights up, and the building's lift seldom works, forcing residents to use the stairs. Inside his apartment, there is a telescreen that cannot be completely shut off, forming part of the surface of the wall. Winston's living conditions are further highlighted by the cold and meager furnishings, emphasizing the oppressive and bleak nature of his environment[0].
[0] sample.txt
[1] sample.txt


In [8]:
aaa = client.beta.threads.messages.create(
    thread_id=thread.id, role="user", content="Where does he work?"
)

aaa

Message(id='msg_pmlZDDCCmpFEmOXd9OLwbLM1', assistant_id=None, attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value='Where does he work?'), type='text')], created_at=1720232709, incomplete_at=None, incomplete_details=None, metadata={}, object='thread.message', role='user', run_id=None, status=None, thread_id='thread_hBWqcirVGh8w80Bpx26aNn4f')

In [9]:
with client.beta.threads.runs.stream(
        thread_id=thread.id,
        assistant_id=assistant_id,
        # instructions="Please address the user as Jane Doe. The user has a premium account.",
        event_handler=EventHandler(),
) as stream:
    stream.until_done()


assistant > file_search


assistant > Winston works at the Ministry of Truth, where he is involved in the alteration and adjustment of historical records to fit the narratives and doctrines of the Party[0].
[0] sample.txt
