# **Assistants API: Knowledge Retrieval**

In [1]:
%pip install --upgrade openai

Note: you may need to restart the kernel to use updated packages.


In [2]:
%pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [3]:
import os
from openai import OpenAI
from dotenv import find_dotenv, load_dotenv

In [4]:
import warnings

# Ignore only DeprecationWarnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [5]:
_ : bool = load_dotenv(find_dotenv()) # read local .env file

client : OpenAI = OpenAI()

In [6]:
_

True

In [7]:
# Get the key
api_key = os.getenv("OPENAI_API_KEY")

# Check and print whether it's loaded
if api_key:
    print("🔐 OpenAI Key Loaded:", bool(api_key))  # True or False
if not api_key:
    raise EnvironmentError("❌ OPENAI_API_KEY not found in .env file!")

🔐 OpenAI Key Loaded: True


In [8]:
import json

def show_json(obj):
    display(json.loads(obj.model_dump_json()))

# **1. Create a new Assistant with File Search Enabled**

In [9]:
from openai import OpenAI

client = OpenAI()

assistant = client.beta.assistants.create(
  name="Document Search Assistant",
  instructions="You are an expert in document search. You are required to retrieve information relative to user query when you find it suitable.",
  model="gpt-3.5-turbo-1106",
  tools=[{"type": "file_search"}],
)

show_json(assistant)

{'id': 'asst_8iTCijvudm5Ks5nJdbQy76Bg',
 'created_at': 1753086848,
 'description': None,
 'instructions': 'You are an expert in document search. You are required to retrieve information relative to user query when you find it suitable.',
 'metadata': {},
 'model': 'gpt-3.5-turbo-1106',
 'name': 'Document Search Assistant',
 'object': 'assistant',
 'tools': [{'type': 'file_search',
   'file_search': {'max_num_results': None,
    'ranking_options': {'score_threshold': 0.0,
     'ranker': 'default_2024_08_21'}}}],
 'response_format': 'auto',
 'temperature': 1.0,
 'tool_resources': {'code_interpreter': None,
  'file_search': {'vector_store_ids': []}},
 'top_p': 1.0,
 'reasoning_effort': None}

## **Upload files and add them to a Vector Store**

In [10]:
# Create a vector store caled "Document Search"
vector_store = client.vector_stores.create(name="Document Search")

# Ready the files for upload to OpenAI
file_paths = ["zia_profile.pdf"]
file_streams = [open(path, "rb") for path in file_paths]

# Use the upload and poll SDK helper to upload the files, add them to the vector store,
# and poll the status of the file batch for completion.
file_batch = client.vector_stores.file_batches.upload_and_poll(
  vector_store_id=vector_store.id, files=file_streams
)

# You can print the status and the file counts of the batch to see the result of this operation.
print(file_batch.status)
print(file_batch.file_counts)

show_json(vector_store)
show_json(file_batch)

completed
FileCounts(cancelled=0, completed=1, failed=0, in_progress=0, total=1)


{'id': 'vs_687dfb8140948191b5339bf141e90176',
 'created_at': 1753086849,
 'file_counts': {'cancelled': 0,
  'completed': 0,
  'failed': 0,
  'in_progress': 0,
  'total': 0},
 'last_active_at': 1753086849,
 'metadata': {},
 'name': 'Document Search',
 'object': 'vector_store',
 'status': 'completed',
 'usage_bytes': 0,
 'expires_after': None,
 'expires_at': None}

{'id': 'vsfb_ac96aa39e6204ef39230b75869452fa3',
 'created_at': 1753086852,
 'file_counts': {'cancelled': 0,
  'completed': 1,
  'failed': 0,
  'in_progress': 0,
  'total': 1},
 'object': 'vector_store.file_batch',
 'status': 'completed',
 'vector_store_id': 'vs_687dfb8140948191b5339bf141e90176'}

## **Update the assistant to use the new Vector Store**

In [11]:
assistant = client.beta.assistants.update(
  assistant_id=assistant.id,
  tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
)

show_json(assistant)

{'id': 'asst_8iTCijvudm5Ks5nJdbQy76Bg',
 'created_at': 1753086848,
 'description': None,
 'instructions': 'You are an expert in document search. You are required to retrieve information relative to user query when you find it suitable.',
 'metadata': {},
 'model': 'gpt-3.5-turbo-1106',
 'name': 'Document Search Assistant',
 'object': 'assistant',
 'tools': [{'type': 'file_search',
   'file_search': {'max_num_results': None,
    'ranking_options': {'score_threshold': 0.0,
     'ranker': 'default_2024_08_21'}}}],
 'response_format': 'auto',
 'temperature': 1.0,
 'tool_resources': {'code_interpreter': None,
  'file_search': {'vector_store_ids': ['vs_687dfb8140948191b5339bf141e90176']}},
 'top_p': 1.0,
 'reasoning_effort': None}

## **Create a thread**

In [12]:
# Upload the user provided file to OpenAI
message_file = client.files.create(
  file=open("zia_profile.pdf", "rb"), purpose="assistants"
)

# Create a thread and attach the file to the message
thread = client.beta.threads.create(
  messages=[
    {
      "role": "user",
      "content": "What are the qualifications of Zia Khan?",
      # Attach the new file to the message.
      "attachments": [
        { "file_id": message_file.id, "tools": [{"type": "file_search"}] }
      ],
    }
  ]
)

# The thread now has a vector store with that file in its tool resources.
print(thread.tool_resources.file_search)

show_json(message_file)
show_json(thread)

ToolResourcesFileSearch(vector_store_ids=['vs_687dfb8815448191b4c3925c7ea9922b'])


{'id': 'file-MHqBp7qshRiaXStnsk72HP',
 'bytes': 48802,
 'created_at': 1753086854,
 'filename': 'zia_profile.pdf',
 'object': 'file',
 'purpose': 'assistants',
 'status': 'processed',
 'expires_at': None,
 'status_details': None}

{'id': 'thread_HvR08t9o45HYlKsLjFxyxgaS',
 'created_at': 1753086855,
 'metadata': {},
 'object': 'thread',
 'tool_resources': {'code_interpreter': None,
  'file_search': {'vector_store_ids': ['vs_687dfb8815448191b4c3925c7ea9922b']}}}

## **Create a run and check the output**

In [13]:
from typing_extensions import override
from openai import AssistantEventHandler, OpenAI

client = OpenAI()

class EventHandler(AssistantEventHandler):
    @override
    def on_text_created(self, text) -> None:
        print(f"\nassistant > ", end="", flush=True)

    @override
    def on_tool_call_created(self, tool_call):
        print(f"\nassistant > {tool_call.type}\n", flush=True)

    @override
    def on_message_done(self, message) -> None:
        # print a citation to the file searched
        message_content = message.content[0].text
        annotations = message_content.annotations
        citations = []
        for index, annotation in enumerate(annotations):
            message_content.value = message_content.value.replace(
                annotation.text, f"[{index}]"
            )
            if file_citation := getattr(annotation, "file_citation", None):
                cited_file = client.files.retrieve(file_citation.file_id)
                citations.append(f"[{index}] {cited_file.filename}")

        print(message_content.value)
        print("\n".join(citations))

# Then, we use the stream SDK helper
# with the EventHandler class to create the Run
# and stream the response.

with client.beta.threads.runs.stream(
    thread_id=thread.id,
    assistant_id=assistant.id,
    instructions=None,
    event_handler=EventHandler(),
) as stream:
    stream.until_done()


assistant > file_search


assistant > Zia Khan holds a Master's in Economics from Karachi, and triple master's degrees (MBA, MS, and MAC) from Arizona State University (ASU). He is also a Certified Public Accountant (CPA) and a Certified Management Accountant (CMA) in the USA  .



You're correct, and here's a clarified explanation based on the [OpenAI Assistants documentation for File Search](https://platform.openai.com/docs/assistants/tools/file-search):

---

##### **File Access Scope in OpenAI Assistants**

#### 1. **Thread-Level File Uploads (User uploads during a thread)**

* **Access Scope**: **Private to that user and thread**.
* **Use Case**: When a user uploads a file during a conversation (thread), it is only available **within that thread** and **for that specific user**.
* **Example**: If User A uploads a PDF in their conversation, the assistant can access it **only in that thread** for **that user** — it’s **not shared** with other users or assistants.

##### 2. **Assistant-Level File Uploads**

* **Access Scope**: **Shared across all users** of that assistant.
* **Use Case**: Files uploaded and attached directly to the **assistant** (via the API or dashboard) are available to **every user** interacting with that assistant.
* **Example**: If you upload a CSV or knowledge base file to an assistant via the OpenAI dashboard or API, **any user** talking to that assistant can use that file via file search.

---

##### Summary Table

| Upload Method               | File Scope          | Accessible To                 |
| --------------------------- | ------------------- | ----------------------------- |
| User upload (in thread)     | **Thread-level**    | Only that user in that thread |
| Assistant file (via API/UI) | **Assistant-level** | All users of the assistant    |




# **2. Create a new Assistant with File Search Enabled**

##### **Important**: *When a user uplaod file at a thread level, it is only accessible to a particular user. When a user upload a file at assistant level, it is accessible to all users which can access that particular assistant.*

In [14]:
from openai import OpenAI

client = OpenAI()

assistant = client.beta.assistants.create(
  name="Financial Analyst Assistant",
  instructions="You are an expert financial analyst. Use you knowledge base to answer questions about audited financial statements."
    "At the end, list the source file name(s) you used.",
  model="gpt-3.5-turbo-1106",
  tools=[{"type": "file_search"}],
)

show_json(assistant)

{'id': 'asst_HuBIXlUPYmSwOYBdGWwYu96x',
 'created_at': 1753086871,
 'description': None,
 'instructions': 'You are an expert financial analyst. Use you knowledge base to answer questions about audited financial statements.At the end, list the source file name(s) you used.',
 'metadata': {},
 'model': 'gpt-3.5-turbo-1106',
 'name': 'Financial Analyst Assistant',
 'object': 'assistant',
 'tools': [{'type': 'file_search',
   'file_search': {'max_num_results': None,
    'ranking_options': {'score_threshold': 0.0,
     'ranker': 'default_2024_08_21'}}}],
 'response_format': 'auto',
 'temperature': 1.0,
 'tool_resources': {'code_interpreter': None,
  'file_search': {'vector_store_ids': []}},
 'top_p': 1.0,
 'reasoning_effort': None}

In [15]:
# Upload the user provided file to OpenAI (It will be only accssible to a particular user)
message_file = client.files.create(
  file=open("Blue-Finance.pdf", "rb"), purpose="assistants"
)

# Create a thread and attach the file to the message
thread = client.beta.threads.create(
  messages=[
    {
      "role": "user",
      "content": "Based on the document, the concept of blue finance gained momentum in tenure of which pakistani Prime minister and what is the GMP of pakistan?",
      # Attach the new file to the message.
      "attachments": [
        { "file_id": message_file.id, "tools": [{"type": "file_search"}] }
      ],
    }
  ]
)

# The thread now has a vector store with that file in its tool resources.
print(thread.tool_resources.file_search)

show_json(message_file)
show_json(thread)

ToolResourcesFileSearch(vector_store_ids=['vs_687dfb9ab00c8191aeef44ee627071e3'])


{'id': 'file-9PRz4ptpPv6VSsAg5UzqRU',
 'bytes': 1453278,
 'created_at': 1753086873,
 'filename': 'Blue-Finance.pdf',
 'object': 'file',
 'purpose': 'assistants',
 'status': 'processed',
 'expires_at': None,
 'status_details': None}

{'id': 'thread_Axgfhq8AonevkxVtjYli9RsU',
 'created_at': 1753086874,
 'metadata': {},
 'object': 'thread',
 'tool_resources': {'code_interpreter': None,
  'file_search': {'vector_store_ids': ['vs_687dfb9ab00c8191aeef44ee627071e3']}}}

In [16]:
from typing_extensions import override
from openai import AssistantEventHandler, OpenAI

client = OpenAI()

class EventHandler(AssistantEventHandler):
    @override
    def on_text_created(self, text) -> None:
        print(f"\nassistant > ", end="", flush=True)

    @override
    def on_tool_call_created(self, tool_call):
        print(f"\nassistant > {tool_call.type}\n", flush=True)

    @override
    def on_message_done(self, message) -> None:
        # print a citation to the file searched
        message_content = message.content[0].text
        annotations = message_content.annotations
        citations = []
        for index, annotation in enumerate(annotations):
            message_content.value = message_content.value.replace(
                annotation.text, f"[{index}]"
            )
            if file_citation := getattr(annotation, "file_citation", None):
                cited_file = client.files.retrieve(file_citation.file_id)
                citations.append(f"[{index}] {cited_file.filename}")

        print(message_content.value)
        print("\n".join(citations))

# Then, we use the stream SDK helper
# with the EventHandler class to create the Run
# and stream the response.

with client.beta.threads.runs.stream(
    thread_id=thread.id,
    assistant_id=assistant.id,
    instructions=None,
    event_handler=EventHandler(),
) as stream:
    stream.until_done()


assistant > file_search


assistant > The concept of blue finance gained momentum in Pakistan during the tenure of the former Prime Minister, Imran Khan, who declared 2020 as the year of the blue economy in Pakistan  . 

As for the Gross Domestic Product (GDP) of Pakistan, the document mentions that the blue economy of Pakistan currently contributes an estimated US$ 1 billion or around 0.4% of the national GDP .

The information was found in the document "Policy Brief Blue Finance: What is it and why does it matter for Pakistan?" .

