In [1]:
from openai import AzureOpenAI
from dotenv import load_dotenv
import os
import json
import time
from IPython.display import clear_output

## Create the assistant

In [2]:
load_dotenv() # config = dotenv_values()

client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),  
    api_version="2024-05-01-preview",
    azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
    )
client

<openai.lib.azure.AzureOpenAI at 0x7fd029067d00>

In [5]:
assistant = client.beta.assistants.create(
  name="Financial Analyst Assistant",
  instructions="You are an expert financial analyst. Use your knowledge base to answer questions about audited financial statements.",
  model=os.getenv("AZURE_OPENAI_MODEL"),
  tools=[{"type": "file_search"}],
)
assistant

Assistant(id='asst_ljAyFQXl1LLgYIn0LhvAqWC7', created_at=1725632638, description=None, instructions='You are an expert financial analyst. Use your knowledge base to answer questions about audited financial statements.', metadata={}, model='gpt4', name='Financial Analyst Assistant', object='assistant', tools=[FileSearchTool(type='file_search', file_search=None)], response_format='auto', temperature=1.0, tool_resources=ToolResources(code_interpreter=None, file_search=ToolResourcesFileSearch(vector_store_ids=[])), top_p=1.0)

In [8]:
# Create a vector store called "Financial Statements"
vector_store = client.beta.vector_stores.create(name="Financial Statements")
 
# Ready the files for upload to OpenAI
file_paths = ["/home/azureuser/cloudfiles/code/Users/paolt/okr_spike/bert_paper.pdf", "/home/azureuser/cloudfiles/code/Users/paolt/okr_spike/gpt_2_paper.pdf"]
file_streams = [open(path, "rb") for path in file_paths]
 
# Use the upload and poll SDK helper to upload the files, add them to the vector store,
# and poll the status of the file batch for completion.
file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
  vector_store_id=vector_store.id, files=file_streams
)
 
# You can print the status and the file counts of the batch to see the result of this operation.
print(file_batch.status)
print(file_batch.file_counts)

completed
FileCounts(cancelled=0, completed=2, failed=0, in_progress=0, total=2)


In [10]:
assistant = client.beta.assistants.update(
  assistant_id=assistant.id,
  tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
)
assistant

Assistant(id='asst_ljAyFQXl1LLgYIn0LhvAqWC7', created_at=1725632638, description=None, instructions='You are an expert financial analyst. Use your knowledge base to answer questions about audited financial statements.', metadata={}, model='gpt4', name='Financial Analyst Assistant', object='assistant', tools=[FileSearchTool(type='file_search', file_search=None)], response_format='auto', temperature=1.0, tool_resources=ToolResources(code_interpreter=None, file_search=ToolResourcesFileSearch(vector_store_ids=['vs_TQVndlpf3E3hbBjMiGewzhuw'])), top_p=1.0)

## Chat with it

In [16]:
# Create a thread
thread = client.beta.threads.create()
print(thread)

Thread(id='thread_ABu9VjDWROiJnpHL6ljFGwx1', created_at=1725633275, metadata={}, object='thread', tool_resources=ToolResources(code_interpreter=None, file_search=None))


In [17]:
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="what is gpt2?"
)
print(message)

Message(id='msg_WFyeyxSEFN3guVtcSXwb5Gmm', assistant_id=None, attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value='what is gpt2?'), type='text')], created_at=1725633277, incomplete_at=None, incomplete_details=None, metadata={}, object='thread.message', role='user', run_id=None, status=None, thread_id='thread_ABu9VjDWROiJnpHL6ljFGwx1')


In [18]:
thread_messages = client.beta.threads.messages.list(thread.id)
print(thread_messages.model_dump_json(indent=2))

{
  "data": [
    {
      "id": "msg_WFyeyxSEFN3guVtcSXwb5Gmm",
      "assistant_id": null,
      "attachments": [],
      "completed_at": null,
      "content": [
        {
          "text": {
            "annotations": [],
            "value": "what is gpt2?"
          },
          "type": "text"
        }
      ],
      "created_at": 1725633277,
      "incomplete_at": null,
      "incomplete_details": null,
      "metadata": {},
      "object": "thread.message",
      "role": "user",
      "run_id": null,
      "status": null,
      "thread_id": "thread_ABu9VjDWROiJnpHL6ljFGwx1"
    }
  ],
  "object": "list",
  "first_id": "msg_WFyeyxSEFN3guVtcSXwb5Gmm",
  "last_id": "msg_WFyeyxSEFN3guVtcSXwb5Gmm",
  "has_more": false
}


In [19]:
run = client.beta.threads.runs.create(
  thread_id=thread.id,
  assistant_id=assistant.id,
)

In [40]:
status = run.status

while status not in ["completed", "cancelled", "expired", "failed"]:
    time.sleep(5)
    run = client.beta.threads.runs.retrieve(thread_id=thread.id,run_id=run.id)
    print(f'Status: {status}')

Status: completed
Elapsed time: 0 minutes 0 seconds


In [45]:
messages = client.beta.threads.messages.list(thread_id=thread.id)
data = json.loads(messages.model_dump_json(indent=2))
answer = data['data'][0]['content'][0]['text']['value']
print(answer)

'GPT-2 stands for "Generative Pretrained Transformer 2". It is a large-scale transformer-based language model developed by OpenAI. GPT-2 was trained on a variety of internet texts, and has the ability to generate coherent and contextually relevant sentences by predicting subsequent words within a given text. However, it\'s worth mentioning that your question does not seem related to audited financial statements. Can I assist with any queries about financial statements specifically?'