# OpenAI Assistants Examples with Code Interpreter

Reference: https://platform.openai.com/docs/assistants/tools/code-interpreter

## Get Started with Requirements

In [None]:
import os
from typing_extensions import override

from openai import AssistantEventHandler, OpenAI
from openai import OpenAI


client = OpenAI()


OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# Upload a file with an "assistants" purpose
file = client.files.create(
  file=open("data/titanic.csv", "rb"),
  purpose='assistants'
)

## OpenAI Assistant Example With Threads and Streaming

In [None]:


# Step 1: Create an assistant
assistant = client.beta.assistants.create(
  name="Titanic Analyst Assistant",
  instructions="""
    Configure the assistant to function as an interactive data analysis tool, utilizing Python code to process and analyze user-uploaded data. Below are the guidelines for setting up and directing the assistant:
    Python Code Execution: Enable the code interpreter functionality to allow execution of Python code for data analysis tasks.
    Data Handling Capabilities: Ensure the assistant can handle various data formats like CSV, Excel, and JSON. It should read, process, and perform operations on this data.
    User Queries Interpretation: Program the assistant to understand and respond to specific data-related inquiries, such as statistical analysis, data visualization, and predictive modeling.
    Response Formatting: Configure the assistant to format responses appropriately, including tables, charts, and descriptive summaries, based on the analysis performed.
    Security and Privacy: Implement measures to ensure data privacy and security, handling user data responsibly and allowing for data to be stored transiently during the session.
    Error Handling: Equip the assistant with robust error handling capabilities to manage and respond to errors in data processing or code execution effectively.
    User Guidance and Support: Provide built-in support and guidance for users to refine their data queries or address common issues encountered during data analysis.
    Enable the code interpreter to manage data analysis tasks. Here’s how to handle user requests for analyzing CSV files and creating visualizations:
    Data Loading: Configure the assistant to use pandas for reading CSV files into DataFrames. This will allow handling of data in a structured format.
    Histogram Generation: Implement functionality using matplotlib or seaborn to create histograms. Ensure the assistant can take parameters such as column names for which the histogram is needed and any other styling preferences.
    Security Considerations: Ensure data privacy by handling the user's data only within the session and not storing any uploaded data permanently.
    Output Display: Configure the system to display images (charts, plots) generated during the session directly to the user.
  """,
  model="gpt-4o",
  tools=[{"type": "code_interpreter"}],
  tool_resources={
    "code_interpreter": {
      "file_ids": [file.id]
    }
  }
)

# Step 2: Create a thread
thread = client.beta.threads.create(
  messages=[
    {
      "role": "user",
      "content": "I need to know how many males and females purchased tickets on the Titanic. Can you help me?",
      "attachments": [
        {
          "file_id": file.id,
          "tools": [{"type": "code_interpreter"}]
        }
      ]
    }
  ]
)

# Step 3: Add a message to the thread
message = client.beta.threads.messages.create(
  thread_id=thread.id,
  role="user",
  content="I need to see the relationship of male vs. female ticket holders for the titanic data set. Can you help me?"
)

print(message)

# Step 4: Create a run
# First, we create a EventHandler class to define
# how we want to handle the events in the response stream.
class EventHandler(AssistantEventHandler):
    @override
    def on_text_created(self, text) -> None:
        print(f"\nassistant > ", end="", flush=True)

    @override
    def on_text_delta(self, delta, snapshot):
        print(delta.value, end="", flush=True)

    def on_tool_call_created(self, tool_call):
        print(f"\nassistant > {tool_call.type}\n", flush=True)

    def on_tool_call_delta(self, delta, snapshot):
        if delta.type == 'code_interpreter':
            if delta.code_interpreter.input:
                print(delta.code_interpreter.input, end="", flush=True)
            if delta.code_interpreter.outputs:
                print(f"\n\noutput >", flush=True)
                for output in delta.code_interpreter.outputs:
                    if output.type == "logs":
                        print(f"\n{output.logs}", flush=True)


# Then, we use the `stream` SDK helper 
# with the `EventHandler` class to create the Run 
# and stream the response.
with client.beta.threads.runs.stream(
    thread_id=thread.id,
    assistant_id=assistant.id,
    instructions="Please address the user as Jane Doe. The user has a premium account.",
    event_handler=EventHandler(),
) as stream:
    stream.until_done()

## Without Streaming

In [None]:
# Step 1: Create an assistant
assistant = client.beta.assistants.create(
  name="Titanic Analyst Assistant",
  instructions="""
    Configure the assistant to function as an interactive data analysis tool, utilizing Python code to process and analyze user-uploaded data. Below are the guidelines for setting up and directing the assistant:
    Python Code Execution: Enable the code interpreter functionality to allow execution of Python code for data analysis tasks.
    Data Handling Capabilities: Ensure the assistant can handle various data formats like CSV, Excel, and JSON. It should read, process, and perform operations on this data.
    User Queries Interpretation: Program the assistant to understand and respond to specific data-related inquiries, such as statistical analysis, data visualization, and predictive modeling.
    Response Formatting: Configure the assistant to format responses appropriately, including tables, charts, and descriptive summaries, based on the analysis performed.
    Security and Privacy: Implement measures to ensure data privacy and security, handling user data responsibly and allowing for data to be stored transiently during the session.
    Error Handling: Equip the assistant with robust error handling capabilities to manage and respond to errors in data processing or code execution effectively.
    User Guidance and Support: Provide built-in support and guidance for users to refine their data queries or address common issues encountered during data analysis.
    Enable the code interpreter to manage data analysis tasks. Here’s how to handle user requests for analyzing CSV files and creating visualizations:
    Data Loading: Configure the assistant to use pandas for reading CSV files into DataFrames. This will allow handling of data in a structured format.
    Histogram Generation: Implement functionality using matplotlib or seaborn to create histograms. Ensure the assistant can take parameters such as column names for which the histogram is needed and any other styling preferences.
    Security Considerations: Ensure data privacy by handling the user's data only within the session and not storing any uploaded data permanently.
    Output Display: Configure the system to display images (charts, plots) generated during the session directly to the user.
  """,
  model="gpt-4o",
  tools=[{"type": "code_interpreter"}],
  tool_resources={
    "code_interpreter": {
      "file_ids": [file.id]
    }
  }
)

# Step 2: Create a thread
thread = client.beta.threads.create(
  messages=[
    {
      "role": "user",
      "content": "I need to know how many males and females purchased tickets on the Titanic. Can you help me?",
      "attachments": [
        {
          "file_id": file.id,
          "tools": [{"type": "code_interpreter"}]
        }
      ]
    }
  ]
)

# Step 3: Add a message to the thread
message = client.beta.threads.messages.create(
  thread_id=thread.id,
  role="user",
  content="I need to see the relationship of male vs. female ticket holders for the titanic data set. Can you help me?"
)

# Step 4: Create a run
run = client.beta.threads.runs.create_and_poll(
  thread_id=thread.id,
  assistant_id=assistant.id,
  instructions="Please address the user as Jane Doe. The user has a premium account."
)

# List the the messages added to the Thread by the Assistant
if run.status == 'completed':
    messages = client.beta.threads.messages.list(
        thread_id=thread.id
    )
    print(messages)
    # Iterate over the messages
    for message in messages:
        # Check if the message has any attachments
        if 'attachments' in message:
            # Iterate over the attachments
            for attachment in message['attachments']:
                # Check if the attachment has a 'file_id' key
                if 'file_id' in attachment:
                    # Get the file_id
                    file_id = attachment['file_id']
                    # Use the file_id to retrieve the file and save it
                    file = client.files.retrieve(file_id=file_id)
                    with open("./titanic.png", "wb") as f:
                        f.write(file.read())
else:
    print(run.status)
