# Part 16

# Using Code Interpreter

Universal code for the entire notebook

In [None]:
# Uncomment the line below to make sure you have all the packages needed
# %pip install -r requirements.txt

In [22]:
# Standard library imports
import time  # Used for time-related functions
import threading  # Used for creating and managing threads
import os

# Third-party library imports
from openai import OpenAI  # Used for interacting with OpenAI's API
from openai import AssistantEventHandler  # Used for handling events related to OpenAI assistants
from typing_extensions import override  # Used for overriding methods in subclasses
from IPython.display import display, Markdown, clear_output  # Used for displaying content in Jupyter Notebooks

import base64  # Used for encoding and decoding binary data
import requests  # Used for making HTTP requests
import markdown2  # Used for converting Markdown to HTML
from IPython.display import display, HTML  # Used for displaying HTML content in Jupyter Notebooks


In [2]:
# Create an instance of the OpenAI class to interact with the API.
# This assumes you have set the OPENAI_API_KEY environment variable.
client = OpenAI() 

In [3]:
# Create our custom event handler class that inherits from AssistantEventHandler for streaming assistant output.
class EventHandler(AssistantEventHandler):
    """Custom event handler for processing assistant events."""

    def __init__(self):
        super().__init__()
        self.results = []  # Initialize the results list

    @override
    def on_text_created(self, text) -> None:
        """Handle the event when text is first created."""
        # Print the created text to the console
        print("\nassistant text > ", end="", flush=True)
        # Append the created text to the results list
        self.results.append(text)

    @override
    def on_text_delta(self, delta, snapshot):
        """Handle the event when there is a text delta (partial text)."""
        # Print the delta value (partial text) to the console
        print(delta.value, end="", flush=True)
        # Append the delta value to the results list
        self.results.append(delta.value)

    def on_tool_call_created(self, tool_call):
        """Handle the event when a tool call is created."""
        # Print the type of the tool call to the console
        print(f"\nassistant tool > {tool_call.type}\n", flush=True)

    def on_tool_call_delta(self, delta, snapshot):
        """Handle the event when there is a delta (update) in a tool call."""
        if delta.type == 'code_interpreter':
            # Check if there is an input in the code interpreter delta
            if delta.code_interpreter.input:
                # Print the input to the console
                print(delta.code_interpreter.input, end="", flush=True)
                # Append the input to the results list
                self.results.append(delta.code_interpreter.input)
            # Check if there are outputs in the code interpreter delta
            if delta.code_interpreter.outputs:
                # Print a label for outputs to the console
                print("\n\noutput >", flush=True)
                # Iterate over each output and handle logs specifically
                for output in delta.code_interpreter.outputs or []:
                    if output.type == "logs":
                        # Print the logs to the console
                        print(f"\n{output.logs}", flush=True)
                        # Append the logs to the results list
                        self.results.append(output.logs)


## Creating an Assistant with Code Interpreter Enabled

Our first step is to create an Assistant that can use Code Interpreter

In [4]:
# Create an assistant using the client library.
assistant = client.beta.assistants.create(
    model="gpt-4o",  # Specify the model to be used.
    
    instructions=""" 
        You are a helpful assistant.
    """,  # Instructions for the assistant.
    
    name="Code Interpreter Assistant",  # Give the assistant a name.
    
    tools=[{"type": "code_interpreter"}],  # Add the code interpreter capability to the assistant.
    
    metadata={  # Add metadata about the assistant's capabilities.
        "can_be_used_for_code_analysis": "True",
        "can_do_python": "True",
    },
    
    temperature=1,  # Set the temperature for response variability.
    top_p=1,  # Set the top_p for nucleus sampling.
)

# Print the details of the created assistant to check its properties.
print(assistant)  # Print the full assistant object.
print("\n\n")
print(assistant.name)  # Print the name of the assistant.
print(assistant.metadata)  # Print the metadata of the assistant.


Assistant(id='asst_o7FHJXlxiQfXhrCZnPn9gV20', created_at=1719389052, description=None, instructions=' \n        You are a helpful assistant.\n    ', metadata={'can_be_used_for_code_analysis': 'True', 'can_do_python': 'True'}, model='gpt-4o', name='Code Interpreter Assistant', object='assistant', tools=[CodeInterpreterTool(type='code_interpreter')], response_format='auto', temperature=1.0, tool_resources=ToolResources(code_interpreter=ToolResourcesCodeInterpreter(file_ids=[]), file_search=None), top_p=1.0)



Code Interpreter Assistant
{'can_be_used_for_code_analysis': 'True', 'can_do_python': 'True'}


## Passing Files to Code Interpreter

There are a variety of ways to get files for Code Interpreter to use. 
- Assistant files - viewable by all runs that use the assistant.
- Thread files - only viewable by runs that use the thread. 

Let's review the code for the two main approaches.

### Getting Files to the Assistant

First, you have to have a file that has been uploaded so we can pass it to our assistant.

In [5]:
# Upload a file with an "assistants" purpose.
assistant_file = client.files.create(
    file=open("./artifacts/penguins_size.csv", "rb"),  # Open the file in binary read mode.
    purpose='assistants'  # Specify the purpose of the file upload.
)

# Print the details of the uploaded file to check its properties.
print(assistant_file)


FileObject(id='file-42G4LXGi4lK6qIWiWXzuTnQu', bytes=13519, created_at=1719389232, filename='penguins_size.csv', object='file', purpose='assistants', status='processed', status_details=None)


Next, we need to modify our Assistant with the new file information. 

In [6]:
# Update the assistant to add tools and tool resources.
assistant = client.beta.assistants.update(
    assistant_id=assistant.id,  # Use the assistant's ID.
    tool_resources={
        "code_interpreter": {
            "file_ids": [assistant_file.id]  # Link the uploaded file to the code interpreter tool.
        }
    }
)

# Print the details of the updated assistant to check its properties.
print(assistant)


Assistant(id='asst_o7FHJXlxiQfXhrCZnPn9gV20', created_at=1719389052, description=None, instructions=' \n        You are a helpful assistant.\n    ', metadata={'can_be_used_for_code_analysis': 'True', 'can_do_python': 'True'}, model='gpt-4o', name='Code Interpreter Assistant', object='assistant', tools=[CodeInterpreterTool(type='code_interpreter')], response_format='auto', temperature=1.0, tool_resources=ToolResources(code_interpreter=ToolResourcesCodeInterpreter(file_ids=['file-42G4LXGi4lK6qIWiWXzuTnQu']), file_search=None), top_p=1.0)


Finally, let's run a message and see if it is working.

In [7]:
# Create a new assistant thread with an initial user message.
assistant_thread = client.beta.threads.create(
    messages=[
        {
            "role": "user",
            "content": "Give me a summary of the file penguins_size.csv."
        },
    ]
)

# Stream the assistant's response to the thread.
with client.beta.threads.runs.stream(
    thread_id=assistant_thread.id,  # Use the thread's ID.
    assistant_id=assistant.id,  # Use the assistant's ID.
    instructions="""
    You are a helpful assistant.
    """,  # Provide instructions to the assistant.
    event_handler=EventHandler(),  # Use the custom event handler for processing events.
) as stream:
    stream.until_done()  # Continue streaming until the assistant has finished responding.



assistant tool > code_interpreter

import pandas as pd

# Load the CSV file
file_path = '/mnt/data/file-42G4LXGi4lK6qIWiWXzuTnQu'
penguins_data = pd.read_csv(file_path)

# Display first few rows and summary information
penguins_data_summary = penguins_data.describe(include='all')
penguins_data_head = penguins_data.head()

penguins_data_summary, penguins_data_head
assistant text > The data file `penguins_size.csv` contains measurements of penguins. Below is a summary of the data and a snapshot of the first few rows:

### Summary Statistics:

| Column             | Count | Unique | Top    | Freq | Mean      | Std Dev   | Min      | 25%      | 50%      | 75%      | Max      |
|--------------------|-------|--------|--------|------|-----------|-----------|----------|----------|----------|----------|----------|
| species            | 344   | 3      | Adelie | 152  |           |           |          |          |          |          |          |
| island             | 344   | 3      | Biscoe 

## Formatting the Output
What if we want to format the markdown output? There are two ways to do it. The "easy" way is to just let the output render without streaming and format it afterward.

In [8]:
# Create a new assistant thread with an initial user message.
assistant_thread = client.beta.threads.create(
    messages=[
        {
            "role": "user",
            "content": "Give me a summary of the file penguins_size.csv."
        },
    ]
)

# Create and poll a new run for the assistant thread to get the response.
run = client.beta.threads.runs.create_and_poll(
    thread_id=assistant_thread.id,  # Specify the thread ID.
    assistant_id=assistant.id  # Specify the assistant ID.
)

# Retrieve all messages from the thread using the run ID.
messages = list(client.beta.threads.messages.list(
    thread_id=assistant_thread.id,  # Specify the thread ID.
    run_id=run.id  # Specify the run ID.
))

# Extract the content from the first message in the retrieved messages.
message_content = messages[0].content[0].text
annotations = message_content.annotations  # Extract annotations from the message content.
citations = []  # Initialize an empty list to store citations.

# Process each annotation to replace the text with indexed references and gather citations.
for index, annotation in enumerate(annotations):
    # Replace the annotated text with an indexed reference in the message content.
    message_content.value = message_content.value.replace(annotation.text, f"[{index}]")
    # Check if there is a file citation in the annotation.
    file_citation = getattr(annotation, "file_citation", None)
    if file_citation:
        # Retrieve the cited file's details using its file ID.
        cited_file = client.files.retrieve(file_citation.file_id)
        # Append the citation with the indexed reference and file name to the citations list.
        citations.append(f"[{index}] {cited_file.filename}")

# Display the processed message content with indexed references using Markdown.
display(Markdown(message_content.value))


The file `penguins_size.csv` contains information about penguins and has the following characteristics:

- **Number of Entries:** 344
- **Number of Columns:** 7

### Columns and their Details:
1. **species**: A categorical column indicating the species of the penguin. This column has no missing values.
2. **island**: A categorical column indicating the island where the penguin was found. This column also has no missing values.
3. **culmen_length_mm**: A numerical column representing the culmen (bill) length of the penguin in millimeters. It has 342 non-null values (2 missing values).
4. **culmen_depth_mm**: A numerical column representing the culmen (bill) depth of the penguin in millimeters. It has 342 non-null values (2 missing values).
5. **flipper_length_mm**: A numerical column representing the flipper length of the penguin in millimeters. It has 342 non-null values (2 missing values).
6. **body_mass_g**: A numerical column representing the body mass of the penguin in grams. It has 342 non-null values (2 missing values).
7. **sex**: A categorical column indicating the sex of the penguin. This column has 334 non-null values (10 missing values).

### Summary:
The dataset contains information on different physical attributes of penguins such as culmen length, culmen depth, flipper length, body mass, along with the species, island, and sex of the penguins. There are some missing values in the numerical columns (culmen length, culmen depth, flipper length, and body mass) and the categorical column for sex.

The "hard"  (but more user-friendly) way is to stream the output and update the display while streaming to show the formatted text. This is what ChatGPT does when you use it. This will require modifying our event handler to be more streamlined, formatting output, and updating our display.

In [9]:
class EventHandler(AssistantEventHandler):
    """Custom event handler for processing assistant events."""

    def __init__(self):
        super().__init__()
        self.results = []  # Initialize an empty list to store the results

    @override
    def on_text_delta(self, delta, snapshot):
        """Handle the event when there is a text delta (partial text)."""
        # Append the delta value (partial text) to the results list
        self.results.append(delta.value)
        # Call the method to update the Jupyter Notebook cell
        self.update_output()

    def update_output(self):
        """Update the Jupyter Notebook cell with the current markdown content."""
        # Clear the current output in the Jupyter Notebook cell
        clear_output(wait=True)
        # Join all the text fragments stored in results to form the complete markdown content
        markdown_content = "".join(self.results)
        # Display the markdown content in the Jupyter Notebook cell
        display(Markdown(markdown_content))

# Create a new assistant thread with an initial user message.
assistant_thread = client.beta.threads.create(
    messages=[
        {
            "role": "user",
            "content": "Give me a summary of the file penguins_size.csv. With at least one small table of data. Make the information well formatted and easy to read."
        },
    ]
)

# Stream the assistant's response to the thread.
with client.beta.threads.runs.stream(
    thread_id=assistant_thread.id,  # Specify the thread ID.
    assistant_id=assistant.id,  # Specify the assistant ID.
    instructions="""
    You are a helpful assistant.
    """,  # Provide instructions to the assistant.
    event_handler=EventHandler(),  # Use the custom event handler for processing events.
) as stream:
    stream.until_done()  # Continue streaming until the assistant has finished responding.


### Summary of the `penguins_size.csv` File

The dataset consists of measurements for 344 penguins with 7 columns. The columns and their data types are as follows:

1. **species**: The species of the penguin (344 non-null, object).
2. **island**: The island where the penguin was observed (344 non-null, object).
3. **culmen_length_mm**: The culmen length in millimeters (342 non-null, float64).
4. **culmen_depth_mm**: The culmen depth in millimeters (342 non-null, float64).
5. **flipper_length_mm**: The flipper length in millimeters (342 non-null, float64).
6. **body_mass_g**: The body mass in grams (342 non-null, float64).
7. **sex**: The sex of the penguin (334 non-null, object).

### Sample Data

| species | island    | culmen_length_mm | culmen_depth_mm | flipper_length_mm | body_mass_g | sex   |
|---------|-----------|------------------|-----------------|-------------------|-------------|-------|
| Adelie  | Torgersen | 39.1             | 18.7            | 181.0             | 3750.0      | MALE  |
| Adelie  | Torgersen | 39.5             | 17.4            | 186.0             | 3800.0      | FEMALE|
| Adelie  | Torgersen | 40.3             | 18.0            | 195.0             | 3250.0      | FEMALE|
| Adelie  | Torgersen | NaN              | NaN             | NaN               | NaN         | NaN   |
| Adelie  | Torgersen | 36.7             | 19.3            | 193.0             | 3450.0      | FEMALE|

This table presents a sample of the first 5 rows in the dataset, displaying various attributes of the penguins.

- Note that there are some missing values in the dataset, particularly in the `culmen_length_mm`, `culmen_depth_mm`, `flipper_length_mm`, `body_mass_g`, and `sex` columns.

## Getting Files to the Thread

First, we need a file uploaded.


In [10]:
# Upload a file with an "assistants" purpose.
thread_file = client.files.create(
    file=open("./artifacts/daily-bike-share.csv", "rb"),  # Open the file in binary read mode.
    purpose='assistants'  # Specify the purpose of the file upload.
)

# Print the details of the uploaded file to check its properties.
print(thread_file)


FileObject(id='file-HqNa4YHRQP9RwRUahMH9jiLa', bytes=43599, created_at=1719389889, filename='daily-bike-share.csv', object='file', purpose='assistants', status='processed', status_details=None)


Second, we need a thread to attach the file to

In [11]:
# Create a new thread with an initial user message requesting a summary of the file.
thread = client.beta.threads.create(
    messages=[
        {
            "role": "user",
            "content": "Give me a summary of the daily-bike-share.csv file."
        },
    ]
)

# Print the details of the created thread to check its properties.
print(thread)


Thread(id='thread_DrVnZi2AmQxpjkwZr8KPsenE', created_at=1719389910, metadata={}, object='thread', tool_resources=ToolResources(code_interpreter=None, file_search=None))


Third, we can update the thread with the file information

In [12]:
# Update the existing thread to add tool resources, specifically linking the uploaded file.
updated_thread = client.beta.threads.update(
    thread_id=thread.id,  # Use the ID of the existing thread.
    tool_resources={
        "code_interpreter": {
            "file_ids": [thread_file.id]  # Link the uploaded file to the code interpreter tool.
        }
    }
)

# Print the details of the updated thread to check its properties.
print(updated_thread)


Thread(id='thread_DrVnZi2AmQxpjkwZr8KPsenE', created_at=1719389910, metadata={}, object='thread', tool_resources=ToolResources(code_interpreter=ToolResourcesCodeInterpreter(file_ids=['file-HqNa4YHRQP9RwRUahMH9jiLa']), file_search=None))


Finally, let's run it against a new assistant and see the results

In [13]:
# Create an assistant using the client library.
thread_assistant = client.beta.assistants.create(
    model="gpt-4o",  # Specify the model to be used.
    
    instructions=""" 
        You are a helpful assistant.
    """,  # Instructions for the assistant.
    
    name="Code Interpreter Assistant Using Thread Data",  # Give the assistant a name.
    
    tools=[{"type": "code_interpreter"}],  # Add the code interpreter capability to the assistant.
    
    metadata={  # Add metadata about the assistant's capabilities.
        "can_be_used_for_code_analysis": "True",
        "can_do_python": "True",
    },
    temperature=1,  # Set the temperature for response variability.
    top_p=1,  # Set the top_p for nucleus sampling.
)

# Print the details of the created assistant to check its properties.
print(thread_assistant)  # Print the full assistant object.
print("\n\n")
print(thread_assistant.name)  # Print the name of the assistant.
print(thread_assistant.metadata)  # Print the metadata of the assistant.


Assistant(id='asst_DnLi3WbeKVR4qU9UaE33WpWR', created_at=1719389985, description=None, instructions=' \n        You are a helpful assistant.\n    ', metadata={'can_be_used_for_code_analysis': 'True', 'can_do_python': 'True'}, model='gpt-4o', name='Code Interpreter Assistant Using Thread Data', object='assistant', tools=[CodeInterpreterTool(type='code_interpreter')], response_format='auto', temperature=1.0, tool_resources=ToolResources(code_interpreter=ToolResourcesCodeInterpreter(file_ids=[]), file_search=None), top_p=1.0)



Code Interpreter Assistant Using Thread Data
{'can_be_used_for_code_analysis': 'True', 'can_do_python': 'True'}


In [14]:
# Stream the output from the assistant.
with client.beta.threads.runs.stream(
    thread_id=updated_thread.id,  # Use the ID of the updated thread.
    assistant_id=thread_assistant.id,  # Use the ID of the newly created assistant.
    event_handler=EventHandler(),  # Use the custom event handler to process events.
) as stream:
    stream.until_done()  # Continue streaming until the assistant has finished responding.


The dataset `daily-bike-share.csv` contains information about daily bike rentals. Here are the summaries for each column:

1. **day**:
   - **Count**: 731
   - **Mean**: 15.74
   - **Standard Deviation**: 8.81
   - **Min**: 1
   - **25%**: 8
   - **50% (Median)**: 16
   - **75%**: 23
   - **Max**: 31

2. **mnth** (Month):
   - **Count**: 731
   - **Mean**: 6.52
   - **Standard Deviation**: 3.45
   - **Min**: 1
   - **25%**: 4
   - **50% (Median)**: 7
   - **75%**: 10
   - **Max**: 12

3. **year**:
   - **Count**: 731
   - **Mean**: 2011.50
   - **Standard Deviation**: 0.50
   - **Min**: 2011
   - **25%**: 2011
   - **50% (Median)**: 2012
   - **75%**: 2012
   - **Max**: 2012

4. **season**:
   - **Count**: 731
   - **Mean**: 2.50
   - **Standard Deviation**: 1.11
   - **Min**: 1
   - **25%**: 2
   - **50% (Median)**: 3
   - **75%**: 3
   - **Max**: 4

5. **holiday** (Is it a holiday?):
   - **Count**: 731
   - **Mean**: 0.03
   - **Standard Deviation**: 0.17
   - **Min**: 0
   - **25%**: 0
   - **50% (Median)**: 0
   - **75%**: 0
   - **Max**: 1

6. **weekday** (Day of the week):
   - **Count**: 731
   - **Mean**: 2.99
   - **Standard Deviation**: 2.00
   - **Min**: 0
   - **25%**: 1
   - **50% (Median)**: 3
   - **75%**: 5
   - **Max**: 6

7. **workingday** (Is it a working day?):
   - **Count**: 731
   - **Mean**: 0.68
   - **Standard Deviation**: 0.47
   - **Min**: 0
   - **25%**: 0
   - **50% (Median)**: 1
   - **75%**: 1
   - **Max**: 1

8. **weathersit** (Weather situation):
   - **Count**: 731
   - **Mean**: 1.40
   - **Standard Deviation**: 0.54
   - **Min**: 1
   - **25%**: 1
   - **50% (Median)**: 1
   - **75%**: 2
   - **Max**: 3

9. **temp** (Normalized temperature in Celsius):
   - **Count**: 731
   - **Mean**: 0.50
   - **Standard Deviation**: 0.18
   - **Min**: 0.06
   - **25%**: 0.34
   - **50% (Median)**: 0.50
   - **75%**: 0.66
   - **Max**: 0.86

10. **atemp** (Normalized feeling temperature in Celsius):
    - **Count**: 731
    - **Mean**: 0.47
    - **Standard Deviation**: 0.16
    - **Min**: 0.08
    - **25%**: 0.34
    - **50% (Median)**: 0.49
    - **75%**: 0.61
    - **Max**: 0.84

11. **hum** (Normalized humidity):
    - **Count**: 731
    - **Mean**: 0.63
    - **Standard Deviation**: 0.14
    - **Min**: 0.00
    - **25%**: 0.52
    - **50% (Median)**: 0.63
    - **75%**: 0.73
    - **Max**: 0.97

12. **windspeed** (Normalized wind speed):
    - **Count**: 731
    - **Mean**: 0.19
    - **Standard Deviation**: 0.08
    - **Min**: 0.02
    - **25%**: 0.13
    - **50% (Median)**: 0.18
    - **75%**: 0.23
    - **Max**: 0.51

13. **rentals** (Total bike rentals):
    - **Count**: 731
    - **Mean**: 848.18
    - **Standard Deviation**: 686.62
    - **Min**: 2
    - **25%**: 315.50
    - **50% (Median)**: 713
    - **75%**: 1096
    - **Max**: 3410


### Text Results from files in Assistants and Threads

Let's see what happens if we use an assistant with a file and a thread with a file together

In [15]:
# Create a new thread with an initial user message requesting a summary of two files.
super_thread = client.beta.threads.create(
    messages=[
        {
            "role": "user",
            "content": (
                "Give me a summary of the penguins-size.csv and daily-bike-share.csv files. "
                "Make the information well formatted and easy to read."
            )
        },
    ]
)

# Print the details of the created thread to check its properties.
print(super_thread)


Thread(id='thread_aMEi8kRdv4QTBvNkoOKHQZSc', created_at=1719390055, metadata={}, object='thread', tool_resources=ToolResources(code_interpreter=None, file_search=None))


In [16]:
# Update the newly created thread to add tool resources, specifically linking the uploaded file.
super_updated_thread = client.beta.threads.update(
    thread_id=super_thread.id,  # Use the ID of the newly created thread.
    tool_resources={
        "code_interpreter": {
            "file_ids": [thread_file.id]  # Link the uploaded file to the code interpreter tool.
        }
    }
)

# Print the details of the updated thread to check its properties.
print(super_updated_thread)


Thread(id='thread_aMEi8kRdv4QTBvNkoOKHQZSc', created_at=1719390055, metadata={}, object='thread', tool_resources=ToolResources(code_interpreter=ToolResourcesCodeInterpreter(file_ids=['file-HqNa4YHRQP9RwRUahMH9jiLa']), file_search=None))


In [17]:
# Stream the output from the assistant.
with client.beta.threads.runs.stream(
    thread_id=super_updated_thread.id,  # Use the ID of the updated thread.
    assistant_id=assistant.id,  # Use the ID of the assistant.
    event_handler=EventHandler(),  # Use the custom event handler to process events.
) as stream:
    stream.until_done()  # Continue streaming until the assistant has finished responding.


### Summary of `penguins-size.csv`

- **Number of Rows**: 344
- **Number of Missing Values**: 18
- **Columns and Data Types**:
  - `species`: Object (Categorical)
  - `island`: Object (Categorical)
  - `culmen_length_mm`: Float64
  - `culmen_depth_mm`: Float64
  - `flipper_length_mm`: Float64
  - `body_mass_g`: Float64
  - `sex`: Object (Categorical)

#### Summary Statistics:
- **species**:
  - Unique: 3
  - Most common: Adelie (152 times)
- **island**:
  - Unique: 3
  - Most common: Biscoe (168 times)
- **culmen_length_mm**:
  - Count: 342
  - Mean: 43.92 mm
  - Std Dev: 5.46 mm
  - Min: 32.1 mm
  - 25%: 39.23 mm
  - 50%: 44.45 mm
  - 75%: 48.5 mm
  - Max: 59.6 mm
- **culmen_depth_mm**:
  - Count: 342
  - Mean: 17.15 mm
  - Std Dev: 1.97 mm
  - Min: 13.1 mm
  - 25%: 15.6 mm
  - 50%: 17.3 mm
  - 75%: 18.7 mm
  - Max: 21.5 mm
- **flipper_length_mm**:
  - Count: 342
  - Mean: 200.92 mm
  - Std Dev: 14.06 mm
  - Min: 172.0 mm
  - 25%: 190.0 mm
  - 50%: 197.0 mm
  - 75%: 213.0 mm
  - Max: 231.0 mm
- **body_mass_g**:
  - Count: 342
  - Mean: 4201.75 g
  - Std Dev: 801.95 g
  - Min: 2700.0 g
  - 25%: 3550.0 g
  - 50%: 4050.0 g
  - 75%: 4750.0 g
  - Max: 6300.0 g
- **sex**:
  - Unique: 3
  - Most common: MALE (168 times)

---

### Summary of `daily-bike-share.csv`

- **Number of Rows**: 731
- **Number of Missing Values**: 0
- **Columns and Data Types**:
  - `day`: int64
  - `mnth`: int64
  - `year`: int64
  - `season`: int64
  - `holiday`: int64
  - `weekday`: int64
  - `workingday`: int64
  - `weathersit`: int64
  - `temp`: float64
  - `atemp`: float64
  - `hum`: float64
  - `windspeed`: float64
  - `rentals`: int64

#### Summary Statistics:
- **day**:
  - Count: 731
  - Mean: 15.73
  - Std Dev: 8.81
  - Min: 1
  - 25%: 8
  - 50%: 16
  - 75%: 23
  - Max: 31
- **mnth**:
  - Count: 731
  - Mean: 6.53
  - Std Dev: 3.44
  - Min: 1
  - 25%: 4
  - 50%: 7
  - 75%: 10
  - Max: 12
- **year**:
  - Count: 731
  - Mean: 2011.50
  - Std Dev: 0.50
  - Min: 2011
  - 25%: 2011
  - 50%: 2012
  - 75%: 2012
  - Max: 2012
- **season**:
  - Count: 731
  - Mean: 2.50
  - Std Dev: 1.11
  - Min: 1
  - 25%: 2
  - 50%: 3
  - 75%: 3
  - Max: 4
- **holiday**:
  - Count: 731
  - Mean: 0.03
  - Std Dev: 0.17
  - Min: 0
  - 25%: 0
  - 50%: 0
  - 75%: 0
  - Max: 1
- **weekday**:
  - Count: 731
  - Mean: 3.00
  - Std Dev: 2.00
  - Min: 0
  - 25%: 1
  - 50%: 3
  - 75%: 5
  - Max: 6
- **workingday**:
  - Count: 731
  - Mean: 0.68
  - Std Dev: 0.47
  - Min: 0
  - 25%: 0
  - 50%: 1
  - 75%: 1
  - Max: 1
- **weathersit**:
  - Count: 731
  - Mean: 1.40
  - Std Dev: 0.54
  - Min: 1
  - 25%: 1
  - 50%: 1
  - 75%: 2
  - Max: 3
- **temp**:
  - Count: 731
  - Mean: 0.50
  - Std Dev: 0.18
  - Min: 0.06
  - 25%: 0.34
  - 50%: 0.50
  - 75%: 0.66
  - Max: 0.86
- **atemp**:
  - Count: 731
  - Mean: 0.47
  - Std Dev: 0.16
  - Min: 0.08
  - 25%: 0.34
  - 50%: 0.49
  - 75%: 0.61
  - Max: 0.84
- **hum**:
  - Count: 731
  - Mean: 0.63
  - Std Dev: 0.14
  - Min: 0.00
  - 25%: 0.52
  - 50%: 0.63
  - 75%: 0.73
  - Max: 0.97
- **windspeed**:
  - Count: 731
  - Mean: 0.19
  - Std Dev: 0.08
  - Min: 0.02
  - 25%: 0.13
  - 50%: 0.18
  - 75%: 0.23
  - Max: 0.51
- **rentals**:
  - Count: 731
  - Mean: 848.18
  - Std Dev: 686.62
  - Min: 2
  - 25%: 315.5
  - 50%: 713
  - 75%: 1096
  - Max: 3410

### Full Results Output

We may also have images produced as well from the Code Interpreter output. Handling this can be tricky and getting it in the right sequence is pretty difficult. Here is some sample code that will assist but I don't pretend to be good at the interface stuff. :)

In [26]:
import os
import requests
import tkinter as tk
from tkinter import filedialog

# Function to download a file and prompt user to select a save location
def download_file(file_id, filename):
    # Strip the "/mnt/data/" prefix if present
    filename = os.path.basename(filename)
    
    url = f"https://api.openai.com/v1/files/{file_id}/content"
    headers = {
        "Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}"
    }
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        # Prompt user to select save location
        root = tk.Tk()
        root.withdraw()  # Hide the root window
        save_path = filedialog.asksaveasfilename(initialfile=filename, title="Save File As")
        if save_path:
            with open(save_path, 'wb') as f:
                f.write(response.content)
            print(f"Downloaded {filename} to {save_path}")
        else:
            print("Download canceled.")
    else:
        print(f"Failed to download {filename}: {response.status_code}")

# Create a thread to send a message and get output
assistant_thread = client.beta.threads.create(
    messages=[
        {
            "role": "user",
            "content": "Give me a one paragraph summary of the file penguins_size.csv. With at least one small table of data and one visualization. Put the result into a well formatted word document."
        },
    ]
)

run = client.beta.threads.runs.create_and_poll(
    thread_id=assistant_thread.id, assistant_id=assistant.id
)

messages = list(client.beta.threads.messages.list(thread_id=assistant_thread.id, run_id=run.id))

content_blocks = []

# Process each message
for message in messages:
    # Process each content block in the message
    for content_block in message.content:
        if content_block.type == 'text':
            text_content = content_block.text
            annotations = text_content.annotations
            citations = []
            for index, annotation in enumerate(annotations):
                # Replace the text with a footnote
                text_content.value = text_content.value.replace(annotation.text, f' [{index}]')
                # Gather citations based on annotation attributes
                if hasattr(annotation, 'file_citation'):
                    file_citation = annotation.file_citation
                    cited_file = client.files.retrieve(file_citation.file_id)
                    citations.append(f'[{index}] {file_citation.quote} from {cited_file.filename}')
                    # Automatically download the file
                    download_file(cited_file.id, cited_file.filename)
                elif hasattr(annotation, 'file_path'):
                    file_path = annotation.file_path
                    cited_file = client.files.retrieve(file_path.file_id)
                    citations.append(f'[{index}] {cited_file.filename}')
                    # Automatically download the file
                    download_file(cited_file.id, cited_file.filename)
            # Add footnotes to the end of the message before displaying to user
            text_content.value += '\n' + '\n'.join(citations)
            # Convert Markdown to HTML and append to content_blocks
            content_blocks.append(markdown2.markdown(text_content.value, extras=["tables"]))

        elif content_block.type == 'image_file':
            image_file = content_block.image_file
            file_info = client.files.retrieve(image_file.file_id)
            image_content = client.files.content(file_info.id).content
            image_base64 = base64.b64encode(image_content).decode('utf-8')
            # Append the image HTML to content_blocks
            content_blocks.append(f'<img src="data:image/png;base64,{image_base64}" width="700" height="700"><br>')

# Join all content blocks into a single HTML string
html_content = ''.join(content_blocks)

# Display the combined content
display(HTML(html_content))


Downloaded Penguins_Size_Summary.docx to E:/Downloads/Penguins_Size_Summary.docx


species,island,culmenlengthmm,culmendepthmm,flipperlengthmm,bodymassg,sex
Adelie,Torgersen,39.1,18.7,181.0,3750.0,MALE
Adelie,Torgersen,39.5,17.4,186.0,3800.0,FEMALE
Adelie,Torgersen,40.3,18.0,195.0,3250.0,FEMALE
Adelie,Torgersen,,,,,
Adelie,Torgersen,36.7,19.3,193.0,3450.0,FEMALE


### Streaming Formatted Output
I tried everything I could think of to get the images to stream inline with the text but it appears that doing so with Jupyter Notebook cells is very difficult. I finally had to throw in the towel. I did post to the forums to see if anyone had and answer but, apparently, no one did. You can see if someone finally answered here: https://community.openai.com/t/streaming-markdown-text-and-images-from-assistant-using-code-interpreter/823042/9
<br>
<br>
