# OpenAI API: Code Interpreter and Advanced Data Analysis
## 03_02 - Review and optimize Python code

### Install the necessary libraries. 

In [None]:
pip install openai

In [None]:
pip install openai[datalib]

In [None]:
pip install urllib3==1.26.6 

In [None]:
pip install python-dotenv

### Import the libraries and environment file to gain access to the Open API Key
#### The key can be generated here: https://platform.openai.com/account/api-keys

In [46]:
#import OS, OpenAI, and time modules
import os
from openai import OpenAI
import time

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

### Authenticate to the API using the API Key
#### Pull from environment variables or use api_key = ("your_key_here") to hardcode the key

In [47]:
client = OpenAI(
  api_key=os.environ['OPENAI_API_KEY']  
)

### Helper function to upload Python example files

In [48]:
def upload_file(path):
    try:
        file = client.files.create(
            file=open(path, "rb"),
            purpose='assistants'
        )
        
        return file.id
    except Exception as e:
        print(e)
        return e

### Helper function to setup an Assistant
#### The Assistant is a Coding Bot
Code Interpreter allows the Assistants API to write and run Python code in a sandboxed execution environment.

In [49]:
def create_assistant(file_id):
    try:
        assistant = client.beta.assistants.create(
            name = "Coding Bot",
            instructions='''Generate a file, always. You are an expert Python developer.
                           You perform code review and optimization, including bug 
                           identification, code explanation, translation, and 
                           prediction of code outputs. When asked to review code, you
                           also make the improved code available for download.''',
            model="gpt-4-1106-preview",
            tools=[{"type": "code_interpreter"}],
            file_ids=[file_id]
        )
        
        return assistant.id
    except openai.APIError as e:
        print(e.http_status)
        print(e.error)
        return e.error 

### Helper function to create a thread to query an Assistant

In [50]:
def query_assistant(assistant_id, user_query):
    try:
        thread = client.beta.threads.create(
            messages=[
                {
                "role": "user",
                "content": user_query
                }
            ]
        )
        
        return thread.id
    except openai.APIError as e:
        print(e.http_status)
        print(e.error)
        return e.error

### Helper function to create a run object to execute the thread

In [51]:
def run(assistant_id, thread_id):
    try:
        run = client.beta.threads.runs.create(
            thread_id=thread_id,
            assistant_id=assistant_id
        )
        
        time.sleep(10)
        
        while True:
            print(f'{run.id=} {run.status=}')

            run = client.beta.threads.runs.retrieve(
                thread_id=thread_id,
                run_id=run.id
            )

            status = run.status

            if status == 'completed':
                break
            else:
                time.sleep(10) 

        print(f'{run.id=} {run.status=}')
        
        #retrieve the messages 
        messages = client.beta.threads.messages.list(
            thread_id=thread_id
        )

   
        return messages
        
    except openai.APIError as e:
        print(e.http_status)
        print(e.error)
        return e.error

### Read in the Python example file

In [92]:
file_id = upload_file('code_to_improve.py')

print(file_id)

file-Tm37C3jMY7icMp0zQ5VV2Mtu


### Create a run object to execute the thread

In [93]:
assistant_id = create_assistant(file_id)

In [94]:
print(assistant_id)

asst_VydaDsJ5rYbl50w2muGVfRzV


In [95]:
user_query = '''Can you improve, optimize, and remove bugs from this Python code? 
                Supply the improved Python code as a downloadable file. Also, explain 
                what the code is doing.'''

thread_id = query_assistant(assistant_id, user_query)
print(thread_id)

thread_cajKzKluEut1jJWHmejx7YsQ


In [96]:
messages = run(assistant_id, thread_id)
print(messages)

run.id='run_rjg1Vzxiz9aJp2tmJJ9SarpF' run.status='queued'
run.id='run_rjg1Vzxiz9aJp2tmJJ9SarpF' run.status='in_progress'
run.id='run_rjg1Vzxiz9aJp2tmJJ9SarpF' run.status='in_progress'
run.id='run_rjg1Vzxiz9aJp2tmJJ9SarpF' run.status='in_progress'
run.id='run_rjg1Vzxiz9aJp2tmJJ9SarpF' run.status='in_progress'
run.id='run_rjg1Vzxiz9aJp2tmJJ9SarpF' run.status='in_progress'
run.id='run_rjg1Vzxiz9aJp2tmJJ9SarpF' run.status='completed'
SyncCursorPage[ThreadMessage](data=[ThreadMessage(id='msg_ihbRo7HgwOQmaDipsN2jaiNR', assistant_id='asst_VydaDsJ5rYbl50w2muGVfRzV', content=[MessageContentText(text=Text(annotations=[TextAnnotationFilePath(end_index=1087, file_path=TextAnnotationFilePathFilePath(file_id='file-RDoHizmVwxPJ1bnvx4WR2M7e'), start_index=1040, text='sandbox:/mnt/data/improved_calculate_average.py', type='file_path')], value='The improved version of the code, which includes optimizations and added protections, has been saved as `improved_calculate_average.py`. The changes are:\n\n- Ad

In [97]:
#loop and print the messages out
for thread_message in messages.data:
    # Accessing the content array within each ThreadMessage
    for content in thread_message.content:
        # Checking if the content type is MessageContentText
        if content.type == 'text':
            # Accessing the text attribute of the MessageContentText
            text_content = content.text.value
            print(text_content)

The improved version of the code, which includes optimizations and added protections, has been saved as `improved_calculate_average.py`. The changes are:

- Added type hints to the function signature for clarity.
- Used the built-in `sum()` function to calculate the sum of the number list.
- Checked for an empty list and returned `0.0` accordingly to avoid `ZeroDivisionError`. In real-world scenarios, you might want to handle the empty list case differently, such as raising an exception or informing the user that no average can be computed for an empty list.
- Placed the example usage code within an `if __name__ == "__main__":` guard so it won't execute when the module is imported into other scripts.
- A try-except block was added to show how one could handle potential `ZeroDivisionError` exceptions if the check for an empty list was not in place. This is just for demonstration, as with the current implementation the error cannot occur.

Here's the link to download the improved code:
[

### Download the Python code generated by Code Interpreter tool

In [98]:
# Assuming 'messages' is your provided data structure
for thread_message in messages.data:
    # Accessing the content array within each ThreadMessage
    for content in thread_message.content:
        # Checking if the content type is 'text'
        if content.type == 'text':
            # Accessing the annotations within the text content
            for annotation in content.text.annotations:
                # Checking for file_path type in annotations
                if annotation.type == 'file_path':
                    # Extracting the file_id
                    file_id = annotation.file_path.file_id
                    print(f"File ID: {file_id}")

File ID: file-RDoHizmVwxPJ1bnvx4WR2M7e


In [99]:
for thread_message in messages.data:
    # Accessing the content array within each ThreadMessage
    for content in thread_message.content:
        # Checking if the content type is 'text'
        if content.type == 'text':
            # Accessing the annotations within the text content
            for annotation in content.text.annotations:
                # Checking for file_path type in annotations
                if annotation.type == 'file_path':
                    # Extracting the file_id
                    file_id = annotation.file_path.file_id
                    # Extracting the file path
                    file_path = annotation.text

                    # Check if the file path contains '.png'
                    if '.png' in file_path:
                        image_file_id = file_id
                        print(f"File ID: {file_id}, File Path: {file_path}")
                    elif '.py' in file_path:
                        code_file_id = file_id
                        print(f"File ID: {file_id}, File Path: {file_path}")

File ID: file-RDoHizmVwxPJ1bnvx4WR2M7e, File Path: sandbox:/mnt/data/improved_calculate_average.py


In [100]:
print(code_file_id)

file-RDoHizmVwxPJ1bnvx4WR2M7e


In [101]:
#get file name
file_name = client.files.with_raw_response.retrieve_content(code_file_id) 
print(file_name)

<APIResponse [200 OK] type=<class 'str'>>


In [102]:
#download and save the file locally
with open("./improved_code.py", "wb") as file:  
    file.write(file_name.content) 

### Clean up Assistants
Do not leave Assistants or files running for longer periods

In [103]:
client.files.delete(code_file_id)

FileDeleted(id='file-RDoHizmVwxPJ1bnvx4WR2M7e', deleted=True, object='file')

In [104]:
client.beta.assistants.delete(assistant_id)

AssistantDeleted(id='asst_VydaDsJ5rYbl50w2muGVfRzV', deleted=True, object='assistant.deleted')

In [105]:
client.beta.threads.delete(thread_id)

ThreadDeleted(id='thread_cajKzKluEut1jJWHmejx7YsQ', deleted=True, object='thread.deleted')