# Semantic Kernel OpenAI Assistant Agent Code Interpreter

## Prepare File Paths of our files to be added to agent

In [7]:
import os

# Let's form the file paths that we will later pass to the assistant
csv_file_path_1 = os.path.join("../Data/population/","PopulationByAdmin1.csv",)

csv_file_path_2 = os.path.join("../Data/population/","PopulationByCountry.csv",)

## Create an Agent and Thread

In [8]:
from semantic_kernel.agents.open_ai.azure_assistant_agent import AzureAssistantAgent
from semantic_kernel.agents import  AssistantAgentThread
from semantic_kernel.contents import StreamingFileReferenceContent

# Create the client using Azure OpenAI resources and configuration
client, model = AzureAssistantAgent.setup_resources()

# Upload the files to the client
file_ids: list[str] = []
for path in [csv_file_path_1, csv_file_path_2]:
    with open(path, "rb") as file:
        file = await client.files.create(file=file, purpose="assistants")
        file_ids.append(file.id)

# Get the code interpreter tool and resources
code_interpreter_tools, code_interpreter_tool_resources = AzureAssistantAgent.configure_code_interpreter_tool(
    file_ids=file_ids
)

# Create the assistant definition
definition = await client.beta.assistants.create(
    model=model,
    instructions="""
        Analyze the available data to provide an answer to the user's question.
        Always format response using markdown.
        Always include a numerical index that starts at 1 for any lists or tables.
        Always sort lists in ascending order.
        """,
    name="SampleAssistantAgent",
    tools=code_interpreter_tools,
    tool_resources=code_interpreter_tool_resources,
)

# Create the agent using the client and the assistant definition
agent = AzureAssistantAgent(
    client=client,
    definition=definition,
)

# Create a thread
thread: AssistantAgentThread = None    

## Helper Methods
This will download any files created by the agent

In [9]:
import os

async def download_file_content(agent, file_id: str):
    try:
        # Fetch the content of the file using the provided method
        response_content = await agent.client.files.content(file_id)

        # Get the current working directory of the file
        current_directory = "../Data/skagents-output/"

        # Define the path to save the image in the current directory
        file_path = os.path.join(
            current_directory,  # Use the current directory of the file
            f"{file_id}.png"  # You can modify this to use the actual filename with proper extension
        )

        # Save content to a file asynchronously
        with open(file_path, "wb") as file:
            file.write(response_content.content)

        print(f"File saved to: {file_path}")
    except Exception as e:
        print(f"An error occurred while downloading file {file_id}: {str(e)}")

async def download_response_image(agent, file_ids: list[str]):
    if file_ids:
        # Iterate over file_ids and download each one
        for file_id in file_ids:
            await download_file_content(agent, file_id)

## Helper Function to run agent

In [10]:
async def run_agent(user_question, thread):
    """
    Executes the assistant agent to process a user question and handle the response.

    Args:
        user_question (str): The question or command provided by the user.
        thread (AssistantAgentThread): The thread object to maintain the conversation context.

    This function streams the agent's response, formats code blocks if present, 
    and downloads any files generated by the agent. It also updates the thread 
    for subsequent interactions.
    """
    is_code = False  # Tracks whether the current response is a code block
    last_role = None  # Tracks the last role (e.g., system, user, assistant) in the response

    # Stream the agent's response
    async for response in agent.invoke_stream(messages=user_question, thread=thread):
        current_is_code = response.metadata.get("code", False)  # Check if the response contains code

        if current_is_code:
            # Start a new code block if not already in one
            if not is_code:
                print("\n\n```python")
                is_code = True
            # Print the code content
            print(response.content, end="", flush=True)
        else:
            # Close the code block if transitioning out of code
            if is_code:
                print("\n```")
                is_code = False
                last_role = None
            # Print the role (if it changes) and the response content
            if hasattr(response, "role") and response.role is not None and last_role != response.role:
                print(f"\n# {response.role}: ", end="", flush=True)
                last_role = response.role
            print(response.content, end="", flush=True)

        # Collect file IDs from the response for downloading
        file_ids.extend([
            item.file_id for item in response.items if isinstance(item, StreamingFileReferenceContent)
        ])
        # Update the thread for the next interaction
        thread = response.thread

    # Close any open code block at the end
    if is_code:
        print("```\n")
    print()

    # Download any files generated by the agent
    await download_response_image(agent, file_ids)
    file_ids.clear()  # Clear the file IDs after downloading

In [11]:
user_question = "What is the population of the country with the highest population?"
await run_agent(user_question,thread)


# AuthorRole.ASSISTANT: Let's begin by examining the contents of the uploaded files to identify the relevant data for determining the population of the country with the highest population. We'll start by loading the files.

```python
import pandas as pd

# Load the contents of the uploaded files
file_path_1 = '/mnt/data/assistant-7KMtptgBdizN8q3F9FSK81'
file_path_2 = '/mnt/data/assistant-875nQwXDjJ4QcX4yU4hSLs'

data_1 = pd.read_csv(file_path_1)
data_2 = pd.read_csv(file_path_2)

# Display the first few rows of each dataset to understand their structure and content
data_1.head(), data_2.head()
```

# AuthorRole.ASSISTANT: The first dataset (data_1) contains population information for various regions within countries, while the second dataset (data_2) contains population information for entire countries.

To find the country with the highest population, we will use the second dataset (data_2). We'll identify the country with the highest population and provide that information.

```pyth

## Appending Messages to the Thread

In [12]:
user_question = "What country has the third highest population?"
await run_agent(user_question, thread)


# AuthorRole.ASSISTANT: To determine the country with the third highest population, we will need to examine the data in the uploaded files. Let's start by inspecting the contents of each file to understand the available information.

```python
import pandas as pd

# Load the data from the uploaded files
file_path_1 = '/mnt/data/assistant-7KMtptgBdizN8q3F9FSK81'
file_path_2 = '/mnt/data/assistant-875nQwXDjJ4QcX4yU4hSLs'

# Try loading both files to inspect the contents
try:
    data_1 = pd.read_csv(file_path_1)
except Exception as e:
    data_1 = str(e)

try:
    data_2 = pd.read_csv(file_path_2)
except Exception as e:
    data_2 = str(e)

data_1, data_2
```

# AuthorRole.ASSISTANT: Both uploaded files contain population data for different regions and countries. Specifically:

1. The first file provides population data for different provinces/states within countries (e.g., each province in Belgium, each state in the US).
2. The second file provides population data for countries directl

In [14]:
user_question = "Give me a column chart of the top 10 countries by population. Add values at the top of each column. Make this in yellow color."
await run_agent(user_question, thread)


# AuthorRole.ASSISTANT: Let's start by loading and examining the provided files to determine which one contains the population data.

```python
import pandas as pd

# Load the files
file1_path = '/mnt/data/assistant-7KMtptgBdizN8q3F9FSK81'
file2_path = '/mnt/data/assistant-875nQwXDjJ4QcX4yU4hSLs'

file1 = pd.read_excel(file1_path)
file2 = pd.read_excel(file2_path)

# Display the first few rows of each file to understand their structure
file1_head = file1.head()
file2_head = file2.head()

file1_head, file2_head
```

# AuthorRole.ASSISTANT: It looks like the files couldn't be read automatically. Let's try loading them by specifying the engine explicitly. This should help us determine whether they are Excel files or something else.

```python
# Attempt to read the files with openpyxl engine
try:
    file1 = pd.read_excel(file1_path, engine='openpyxl')
    file2 = pd.read_excel(file2_path, engine='openpyxl')
except Exception as e:
    file1_error = str(e)
    file2_error = str(e)

file1_e

In [61]:
user_question = "Provide a column chart for countries whose names start with the same letter and sort the x axis by highest count to lowest (include all countries). Add values on top of each column."
await run_agent(user_question, thread)


# AuthorRole.ASSISTANT: I will begin by loading the uploaded files to check the data and identify the relevant information for the analysis.

```python
import pandas as pd

# Load the uploaded files
file_path1 = '/mnt/data/assistant-76KyK1ZmHT5kUrNvGwSwWE'
file_path2 = '/mnt/data/assistant-3zZpUPqZtYojA4JJRd33wf'

# Check the type of files (assume CSV as a common format)
try:
    data1 = pd.read_csv(file_path1)
    data2 = pd.read_csv(file_path2)
    file_type = 'csv'
except Exception:
    try:
        data1 = pd.read_excel(file_path1)
        data2 = pd.read_excel(file_path2)
        file_type = 'excel'
    except Exception:
        file_type = 'unknown'

data1.head(), data2.head(), file_type
```

# AuthorRole.ASSISTANT: The first file contains data about countries and territories, and the second file appears to be more fine-grained, detailing provinces and states within countries.

I'll use the first file to count the number of countries whose names start with the same letter and cr

## Deleting Files, Thread, Agent

In [15]:
if agent is not None:
    [await client.files.delete(file_id) for file_id in file_ids]
    await thread.delete() if thread else None
    await client.beta.assistants.delete(agent.id)