## OpenAI calls

In [None]:
import os
import base64
from openai import AzureOpenAI

endpoint = os.getenv("ENDPOINT_URL", "https://oai-ms-2025.openai.azure.com/")
deployment = os.getenv("DEPLOYMENT_NAME", "gpt-4.1-nano")
subscription_key = os.getenv("AZURE_OPENAI_API_KEY", "")

# Initialize Azure OpenAI client with key-based authentication
client = AzureOpenAI(
    azure_endpoint=endpoint,
    api_key=subscription_key,
    api_version="2025-01-01-preview",
)

In [6]:
# IMAGE_PATH = "YOUR_IMAGE_PATH"
# encoded_image = base64.b64encode(open(IMAGE_PATH, 'rb').read()).decode('ascii')

# Prepare the chat prompt
chat_prompt = [
    {
        "role": "system",
        "content": [
            {
                "type": "text",
                "text": "You are an AI assistant that helps people find information."
            }
        ]
    },
    {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": "What is 1 + 1?"
            }
        ]
    },
    {
        "role": "assistant",
        "content": [
            {
                "type": "text",
                "text": "1 + 1 equals 2."
            }
        ]
    }
]

# Include speech result if speech is enabled
messages = chat_prompt

# Generate the completion
completion = client.chat.completions.create(
    model=deployment,
    messages=messages,
    max_tokens=13107,
    temperature=0.7,
    top_p=0.95,
    frequency_penalty=0,
    presence_penalty=0,
    stop=None,
    stream=False
)

print(completion.to_json())

{
  "id": "chatcmpl-C5zcO9Pq9IXOJPXWWqTRpoEbW6yKR",
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "message": {
        "content": "1 + 1 equals 2.",
        "refusal": null,
        "role": "assistant",
        "annotations": []
      },
      "content_filter_results": {
        "hate": {
          "filtered": false,
          "severity": "safe"
        },
        "protected_material_code": {
          "filtered": false,
          "detected": false
        },
        "protected_material_text": {
          "filtered": false,
          "detected": false
        },
        "self_harm": {
          "filtered": false,
          "severity": "safe"
        },
        "sexual": {
          "filtered": false,
          "severity": "safe"
        },
        "violence": {
          "filtered": false,
          "severity": "safe"
        }
      }
    }
  ],
  "created": 1755544536,
  "model": "gpt-4.1-nano-2025-04-14",
  "object": "chat.comple

## Exploring InfiAgent data

In [4]:
import pandas as pd

questions_df = pd.read_json('examples/DA-Agent/data/da-dev-questions.jsonl', lines=True)
labels_df = pd.read_json('examples/DA-Agent/data/da-dev-labels.jsonl', lines=True)

data_df = pd.concat((questions_df, labels_df[['common_answers']]), axis=1)
print(data_df.shape)
data_df.head()

(257, 8)


Unnamed: 0,id,question,concepts,constraints,format,file_name,level,common_answers
0,0,Calculate the mean fare paid by the passengers.,[Summary Statistics],Calculate the mean fare using Python's built-i...,"@mean_fare[mean_fare_value] where ""mean_fare_v...",test_ave.csv,easy,"[[mean_fare, 34.65]]"
1,5,"Generate a new feature called ""FamilySize"" by ...","[Feature Engineering, Correlation Analysis]",Create a new column 'FamilySize' that is the s...,"@correlation_coefficient[r_value]\nwhere ""r_va...",test_ave.csv,medium,"[[correlation_coefficient, 0.21]]"
2,6,"Create a new column called ""AgeGroup"" that cat...","[Feature Engineering, Summary Statistics]",Make sure to round the mean fare of each group...,"@mean_fare_child[mean_fare], @mean_fare_teenag...",test_ave.csv,medium,"[[mean_fare_elderly, 43.47], [mean_fare_teenag..."
3,7,Apply the linear regression algorithm from the...,[Machine Learning],Use one-hot encoding for the 'Sex' and 'Embark...,"@prediction_accuracy[accuracy], where ""accurac...",test_ave.csv,hard,"[[prediction_accuracy, 0.78]]"
4,8,Perform a distribution analysis on the 'Fare' ...,"[Distribution Analysis, Summary Statistics]",Keep all numerical values rounded to 2 decimal...,"@mean_fare_class1[mean_fare], @median_fare_cla...",test_ave.csv,medium,"[[median_fare_class1, 69.30], [median_fare_cla..."


In [5]:
data_df['level'].value_counts()

level
hard      88
medium    87
easy      82
Name: count, dtype: int64

In [6]:
from collections import Counter

Counter(data_df.concepts.values.sum())

Counter({'Summary Statistics': 90,
         'Correlation Analysis': 72,
         'Distribution Analysis': 64,
         'Feature Engineering': 50,
         'Comprehensive Data Preprocessing': 45,
         'Outlier Detection': 35,
         'Machine Learning': 19})

In [7]:
sum_stats_df = data_df[data_df['concepts'].apply(lambda l: ['Summary Statistics'] == l)].reset_index(drop=True)
sum_stats_df.level.value_counts()

level
easy      36
medium     1
Name: count, dtype: int64

In [8]:
i = 0
ex = sum_stats_df.iloc[i]

prompt_template = """# Task
{question}

# Instructions
 - Analyze the data in `{file_name}`.
 - {constraints}
 - Do **not** write any code. Rely on your tools instead.
 - **Only use your tools** and ignore Python-specific constraints.

# Formatting
Format your answer as follows:
{format}"""


print(prompt_template.format(
    question=ex.question,
    file_name=ex.file_name,
    constraints=ex.constraints,
    format=ex.format
))

# Task
Calculate the mean fare paid by the passengers.

# Instructions
 - Analyze the data in `test_ave.csv`.
 - Calculate the mean fare using Python's built-in statistics module or appropriate statistical method in pandas. Rounding off the answer to two decimal places.
 - Do **not** write any code. Rely on your tools instead.
 - **Only use your tools** and ignore Python-specific constraints.

# Formatting
Format your answer as follows:
@mean_fare[mean_fare_value] where "mean_fare_value" is a floating-point number rounded to two decimal places.


In [9]:
print("# Answer")
print(ex.common_answers)

# Answer
[['mean_fare', '34.65']]


In [11]:
def load_data(file_name : str) -> pd.DataFrame:
    """Load data from `file_name`. Must be executed before all other tools."""
    if not file_name.endswith('.csv'):
        file_name += '.csv'
    if not file_name.startswith("examples/DA-Agent/data/da-dev-tables/"):
        file_name = "examples/DA-Agent/data/da-dev-tables/" + file_name
    df = pd.read_csv(file_name)
    return df

question_df = load_data(ex.file_name)
# question_df['indicator_of_free_agency_eligibility'].describe().to_dict()

## Testing our custom MCP client

In [12]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [13]:
from mcp_client import MCPClient

mcp_client = MCPClient()
await mcp_client.connect_to_server("mcp_server.py")

Connected! Discovered 8 tools: ['load_data', 'get_column_names', 'describe_column', 'get_value_counts', 'filter', 'remove_outliers', 'compute_mean', 'compute_standard_deviation']


In [19]:
i = 4
ex = sum_stats_df.iloc[i]

prompt_template = """# Task
{question}

# Instructions
 - Analyze the data in `{file_name}`.
 - {constraints}
 - Do **not** write any code. Rely on your tools instead.
 - **Only use your tools** and ignore Python-specific constraints.

# Formatting
Format your answer as follows:
{format}"""


prompt = prompt_template.format(
    question=ex.question,
    file_name=ex.file_name,
    constraints=ex.constraints,
    format=ex.format
)

try:
    response = await mcp_client.process_query(prompt)
except Exception as e:
    print(f"\nError: {str(e)}")

print(prompt)
print()
print(response)
print()
print(ex.common_answers)

# Task
Calculate the mean and standard deviation of the "importance.score" column.

# Instructions
 - Analyze the data in `imp.score.ldlr.metabolome.csv`.
 - Calculate the mean and standard deviation to two decimal places for the "importance.score" column. Ignore any null or missing values in the calculations. The calculations are to be done using standard statistical methods without applying any transformations or filters to the data.
 - Do **not** write any code. Rely on your tools instead.
 - **Only use your tools** and ignore Python-specific constraints.

# Formatting
Format your answer as follows:
@importance_score_mean[mean] @importance_score_std[std_dev] where "mean" and "std_dev" are non-negative numbers rounded to two decimal places.

[Calling tool load_data with args {'file_name': 'imp.score.ldlr.metabolome.csv'}]
[Tool response: [TextContent(type='text', text='{\n  "success": true,\n  "information": {\n    "data size": 377\n  }\n}', annotations=None, meta=None)]]
[Calling to

## Simple evaluation

In [20]:
from tqdm import trange

results = []
for i in trange(10):
    ex = sum_stats_df.iloc[i]

    prompt_template = """# Task
    {question}

    # Instructions
    - Analyze the data in `{file_name}`.
    - {constraints}
    - Do **not** write any code. Rely on your tools instead.
    - **Only use your tools** and ignore Python-specific constraints.

    # Formatting
    Format your answer as follows:
    {format}"""


    prompt = prompt_template.format(
        question=ex.question,
        file_name=ex.file_name,
        constraints=ex.constraints,
        format=ex.format
    )

    try:
        response = await mcp_client.process_query(prompt)
    except Exception as e:
        print(f"\nError: {str(e)}")

    results.append({
        "response": response.split('\n')[-1],
        "target": ' '.join(f"@{t[0]}[{t[1]}]" for t in ex.common_answers)
    })

In [21]:
results

[{'response': '@mean_fare[34.65]', 'target': '@mean_fare[34.65]'},
 {'response': '@mean_close_price[570.68]',
  'target': '@mean_close_price[570.68]'},
 {'response': "[Calling tool compute_standard_deviation with args {'column_name': 'Mar.2019'}]",
  'target': '@mean_mar_2019[171.44] @sd_mar_2019[188.25]'},
 {'response': '@mean_age[39.21]', 'target': '@mean_age[39.21]'},
 {'response': '@importance_score_mean[0.00] @importance_score_std[0.01]',
  'target': '@importance_score_std[0.01] @importance_score_mean[0.0]'},
 {'response': "[Calling tool get_value_counts with args {'column_name': 'No. of cases'}]",
  'target': '@mean_cases[2081990]'},
 {'response': '@mean_wage[6.31] @std_wage[4.66]',
  'target': '@std_wage[4.66] @mean_wage[6.31]'},
 {'response': '@std_dev_volume[8254791.71]',
  'target': '@mean_volume[22607406.19] @std_dev_volume[8254791.71]'},
 {'response': '@country_with_highest_score[Denmark]',
  'target': '@country_with_highest_score[Switzerland]'},
 {'response': '@mean_fare[3

## Generating our own example