# Tool Description Evaluation Framework (TDEF) Demo

In [None]:
import json
import os

import requests

In [None]:
OPENROUTER_API_KEY = os.environ["OPENROUTER_API_KEY"]


def print_json(data, title="JSON Response"):
    print(f"\n{'=' * 50}")
    print(f"{title}")
    print("=" * 50)
    print(json.dumps(data, indent=2, ensure_ascii=False))
    print("=" * 50)

### Assess Quality of Tool Descriptions

In [None]:
url = "https://localhost:8000/mcp-servers/tools/quality"
params = {
    "server_urls": "http://127.0.0.1:52601/sse",
    "model_provider": "openrouter",
    "model_name": "anthropic/claude-3.5-sonnet",
}
headers = {"X-Model-API-Key": OPENROUTER_API_KEY}

quality_response = requests.get(url, params=params, headers=headers, verify=False)
print("Status Code:", quality_response.status_code)



Status Code: 200


In [97]:
print_json(quality_response.json(), "Quality of Tool Descriptions")


Quality of Tool Descriptions
{
  "results": [
    {
      "tool_name": "resolve-library-id",
      "tool_description": "Resolves a package/product name to a Context7-compatible library ID and returns a list of matching libraries.\n\nYou MUST call this function before 'get-library-docs' to obtain a valid Context7-compatible library ID UNLESS the user explicitly provides a library ID in the format '/org/project' or '/org/project/version' in their query.\n\nSelection Process:\n1. Analyze the query to understand what library/package the user is looking for\n2. Return the most relevant match based on:\n- Name similarity to the query (exact matches prioritized)\n- Description relevance to the query's intent\n- Documentation coverage (prioritize libraries with higher Code Snippet counts)\n- Trust score (consider libraries with scores of 7-10 more authoritative)\n\nResponse Format:\n- Return the selected library ID in a clearly marked section\n- Provide a brief explanation for why this librar

### Analyze Similarity Between Tools

In [98]:
url = "https://localhost:8000/similarity/analyze"
params = {
    "mcp_server_urls": ["http://127.0.0.1:52601/sse"],
    "similarity_threshold": 0.7,
    "compute_full_similarity": False,
}

similarity_response = requests.post(url, json=params, verify=False)
print("Status Code:", similarity_response.status_code)



Status Code: 200


In [100]:
print_json(similarity_response.json(), "Analyze Similarity Between Tools")


Analyze Similarity Between Tools
{
  "tool_ids": [
    "http://127.0.0.1:52601/sse:resolve-library-id",
    "http://127.0.0.1:52601/sse:get-library-docs"
  ],
  "matrix": [
    [
      1.0,
      0.8968597013011386
    ],
    [
      0.8968597013011386,
      1.0
    ]
  ],
  "threshold": 0.7,
  "flagged_pairs": [
    {
      "tool_a_id": "http://127.0.0.1:52601/sse:resolve-library-id",
      "tool_b_id": "http://127.0.0.1:52601/sse:get-library-docs",
      "similarity_score": 0.8968597013011386
    }
  ],
  "generated_at": "2025-11-14T03:02:39.769480+00:00",
  "recommendations": null
}


### Analyze Query-Tool Alignment

#### 1. Create a test case

In [None]:
url = "https://localhost:8000/test-cases"
params = {
    "name": "Demo query-tool alignment",
    "query": "How can I use requests library?",
    "expected_mcp_server_url": "http://127.0.0.1:52601/sse",
    "expected_tool_name": "resolve-library-id",
    "expected_parameters": {"libraryName": "requests"},
    "available_mcp_servers": ["http://127.0.0.1:52601/sse", "http://127.0.0.1:41321/sse"],
}

test_case_creation_response = requests.post(url, json=params, verify=False)
print("Status Code:", test_case_creation_response.status_code)

Status Code: 201




In [109]:
test_case_creation_response.json()

{'id': '2d6fab8e-86f9-4f03-97cf-73f922a3d293',
 'name': 'Demo query-tool alignment',
 'query': 'How can I use requests library?',
 'expected_mcp_server_url': 'http://127.0.0.1:52601/sse',
 'expected_tool_name': 'resolve-library-id',
 'expected_parameters': {'libraryName': 'requests'},
 'available_mcp_servers': ['http://127.0.0.1:41321/sse',
  'http://127.0.0.1:52601/sse'],
 'created_at': '2025-11-14T03:21:23',
 'updated_at': '2025-11-14T03:21:23',
 'available_tools': None}

#### 2. Run the test case

In [None]:
# Get the test case ID from the creation response
test_case_id = test_case_creation_response.json()["id"]

# Run the test case
url = f"https://localhost:8000/test-cases/{test_case_id}/run"
headers = {"X-Model-API-Key": OPENROUTER_API_KEY}
run_params = {"model_settings": {"provider": "openrouter", "model": "anthropic/claude-3.5-sonnet"}}

test_run_response = requests.post(url, json=run_params, headers=headers, verify=False)
print("Status Code:", test_run_response.status_code)

# Print the test run response
if test_run_response.status_code == 201:
    print_json(test_run_response.json(), "Test Run Created")
else:
    print("Error running test case:")
    print_json(test_run_response.json(), "Error Response")

Status Code: 201

Test Run Created
{
  "id": "71ca2873-86a9-4def-beb5-8fe8f1f56e29",
  "test_case_id": "2d6fab8e-86f9-4f03-97cf-73f922a3d293",
  "model_settings": {
    "id": "3f63e280-bd7a-4362-8c19-524bd2a22c31",
    "provider": "openrouter",
    "model": "anthropic/claude-3.5-sonnet",
    "timeout": 30,
    "temperature": 0.4,
    "max_retries": 3,
    "base_url": null,
    "system_prompt": null,
    "created_at": "2025-11-14T03:25:38"
  },
  "status": "pending",
  "llm_response_raw": null,
  "selected_tool": null,
  "expected_tool": {
    "id": null,
    "name": "resolve-library-id",
    "mcp_server_url": "http://127.0.0.1:52601/sse",
    "parameters": {
      "libraryName": "requests"
    }
  },
  "extracted_parameters": null,
  "llm_confidence": null,
  "parameter_correctness": null,
  "confidence_score": null,
  "classification": null,
  "execution_time_ms": null,
  "error_message": null,
  "created_at": "2025-11-14T03:25:38",
  "completed_at": null,
  "tools": [
    {
      "id



#### 3. Get the test case results

In [121]:
# Get the test run ID from the run response
test_run_id = test_run_response.json()["id"]

# Get the test run results
url = f"https://localhost:8000/test-runs/{test_run_id}"
headers = {"X-Model-API-Key": OPENROUTER_API_KEY}

test_results_response = requests.get(url, headers=headers, verify=False)
print("Status Code:", test_results_response.status_code)

if test_results_response.status_code == 200:
    # print_json(test_results_response.json(), "Test Run Results")
    result_data = test_results_response.json()

    filtered_keys = [
        "test_case_id",
        "selected_tool",
        "expected_tool",
        "extracted_parameters",
        "parameter_correctness",
    ]

    filtered_data = {k: v for k, v in result_data.items() if k in filtered_keys}

    print_json(filtered_data, "Test Run Results (Filtered)")
else:
    print("Error getting test results:")
    print_json(test_results_response.json(), "Error Response")

Status Code: 200

Test Run Results (Filtered)
{
  "test_case_id": "2d6fab8e-86f9-4f03-97cf-73f922a3d293",
  "selected_tool": {
    "id": "1b18df3a-9416-403c-be28-21bbc45a5c2b",
    "name": "resolve-library-id",
    "mcp_server_url": "http://127.0.0.1:52601/sse",
    "parameters": {
      "libraryName": "requests"
    }
  },
  "expected_tool": {
    "id": null,
    "name": "resolve-library-id",
    "mcp_server_url": "http://127.0.0.1:52601/sse",
    "parameters": {
      "libraryName": "requests"
    }
  },
  "extracted_parameters": {
    "libraryName": "requests"
  },
  "parameter_correctness": 10.0
}


