In [2]:
import pandas as pd
from datetime import datetime, timedelta
import httpx, json

In [5]:
from dotenv import load_dotenv
import os
load_dotenv("../.env")
phoenix_api_key = os.environ.get("PHOENIX_API_KEY")

# API Ref

## datasets

- GET `/v1/datasets`: List datasets
- DELETE `/v1/datasets/{id}`: Delete dataset by ID
- GET `/v1/datasets/{id}`: Get dataset by ID
- GET `/v1/datasets/{id}/versions`: List dataset versions
- POST `/v1/datasets/upload`: Upload dataset from JSON, CSV, or PyArrow
- GET `/v1/datasets/{id}/examples`: Get examples from a dataset
- GET `/v1/datasets/{id}/csv`: Download dataset examples as CSV file
- GET `/v1/datasets/{id}/jsonl/openai_ft`: Download dataset examples as OpenAI fine-tuning JSONL file
- GET `/v1/datasets/{id}/jsonl/openai_evals`: Download dataset examples as OpenAI evals JSONL file

## experiments

- POST `/v1/datasets/{dataset_id}/experiments`: Create experiment on a dataset
- GET `/v1/datasets/{dataset_id}/experiments`: List experiments by dataset
- GET `/v1/experiments/{experiment_id}`: Get experiment by ID
- GET `/v1/experiments/{experiment_id}/json`: Download experiment runs as a JSON file
- GET `/v1/experiments/{experiment_id}/csv`: Download experiment runs as a CSV file

## spans

POST
/v1/span_annotations
Create or update span annotations

## traces

- POST `/v1/evaluations`: Add span, trace, or document evaluations
- GET `/v1/evaluations`: Get span, trace, or document evaluations from a project

## prompts

- GET `/v1/prompts`: Get all prompts
- POST `/v1/prompts`: Create a prompt version
- GET `/v1/prompts/{prompt_identifier}/versions`: List all prompt versions for a given prompt
- GET `/v1/prompt_versions/{prompt_version_id}`: Get prompt by prompt version ID
- GET `/v1/prompts/{prompt_identifier}/tags/{tag_name}`: Get prompt by tag name
- GET `/v1/prompts/{prompt_identifier}/latest`:Get the latest prompt version

# Code

In [22]:
def phoenix_query(endpoint, params=None, method="GET", data=None):
    "Query the Phoenix API with automatic authentication."
    headers = {'Authorization': f'Bearer {phoenix_api_key}'}
    url = f"https://phx.ankihub.net{endpoint}"    
    match method.upper():
        case "GET":  response = httpx.get(url,  params=params, headers=headers)
        case "POST": response = httpx.post(url, params=params, json=data, headers=headers)
        case _: raise ValueError(f"Unsupported HTTP method: {method}")
    
    response.raise_for_status()
    return response.json()

In [26]:
httpx.get('https://phx.ankihub.net/v1/datasets', headers={'Authorization': f'Bearer {phoenix_api_key}'}).json()

{'data': [{'id': 'RGF0YXNldDoxOQ==',
   'name': 'Cystic Fibrosis Prompt Rewriting',
   'description': '41 rewrites of Cystic Fibrosis query 4',
   'metadata': {},
   'created_at': '2025-03-31T17:42:12.671241+00:00',
   'updated_at': '2025-03-31T17:42:12.671241+00:00'},
  {'id': 'RGF0YXNldDoxOA==',
   'name': 'Smart search golden dataset',
   'description': 'Golden dataset for smart search results based on human feedback',
   'metadata': {},
   'created_at': '2025-03-29T08:29:37.670726+00:00',
   'updated_at': '2025-03-29T17:06:49.423323+00:00'}],
 'next_cursor': None}

In [23]:
response = phoenix_query('/v1/datasets', params={'limit': 10})
pd.DataFrame(response['data'])[['name','description','id']]

Unnamed: 0,name,description,id
0,Cystic Fibrosis Prompt Rewriting,41 rewrites of Cystic Fibrosis query 4,RGF0YXNldDoxOQ==
1,Smart search golden dataset,Golden dataset for smart search results based ...,RGF0YXNldDoxOA==


In [13]:
response

{'data': [{'id': 'RGF0YXNldDoxOQ==',
   'name': 'Cystic Fibrosis Prompt Rewriting',
   'description': '41 rewrites of Cystic Fibrosis query 4',
   'metadata': {},
   'created_at': '2025-03-31T17:42:12.671241+00:00',
   'updated_at': '2025-03-31T17:42:12.671241+00:00'},
  {'id': 'RGF0YXNldDoxOA==',
   'name': 'Smart search golden dataset',
   'description': 'Golden dataset for smart search results based on human feedback',
   'metadata': {},
   'created_at': '2025-03-29T08:29:37.670726+00:00',
   'updated_at': '2025-03-29T17:06:49.423323+00:00'}],
 'next_cursor': None}

In [14]:
dataset_id = "RGF0YXNldDoxOA==" 
examples = phoenix_query(f'/v1/datasets/{dataset_id}/examples', params={'limit': 5})

In [15]:
_exp = examples['data']['examples'][0]

In [16]:
_exp['input']

{'query': 'propofol'}

In [17]:
pd.DataFrame(_exp['output']['documents']).head()

Unnamed: 0,rating,anki_id,document,ankihub_id
0,1.0,1523479052793,"Because propofol is highly lipophilic, it quic...",7f69689b-bbbd-40f4-b806-e374a07734f9
1,1.0,1484862424297,"Compared to thiopental, propofol has less post...",397eb3f4-f8e0-4ed8-8308-f4170d19f6e5
2,1.0,1484862200866,Is propofol used for the induction or maintena...,83451a82-de2d-4bd8-b76a-3e0083fb0235
3,1.0,1484862393206,Propofol and etomidate are IV anesthetics that...,e2dee87f-3b0c-45c7-9855-6a42ffebe8ec
4,1.0,1484862398045,Propofol causes profound vasodilation of arter...,4cfbc90a-0c9e-46b5-adf1-4157de56191f


In [27]:
def store_search_results_to_csv(data, output_file="search_results.csv"):
    import csv
    import os
    
    rows = []
    
    # Create headers
    headers = ["query", "rating", "anki_id", "document", "ankihub_id"]
    
    # Process each example
    for example in data:
        query = example['input']['query']
        
        # Process each document in the output
        for doc in example['output']['documents']:
            row = {
                "query": query,
                "rating": doc['rating'],
                "anki_id": doc['anki_id'],
                "document": doc['document'],
                "ankihub_id": doc['ankihub_id']
            }
            rows.append(row)
    
    # Write to CSV
    with open(output_file, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=headers)
        writer.writeheader()
        writer.writerows(rows)
    
    return f"Data saved to {os.path.abspath(output_file)} with {len(rows)} rows"

In [28]:
store_search_results_to_csv(examples['data']['examples'])

'Data saved to /Users/iflath/git/AnkiHub/fasthtml-demo/scratch/search_results.csv with 865 rows'