In [6]:
import os
from datetime import datetime
from langfuse import Langfuse
from tqdm import tqdm

# Langfuse credentials: set env vars or paste below
# export LANGFUSE_PUBLIC_KEY="pk-lf-..."
# export LANGFUSE_SECRET_KEY="sk-lf-..."
# export LANGFUSE_HOST="https://langfuse.kavida.ai"  # or https://cloud.langfuse.com

PUBLIC_KEY = os.environ.get("LANGFUSE_PUBLIC_KEY", "your-public-key")
SECRET_KEY = os.environ.get("LANGFUSE_SECRET_KEY", "your-secret-key")
HOST = os.environ.get("LANGFUSE_HOST", "https://langfuse.kavida.ai")

# High timeout to avoid "read operation timed out" (parallel fetch still needs each request to complete)
langfuse = Langfuse(
    public_key=PUBLIC_KEY,
    secret_key=SECRET_KEY,
    host=HOST,
    timeout=1800,  # 15 min per request; increase to 1800 if timeouts persist
)

In [7]:
# Time frame for cost calculation (inclusive) — all times in IST (India Standard Time)
from datetime import timezone
from zoneinfo import ZoneInfo

IST = ZoneInfo("Asia/Kolkata")

# Example: 2026-02-04 19:38:05 IST — set your range in IST below
# Start and end of the same day in IST
start_date_ist = datetime(2025, 11, 1, 0, 0, 0)   # 2026-02-04 00:00:00 IST
end_date_ist = datetime(2025, 11, 30, 23, 59, 59)  # 2026-02-04 23:59:59 IST

# Langfuse API expects UTC — convert IST → UTC
start_date = start_date_ist.astimezone(timezone.utc)
end_date = end_date_ist.astimezone(timezone.utc)

# Or a single timestamp in IST, e.g. 2026-02-04 19:38:05.557 IST (that day only):
# start_date_ist = datetime(2026, 2, 4, 19, 38, 5, 557000, tzinfo=IST)
# end_date_ist = datetime(2026, 2, 4, 23, 59, 59, tzinfo=IST)
# start_date = start_date_ist.astimezone(timezone.utc)
# end_date = end_date_ist.astimezone(timezone.utc)

In [8]:
import time
import random
from concurrent.futures import ThreadPoolExecutor, as_completed

# --- Speed + anti-timeout: minimal fields, parallel fetch, retries ---
PAGE_LIMIT = 100          # Larger pages OK when using fields=core,metrics (smaller payload)
MAX_RETRIES = 5
BASE_DELAY = 5
MAX_WORKERS = 12          # Parallel page fetches (don't set too high or server may timeout more)

# Only request fields needed for cost + name (smaller response = faster, fewer timeouts)
FIELDS = "core,metrics"  # core=id,name,timestamp,...; metrics=totalCost,latency

# Fetch all traces; we exclude ActionItemsSummarizer and get_authorization_token below.
TRACE_NAME_FILTER = None

def fetch_page(page_num):
    """Fetch one page with retries (handles ReadTimeout / transient errors)."""
    kwargs = dict(
        from_timestamp=start_date,
        to_timestamp=end_date,
        limit=PAGE_LIMIT,
        order_by="timestamp.desc",
        fields=FIELDS,
    )
    if TRACE_NAME_FILTER:
        kwargs["name"] = TRACE_NAME_FILTER
    for attempt in range(MAX_RETRIES):
        try:
            if page_num == 1:
                return langfuse.api.trace.list(**kwargs)
            return langfuse.api.trace.list(page=page_num, **kwargs)
        except Exception as e:
            if attempt == MAX_RETRIES - 1:
                raise
            delay = BASE_DELAY * (2 ** attempt) + random.uniform(0, 2)
            time.sleep(delay)

# First page
first = fetch_page(1)
total_pages = max(1, first.meta.total_pages)
total_items = getattr(first.meta, "total_items", None) or (total_pages * PAGE_LIMIT)
all_traces = list(first.data)
failed_pages = []

# Remaining pages: fetch in parallel (faster) with limited workers (avoids timeouts)
if total_pages > 1:
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        future_to_page = {executor.submit(fetch_page, p): p for p in range(2, total_pages + 1)}
        for future in tqdm(as_completed(future_to_page), total=len(future_to_page), desc="Fetching traces"):
            page_num = future_to_page[future]
            try:
                page = future.result()
                all_traces.extend(page.data)
            except Exception as e:
                failed_pages.append((page_num, str(e)))

# Exclude specific trace types from all calculations
EXCLUDED_TRACE_NAMES = {"ActionItemsSummarizer", "get_authorization_token"}
count_before = len(all_traces)
all_traces = [t for t in all_traces if (getattr(t, "name", None) or "") not in EXCLUDED_TRACE_NAMES]
excluded_count = count_before - len(all_traces)

print(f"Fetched {count_before} traces (expected ~{total_items}, {total_pages} pages).")
print(f"Included: all traces except {', '.join(sorted(EXCLUDED_TRACE_NAMES))}.")
if excluded_count:
    print(f"Excluded {excluded_count} traces ({', '.join(EXCLUDED_TRACE_NAMES)}). Included: {len(all_traces)}.")
if failed_pages:
    print(f"Warning: {len(failed_pages)} page(s) failed and were skipped. Re-run to retry or increase timeout.")
if len(all_traces) == 0:
    print("Tip: Check start_date/end_date (IST→UTC) and that the project has traces in that range.")

Fetching traces: 100%|██████████| 1758/1758 [42:23<00:00,  1.45s/it] 


Fetched 175820 traces (expected ~175821, 1759 pages).
Included: all traces except ActionItemsSummarizer, get_authorization_token.
Excluded 123434 traces (get_authorization_token, ActionItemsSummarizer). Included: 52386.


In [9]:
# Total cost (input + output) for all traces except ActionItemsSummarizer and get_authorization_token (USD)
total_cost = sum((t.total_cost or 0) for t in all_traces)
traces_with_cost = sum(1 for t in all_traces if (t.total_cost or 0) > 0)

print(f"Time frame: {start_date.date()} to {end_date.date()}")
print(f"Included: all traces. Excluded: ActionItemsSummarizer, get_authorization_token.")
print(f"Traces with cost: {traces_with_cost} / {len(all_traces)}")
print(f"Total cost (input + output): ${total_cost:.4f} USD")

Time frame: 2025-10-31 to 2025-11-30
Included: all traces. Excluded: ActionItemsSummarizer, get_authorization_token.
Traces with cost: 27750 / 52386
Total cost (input + output): $2867.7923 USD


In [10]:
# Average cost per trace (included traces only; excludes ActionItemsSummarizer, get_authorization_token)
total_cost / traces_with_cost if traces_with_cost else 0

0.10334386817435921