# Langfuse Data Analysis

In [None]:
import json
import os
from typing import Optional

from dotenv import load_dotenv
from langfuse import Langfuse

In [None]:
def init_langfuse_client(public_key: str, secret_key: str, host: str):
    return Langfuse(
        public_key=public_key,
        secret_key=secret_key,
        host=host,
    )

In [None]:
def spanning_seconds(end_time, start_time):
    return (end_time - start_time).total_seconds()

In [None]:
def get_all_traces(client, name: Optional[str]=None):
    traces = []
    page = 1

    while True:
        data = client.fetch_traces(name=name, page=page).data
        if len(data) == 0:
            break
        traces += data
        page += 1

    return traces

In [None]:
def get_traces_with_project_id(traces, project_id):
    results = []
    for trace in traces:
        if trace.metadata and trace.metadata.get('project_id', '') == project_id:
            results.append(trace)
    return results

In [None]:
def pprint_json(data):
    print(json.dumps(json.loads(data), indent=2, ensure_ascii=False))

In [None]:
load_dotenv(".env", override=True)

client = init_langfuse_client(
    os.getenv("LANGFUSE_PUBLIC_KEY"),
    os.getenv("LANGFUSE_SECRET_KEY"),
    os.getenv("LANGFUSE_HOST")
)

In [None]:
traces = get_all_traces(client)
len(traces)

In [None]:
traces_with_same_project = get_traces_with_project_id(traces, "161")
len(traces_with_same_project)

In [None]:
results = []
for trace in traces_with_same_project:
    if trace.output and trace.output.get('metadata', {}).get('error_type', ''):
        results.append(trace)
len(results)

Ratio of failed Traces

In [None]:
len(results) / len(traces_with_same_project)

In [None]:
error_results = {}
for result in results:
    error_type = result.output['metadata']['error_type']
    if error_type not in error_results:
        error_results[error_type] = [result]
    else:
        error_results[error_type].append(result)

In [None]:
for key, value in error_results.items():
    print(key)
    print(len(value))

### Error Type: OTHERS

In [None]:
for error_result in error_results['OTHERS']:
    pprint_json(error_result.json())

### Error Type: NO_RELEVANT_DATA

In [None]:
for error_result in error_results['NO_RELEVANT_DATA']:
    pprint_json(error_result.json())

### Error Type: NO_RELEVANT_SQL

In [None]:
for error_result in error_results['NO_RELEVANT_SQL']:
    pprint_json(error_result.json())

In [None]:
_results = list(
    filter(
        lambda error_result: error_result.dict()['input']['args'][0]['query'] == 'What is the close rate of deals in the funnel?',
        error_results['NO_RELEVANT_SQL']
    )
)
len(_results)

In [None]:
for _result in _results:
    pprint_json(_result.json())

### Error Type: SQL_BREAKDOWN_FAILED

In [None]:
for error_result in error_results['SQL_BREAKDOWN_FAILED']:
    pprint_json(error_result.json())