# Langfuse Data Analysis

In [None]:
import json
import os
from typing import Optional

from dotenv import load_dotenv
from langfuse import Langfuse

### Utilities Functions

In [None]:
def init_langfuse_client(public_key: str, secret_key: str, host: str):
    return Langfuse(
        public_key=public_key,
        secret_key=secret_key,
        host=host,
    )

In [None]:
def spanning_seconds(end_time, start_time):
    return (end_time - start_time).total_seconds()

In [None]:
def get_all_traces(client, name: Optional[str]=None):
    traces = []
    page = 1

    while True:
        data = client.fetch_traces(name=name, page=page).data
        if len(data) == 0:
            break
        traces += data
        page += 1

    return traces

In [None]:
def get_traces_with_project_id(traces, project_id):
    results = []
    for trace in traces:
        if trace.metadata and trace.metadata.get('project_id', '') == project_id:
            results.append(trace)
    return results

In [None]:
def pprint_json(data):
    print(json.dumps(json.loads(data), indent=2, ensure_ascii=False))

In [None]:
def get_error_results_by_query(error_results, query):
    results = []
    for error_type in error_results.keys():
        results += list(
            filter(
                lambda error_result: error_result.dict()['input']['args'][0]['query'] == query,
                error_results[error_type]
            )
        )
    return results

## Analysis

In [None]:
load_dotenv(".env", override=True)

client = init_langfuse_client(
    os.getenv("LANGFUSE_PUBLIC_KEY"),
    os.getenv("LANGFUSE_SECRET_KEY"),
    os.getenv("LANGFUSE_HOST")
)

In [None]:
traces = get_all_traces(client)
len(traces)

In [None]:
traces = get_traces_with_project_id(traces, "113")  # foxcomm
len(traces)

In [None]:
error_results = []
no_error_results = []
for trace in traces:
    if trace.output and trace.output.get('metadata', {}).get('error_type', ''):
        error_results.append(trace)
    else:
        no_error_results.append(trace)

assert len(error_results) + len(no_error_results) == len(traces)
len(error_results), len(no_error_results)

### Ratio of failed Traces

In [None]:
len(error_results) / len(traces)

In [None]:
error_results_details = {}
for result in error_results:
    error_type = result.output['metadata']['error_type']
    if error_type not in error_results_details:
        error_results_details[error_type] = [result]
    else:
        error_results_details[error_type].append(result)

In [None]:
for key, value in error_results_details.items():
    print(key)
    print(len(value))

In [None]:
_error_results = get_error_results_by_query(error_results_details, '我在台中公園，有哪些路線我可以搭乘？')
len(_error_results)

In [None]:
for _error_result in _error_results:
    pprint_json(_error_result.json())

### Error Type: OTHERS

In [None]:
for error_result in error_results_details['OTHERS']:
    pprint_json(error_result.json())

### Error Type: NO_RELEVANT_DATA

In [None]:
for error_result in error_results_details['NO_RELEVANT_DATA']:
    pprint_json(error_result.json())

### Error Type: NO_RELEVANT_SQL

In [None]:
for error_result in error_results_details['NO_RELEVANT_SQL']:
    pprint_json(error_result.json())

### Error Type: SQL_BREAKDOWN_FAILED

In [None]:
for error_result in error_results_details['SQL_BREAKDOWN_FAILED']:
    pprint_json(error_result.json())

### No Error Results

In [None]:
for no_error_result in no_error_results:
    pprint_json(no_error_result.json())