In [1]:
import phoenix as px
pxc = px.Client()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from phoenix.session.evaluation import get_retrieved_documents

retrieved_documents = get_retrieved_documents(pxc, project_name="research_assistant")
retrieved_documents.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,context.trace_id,input,reference,document_score
context.span_id,document_position,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
35efaab1dd55534b,0,255d389c1d1edd773fff4748adaab82d,2024 US presidential election candidates and t...,2024 Presidential Election\nAdvertisement\nWho...,0.680594
35efaab1dd55534b,1,255d389c1d1edd773fff4748adaab82d,2024 US presidential election candidates and t...,Email *\nFirst Name *\nLast Name\nPlease compl...,0.658504
35efaab1dd55534b,2,255d389c1d1edd773fff4748adaab82d,2024 US presidential election candidates and t...,Campaign positions\nEstablish “an Economic Bil...,0.64026
35efaab1dd55534b,3,255d389c1d1edd773fff4748adaab82d,2024 US presidential election candidates and t...,Profile\nSections\ntv\nFeatured\nMore From NBC...,0.589031
35efaab1dd55534b,4,255d389c1d1edd773fff4748adaab82d,2024 US presidential election candidates and t...,"Suarez, who is Cuban American, comes from a po...",0.586914


In [3]:
from phoenix.evals import (
    OpenAIModel,
    llm_classify,
)
import nest_asyncio
nest_asyncio.apply()

model = OpenAIModel(
    model="gpt-4o",
    temperature=0.0,
)

bias_detection_prompt = """
You are an AI assistant designed to detect bias in documents. Given an original question 
and a document, your task is to analyze the document and determine if there is any bias present. 

Please follow these steps:
1. Read the original question carefully.
2. Read the document provided.
3. Identify any statements, language, or information in the document that may indicate bias.
4. Provide a classification of the document's bias level: 
Unbiased, Somewhat Unbiased, Somewhat Biased, Biased.
5. Provide a detailed explanation for your classification, citing specific parts of the 
document that influenced your decision.

Original Question: {input}
Document: {reference}

Your analysis should be thorough and objective. Please ensure that your explanation 
is clear and concise.

Example response:
************
EXPLANATION: An explanation of your reasoning for the label you chose
LABEL: "bias", "unbiased", "somewhat biased", "somewhat unbiased"
************
"""

In [4]:
bias_classifications = llm_classify(
    dataframe=retrieved_documents, 
    template=bias_detection_prompt, 
    model=model, 
    rails=['Unbiased', 'Biased', 'Somewhat Biased', 'Somewhat Unbiased'],
    provide_explanation=True,
)
bias_classifications["score"] = bias_classifications["label"].map(
    {"unbiased": 1, "somewhat unbiased": 0.75, "somewhat biased": 0.5, "biased": 0}
    )

llm_classify |██████████| 119/119 (100.0%) | ⏳ 00:37<00:00 |  3.20it/s


In [5]:
bias_classifications.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,label,explanation,exceptions,execution_status,execution_seconds,score
context.span_id,document_position,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
35efaab1dd55534b,0,somewhat unbiased,The document provides a detailed overview of t...,[],COMPLETED,7.456938,0.75
35efaab1dd55534b,1,unbiased,The document provides a factual and comprehens...,[],COMPLETED,7.188562,1.0
35efaab1dd55534b,2,unbiased,The document provides a factual summary of Jil...,[],COMPLETED,3.416684,1.0
35efaab1dd55534b,3,somewhat biased,"The document exhibits a degree of bias, partic...",[],COMPLETED,10.110849,0.5
35efaab1dd55534b,4,biased,The document exhibits a degree of bias in its ...,[],COMPLETED,4.191071,0.0


In [6]:
span_bias_classifications = bias_classifications.copy()

span_bias_classifications["average_score"] = span_bias_classifications.groupby("context.span_id")["score"].transform("mean")

span_bias_classifications["label"] = span_bias_classifications["average_score"].apply(
    lambda x: min({1: "unbiased", 0.75: "somewhat unbiased", 0.5: "somewhat biased", 0: "biased"}.keys(), key=lambda k: abs(k - x))
).map(
    {1: "unbiased", 0.75: "somewhat unbiased", 0.5: "somewhat biased", 0: "biased"}
)

# Combine all rows with the same context.span_id into one row, with explanations being a concatenation of all the explanations
span_bias_classifications = span_bias_classifications.groupby("context.span_id").agg({
    "label": "first",
    "explanation": lambda x: "\n----\n".join(x),
    "exceptions": "first",
    "execution_status": "first",
    "execution_seconds": "mean",
    "score": "mean",
    "average_score": "first"
}).reset_index()
span_bias_classifications.set_index("context.span_id", inplace=True)
span_bias_classifications.head()


TypeError: sequence item 10: expected str instance, NoneType found

In [None]:
from phoenix.trace import SpanEvaluations, DocumentEvaluations

px.Client().log_evaluations(
    SpanEvaluations(
        dataframe=span_bias_classifications,
        eval_name="Bias Detection",
    ),
    DocumentEvaluations(
        dataframe=bias_classifications,
        eval_name="Relevance",
    ),
)