In [4]:
OPENAI_API_KEY = ""

In [42]:
import math
import pandas as pd
from langchain import hub
from langchain_openai import ChatOpenAI
from datasets import load_dataset

rag_dataset = load_dataset("neural-bridge/rag-dataset-1200")

prompt = hub.pull("rlm/rag-prompt")
llm = ChatOpenAI(model="gpt-4o-mini", api_key=OPENAI_API_KEY).bind(logprobs=True)

data = []
num_repeats = 10
num_qa = 5

for i in range(num_qa):
    context = rag_dataset['train'][i]['context']
    question = rag_dataset['train'][i]['question']
    example_messages = prompt.invoke({"context": context, "question": question}).to_messages()

    for _ in range(num_repeats):
        full = None
        log_probs = []

        for chunk in llm.stream(example_messages):
            full = chunk if full is None else full + chunk
            if "logprobs" in full.response_metadata:
                log_probs.extend([token["logprob"] for token in full.response_metadata["logprobs"]["content"]])

        ppl = math.exp(-sum(log_probs) / len(log_probs)) if log_probs else None
        data.append({"Context": context, "Question": question, "Answer": full.content if full else "No response", "Perplexity": ppl, "Example": i + 1})

df = pd.DataFrame(data)




In [43]:
df

Unnamed: 0,Context,Question,Answer,Perplexity,Example
0,Francisco Rogers found the answer to a search ...,Who found the answer to a search query collar ...,Francisco Rogers found the answer to the searc...,1.064721,1
1,Francisco Rogers found the answer to a search ...,Who found the answer to a search query collar ...,Francisco Rogers found the answer to the searc...,1.189514,1
2,Francisco Rogers found the answer to a search ...,Who found the answer to a search query collar ...,Francisco Rogers found the answer to a search ...,1.103298,1
3,Francisco Rogers found the answer to a search ...,Who found the answer to a search query collar ...,Francisco Rogers found the answer to the searc...,1.067272,1
4,Francisco Rogers found the answer to a search ...,Who found the answer to a search query collar ...,Francisco Rogers found the answer to a search ...,1.102759,1
5,Francisco Rogers found the answer to a search ...,Who found the answer to a search query collar ...,Francisco Rogers found the answer to the searc...,1.066394,1
6,Francisco Rogers found the answer to a search ...,Who found the answer to a search query collar ...,Francisco Rogers found the answer to a search ...,1.109957,1
7,Francisco Rogers found the answer to a search ...,Who found the answer to a search query collar ...,Francisco Rogers found the answer to a search ...,1.272327,1
8,Francisco Rogers found the answer to a search ...,Who found the answer to a search query collar ...,Francisco Rogers found the answer to the searc...,1.067057,1
9,Francisco Rogers found the answer to a search ...,Who found the answer to a search query collar ...,Francisco Rogers found the answer to the searc...,1.21543,1


In [44]:
grouped = df.groupby('Example')['Perplexity'].agg(['mean', 'std', 'min', 'max', 'median'])

print(grouped)

                  mean            std       min            max    median
Example                                                                 
1         1.125873e+00   7.377680e-02  1.064721   1.272327e+00  1.103029
2        6.323466e+108  1.999656e+109  1.424206  6.323466e+109  1.701900
3         1.081154e+00   8.945537e-02  1.029769   1.322408e+00  1.043884
4         1.350128e+00   1.092820e-01  1.208428   1.568132e+00  1.367820
5         1.190609e+00   9.477321e-02  1.079223   1.373032e+00  1.175791


In [50]:
filtered_df = df[df['Perplexity'] > 10]

answers_above_10 = filtered_df[['Answer', 'Perplexity']]
print(answers_above_10['Answer'].iloc[1])

Some potential negative impacts of charity include perpetuating dependency on foreign aid, which can undermine local economies and small businesses, as seen in Haiti. Additionally, charities may allow companies to neglect ethical considerations in their supply chains, such as worker exploitation and environmental harm, while masking these issues with charitable branding. Finally, charity can serve as a temporary solution that fails to address systemic injustices, leading to a cycle of reliance rather than empowerment.
