https://docs.ragas.io/en/stable/getstarted/evaluation.html

In [1]:
import json
import os
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

from datasets import Dataset

In [2]:
import pandas as pd
df = pd.read_excel("verify_result.xlsx")
df.head(3)

Unnamed: 0,name,category,title,question,stance,reference,answer,reasoning,evidence,source_content,source_category,source_sub_category,cost
0,Joe Biden,Social Issues,Abortion,Do you support women's right to abortion?,Yes,,yes,Joe Biden has expressed clear support for a wo...,['I would work to enshrine into federal law th...,I would work to enshrine into federal law the ...,"Abortion, Pro-Life & Genetic Engineering",Roe v. Wade,0
1,Joe Biden,Social Issues,Gender transition,Should people under the age of 18 years old be...,Yes,,yes,Joe Biden has expressed support for transgende...,"[""The fact of the matter is that we're in a po...",would be more expensive to treat if they had t...,Health & Medical,Medically Uninsured or Underinsured,0
2,Joe Biden,Social Issues,Hate Speech,Should hate speech be protected by the first a...,No,,unknown,The provided context does not contain a direct...,[],[4:51] Former Vice President Joe Biden discuss...,"Crime, Police & Imprisonment",Hate Crimes,0


In [3]:
from genie_master import GenieMaster

genie_db_path = "./chroma_qadata_db/"
gm = GenieMaster(db_path=genie_db_path)
print("Genie is ready...", gm.model_is_ready())
print("Document collection count...", gm._document_count())

Genie Master initialized at: 2024-01-20 15:26:45.477758-05:00
Genie is ready... True
Document collection count... 45990


# genie master eval

In [4]:
df_data = df[['name', 'question']].sample(10)
df_data

Unnamed: 0,name,question
35,Joe Biden,Should the electoral college be abolished?
14,Joe Biden,Should the federal government pay for tuition ...
41,Joe Biden,Should the government give tax credits and sub...
37,Joe Biden,Should the government require children to be v...
40,Joe Biden,Should the U.S. expand offshore oil drilling?
44,Joe Biden,Do you support qualified immunity or police of...
38,Joe Biden,Do you support the use of nuclear energy?
46,Joe Biden,Should the government increase military spending?
2,Joe Biden,Should hate speech be protected by the first a...
42,Joe Biden,Should police officers be required to wear bod...


In [5]:
df_eval = gm.evaluate(df_data)
df_eval.head(5)

[1mEvaluating <Genie name=Joe Biden model=gpt-3.5-turbo-1106>
Getting LLM response...
evaluating with [faithfulness]


100%|██████████| 1/1 [00:28<00:00, 28.60s/it]


evaluating with [answer_relevancy]


100%|██████████| 1/1 [00:07<00:00,  7.51s/it]


evaluating with [context_relevancy]


100%|██████████| 1/1 [00:02<00:00,  2.22s/it]


Unnamed: 0,question,answer,contexts,faithfulness,answer_relevancy,context_relevancy
0,Should the electoral college be abolished?,"{""answer"": ""unknown"", ""reasoning"": ""There is n...",[supports giving voting representation to the ...,0.0,0.0,0.636364
1,Should the federal government pay for tuition ...,"{""answer"": ""unknown"", ""reasoning"": ""There are ...",[to transfer these community college credits t...,0.0,0.0,0.555556
2,Should the government give tax credits and sub...,"{""answer"": ""yes"", ""reasoning"": ""Joe Biden has ...",[emissions.•\tIncluded in the 2009 American Re...,1.0,0.8325,0.6
3,Should the government require children to be v...,"{""answer"": ""yes"", ""reasoning"": ""Joe Biden has ...",[children to buy in to the Medicaid program.ww...,1.0,0.858189,0.0
4,Should the U.S. expand offshore oil drilling?,"{""answer"": ""no"", ""reasoning"": ""Joe Biden has e...",[End offshore drillingwww.politico.com/2020-el...,1.0,0.0,0.571429
5,Do you support qualified immunity or police of...,"{""answer"": ""unknown"", ""reasoning"": ""There is n...",[When a police officer pins on that shield and...,0.666667,0.0,0.0
6,Do you support the use of nuclear energy?,"{""answer"": ""unknown"", ""reasoning"": ""The quotes...",[Identify the future of nuclear energy. To add...,0.0,0.0,0.333333
7,Should the government increase military spending?,"{""answer"": ""yes"", ""reasoning"": ""Joe Biden beli...",[Our military is one tool in our toolbox—along...,1.0,0.877783,0.2
8,Should hate speech be protected by the first a...,"{""answer"": ""no"", ""reasoning"": ""Joe Biden has c...",[[4:51] Former Vice President Joe Biden discus...,1.0,0.859531,0.0
9,Should police officers be required to wear bod...,"{""answer"": ""yes"", ""reasoning"": ""Joe Biden has ...",[When a police officer pins on that shield and...,0.75,0.922346,0.0


In [6]:
faithfulness = df_eval['faithfulness'].sum() / df_eval.shape[0]
answer_relevancy = df_eval['answer_relevancy'].sum() / df_eval.shape[0]
context_relevancy = df_eval['context_relevancy'].sum() / df_eval.shape[0]

print("Faithfulness:", faithfulness)
print("Answer relevancy:", answer_relevancy)
print("Context relevancy:", context_relevancy)

Faithfulness: 0.6416666666666666
Answer relevancy: 0.43503488205347535
Context relevancy: 0.2896681096681097


# old

In [None]:
name = "Joe Biden"
genie = gm.get_genie(name)
print(genie)

In [None]:
df_sample = df.sample(5)
questions = df_sample.question.to_list()
questions

# new

In [None]:
eval = genie.evaluate(questions)
eval

In [None]:
df_eval = eval.to_pandas()
df_eval.head()

# old

In [None]:
base_batch_result = genie.base_batch_ask(questions)
base_batch_result

In [None]:
df_result = pd.DataFrame(base_batch_result)
df_result["answer"] = df_result["result"].apply(lambda x: json.dumps(x))
df_result["contexts"] = df_result["context"].apply(lambda docs: [doc.page_content for doc in docs])
df_result = df_result[["question", "answer", "contexts"]]
df_result

In [None]:
dataset = Dataset.from_pandas(df_result)
dataset

In [None]:
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
)

In [None]:
from ragas import evaluate

result = evaluate(
    dataset.select(range(2)),
    metrics = [
        context_precision,
        faithfulness,
        answer_relevancy,
        # context_recall
    ]
)
result

# Part 2