In [1]:
import pandas as pd
import os
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [2]:
evaluation_file = "../verified_data_sample_01_20_2024.xlsx"
# evaluation_file = "../verified_data_01_15_2024.xlsx"
df = pd.read_excel(evaluation_file)
df['ground_truths']=df['ground_truths'].apply(lambda x: [x])
df.head(2)

Unnamed: 0,name,category,title,question,ground_truths,reference
0,Joe Biden,Social Issues,Abortion,Do you support women's right to abortion?,"[Yes, support women's right to abortion]",
1,Joe Biden,Domestic Policy Issues,Drug Policy,Are you in favor of decriminalizing drug use?,"[Yes, in favor of decriminalizing drug use]",https://www.whitehouse.gov/briefing-room/state...


In [3]:
from genie_master import GenieMaster

genie_db_path = "./chroma_qadata_db/"
gm = GenieMaster(db_path=genie_db_path)
print("Genie is ready...", gm.model_is_ready())
print("Document collection count...", gm._document_count())

genie=gm.get_genie("\033[4mname\033[0m")
prompt_template = genie.get_prompt_template()
prompt = prompt_template.format_messages(context="\033[4mcontext\033[0m", question="\033[4mquestion?\033[0m")
print("\nGenie's standard prompt template:")
print(f'\033[1mSystem Message:\033[0m {prompt[0].content}')
print(f'\033[1mHuman Message:\033[0m {prompt[1].content}')

Genie Master initialized at: 2024-01-22 02:33:05.896532-05:00
Genie is ready... True
Document collection count... 45990

Genie's standard prompt template:
[1mSystem Message:[0m You will be provided with different pieces of context delimited by triple quotes and a question. The context are either statements made by the person of interest, or statements describing the person of interest. Your task is to answer the question using only the provided context, then support the answer with evidence and reasoning. If the document does not contain the information needed to answer this question, simply write “unknown”.

[1mHuman Message:[0m 
"""
[4mcontext[0m
"""

Question: For [4mname[0m, [4mquestion?[0m

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": 

In [4]:
sample_number = 10
df_data = df[['name', 'question', 'ground_truths']].sample(sample_number)
df_data.head(3)

Unnamed: 0,name,question,ground_truths
10,Joe Biden,Do you support qualified immunity for police o...,"[Yes, support qualified immunity for police of..."
1,Joe Biden,Are you in favor of decriminalizing drug use?,"[Yes, in favor of decriminalizing drug use]"
9,Joe Biden,Should the government increase environmental r...,"[Yes, the government should increase environme..."


In [5]:
df_eval = gm.evaluate(df_data)

eval_result = dict()
df_scores = df_eval.drop(
    columns=["name", "question", "answer", "contexts", "ground_truths"]
)

for metric in df_scores.columns:
    eval_result[metric] = df_scores[metric].sum() / df_scores.shape[0]

print('')
for metric, score in eval_result.items():
    print(f"{metric}: {score}")

[1mEvaluating <Genie name=Joe Biden model=gpt-3.5-turbo-1106> [0m
Getting LLM response...
evaluating with [faithfulness]


100%|██████████| 1/1 [00:17<00:00, 17.10s/it]


evaluating with [answer_relevancy]


100%|██████████| 1/1 [00:06<00:00,  6.02s/it]


evaluating with [context_relevancy]


100%|██████████| 1/1 [00:05<00:00,  5.92s/it]


evaluating with [context_recall]


100%|██████████| 1/1 [00:21<00:00, 21.58s/it]


evaluating with [answer_correctness]


100%|██████████| 1/1 [00:13<00:00, 13.62s/it]



faithfulness: 0.6466666666666667
answer_relevancy: 0.7001581060361219
context_relevancy: 0.14924730125968516
context_recall: 0.6177777777777778
answer_correctness: 0.5985129633356373


In [6]:
df_eval

Unnamed: 0,name,question,answer,contexts,ground_truths,faithfulness,answer_relevancy,context_relevancy,context_recall,answer_correctness
0,,Do you support qualified immunity for police o...,"{""answer"": ""unknown"", ""reasoning"": ""The provid...",[When a police officer pins on that shield and...,"[Yes, support qualified immunity for police of...",0.0,0.0,0.0,0.111111,0.491712
1,,Are you in favor of decriminalizing drug use?,"{""answer"": ""yes"", ""reasoning"": ""Joe Biden is i...",[negative impacts.End all incarceration for dr...,"[Yes, in favor of decriminalizing drug use]",0.8,0.90743,0.277778,0.5,0.679766
2,,Should the government increase environmental r...,"{""answer"": ""yes"", ""reasoning"": ""Joe Biden supp...",[[4:52] From coastal towns to rural farms to u...,"[Yes, the government should increase environme...",0.75,0.915586,0.125,1.0,0.639284
3,,Should a photo ID be required to vote?,"{""answer"": ""no"", ""reasoning"": ""Joe Biden belie...","[On the civil rights front, we see renewed att...","[No, a photo ID should not be required to vote]",1.0,0.835758,0.296296,0.75,0.559848
4,,Should the government regulate the prices of l...,"{""answer"": ""yes"", ""reasoning"": ""Joe Biden supp...",[Senator Biden fought for years to expand the ...,"[Yes, the government should regulate the price...",0.666667,0.928465,0.117647,0.666667,0.617643
5,,Should the government require children to be v...,"{""answer"": ""Yes"", ""reasoning"": ""Joe Biden has ...",[The path to universal health care starts with...,"[Yes, the government should require children t...",0.25,0.839564,0.157895,1.0,0.654859
6,,Should the U.S. continue to support Israel?,"{""answer"": ""unknown"", ""reasoning"": ""The provid...","[nuclear energy, and also lay the foundation f...","[Yes, U.S. should continue to support Israel]",0.0,0.0,0.0,0.0,0.468195
7,,Should the federal government pay for tuition ...,"{""answer"": ""yes"", ""reasoning"": ""Joe Biden has ...",[The cost of making community college free cou...,"[Yes, federal government should pay for tuitio...",1.0,0.844007,0.375,0.4,0.617051
8,,Do you support affirmative action programs?,"{""answer"": ""yes"", ""reasoning"": ""Joe Biden has ...",[While workplace discrimination is not as obvi...,"[Yes, support affirmative action programs]",1.0,0.894272,0.142857,1.0,0.669503
9,,Should the government increase spending on pub...,"{""answer"": ""yes"", ""reasoning"": ""Joe Biden beli...",[housing near public transit would cut commute...,"[Yes, government should increase spending on p...",1.0,0.836501,0.0,0.75,0.587268


In [7]:
from datetime import datetime
# store result
eval_records_folder_path = "../evaluation_records/"

now = datetime.now()
path = eval_records_folder_path + now.strftime("%m%d%Y%H%M") + ".xlsx"
df_eval.to_excel(path, index=False)
path

'../evaluation_records/012220240234.xlsx'