In [1]:
import os

if os.getcwd().split('/')[-1] == 'evaluation':
    os.chdir('../')

## Which Hyperparameters Should I Iterate On?
https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2

Here are typically the hyperparameters you should iterate on:

- model: the LLM to use for generation.
- prompt template: the variation of prompt templates to use for generation.
- temperature: the temperature value to use for generation.
- max tokens: the max token limit to set for your LLM generation.
- top-K: the number of retrieved nodes in your retrieval_context in a RAG pipeline.
- chunk size: the size of the retrieved nodes in your retrieval_context in a RAG pipeline.
- reranking model: the model used to rerank the retrieved nodes in your retrieval_context in a RAG pipeline.


In [15]:
import pandas as pd
from deepeval.test_case import LLMTestCase
from deepeval.dataset import EvaluationDataset

from deepeval import evaluate
from deepeval.metrics import AnswerRelevancyMetric

from chat_caller import query_gpt_chat, reload_vector_store
from deepeval.metrics import ContextualPrecisionMetric, FaithfulnessMetric, ContextualRecallMetric, ContextualRelevancyMetric, HallucinationMetric


In [3]:
df_questions = pd.read_csv('evaluation/questions.csv')

In [4]:
df_questions

Unnamed: 0,ques_id,context,question,answer,source_doc
0,Q1,- **AI Chatbot**: An AI chatbot is available f...,What is the purpose of the AI chatbot in the b...,The AI chatbot is designed to answer frequentl...,course_material/general/syllabus.md
1,Q2,Primer on Business Analytics with Python\nModu...,What are the basic techniques in text analytic...,The basic techniques in text analytics covered...,course_material/module 5/lecture slides.pptx
2,Q3,Primer on Business Analytics \nwith Python\nMo...,What is the business example of using linear r...,Predicting sales based on advertising spend.,course_material/module 2/lecture slides.pdf
3,Q4,Primer on Business Analytics with Python\nModu...,What are the three tasks in the online exam fo...,The three tasks in the online exam are Code In...,course_material/module 6/lecture slides.pptx
4,Q5,Primer on Business Analytics with Python\nModu...,What are some common algorithms used in superv...,Some common algorithms used in supervised mach...,course_material/module 4/lecture slides.pptx
5,Q6,"[Slide 1: Title Slide]\nHello again, everyone!...",What is the purpose of a t-test in business an...,A t-test allows us to compare the means of two...,course_material/module 2/transcript.txt
6,Q7,# Primer on Business Analytics with Python\n##...,What are the main concepts covered in the Busi...,The main concepts covered in the course are ba...,course_material/module 6/transcript.txt
7,Q8,"Thank you for your participation, and I'm exci...",What is the speaker excited to continue in the...,The speaker is excited to continue the learnin...,course_material/module 2/transcript.txt
8,Q9,"[Slide 1: Title Slide]\nHello, everyone! Welco...",What is the go-to tool for data manipulation a...,The go-to tool for data manipulation and analy...,course_material/module 1/transcript.txt
9,Q10,- Split data into training and test sets for u...,What is a confusion matrix in model evaluation?\n,A confusion matrix is a table that describes t...,course_material/module 4/lecture slides.pptx


In [5]:
df_evaluation = df_questions.copy()

In [6]:
import os
from huggingface_hub import login

# Retrieve the token from the environment
token = os.getenv("HUGGINGFACE_TOKEN")

if token:
    login(token=token)
    print("Logged in successfully.")
else:
    print("Environment token not found.")

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /Users/crystalfrancis/.cache/huggingface/token
Login successful
Logged in successfully.


https://huggingface.co/spaces/mteb/leaderboard
- Model Types: Open, Sentence Transformers
- Model Sizes: <100M

In [7]:
embedding_models = [
    "sentence-transformers/all-MiniLM-L6-v2",  # Efficient and fast model for sentence embeddings
    'sentence-transformers/all-MiniLM-L12-v2',
    'TaylorAI/gte-tiny',
]

In [9]:
eval_metrics = {
    'Answer Relevance': AnswerRelevancyMetric(threshold=0.5), 
    'Faithfulness':FaithfulnessMetric(
        threshold=0.7,
        model="gpt-4",
        include_reason=True
    ),
    'Contextual Relevancy': ContextualRelevancyMetric(
        threshold=0.7,
        model="gpt-4",
        include_reason=True
    ),
    # The following metrics through errors regarding parsing json in the LLM response:
    #    'Contextual Precision': ContextualPrecisionMetric(
    #         threshold=0.7,
    #         model="gpt-4",
    #         include_reason=True
    #     ),
    #     'Hallucination': HallucinationMetric(threshold=0.5)
    # 'Contextual Recall': ContextualRecallMetric(
    #     threshold=0.7,
    #     model="gpt-4",
    #     include_reason=True
    # ),
}

In [10]:
def add_metrics(dataset, model, df_model):
    df_model['Model'] = model
    results = evaluate(dataset, list(eval_metrics.values()))

    metric_labels = list(eval_metrics.keys())

    for m_index in range(len(metric_labels)):
        metric_label = metric_labels[m_index]
        df_model[metric_label] = [
            r.metrics_metadata[m_index].score for r in results
        ]
        df_model[f"{metric_label} - Reason"] = [ 
            r.metrics_metadata[m_index].reason for r in results
        ]

    return df_model


In [11]:
def evaluate_rag(model, df_questions):
    print("Creating Test Cases...")
    print("--------------------------------------------------------------------------------")
    reload_vector_store(model)
    df_model = df_questions.copy()
    test_cases = []

    for _, row in df_model.iterrows():
        question = row['question']
        answer = query_gpt_chat(question, [])[1]
        test = LLMTestCase(
            input=question,
            actual_output=answer,
            expected_output=row['answer'],
            retrieval_context=[row['context'] or '']
        )
        test_cases.append(test)
        
    print("Evaluating Test Cases...")
    print("--------------------------------------------------------------------------------")
    dataset = EvaluationDataset(test_cases=test_cases)
    df_model = add_metrics(dataset, model, df_model)

    print("--------------------------------------------------------------------------------")
    print("Completed Evaluation")
    return df_model


In [12]:
df_results = None
for model in embedding_models:
    df_model = evaluate_rag(model, df_questions)
    df_results = df_model if df_results is None else pd.concat([df_results, df_model])

Creating Test Cases...
--------------------------------------------------------------------------------
load INSTRUCTOR_Transformer
max_seq_length  512


I0000 00:00:1723202705.841102 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723202708.012568 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Prompt tokens: 599
Completion tokens: 84
Total tokens: 683
Prompt tokens: 577
Completion tokens: 176
Total tokens: 753
Prompt tokens: 597
Completion tokens: 130
Total tokens: 727
Prompt tokens: 605
Completion tokens: 100
Total tokens: 705
Prompt tokens: 528
Completion tokens: 146
Total tokens: 674
Prompt tokens: 568
Completion tokens: 118
Total tokens: 686
Prompt tokens: 588
Completion tokens: 187
Total tokens: 775
Prompt tokens: 492
Completion tokens: 80
Total tokens: 572
Prompt tokens: 634
Completion tokens: 78
Total tokens: 712


Output()

Prompt tokens: 582
Completion tokens: 180
Total tokens: 762
Evaluating Test Cases...
--------------------------------------------------------------------------------
Evaluating test cases...
Event loop is already running. Applying nest_asyncio patch to allow async execution...


I0000 00:00:1723202776.593463 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202779.750268 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202781.590675 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202783.203107 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202787.696590 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Output()

I0000 00:00:1723202789.791861 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723202789.813729 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723202789.830071 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723202789.846616 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202790.857400 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202793.584639 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202796.091605 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202804.105571 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202816.836616 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Output()

I0000 00:00:1723202820.428048 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723202820.446795 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723202820.463028 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723202820.477663 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202821.334519 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202825.654330 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202828.113943 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202835.297871 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202839.685921 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Output()

I0000 00:00:1723202841.684272 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723202841.708344 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723202841.725480 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723202841.742365 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202843.468527 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202845.842231 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202851.058341 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202854.599084 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202860.235667 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Output()

I0000 00:00:1723202861.944616 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723202861.965092 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723202861.988937 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723202862.003899 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202863.268277 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202865.490971 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202873.679900 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202879.125252 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202879.751883 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Output()

I0000 00:00:1723202881.293498 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723202881.313608 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723202881.330495 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723202881.348216 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202882.281924 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202885.966701 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202888.071873 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202894.965470 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202899.022942 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Output()

I0000 00:00:1723202900.697162 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723202900.717056 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723202900.735197 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723202900.754422 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202901.827329 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202905.223545 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202914.305784 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202916.281567 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202930.634329 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Output()

I0000 00:00:1723202934.331587 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723202934.352078 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723202934.368497 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723202934.385346 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202935.659759 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202936.147939 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202938.607031 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202940.400010 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202943.952182 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Output()

I0000 00:00:1723202947.238022 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723202947.271753 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723202947.288563 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723202947.304175 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202948.342735 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202949.030480 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202950.774300 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202957.013528 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202960.328514 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Output()

I0000 00:00:1723202962.374967 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723202962.397396 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723202962.418014 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723202962.441084 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202963.543145 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202965.299487 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202967.674568 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202971.954465 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723202977.705662 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork




Metrics Summary

  - ✅ Answer Relevancy (score: 0.75, threshold: 0.5, strict: False, evaluation model: gpt-4o, reason: The score is 0.75 because the answer mostly addresses the purpose of the AI chatbot in the business analytics course, but includes an irrelevant statement about additional resources that does not directly answer the question., error: None)
  - ✅ Faithfulness (score: 1.0, threshold: 0.7, strict: False, evaluation model: gpt-4, reason: The score is 1.00 because there are no contradictions between the actual output and retrieval context. Good job!, error: None)
  - ✅ Contextual Relevancy (score: 1.0, threshold: 0.7, strict: False, evaluation model: gpt-4, reason: The score is 1.00 because there were no reasons found indicating the retrieval context was irrelevant to the input. Great job!, error: None)

For test case:

  - input: What is the purpose of the AI chatbot in the business analytics course?

  - actual output: The AI chatbot in this business analytics course is

--------------------------------------------------------------------------------
Completed Evaluation
Creating Test Cases...
--------------------------------------------------------------------------------
load INSTRUCTOR_Transformer
max_seq_length  512
Prompt tokens: 611
Completion tokens: 77
Total tokens: 688
Prompt tokens: 592
Completion tokens: 165
Total tokens: 757
Prompt tokens: 634
Completion tokens: 96
Total tokens: 730
Prompt tokens: 630
Completion tokens: 106
Total tokens: 736
Prompt tokens: 540
Completion tokens: 129
Total tokens: 669
Prompt tokens: 621
Completion tokens: 113
Total tokens: 734
Prompt tokens: 606
Completion tokens: 179
Total tokens: 785
Prompt tokens: 564
Completion tokens: 79
Total tokens: 643
Prompt tokens: 643
Completion tokens: 83
Total tokens: 726


Output()

Prompt tokens: 613
Completion tokens: 170
Total tokens: 783
Evaluating Test Cases...
--------------------------------------------------------------------------------
Evaluating test cases...
Event loop is already running. Applying nest_asyncio patch to allow async execution...


I0000 00:00:1723203037.014560 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203037.041336 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203037.061135 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203037.078906 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203038.114973 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203039.059373 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203041.299038 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203044.480014 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203050.032053 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Output()

I0000 00:00:1723203054.044654 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203054.067143 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203054.086016 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203054.103607 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203055.059959 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203056.788656 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203058.918116 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203063.194996 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203070.474178 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Output()

I0000 00:00:1723203073.018933 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203073.038140 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203073.055118 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203073.072487 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203074.003089 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203075.261439 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203076.926117 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203085.515005 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203089.395927 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Output()

I0000 00:00:1723203091.499499 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203091.519795 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203091.534214 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203091.547988 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203093.340483 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203093.500182 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203095.295980 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203101.479276 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203109.512421 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Output()

I0000 00:00:1723203112.725989 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203112.750545 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203112.770085 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203112.789455 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203113.609128 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203115.649884 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203119.700665 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203124.859395 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203130.686164 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Output()

I0000 00:00:1723203132.842718 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203132.861507 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203132.876806 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203132.892720 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203133.935419 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203134.885333 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203136.406322 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203144.275247 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203147.730479 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Output()

I0000 00:00:1723203150.485947 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203150.511608 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203150.530910 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203150.550243 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203151.540439 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203153.331167 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203155.999713 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203160.613773 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203176.956251 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Output()

I0000 00:00:1723203180.659236 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203180.682396 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203180.702347 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203180.721857 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203181.918744 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203183.369167 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203184.873076 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203185.469783 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203188.057311 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Output()

I0000 00:00:1723203189.963393 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203189.984222 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203190.000183 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203190.016079 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203191.434935 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203191.823817 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203194.481491 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203198.862317 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203203.419818 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Output()

I0000 00:00:1723203205.461758 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203205.482894 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203205.503522 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203205.522286 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203207.495469 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203208.197841 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203210.391824 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203214.541066 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203221.016380 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork




Metrics Summary

  - ✅ Answer Relevancy (score: 0.75, threshold: 0.5, strict: False, evaluation model: gpt-4o, reason: The score is 0.75 because the provided answer mostly explains the purpose of the AI chatbot in the business analytics course, but includes a reference to 'Module 5: Lecture Slides,' which is not directly relevant to the question., error: None)
  - ❌ Faithfulness (score: 0.3333333333333333, threshold: 0.7, strict: False, evaluation model: gpt-4, reason: The score is 0.33 due to the fact that the actual output wrongly stated that the purpose of the AI chatbot is to demonstrate an application of text analytics in a business context, whereas its actual purpose is to answer frequently asked questions and provide clarifications on course materials. Additionally, it inaccurately claimed that the course covers how to train intelligent chatbots to handle customer queries, which was not mentioned in the retrieval context., error: None)
  - ✅ Contextual Relevancy (score: 1.0, t

--------------------------------------------------------------------------------
Completed Evaluation
Creating Test Cases...
--------------------------------------------------------------------------------
load INSTRUCTOR_Transformer
max_seq_length  512
Prompt tokens: 427
Completion tokens: 63
Total tokens: 490
Prompt tokens: 956
Completion tokens: 85
Total tokens: 1041
Prompt tokens: 591
Completion tokens: 119
Total tokens: 710
Prompt tokens: 969
Completion tokens: 50
Total tokens: 1019
Prompt tokens: 964
Completion tokens: 98
Total tokens: 1062
Prompt tokens: 589
Completion tokens: 50
Total tokens: 639
Prompt tokens: 955
Completion tokens: 230
Total tokens: 1185
Prompt tokens: 420
Completion tokens: 38
Total tokens: 458
Prompt tokens: 545
Completion tokens: 54
Total tokens: 599


Output()

Prompt tokens: 562
Completion tokens: 183
Total tokens: 745
Evaluating Test Cases...
--------------------------------------------------------------------------------
Evaluating test cases...
Event loop is already running. Applying nest_asyncio patch to allow async execution...


I0000 00:00:1723203262.522064 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203262.547471 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203262.569133 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203262.587763 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203263.753883 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203264.349003 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203266.349957 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203268.557039 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203272.155511 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Output()

I0000 00:00:1723203273.765365 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203273.786765 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203273.803950 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203273.824523 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203275.562056 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203275.738706 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203278.756750 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203288.415747 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203293.812558 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Output()

I0000 00:00:1723203296.331334 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203296.357498 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203296.385293 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203296.397929 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203297.486596 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203298.459463 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203300.968565 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203314.470633 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203319.436167 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Output()

I0000 00:00:1723203321.102991 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203321.129738 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203321.144402 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203321.156675 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203322.039386 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203322.721693 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203324.130685 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203328.938577 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203332.987731 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Output()

I0000 00:00:1723203336.593690 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203336.613938 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203336.631283 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203336.649273 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203337.682294 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203338.577472 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203341.339982 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203346.972524 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203354.141881 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Output()

I0000 00:00:1723203358.226251 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203358.254593 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203358.274971 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203358.291631 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203359.375231 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203359.835426 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203369.516084 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203373.765756 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Output()

I0000 00:00:1723203377.050435 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203377.076979 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203377.097580 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203377.115807 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203380.204363 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203380.757393 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203383.356015 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203388.724784 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203400.007204 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Output()

I0000 00:00:1723203401.930644 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203401.953045 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203401.972634 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203401.990932 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203402.793035 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203402.990789 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203404.680299 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203405.586711 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203406.830272 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Output()

I0000 00:00:1723203409.116497 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203409.137713 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203409.156074 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203409.172536 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203410.155675 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203410.435424 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203412.739205 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203416.802068 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203420.797524 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Output()

I0000 00:00:1723203423.990457 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203424.014693 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203424.032431 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1723203424.050237 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203425.524666 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203428.550612 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203431.172875 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203432.394619 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


I0000 00:00:1723203440.346830 28273155 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork




Metrics Summary

  - ✅ Answer Relevancy (score: 0.6666666666666666, threshold: 0.5, strict: False, evaluation model: gpt-4o, reason: The score is 0.67 because the output provides relevant information about the AI chatbot's purpose in the business analytics course, but includes an irrelevant statement about the syllabus., error: None)
  - ✅ Faithfulness (score: 1.0, threshold: 0.7, strict: False, evaluation model: gpt-4, reason: The score is 1.00 because there are no contradictions between the retrieval context and the actual output., error: None)
  - ✅ Contextual Relevancy (score: 1.0, threshold: 0.7, strict: False, evaluation model: gpt-4, reason: The score is 1.00 because the retrieval context perfectly matches the input, indicating high relevancy and accuracy., error: None)

For test case:

  - input: What is the purpose of the AI chatbot in the business analytics course?

  - actual output: The AI chatbot in the business analytics course is designed to answer frequently asked que

--------------------------------------------------------------------------------
Completed Evaluation


In [13]:
df_results

Unnamed: 0,ques_id,context,question,answer,source_doc,Model,Answer Relevance,Answer Relevance - Reason,Faithfulness,Faithfulness - Reason,Contextual Relevancy,Contextual Relevancy - Reason
0,Q1,- **AI Chatbot**: An AI chatbot is available f...,What is the purpose of the AI chatbot in the b...,The AI chatbot is designed to answer frequentl...,course_material/general/syllabus.md,sentence-transformers/all-MiniLM-L6-v2,0.75,The score is 0.75 because the answer mostly ad...,1.0,The score is 1.00 because there are no contrad...,1.0,The score is 1.00 because there were no reason...
1,Q2,Primer on Business Analytics with Python\nModu...,What are the basic techniques in text analytic...,The basic techniques in text analytics covered...,course_material/module 5/lecture slides.pptx,sentence-transformers/all-MiniLM-L6-v2,1.0,The score is 1.00 because the output is fully ...,0.866667,The score is 0.87 because the actual output in...,1.0,The score is 1.00 because the retrieval contex...
2,Q3,Primer on Business Analytics \nwith Python\nMo...,What is the business example of using linear r...,Predicting sales based on advertising spend.,course_material/module 2/lecture slides.pdf,sentence-transformers/all-MiniLM-L6-v2,0.833333,The score is 0.83 because the majority of the ...,1.0,The score is 1.00 because there are no contrad...,1.0,The score is 1.00 because the retrieval contex...
3,Q4,Primer on Business Analytics with Python\nModu...,What are the three tasks in the online exam fo...,The three tasks in the online exam are Code In...,course_material/module 6/lecture slides.pptx,sentence-transformers/all-MiniLM-L6-v2,0.875,The score is 0.88 because the answer effective...,1.0,The score is 1.00 because there were no contra...,1.0,The score is 1.00 because the retrieval contex...
4,Q5,Primer on Business Analytics with Python\nModu...,What are some common algorithms used in superv...,Some common algorithms used in supervised mach...,course_material/module 4/lecture slides.pptx,sentence-transformers/all-MiniLM-L6-v2,0.916667,The score is 0.92 because the answer is highly...,1.0,The score is 1.00 because there are no contrad...,1.0,The score is 1.00 because the retrieval contex...
5,Q6,"[Slide 1: Title Slide]\nHello again, everyone!...",What is the purpose of a t-test in business an...,A t-test allows us to compare the means of two...,course_material/module 2/transcript.txt,sentence-transformers/all-MiniLM-L6-v2,0.8,The score is 0.80 because the explanation abou...,1.0,The score is 1.00 because there are no contrad...,1.0,The score is 1.00 because the retrieval contex...
6,Q7,# Primer on Business Analytics with Python\n##...,What are the main concepts covered in the Busi...,The main concepts covered in the course are ba...,course_material/module 6/transcript.txt,sentence-transformers/all-MiniLM-L6-v2,0.947368,The score is 0.95 because the answer is highly...,0.823529,The score is 0.82 because the actual output in...,1.0,The score is 1.00 because there are no reasons...
7,Q8,"Thank you for your participation, and I'm exci...",What is the speaker excited to continue in the...,The speaker is excited to continue the learnin...,course_material/module 2/transcript.txt,sentence-transformers/all-MiniLM-L6-v2,0.25,The score is 0.25 because the response include...,1.0,The score is 1.00 because there are no contrad...,1.0,The score is 1.00 because the retrieval contex...
8,Q9,"[Slide 1: Title Slide]\nHello, everyone! Welco...",What is the go-to tool for data manipulation a...,The go-to tool for data manipulation and analy...,course_material/module 1/transcript.txt,sentence-transformers/all-MiniLM-L6-v2,0.75,The score is 0.75 because the main response co...,1.0,The score is 1.00 because there are no contrad...,1.0,The score is 1.00 because the retrieval contex...
9,Q10,- Split data into training and test sets for u...,What is a confusion matrix in model evaluation?\n,A confusion matrix is a table that describes t...,course_material/module 4/lecture slides.pptx,sentence-transformers/all-MiniLM-L6-v2,0.9,The score is 0.90 because the explanation is m...,1.0,The score is 1.00 because there are no contrad...,1.0,The score is 1.00 because the retrieval contex...


In [14]:
df_results.to_csv('evaluation/evaluations_v4.csv', index=False)