# https://github.com/microsoft/promptflow/blob/user/singankit/pf-evals-bug-bash/src/promptflow-evals/samples/bug-bash/instructions.md

In [1]:
import os
import json

from promptflow.core import AzureOpenAIModelConfiguration

# Initialize Azure OpenAI Connection
model_config = AzureOpenAIModelConfiguration(
        azure_deployment="gpt-4",
        api_key=os.environ["AZURE_OPENAI_API_KEY"],
        api_version=os.environ["AZURE_OPENAI_API_VERSION"],
        azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"]
    )

In [2]:
import pandas as pd

data_path = "../data/data.jsonl"

df = pd.read_json(data_path, lines=True)
df.head()

Unnamed: 0,customerId,question,chat_history,intent
0,4,tell me about your hiking jackets,[],chat
1,1,Do you have any climbing gear?,[],chat
2,3,Can you tell me about your selection of tents?,[],chat
3,6,Do you have any hiking boots?,[],chat
4,2,What gear do you recommend for hiking?,[],chat


In [3]:
import sys
sys.path.append('../contoso_chat')  # Replace '/path/to/contoso_chat' with the actual path to the 'contoso_chat' folder

from chat_request import get_response
from promptflow.evals.evaluators import RelevanceEvaluator, GroundednessEvaluator, FluencyEvaluator, CoherenceEvaluator

relevance_evaluator = RelevanceEvaluator(model_config)
groundedness_evaluator = GroundednessEvaluator(model_config)
fluency_evaluator = FluencyEvaluator(model_config)
coherence_evaluator = CoherenceEvaluator(model_config)

In [4]:
from promptflow.evals.evaluate import evaluate

result_eval = evaluate(
    data="../data/data.jsonl",
    target=get_response,
    evaluators={
        #"violence": violence_eval,
        "relevance": relevance_evaluator,
        "fluency": fluency_evaluator,
        "coherence": coherence_evaluator,
        "groundedness": groundedness_evaluator,
    },
    # column mapping    return {"question": question, "answer": result, "context": context}
    evaluator_config={
        "defaultS": {
            "question": "${data.question}",
            "answer": "${target.answer}",
            "context": "${target.context}",
        },
    },
)



Prompt flow service has started...


[2024-05-05 21:42:43,469][promptflow._sdk._orchestrator.run_submitter][INFO] - Submitting run evaluations_variant_0_20240505_214237_746711, log path: C:\Users\cassieb\.promptflow\.runs\evaluations_variant_0_20240505_214237_746711\logs.txt


You can view the traces in local from http://localhost:23333/v1.0/ui/traces/?#run=evaluations_variant_0_20240505_214237_746711
2024-05-05 21:42:43 -0500    2880 execution.bulk     INFO     Current system's available memory is 8911.703125MB, memory consumption of current process is 321.4609375MB, estimated available worker count is 8911.703125/321.4609375 = 27
2024-05-05 21:42:43 -0500    2880 execution.bulk     INFO     Set process count to 4 by taking the minimum value among the factors of {'default_worker_count': 4, 'row_count': 12, 'estimated_worker_count_based_on_memory_usage': 27}.
2024-05-05 21:42:51 -0500    2880 execution.bulk     INFO     Process name(SpawnProcess-2)-Process id(44996)-Line number(0) start execution.
2024-05-05 21:42:51 -0500    2880 execution.bulk     INFO     Process name(SpawnProcess-4)-Process id(47628)-Line number(1) start execution.
2024-05-05 21:42:51 -0500    2880 execution.bulk     INFO     Process name(SpawnProcess-3)-Process id(12948)-Line number(2) 

[2024-05-05 21:44:13,149][promptflow._sdk._orchestrator.run_submitter][INFO] - Submitting run evaluations_variant_0_20240505_214408_841366, log path: C:\Users\cassieb\.promptflow\.runs\evaluations_variant_0_20240505_214408_841366\logs.txt


You can view the traces in local from http://localhost:23333/v1.0/ui/traces/?#run=evaluations_variant_0_20240505_214408_841366
2024-05-05 21:44:13 -0500    2880 execution.bulk     INFO     Current system's available memory is 8747.609375MB, memory consumption of current process is 328.3359375MB, estimated available worker count is 8747.609375/328.3359375 = 26
2024-05-05 21:44:13 -0500    2880 execution.bulk     INFO     Set process count to 4 by taking the minimum value among the factors of {'default_worker_count': 4, 'row_count': 12, 'estimated_worker_count_based_on_memory_usage': 26}.
2024-05-05 21:44:21 -0500    2880 execution.bulk     INFO     Process name(SpawnProcess-9)-Process id(35392)-Line number(0) start execution.
2024-05-05 21:44:21 -0500    2880 execution.bulk     INFO     Process name(SpawnProcess-12)-Process id(40040)-Line number(1) start execution.
2024-05-05 21:44:21 -0500    2880 execution.bulk     INFO     Process name(SpawnProcess-11)-Process id(20024)-Line number(2

[2024-05-05 21:45:02,322][promptflow._sdk._orchestrator.run_submitter][INFO] - Submitting run evaluations_variant_0_20240505_214458_139127, log path: C:\Users\cassieb\.promptflow\.runs\evaluations_variant_0_20240505_214458_139127\logs.txt


You can view the traces in local from http://localhost:23333/v1.0/ui/traces/?#run=evaluations_variant_0_20240505_214458_139127
2024-05-05 21:45:02 -0500    2880 execution.bulk     INFO     Current system's available memory is 8828.6171875MB, memory consumption of current process is 330.30859375MB, estimated available worker count is 8828.6171875/330.30859375 = 26
2024-05-05 21:45:02 -0500    2880 execution.bulk     INFO     Set process count to 4 by taking the minimum value among the factors of {'default_worker_count': 4, 'row_count': 12, 'estimated_worker_count_based_on_memory_usage': 26}.
2024-05-05 21:45:10 -0500    2880 execution.bulk     INFO     Process name(SpawnProcess-16)-Process id(14304)-Line number(0) start execution.
2024-05-05 21:45:10 -0500    2880 execution.bulk     INFO     Process name(SpawnProcess-18)-Process id(3032)-Line number(1) start execution.
2024-05-05 21:45:10 -0500    2880 execution.bulk     INFO     Process name(SpawnProcess-17)-Process id(43600)-Line numb

[2024-05-05 21:45:51,043][promptflow._sdk._orchestrator.run_submitter][INFO] - Submitting run evaluations_variant_0_20240505_214546_838322, log path: C:\Users\cassieb\.promptflow\.runs\evaluations_variant_0_20240505_214546_838322\logs.txt


You can view the traces in local from http://localhost:23333/v1.0/ui/traces/?#run=evaluations_variant_0_20240505_214546_838322
2024-05-05 21:45:51 -0500    2880 execution.bulk     INFO     Current system's available memory is 8931.93359375MB, memory consumption of current process is 330.46875MB, estimated available worker count is 8931.93359375/330.46875 = 27
2024-05-05 21:45:51 -0500    2880 execution.bulk     INFO     Set process count to 4 by taking the minimum value among the factors of {'default_worker_count': 4, 'row_count': 12, 'estimated_worker_count_based_on_memory_usage': 27}.
2024-05-05 21:45:58 -0500    2880 execution.bulk     INFO     Process name(SpawnProcess-23)-Process id(25620)-Line number(0) start execution.
2024-05-05 21:45:58 -0500    2880 execution.bulk     INFO     Process name(SpawnProcess-24)-Process id(25956)-Line number(1) start execution.
2024-05-05 21:45:58 -0500    2880 execution.bulk     INFO     Process name(SpawnProcess-26)-Process id(18640)-Line number(

[2024-05-05 21:46:43,835][promptflow._sdk._orchestrator.run_submitter][INFO] - Submitting run evaluations_variant_0_20240505_214638_538242, log path: C:\Users\cassieb\.promptflow\.runs\evaluations_variant_0_20240505_214638_538242\logs.txt


You can view the traces in local from http://localhost:23333/v1.0/ui/traces/?#run=evaluations_variant_0_20240505_214638_538242
2024-05-05 21:46:44 -0500    2880 execution.bulk     INFO     Current system's available memory is 9422.14453125MB, memory consumption of current process is 330.47265625MB, estimated available worker count is 9422.14453125/330.47265625 = 28
2024-05-05 21:46:44 -0500    2880 execution.bulk     INFO     Set process count to 4 by taking the minimum value among the factors of {'default_worker_count': 4, 'row_count': 12, 'estimated_worker_count_based_on_memory_usage': 28}.
2024-05-05 21:46:52 -0500    2880 execution.bulk     INFO     Process name(SpawnProcess-30)-Process id(49092)-Line number(0) start execution.
2024-05-05 21:46:52 -0500    2880 execution.bulk     INFO     Process name(SpawnProcess-31)-Process id(3732)-Line number(1) start execution.
2024-05-05 21:46:52 -0500    2880 execution.bulk     INFO     Process name(SpawnProcess-33)-Process id(43732)-Line nu

In [5]:
eval_result = pd.DataFrame(result_eval["rows"])

In [6]:
eval_result.head()

Unnamed: 0,outputs.answer,outputs.context,inputs.customerId,inputs.question,inputs.chat_history,inputs.intent,outputs.relevance.gpt_relevance,outputs.fluency.gpt_fluency,outputs.coherence.gpt_coherence,outputs.groundedness.gpt_groundedness
0,"Sure, Sarah Lee! 🏔️🧥\n\nWe have two hiking jac...","[{'id': '17', 'title': 'RainGuard Hiking Jacke...",4,tell me about your hiking jackets,[],chat,5,5,5,5
1,"Yes, we have climbing gear! 🧗‍♂️ For a thrilli...","[{'id': '9', 'title': 'SummitClimber Backpack'...",1,Do you have any climbing gear?,[],chat,5,5,5,5
2,"Of course, Michael! 🏕️ We have a great selecti...","[{'id': '15', 'title': 'SkyView 2-Person Tent'...",3,Can you tell me about your selection of tents?,[],chat,5,5,5,5
3,"Yes, Emily! We have the TrekReady Hiking Boots...","[{'id': '4', 'title': 'TrekReady Hiking Boots'...",6,Do you have any hiking boots?,[],chat,4,5,5,5
4,"For hiking, I recommend the following gear:\n\...","[{'id': '10', 'title': 'TrailBlaze Hiking Pant...",2,What gear do you recommend for hiking?,[],chat,5,5,5,5


In [7]:
#save evaluation results to a JSONL file
eval_result.to_json('eval_result.jsonl', orient='records', lines=True)