# AI Studio Azure batch run Evaluation
### Chat Prompt Flow - All Data Run Base Run

Now in order to test these more thoroughly, we can use the Azure AI Studio to run batches of test data with the evaluation prompt flow on a larger dataset.

In [1]:
import json
# Import required libraries
from promptflow.azure import PFClient
# Import required libraries
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from evaluate import run_azure_flow, run_azure_eval_flow

In [2]:
try:
    credential = DefaultAzureCredential()
    # Check if given credential can get token successfully.
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
    credential = InteractiveBrowserCredential()

Populate the `config.json` file with the subscription_id, resource_group, and workspace_name.

In [3]:
config_path = "../config.json"
pf_azure_client = PFClient.from_config(credential=credential, path=config_path)

Found the config file in: ../config.json


Set the properties needed to run in Azure

In [4]:
# Update the runtime to the name of the runtime you created previously
runtime = "automatic"
flow = "../contoso-chat"
data = "../data/alltestdata.jsonl"
run_name = "chat_all_data_base_run"
column_mapping={"customerId": "${data.customerId}","question": "${data.question}"}


Create a base run to use as the variant for the evaluation runs. 

In [5]:
base_run = run_azure_flow(runtime, flow, run_name, data, column_mapping, pf_azure_client)

[32mUploading alltestdata.jsonl[32m (< 1 MB): 100%|██████████| 1.51k/1.51k [00:00<00:00, 5.06kB/s]
[39m

[32mUploading contoso-chat (0.69 MBs): 100%|██████████| 685203/685203 [00:02<00:00, 330157.23it/s]
[39m



Portal url: https://ai.azure.com/projectflows/bulkrun/run/chat_all_data_base_run_03_13_1600/details?wsid=/subscriptions/ca5b1615-f8ca-462a-9085-7f6f02196f50/resourcegroups/contoso-chat-rg/providers/Microsoft.MachineLearningServices/workspaces/contoso-chat-sf-aiproj


In [6]:
pf_azure_client.stream(base_run)

(Run status is 'NotStarted', continue streaming...)
(Run status is 'NotStarted', continue streaming...)
(Run status is 'NotStarted', continue streaming...)
(Run status is 'NotStarted', continue streaming...)
(Run status is 'NotStarted', continue streaming...)
2024-03-13 16:04:22 +0000      53 promptflow-runtime INFO     [chat_all_data_base_run_03_13_1600] Receiving v2 bulk run request a36f3857-50da-40c6-8bff-062a5c70ab25: {"flow_id": "chat_all_data_base_run_03_13_1600", "flow_run_id": "chat_all_data_base_run_03_13_1600", "flow_source": {"flow_source_type": 1, "flow_source_info": {"snapshot_id": "0a1a4c0c-ba5e-46f9-9efd-4aaf15bbc1b2"}, "flow_dag_file": "flow.dag.yaml"}, "connections": "**data_scrubbed**", "log_path": "https://stcontoso38774357.blob.core.windows.net/bcd1610a-17bd-4a49-a592-96bbfcce576a-azureml/ExperimentRun/dcid.chat_all_data_base_run_03_13_1600/logs/azureml/executionlogs.txt?sv=2019-07-07&sr=b&sig=**data_scrubbed**&skoid=0f6675c4-71fb-4fbf-aa94-aa7aaca0ba76&sktid=d2f64b

In [None]:
details = pf_azure_client.get_details(base_run)
details.head(10)

## Chat Prompt Flow Evaluation - All Data Eval Run

In [None]:
eval_flow = "multi_flow/"
data = "../data/alltestdata.jsonl"
run_name = "chat_all_data_eval_run"
column_mapping={
        # reference data
        "customerId": "${data.customerId}",
        "question": "${data.question}",
        "context": "${run.outputs.context}",
        # reference the run's output
        "answer": "${run.outputs.answer}",
    }

In [None]:
eval_run = run_azure_eval_flow(runtime, eval_flow, run_name, data, column_mapping, base_run, pf_azure_client)

In [None]:
pf_azure_client.stream(eval_run)

In [None]:
details = pf_azure_client.get_details(eval_run)
details.head(10)

In [None]:

metrics = pf_azure_client.get_metrics(eval_run)
print(json.dumps(metrics, indent=4))

In [None]:
pf_azure_client.visualize([base_run, eval_run])

### Chat Prompt Flow - Chat Only Data Run Base Run


In [None]:
pf_azure_client = PFClient.from_config(credential=credential, path=config_path)

flow = "../contoso-chat"
data = "../data/salestestdata.jsonl"
run_name = "chat_only_data_base_run"
column_mapping={"customerId": "${data.customerId}","question": "${data.question}"}

In [None]:
base_run_chat_only = run_azure_flow(runtime, flow, run_name, data, column_mapping, pf_azure_client)

### Chat Prompt Flow - Chat Only Data Run Eval Run

In [None]:
eval_flow = "multi_flow/"
run_name = "chat_only_data_eval_run"
column_mapping={
        # reference data
        "customerId": "${data.customerId}",
        "question": "${data.question}",
        "context": "${run.outputs.context}",
        # reference the run's output
        "answer": "${run.outputs.answer}",
    }

In [None]:
eval_run = run_azure_eval_flow(runtime, eval_flow, run_name, data, column_mapping, base_run_chat_only, pf_azure_client)

In [None]:
pf_azure_client.stream(eval_run)

In [None]:
details = pf_azure_client.get_details(eval_run)
details.head(10)

In [None]:
metrics = pf_azure_client.get_metrics(eval_run)
print(json.dumps(metrics, indent=4))