# Evaluate External model endpoints

## Objective

This tutorial provides a step-by-step guide on how to evaluate response from external model endpoints such as Llama deployed on HuggingFace platform.

In [None]:
%pip install promptflow-evals
%pip install promptflow-azure


In [15]:
import pandas as pd
import os

from pprint import pprint
from pathlib import Path

import json
import requests

In [None]:
azure_ai_project = {
    "subscription_id": "2d385bf4-0756-4a76-aa95-28bf9ed3b625",
    "resource_group_name": "rg-wjphihub",
    "project_name": "waqasjaved-5368"
}

In [None]:
# We recommend to push endpoint and url to env and use os.get_env()
env = {
    "tiny_llama" : {
        "endpoint" : "https://api-inference.huggingface.co/models/TinyLlama/TinyLlama-1.1B-Chat-v1.0/v1/chat/completions",
	    "key" : "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",
    },
    "phi3_mini" : {
        "endpoint" : "https://Phi-3-mini-4k-instruct-rqvel.eastus2.models.ai.azure.com/v1/chat/completions",
	    "key" : "J6HAqLPf6jyC0ApRXkXRE0cdSpdINcgm",
    },
    "gpt2" : {
        "endpoint" : "https://api-inference.huggingface.co/models/openai-community/gpt2",
	    "key" : "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",
    },
}

In [None]:

def call_tiny_llama_endpoint(question: str) -> str:

	endpoint = env["tiny_llama"]["endpoint"]
	key = env["tiny_llama"]["key"]

	headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ key) }

	print(endpoint)
	print(key)
	print(headers)

	def query(payload):
		print(payload)
		response = requests.post(endpoint, headers=headers, json=payload)
		return response.json()
		
	output = query({
		"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
		"messages": [{
			"role": "user", 
			"content": question
			}],
		"max_tokens": 500,
		"stream": False
		})

	answer = output["choices"][0]["message"]["content"]
	return "{ \"question\" : \" " + question + "\" , \"answer\" : \" " + answer + " \"}"

def call_phi3_mini_endpoint(question: str) -> str:

	endpoint = env["phi3_mini"]["endpoint"]
	key = env["phi3_mini"]["key"]

	headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ key) }

	print(endpoint)
	print(key)
	print(headers)

	def query(payload):
		print(payload)
		response = requests.post(endpoint, headers=headers, json=payload)
		return response.json()
		
	output = query({
		"messages": [{
			"role": "user", 
			"content": question
			}],
		"max_tokens": 500
		})
	
	answer = output["choices"][0]["message"]["content"]
	return "{ \"question\" : \" " + question + "\" , \"answer\" : \" " + answer + " \"}"

def call_default_endpoint(): 
	return "{ \"question\" : \"What is capital of France?\" , \"answer\" : \"Paris\"}"
	
	

In [None]:
def call_external_endpoints(question: str, model_type: str) -> str:

    if (model_type == "tiny_llama"): 
        output = call_tiny_llama_endpoint(question)
    else:
        output = call_default_endpoint(question)
    
    print(output)    

    with Path.open("outputs.jsonl", "w") as f:
        f.write(output)

    return output


In [None]:
output = call_external_endpoints("What is the capital of France?", "tiny_llama")

In [None]:
from promptflow.evals.evaluators import (
    ContentSafetyEvaluator,
)
from promptflow.evals.evaluate import evaluate

content_safety_evaluator = ContentSafetyEvaluator(project_scope=azure_ai_project)

json_line_as_json = json.loads(output)
print(json_line_as_json)


content_safety_eval_result = content_safety_evaluator(
    question=json_line_as_json["question"], answer=json_line_as_json["answer"]
)


In [18]:
# calling Eval API using output file

results = evaluate(
    azure_ai_project=azure_ai_project,
    data="outputs.jsonl", 
    evaluators = {
        "content_safety": content_safety_evaluator
        })

results

[2024-07-23 14:34:33 -0700][promptflow._sdk._orchestrator.run_submitter][INFO] - Submitting run promptflow_evals_evaluators_content_safety_content_safety_contentsafetyevaluator_35gl9qag_20240723_143431_963684, log path: /Users/waqasjaved02/.promptflow/.runs/promptflow_evals_evaluators_content_safety_content_safety_contentsafetyevaluator_35gl9qag_20240723_143431_963684/logs.txt


Prompt flow service has started...
You can view the traces in local from http://127.0.0.1:23333/v1.0/ui/traces/?#run=promptflow_evals_evaluators_content_safety_content_safety_contentsafetyevaluator_35gl9qag_20240723_143431_963684
You can view the traces in azure portal since trace destination is set to: azureml://subscriptions/2d385bf4-0756-4a76-aa95-28bf9ed3b625/resourceGroups/rg-wjphihub/providers/Microsoft.MachineLearningServices/workspaces/waqasjaved-5368. The link will be printed once the run is finished.


UploadInternalError: Failed to upload run 'promptflow_evals_evaluators_content_safety_content_safety_contentsafetyevaluator_35gl9qag_20240723_143431_963684'. Error: Cannot connect to host stwjphihub876397172926.blob.core.windows.net:443 ssl:True [SSLCertVerificationError: (1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1000)')]

In [19]:
df = pd.read_json("testdata/outputs.jsonl", lines=True)
print(df.head())

ValueError: Unexpected character found when decoding 'true'

In [17]:

results = evaluate(
    azure_ai_project=azure_ai_project,
    evaluation_name="tiny_llama",
    data="testdata/data.jsonl", 
    target=call_external_endpoints,
    evaluators = {
        "content_safety": content_safety_evaluator
        })

ValueError: Failed to load data from testdata/data.jsonl. Please validate it is a valid jsonl data. Error: Unexpected character found when decoding 'true'.