# Evaluate model endpoints

## Objective

This tutorial provides a step-by-step guide on how to evaluate response from MaaS endpoints deployed on Azure AI Platform, as well as external model endpoints such as model deployed on HuggingFace platform.

This guide uses Data file as an input to evaluate results. It does not take any target function or class.  

In [None]:
%pip install promptflow-evals
%pip install promptflow-azure


In [None]:
import pandas as pd
import os

from pprint import pprint
from pathlib import Path

import json
import requests

In [None]:
azure_ai_project = {
    "subscription_id": "",
    "resource_group_name": "",
    "project_name": ""
}

In [None]:
# We recommend to push endpoint and url to env and use os.get_env()
env = {
    "tiny_llama" : {
        "endpoint" : "https://api-inference.huggingface.co/models/TinyLlama/TinyLlama-1.1B-Chat-v1.0/v1/chat/completions",
	    "key" : "",
    },
    "phi3_mini" : {
        "endpoint" : "https://Phi-3-mini-4k-instruct-rqvel.eastus2.models.ai.azure.com/v1/chat/completions",
	    "key" : "",
    },
    "gpt2" : {
        "endpoint" : "https://api-inference.huggingface.co/models/openai-community/gpt2",
	    "key" : "",
    },
}

In [None]:

def call_tiny_llama_endpoint(question: str) -> str:

	endpoint = env["tiny_llama"]["endpoint"]
	key = env["tiny_llama"]["key"]

	headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ key) }

	def query(payload):
		print(payload)
		response = requests.post(endpoint, headers=headers, json=payload)
		return response.json()
		
	output = query({
		"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
		"messages": [{
			"role": "user", 
			"content": question
			}],
		"max_tokens": 500,
		"stream": False
		})

	answer = output["choices"][0]["message"]["content"]
	return "{ \"question\" : \" " + question + "\" , \"answer\" : \" " + answer + " \"}"

def call_phi3_mini_endpoint(question: str) -> str:

	endpoint = env["phi3_mini"]["endpoint"]
	key = env["phi3_mini"]["key"]

	headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ key) }

	def query(payload):
		print(payload)
		response = requests.post(endpoint, headers=headers, json=payload)
		return response.json()
		
	output = query({
		"messages": [{
			"role": "user", 
			"content": question
			}],
		"max_tokens": 500
		})
	
	answer = output["choices"][0]["message"]["content"]
	return "{ \"question\" : \" " + question + "\" , \"answer\" : \" " + answer + " \"}"

def call_default_endpoint(): 
	return "{ \"question\" : \"What is capital of France?\" , \"answer\" : \"Paris\"}"
	
	

In [None]:
def call_external_endpoints(question: str, model_type: str) -> str:

    if (model_type == "tiny_llama"): 
        output = call_tiny_llama_endpoint(question)
    else:
        output = call_default_endpoint(question)
    
    return output


In [None]:
output = call_external_endpoints("What is the capital of France?", "tiny_llama")

In [None]:
from promptflow.evals.evaluators import (
    ContentSafetyEvaluator,
)
from promptflow.evals.evaluate import evaluate

content_safety_evaluator = ContentSafetyEvaluator(project_scope=azure_ai_project)

json_line_as_json = json.loads(output)
json_line_as_json


content_safety_eval_result = content_safety_evaluator(
    question=json_line_as_json["question"], answer=json_line_as_json["answer"]
)


In [None]:
# calling Eval API using output file

results = evaluate(
    azure_ai_project=azure_ai_project,
    data="outputs.jsonl", 
    evaluators = {
        "content_safety": content_safety_evaluator
        })

results