# Evaluate RAG answer quality

## Setup API clients

In [2]:
import os

import azure.identity
import dotenv
import openai
from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizedQuery

dotenv.load_dotenv()

azure_credential = azure.identity.AzureDeveloperCliCredential(tenant_id=os.getenv("AZURE_TENANT_ID"))

# Initialize Azure OpenAI client
AZURE_OPENAI_SERVICE = os.getenv("AZURE_OPENAI_SERVICE")
AZURE_OPENAI_ADA_DEPLOYMENT = os.getenv("AZURE_OPENAI_ADA_DEPLOYMENT")

token_provider = azure.identity.get_bearer_token_provider(azure_credential, "https://cognitiveservices.azure.com/.default")
openai_client = openai.AzureOpenAI(
    api_version="2023-07-01-preview",
    azure_endpoint=f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com",
    azure_ad_token_provider=token_provider)

def get_embedding(text):
    get_embeddings_response = openai_client.embeddings.create(model=AZURE_OPENAI_ADA_DEPLOYMENT, input=text)
    return get_embeddings_response.data[0].embedding

# Initialize Azure search client
AZURE_SEARCH_SERVICE = os.getenv("AZURE_SEARCH_SERVICE")
AZURE_SEARCH_ENDPOINT = f"https://{AZURE_SEARCH_SERVICE}.search.windows.net"

AZURE_SEARCH_FULL_INDEX = "gptkbindex"
search_client = SearchClient(AZURE_SEARCH_ENDPOINT, AZURE_SEARCH_FULL_INDEX, credential=azure_credential)


## Get answer for a question

In [3]:
user_question = "What does a product manager do?"
user_question_vector = get_embedding(user_question)

r = search_client.search(
        user_question,
        top=5, 
        vector_queries=[
                VectorizedQuery(vector=user_question_vector, k_nearest_neighbors=50, fields="embedding")],
        query_type="semantic",
        semantic_configuration_name="default")

sources = "\n\n".join([f"[{doc['sourcepage']}]: {doc['content']}\n" for doc in r])

SYSTEM_MESSAGE = """
Assistant helps company employees questions about the employee handbook. Be brief in your answers.
Answer ONLY with the facts listed in the list of sources below.
If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below.
Each source has a name followed by colon and the actual information, include the source name for each fact you use.
Use square brackets to reference the source, for example [info1.txt].
"""
USER_MESSAGE = user_question + "\nSources: " + sources

response = openai_client.chat.completions.create(
    model=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME"),
    temperature=0.7,
    messages=[
        {"role": "system", "content": SYSTEM_MESSAGE},
        {"role": "user", "content": USER_MESSAGE},
    ],
)

answer = response.choices[0].message.content
print(answer)

A product manager is responsible for overseeing the product management team, driving product development and marketing strategy, monitoring industry trends, developing product marketing plans, collaborating with internal teams and external partners, and analyzing product performance and customer feedback [role_library-28.png].


## Evaluate the answer quality

We can use the `promptflow-evals` package to run GPT-based evaluators on the RAG responses.

In [4]:
import os

from promptflow.core import AzureOpenAIModelConfiguration
from promptflow.evals.evaluators import GroundednessEvaluator, RelevanceEvaluator

model_config = AzureOpenAIModelConfiguration(
    azure_endpoint=f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com",
    azure_deployment=os.environ.get("AZURE_OPENAI_DEPLOYMENT_NAME"),
)

relevance_eval = RelevanceEvaluator(model_config)
groundedness_eval = GroundednessEvaluator(model_config)

relevance_score = relevance_eval(
    question=user_question,
    answer=answer,
    context=sources,
)
print(relevance_score)

groundedness_score = groundedness_eval(
    answer=answer,
    context=sources,
)
print(groundedness_score)

{'gpt_relevance': 4.0}
{'gpt_groundedness': 5.0}
