# Ragas Answer Relevance Evaluation

Evaluating model output with Ragas `answer_relevancy` metric.

In [None]:
!pip3 install ragas==0.1.13 langchain-openai==0.1.20 datasets==2.20.0

In [None]:
!pip3 show datasets ragas langchain-openai

In [None]:
import ipython_secrets
import os

OPENAI_API_KEY = ipython_secrets.get_secret('OPENAI_API_KEY')
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

In [None]:
from ragas.metrics import answer_relevancy
from ragas import evaluate
from datasets import Dataset 
from langchain_openai.chat_models import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings
import os

openai_api_key = os.getenv("OPENAI_API_KEY") 

evaluation_data = [
    {
        "input": "What should I do in New York City in July?",
        "output": "Check out Times Square, go to an outdoor concert, and visit the Statue of Liberty.",
        "contexts": [
            "Times Square is known for its Broadway theaters, bright lights, and bustling atmosphere.",
            "Outdoor concerts in Central Park are popular summer events attracting many visitors.",
            "The Statue of Liberty is a symbol of freedom and a must-see landmark in NYC."
        ]
    },
    {
        "input": "Can you help me with my math homework?",
        "output": "I'm designed to assist with travel queries. For math help, try using online resources like Khan Academy or Mathway.",
        "contexts": []
    },
    {
        "input": "What’s the capital of France?",
        "output": "The capital of France is Paris.",
        "contexts": [
            "Paris, known as the City of Light, is the most populous city of France.",
            "European capitals: Paris, France; Berlin, Germany; Madrid, Spain",
        ]
    }
]

# Format our dataset for Ragas data structure
def prepare_data_for_ragas(data_list):
    data_table = {
        'question': [],
        'answer': [],
        'contexts': []
    }
    for data_item in data_list:
        data_table["question"].append(data_item["input"])
        data_table["answer"].append(data_item["output"])
        data_table["contexts"].append(data_item["contexts"])
    
    return data_table

def create_report(data):

    ragas_dict = prepare_data_for_ragas(data)
    dataset = Dataset.from_dict(prepare_data_for_ragas(data))
    langchain_llm = ChatOpenAI(
        model_name="gpt-4o-mini",
        api_key=openai_api_key)
    langchain_embeddings = OpenAIEmbeddings(
        model="text-embedding-3-large",
        api_key=openai_api_key
    )
    score = evaluate(dataset,
                     metrics=[answer_relevancy],
                     llm=langchain_llm,
                     embeddings=langchain_embeddings
                    )
    return score

# Create and print the table
results = create_report(evaluation_data)
print(results.to_pandas())
print(results)
