In [2]:
# data
from datasets import load_dataset

amnesty_qa = load_dataset("explodinggradients/amnesty_qa", "english_v2")




You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


In [3]:
amnesty_qa['eval']

Dataset({
    features: ['question', 'ground_truth', 'answer', 'contexts'],
    num_rows: 20
})

In [6]:
# !pip install --user ragas

In [2]:
from ragas.metrics import (
    context_precision,
    answer_relevancy,
    faithfulness,
    context_recall,
)
from ragas.metrics.critique import harmfulness

# list of metrics we're going to use
metrics = [
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision,
    harmfulness,
]

In [3]:
from dotenv import find_dotenv, load_dotenv
import os

load_dotenv(find_dotenv())

azure_configs = {
    "base_url": os.getenv("AZURE_OPENAI_ENDPOINT"),
    "model_deployment": os.getenv("GEN_STEP_MODEL"),
    "model_name": os.getenv("GEN_STEP_MODEL"),
    "embedding_deployment": os.getenv("AZURE_OPENAI_EMBEDDING_MODEL"),
    "embedding_name": os.getenv("AZURE_OPENAI_EMBEDDING_MODEL"),  # most likely
}

In [4]:
from langchain_openai.chat_models import AzureChatOpenAI
from langchain_openai.embeddings import AzureOpenAIEmbeddings
from ragas import evaluate

azure_model = AzureChatOpenAI(
    openai_api_version="2023-05-15",
    azure_endpoint=azure_configs["base_url"],
    azure_deployment=azure_configs["model_deployment"],
    model=azure_configs["model_name"],
    validate_base_url=False,
)

# init the embeddings for answer_relevancy, answer_correctness and answer_similarity
azure_embeddings = AzureOpenAIEmbeddings(
    openai_api_version="2023-05-15",
    azure_endpoint=azure_configs["base_url"],
    azure_deployment=azure_configs["embedding_deployment"],
    model=azure_configs["embedding_name"],
)

In [5]:
result = evaluate(
    amnesty_qa["eval"], metrics=metrics, llm=azure_model, embeddings=azure_embeddings, raise_exceptions=False
)

result


Evaluating:   0%|          | 0/100 [00:00<?, ?it/s]

Runner in Executor raised an exception
Traceback (most recent call last):
  File "/home/guy/miniconda3/envs/bert/lib/python3.11/site-packages/ragas/executor.py", line 79, in _aresults
    r = await future
        ^^^^^^^^^^^^
  File "/home/guy/miniconda3/envs/bert/lib/python3.11/asyncio/tasks.py", line 615, in _wait_for_one
    return f.result()  # May raise f.exception().
           ^^^^^^^^^^
  File "/home/guy/miniconda3/envs/bert/lib/python3.11/site-packages/ragas/executor.py", line 38, in sema_coro
    return await coro
           ^^^^^^^^^^
  File "/home/guy/miniconda3/envs/bert/lib/python3.11/site-packages/ragas/executor.py", line 112, in wrapped_callable_async
    return counter, await callable(*args, **kwargs)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/guy/miniconda3/envs/bert/lib/python3.11/site-packages/ragas/metrics/base.py", line 116, in ascore
    raise e
  File "/home/guy/miniconda3/envs/bert/lib/python3.11/site-packages/ragas/metrics/base.py", line

{'faithfulness': 0.8904, 'answer_relevancy': 0.0526, 'context_recall': 0.9218, 'context_precision': 0.9748, 'harmfulness': 0.8487}

In [20]:
df = result.to_pandas()

df.head()

Unnamed: 0,question,ground_truth,answer,contexts,faithfulness,answer_relevancy,context_recall,context_precision,harmfulness
0,What are the global implications of the USA Su...,The global implications of the USA Supreme Cou...,The global implications of the USA Supreme Cou...,"[- In 2022, the USA Supreme Court handed down ...",,,1.0,0.988043,1.0
1,Which companies are the main contributors to G...,"According to the Carbon Majors database, the m...","According to the Carbon Majors database, the m...","[In recent years, there has been increasing pr...",1.0,0.0,1.0,0.956722,1.0
2,Which private companies in the Americas are th...,The largest private companies in the Americas ...,"According to the Carbon Majors database, the l...",[The issue of greenhouse gas emissions has bec...,1.0,0.0,0.3,0.988319,1.0
3,What action did Amnesty International urge its...,Amnesty International urged its supporters to ...,Amnesty International urged its supporters to ...,"[In the case of the Ogoni 9, Amnesty Internati...",0.833333,0.0,0.8,0.933254,1.0
4,What are the recommendations made by Amnesty I...,The recommendations made by Amnesty Internatio...,Amnesty International made several recommendat...,"[In recent years, Amnesty International has fo...",1.0,0.0,1.0,0.993686,1.0
