# Evaluating using RAGAS
The notebook shows how to perform evaluation of the performance of a RAG pipeline using the RAGAS library.

In [1]:
import pandas as pd
from datasets import Dataset
from ragas import evaluate
from ragas.run_config import RunConfig
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.chat_models import ChatOllama
import plotly.graph_objects as go
import plotly.io as pio
import nest_asyncio

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
nest_asyncio.apply() # apply the event loop async fix

First load the evaluation set from a file. This should contain not only the synthetic (or human generated) test questions and `ground_truth`s, but also the answer generated by the RAG pipeline as well as the contexts used by the pipeline to generate that answer

In [3]:
df = pd.read_csv("../data/evaluation-sets/eidc-eval-sample.csv")
eval_dataset = Dataset.from_pandas(df)

In [7]:
df

Unnamed: 0,question,ground_truth,answer,contexts
0,What was the frequency of snowline observation...,The frequency of snowline observations made da...,The available information does not provide a c...,"['The dataset entitled ""Snow Survey of Great B..."
1,What was the primary focus of studying the Eur...,The primary focus of studying the European sha...,The available information does not clearly sta...,"['The dataset entitled ""Diet, timing of egg la..."
2,What are the UKCEH Land Cover Classes used to ...,The UKCEH Land Cover Classes used to describe ...,The UKCEH Land Cover Classes used to describe ...,"['The dataset entitled ""Land Cover Map 2020 (l..."
3,What method was used to classify the pixels in...,The Random Forest classification method was us...,"Based on the available information, it appears...","['The dataset entitled ""Land Cover Map 2017 (l..."
4,What were the specific locations where the exp...,The answer to given question is not present in...,"Based on the available information, it does no...","['The dataset entitled ""Ammonia measurements f..."


In [4]:
llm = ChatOllama(model='mistral-nemo', num_ctx=16384)
embeddings = OllamaEmbeddings(model='mistral-nemo', num_ctx=16384)

In [5]:
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_precision,
    context_utilization,
    context_recall,
    context_entity_recall,
    answer_similarity,
    answer_correctness,
)

In [6]:
result = evaluate(
    eval_dataset,
    metrics=[
        faithfulness,
        answer_relevancy,
        context_precision,
        context_utilization,
        context_recall,
        context_entity_recall,
        answer_similarity,
        answer_correctness,
    ],
    llm=llm,
    embeddings=embeddings,
    is_async=False,
    raise_exceptions=False,
    run_config=RunConfig(max_workers=1),
)
result

ValueError: Dataset feature "contexts" should be of type Sequence[string], got <class 'datasets.features.features.Value'>

In [None]:
result_df = result.to_pandas()
pio.templates.default = "gridon"
fig = go.Figure()
metrics = [metric for metric in result_df.columns.to_list() if metric not in ["question", "ground_truth", "answer", "contexts"]]
for metric in metrics:
    fig.add_trace(go.Violin(y=result_df[metric], name=metric, points="all", box_visible=True, meanline_visible=True))
fig.update_yaxes(range=[-0.02,1.02])
fig.show()