# RAGAS Approach


![](../assets/ragas.png)


## Setup


In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import json
import time
import os
from typing import List, Dict, TypedDict
from pathlib import Path
from tqdm import tqdm
import pandas as pd
import nest_asyncio
from dotenv import load_dotenv

import openai
from langchain_community.document_loaders import TextLoader
from langchain_core.documents import Document
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_core.vectorstores import VectorStore
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_openai import ChatOpenAI
from langchain import hub
from langchain_core.runnables import RunnablePassthrough
from langchain.schema import StrOutputParser

In [3]:
loaded = load_dotenv(override=True)

data_dir = "data/ragas/"
os.environ['CHUNKING_BENCHMARK_DATADIR'] = data_dir

# 1. Load and Save Documents


Each document is loaded as one Langchain document possibly to small to fit into a LLM. Therefore, we need to split these documents into smaller pieces of text for further processing.

In [None]:
from utils.data_loader import save_documents

documents: List[Document] = []
for file in os.listdir(data_dir+"documents"):
    file_path = os.path.join(data_dir+"documents", file)
    loader = TextLoader(file_path)
    documents.extend(loader.load())

save_documents(documents, data_dir)

In [None]:
from utils.data_loader import load_documents
documents = load_documents(data_dir)

# 2. Apply chunking


In [None]:
%run -i ../chunking_strategies.ipynb

In [4]:
from utils.data_loader import load_chunks
split_chunks: Dict[str, Document] = load_chunks(data_dir)

# 3. Ingest into vector store

Using FAISS


In [5]:
from langchain_huggingface import HuggingFaceEmbeddings

vector_stores: Dict[str, VectorStore] = {}

# embeddings = HuggingFaceEmbeddings(
#     model_name="Snowflake/snowflake-arctic-embed-l",
#     model_kwargs={"device": 0, 'trust_remote_code': True},  # Comment out to use CPU
# )

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

vector_store_dir = f"{data_dir}vector_stores/{embeddings.model.replace('/', '-')}"
Path(vector_store_dir).mkdir(parents=True, exist_ok=True)
for experiment_name, chunks in split_chunks.items():
    if os.path.exists(f"{vector_store_dir}/{experiment_name}"):
        print("Loading", experiment_name)
        vector_stores[experiment_name] = FAISS.load_local(f"{vector_store_dir}/{experiment_name}", embeddings, allow_dangerous_deserialization=True)
    else:
        print("Indexing", experiment_name)
        vector_stores[experiment_name] = FAISS.from_documents(chunks, embeddings)
        vector_stores[experiment_name].save_local(f"{vector_store_dir}/{experiment_name}")

Indexing markdown-header-recursive-512-0
Indexing fixed-size-2048-0
Indexing markdown-header-recursive-1024-0
Indexing markdown-header-recursive-2048-200
Indexing recursive-1024-200
Indexing fixed-size-512-0
Indexing fixed-size-1024-0
Indexing markdown-header-recursive-1024-200
Indexing markdown-header-recursive-2048-0
Indexing fixed-size-1024-200
Indexing markdown-header
Indexing recursive-2048-0
Indexing fixed-size-2048-200
Indexing recursive-512-200
Indexing recursive-2048-200
Indexing recursive-1024-0
Indexing markdown-header-parent
Indexing fixed-size-512-200
Indexing semantic-chunks-95-recursive-2048-200
Indexing semantic-chunks-90
Indexing semantic-chunks-95
Indexing recursive-512-0
Indexing markdown-header-recursive-512-200


# 5. Evaluation


## Create Golden Datasets

Create golden dataset on subset of documents, to have some irrelevant documents left for some noise

In [None]:
documents_subset_sources = [data_dir+"documents/sleep.md", data_dir+"documents/teeth.md", data_dir+"documents/time_management.md", data_dir+"documents/mentoring.md"]

### Question Generation with RAGAS


Generate synthetic Questions across Documents to challenge chunking strategies on multi-context queries


In [6]:
from os import environ

environ["RAGAS_DO_NOT_TRACK"] = "true"

In [None]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context

nest_asyncio.apply()

generator_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
critic_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

generator = TestsetGenerator.from_langchain(generator_llm, critic_llm, embeddings)

for experiment_name, chunks in split_chunks.items():
    if "markdown-header" not in experiment_name:
        continue
    print("Generating", experiment_name)
    ragas_testset = generator.generate_with_langchain_docs(
        # [chunk for chunk in chunks if chunk.metadata["source"] in documents_subset_sources],
        chunks,
        test_size=10,
        distributions={simple: 0.2, reasoning: 0.3, multi_context: 0.5},
    )
    df = ragas_testset.to_pandas()
    df.to_csv(data_dir+f"testsets/{experiment_name}.csv", index=False)

## Evaluate Retrieval

Load Evaluation Dataset


In [12]:
from ragas.metrics import context_precision, context_recall
from ragas import RunConfig, evaluate
from datasets import Dataset


# Allow nested use of asyncio (used by Ragas)
nest_asyncio.apply()

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

K = None
results_dir = f"{data_dir}results/{K or 'Dyn'}-k"
Path(results_dir).mkdir(parents=True, exist_ok=True)

for experiment_name, vector_store in vector_stores.items():
    if os.path.exists(f"{results_dir}/{experiment_name}.csv"):
        print("Skipping", experiment_name)
        continue
    ragas_testset = pd.read_csv(data_dir+f"testsets/{experiment_name}.csv")
    questions = ragas_testset["question"].tolist()
    ground_truths = ragas_testset["ground_truth"].tolist()
    eval_data = {
        "question": questions,
        "ground_truth": ground_truths,
        "contexts": [],
    }
    for question, ground_truth in tqdm(zip(questions, ground_truths), total=len(questions)):
            retriever = vector_stores[experiment_name].as_retriever(search_kwargs={"k": K or len(ground_truth)})
            retrieved_chunks = retriever.invoke(question)
            eval_data["contexts"].append([chunk.page_content for chunk in retrieved_chunks])
     
    # RAGAS expects a Dataset object
    dataset = Dataset.from_dict(eval_data)

    retries = 8
    for i in range(retries):
        try:
            print(f"Running evaluation for {experiment_name}")
            result = evaluate(
                dataset=dataset,
                metrics=[context_precision, context_recall],
                raise_exceptions=True,
                embeddings=embeddings,
                run_config=RunConfig(max_workers=4),
                llm=llm,
            )
            result.to_pandas().to_csv(f"{results_dir}/{experiment_name}.csv", index=False)
            break
        except Exception as e:
            if i == retries - 1:
                raise e
            wait_time = 2**i
            print(f"Error, waiting {wait_time} seconds")
            time.sleep(wait_time)


    

100%|██████████| 9/9 [00:03<00:00,  2.36it/s]

Running evaluation for markdown-header-recursive-512-0





Evaluating:   0%|          | 0/18 [00:00<?, ?it/s]

Error, waiting 1 seconds
Running evaluation for markdown-header-recursive-512-0


Evaluating:   0%|          | 0/18 [00:00<?, ?it/s]

Error, waiting 2 seconds
Running evaluation for markdown-header-recursive-512-0


Evaluating:   0%|          | 0/18 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [11]:
K = None
results_dir = f"{data_dir}results/{K or 'Dyn'}-k"

results = pd.DataFrame(columns=[f"Experiment@{K or 'Dyn'}", "MAP", "Recall"])
for experiment_name in vector_stores.keys():
    if not os.path.exists(f"{results_dir}/{experiment_name}.csv"):
        continue
    result = pd.read_csv(f"{results_dir}/{experiment_name}.csv")
    results.loc[len(results)] = [experiment_name, result["context_precision"].mean(), result["context_recall"].mean()]

results.to_csv(results_dir+".csv", index=False)
results.sort_values("MAP", ascending=False)

Unnamed: 0,Experiment@10,MAP,Recall
7,markdown-header-recursive-1024-200,0.934064,1.0
0,markdown-header-recursive-512-0,0.852418,0.888889
8,markdown-header-recursive-2048-0,0.848237,1.0
11,recursive-2048-0,0.847042,1.0
16,markdown-header-parent,0.823115,1.0
3,markdown-header-recursive-2048-200,0.814451,1.0
2,markdown-header-recursive-1024-0,0.807684,1.0
10,markdown-header,0.772948,0.95
6,fixed-size-1024-0,0.733362,1.0
22,markdown-header-recursive-512-200,0.726434,0.935714
