### Setup RAG System

In [None]:
# pip install faiss-cpu
# !pip install ragas

Collecting ragas
  Downloading ragas-0.2.15-py3-none-any.whl.metadata (9.0 kB)
Collecting appdirs (from ragas)
  Downloading appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)
Collecting diskcache>=5.6.3 (from ragas)
  Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Downloading ragas-0.2.15-py3-none-any.whl (190 kB)
Downloading diskcache-5.6.3-py3-none-any.whl (45 kB)
Downloading appdirs-1.4.4-py2.py3-none-any.whl (9.6 kB)
Installing collected packages: appdirs, diskcache, ragas

   -------------------------- ------------- 2/3 [ragas]
   -------------------------- ------------- 2/3 [ragas]
   -------------------------- ------------- 2/3 [ragas]
   ---------------------------------------- 3/3 [ragas]

Successfully installed appdirs-1.4.4 diskcache-5.6.3 ragas-0.2.15


In [36]:
import os

import faiss
import openai
import numpy as np
from dotenv import load_dotenv

load_dotenv()
groq_api_key = os.getenv("groq_api")

In [21]:
docs = [
    "Paris is the capital and most populous city of France. The city is famed for the Eiffel Tower.",
    "Jane Austen was an English novelist best known for 'Pride and Prejudice' and 'Sense and Sensibility'.",
    "The Great Wall of China is a series of fortifications built to protect the ancient Chinese states.",
    "Mount Everest, part of the Himalayas, is Earth’s highest mountain above sea level.",
    "Mike loves the color pink more than any other color."
]

In [None]:
# client = openai.OpenAI()
# def get_embedding(text):
#     response = client.embeddings.create(model="text-embedding-3-small", input=text)
#     return response.data[0].embedding

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings
hf_embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

In [23]:
embeddings = np.array(hf_embeddings.embed_documents(docs)).astype('float32')
index = faiss.IndexFlatIP(embeddings.shape[1])
faiss.normalize_L2(embeddings)
index.add(embeddings)

In [68]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain_groq import ChatGroq 
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


def retrieve(query, k):
    query_embedding = np.array(hf_embeddings.embed_documents(query)).astype('float32')
    
    faiss.normalize_L2(query_embedding)
    _, idx = index.search(query_embedding, k)
    
    return [docs[i] for i in idx[0]]

def generate_answer(question: str, contexts: list[str]) -> str:
    """
    Generates an answer to the user's question using the provided contexts
    and a Groq-hosted LLM via LangChain Expression Language (LCEL).

    Args:
        question (str): The user's question.
        contexts (list[str]): A list of relevant document contexts.

    Returns:
        str: The generated answer from the LLM.
    """
    # 1. Initialize the Groq Chat model
    llm = ChatGroq(
        model_name='gemma2-9b-it',
        temperature=0, # Keep temperature at 0 for more factual/less creative answers
        groq_api_key=groq_api_key
    )

    # 2. Define the RAG prompt template
    prompt_template = ChatPromptTemplate.from_messages([
        ("system", 
         "Answer the user question **only** with facts found in the context. "
         "If the answer is not in the context, state that you cannot answer from the provided information.\n\n"
         "Context:\n{context}"), # 'context' is the variable where retrieved docs will be injected
        ("user", "{question}")
    ])

    # This creates a chain that takes 'context' and 'question' as input,
    # formats them into the prompt, and sends to the LLM.
    # Note: `create_stuff_documents_chain` is more for LangChain's Document objects.
    # We are directly formatting the context string in the LCEL chain below.
    # For a simple RAG chain:
    generation_chain = (
        {
            "context": lambda x: "\n".join(f"- {c}" for c in x["contexts"]), # Format contexts from list of strings
            "question": RunnablePassthrough() # Pass the question through
        }
        | prompt_template
        | llm
        | StrOutputParser()
    )

    try:
        # Invoke the chain with the question and contexts
        result = generation_chain.invoke({"question": question, "contexts": contexts})
        return result.strip()
    except Exception as e:
        print(f"Error generating answer with Groq/LangChain: {e}")
        return "Error: Could not generate answer."

    


# def generate_answer(question, contexts):
#     context_text = "\n".join(f"- {c}" for c in contexts)

#     prompt_template = PromptTemplate(
#         template=(
#             "Answer the user question **only** with facts found in the context.\n\n"
#             "Context:\n{context}\n\nQuestion: {question}\nAnswer:"
#         ),
#         input_variables=["context", "question"]
#     )
#     llm = ChatGroq(model="meta-llama/llama-guard-4-12b", temperature=.5, api_key=groq_api_key)
    
#     chain = LLMChain(llm=llm, prompt=prompt_template)

#     result = chain.run({"context": context_text, "question": question})
#     return result.strip()


### Evaluate RAG System with Ragas

In [69]:
from datasets import Dataset

questions = [
    "What is the capital of France?",
    "Who wrote Pride and Prejudice?",
    "Where is Mount Everest located?",
    "What is Mike's favorite color?"
]

ground_truths = [
    "Paris",
    "Jane Austen",
    "the Himalayas",
    "Pink"
]

rows = []

for question, ground_truth in zip(questions, ground_truths):
    context = retrieve(question, k=2)
    answer = generate_answer(question, context)
    rows.append(
        {
            "question": question,
            "contexts": context,
            "answer": answer,
            "reference": ground_truth,
        }
    )

evaluation_dataset = Dataset.from_list(rows)

In [70]:
print(evaluation_dataset[0])

{'question': 'What is the capital of France?', 'contexts': ['Mike loves the color pink more than any other color.', "Jane Austen was an English novelist best known for 'Pride and Prejudice' and 'Sense and Sensibility'."], 'answer': 'I cannot answer from the provided information.', 'reference': 'Paris'}


In [71]:
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper
from ragas.metrics import (
    answer_correctness,
    answer_relevancy,
    faithfulness,
    context_precision,
    context_recall,
)

ragas_eval_llm = ChatGroq(model_name='gemma2-9b-it', temperature=0.3,max_retries=5,timeout=60, groq_api_key=groq_api_key)

scores = evaluate(
    evaluation_dataset,
    metrics=[
        answer_correctness,
        answer_relevancy,
        faithfulness,
        context_precision,
        context_recall,
    ],
    llm=ragas_eval_llm, 
    embeddings=hf_embeddings
)

print(rows)
print(scores)

Evaluating: 100%|██████████| 20/20 [01:05<00:00,  3.28s/it]


[{'question': 'What is the capital of France?', 'contexts': ['Mike loves the color pink more than any other color.', "Jane Austen was an English novelist best known for 'Pride and Prejudice' and 'Sense and Sensibility'."], 'answer': 'I cannot answer from the provided information.', 'reference': 'Paris'}, {'question': 'Who wrote Pride and Prejudice?', 'contexts': ['Mike loves the color pink more than any other color.', "Jane Austen was an English novelist best known for 'Pride and Prejudice' and 'Sense and Sensibility'."], 'answer': 'Jane Austen wrote Pride and Prejudice.', 'reference': 'Jane Austen'}, {'question': 'Where is Mount Everest located?', 'contexts': ['Mike loves the color pink more than any other color.', "Jane Austen was an English novelist best known for 'Pride and Prejudice' and 'Sense and Sensibility'."], 'answer': 'I cannot answer from the provided information.', 'reference': 'the Himalayas'}, {'question': "What is Mike's favorite color?", 'contexts': ['Mike loves the c

### Metrics Definitions

https://docs.ragas.io/en/v0.1.21/concepts/metrics/answer_correctness.html

https://docs.ragas.io/en/latest/concepts/metrics/available_metrics/faithfulness/#example

https://docs.ragas.io/en/latest/concepts/metrics/available_metrics/context_precision/

https://docs.ragas.io/en/latest/concepts/metrics/available_metrics/context_recall/

https://docs.ragas.io/en/latest/concepts/metrics/available_metrics/answer_relevance/

https://docs.ragas.io/en/v0.1.21/concepts/metrics/index.html

### High Scores

In [73]:
rows = []

context = docs[-1:]
question = questions[-1]
answer = generate_answer(question, context)

rows.append(
    {
        "user_input": question,
        "retrieved_contexts": context,
        "response": answer,
        "reference": ground_truths[-1]
    }
)

evaluation_dataset = Dataset.from_list(rows)

scores = evaluate(
    evaluation_dataset,
    metrics=[
        answer_correctness,
        answer_relevancy,
        faithfulness,
        context_precision,
        context_recall,
    ],
    llm=ragas_eval_llm, 
    embeddings=hf_embeddings
)

print(rows)
print(scores)

Evaluating: 100%|██████████| 5/5 [00:01<00:00,  2.92it/s]


[{'user_input': "What is Mike's favorite color?", 'retrieved_contexts': ['Mike loves the color pink more than any other color.'], 'response': "Mike's favorite color is pink.", 'reference': 'Pink'}]
{'answer_correctness': 0.9023, 'answer_relevancy': 1.0000, 'faithfulness': 1.0000, 'context_precision': 1.0000, 'context_recall': 1.0000}


### Wrong Context

In [74]:
rows = []

context = ['Vienna is the capital of Austria']
question = questions[-1]
answer = generate_answer(question, context)

rows.append(
    {
        "user_input": question,
        "retrieved_contexts": context,
        "response": answer,
        "reference": ground_truths[-1]
    }
)

evaluation_dataset = Dataset.from_list(rows)

scores = evaluate(
    evaluation_dataset,
    metrics=[
        answer_correctness,
        answer_relevancy,
        faithfulness,
        context_precision,
        context_recall,
    ],
    llm=ragas_eval_llm, 
    embeddings=hf_embeddings
)

print(rows)
print(scores)

Evaluating: 100%|██████████| 5/5 [00:02<00:00,  2.19it/s]


[{'user_input': "What is Mike's favorite color?", 'retrieved_contexts': ['Vienna is the capital of Austria'], 'response': 'I cannot answer from the provided information.', 'reference': 'Pink'}]
{'answer_correctness': 0.0304, 'answer_relevancy': 0.0000, 'faithfulness': 0.0000, 'context_precision': 0.0000, 'context_recall': 0.0000}


### Correct Answer with Wrong Context

In [76]:
rows = []

context = ['Vienna is the capital of Austria']
question = questions[-1]
answer = generate_answer(question, context)

rows.append(
    {
        "user_input": question,
        "retrieved_contexts": context,
        "response": "Mike's favorite color is pink!",
        "reference": ground_truths[-1]
    }
)

evaluation_dataset = Dataset.from_list(rows)

scores = evaluate(
    evaluation_dataset,
    metrics=[
        answer_correctness,
        answer_relevancy,
        faithfulness,
        context_precision,
        context_recall,
    ],
    llm=ragas_eval_llm, 
    embeddings=hf_embeddings
)

print(rows)
print(scores)

Evaluating: 100%|██████████| 5/5 [00:01<00:00,  2.75it/s]


[{'user_input': "What is Mike's favorite color?", 'retrieved_contexts': ['Vienna is the capital of Austria'], 'response': "Mike's favorite color is pink!", 'reference': 'Pink'}]
{'answer_correctness': 0.8988, 'answer_relevancy': 1.0000, 'faithfulness': 0.0000, 'context_precision': 0.0000, 'context_recall': 0.0000}


## Ollama Integration

In [57]:
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

from langchain_ollama.chat_models import ChatOllama
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

ModuleNotFoundError: No module named 'langchain_ollama'

In [None]:
llm = ChatOllama(model="qwen3:4b", temperature=0)
ragas_llm = LangchainLLMWrapper(llm)

emb = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-small-en-v1.5")
ragas_emb = LangchainEmbeddingsWrapper(emb)

scores = evaluate(
    evaluation_dataset,
    metrics=[answer_correctness, answer_relevancy, faithfulness,
             context_precision, context_recall],
    llm=ragas_llm,
    embeddings=ragas_emb,
)

In [None]:
print(scores)