# Retrieval Augmented Generation (RAG) Basics

In this notebook, we will cover the basics of Retrieval Augmented Generation (RAG) model. RAG is a model that combines the best of both worlds - retrieval and generation. It uses a retriever to retrieve relevant passages from a large corpus and then uses a generator to generate the answer.

References:

https://github.com/zenml-io/zenml-projects/blob/feature/evaluation-llm-complete-guide/llm-complete-guide/most_basic_rag_pipeline.py

https://docs.zenml.io/user-guide/llmops-guide/evaluation/evaluation-in-65-loc


In [1]:
import os
import re
import string

from openai import OpenAI
from typing import List, Tuple

# Helper Functions

In [2]:
def preprocess_text(text: str):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = re.sub(r"\s+", " ", text).strip()
    return text

def tokenize(text: str):
    return preprocess_text(text).split()

In [3]:
def retrieve_relevant_chunks(query, corpus, top_n=2):
    query_tokens = set(tokenize(query))
    similarities = []
    for chunk in corpus:
        chunk_tokens = set(tokenize(chunk))
        similarity = len(query_tokens.intersection(chunk_tokens)) / len(query_tokens.union(chunk_tokens))
        similarities.append(similarity)
    top_chunks = sorted(list(enumerate(similarities)), key=lambda x: x[1], reverse=True)[:top_n]
    return [corpus[i] for i, _ in top_chunks]


In [4]:
def modify_query(query:str, chunks: List[str]):
    context = "/n".join(chunks)
    new_query = [
            {
                "role": "system",
                "content": f"Based on the provided context, answer the following question: {query}\n\nContext:\n{context}",
                },
            {
                "role": "user",
                "content": query,
                },
        ]
    return new_query

In [5]:
def answer_question(query: str, corpus: str, top_n=2):
    relevant_chunks = retrieve_relevant_chunks(query, corpus, top_n)
    if not relevant_chunks:
        return "I'm sorry, I don't know the answer to that question."
    client = OpenAI(api_key = os.environ.get("OPENAI_API_KEY"))
    chat_completion = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=modify_query(query, chunks=relevant_chunks),
        max_tokens=100,
        temperature=0,
    )
    answer = chat_completion.choices[0].message.content.strip()
    return answer

# Example Usage

In [6]:
# Example usage
query = "Who is the president of the United States?"
corpus = [
    "The president of the United States is Joe Biden.",
    "Joe Biden is the current president of the United States.",
    "The current president of the United States is Joe Biden.",
]

relevant_chunks = retrieve_relevant_chunks(query=query, corpus=corpus, top_n=2)
query_modification = modify_query(query=query, chunks=relevant_chunks)

print(f"Relevant chunks: {relevant_chunks}")
print(f"Modified query: {query_modification}")

Relevant chunks: ['The president of the United States is Joe Biden.', 'Joe Biden is the current president of the United States.']
Modified query: [{'role': 'system', 'content': 'Based on the provided context, answer the following question: Who is the president of the United States?\n\nContext:\nThe president of the United States is Joe Biden./nJoe Biden is the current president of the United States.'}, {'role': 'user', 'content': 'Who is the president of the United States?'}]


In [7]:
run_query = True
if run_query:
    answer = answer_question(query, corpus)
    print(answer)

The president of the United States is Joe Biden.


# Sample Evaluation

In [8]:
def evaluate_retrieval(question, expected_answer, corpus, top_n=2):
    """Check if the retrieved chunks contain any words from expected answer"""
    relevant_chunks = retrieve_relevant_chunks(question, corpus, top_n)
    score = any(
        any(word in chunk for word in tokenize(expected_answer))
        for chunk in relevant_chunks
    )
    return score

In [9]:
def evaluate_generation(question, expected_answer, generated_answer):
    """Use ChatGPT to evaluate the relevance and accuracy of a generated answer."""
    client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "system",
                "content": "You are an evaluation judge. Given a question, an expected answer, and a generated answer, your task is to determine if the generated answer is relevant and accurate. Respond with 'YES' if the generated answer is satisfactory, or 'NO' if it is not.",
            },
            {
                "role": "user",
                "content": f"Question: {question}\nExpected Answer: {expected_answer}\nGenerated Answer: {generated_answer}\nIs the generated answer relevant and accurate?",
            },
        ],
        model="gpt-3.5-turbo",
    )

    judgment = chat_completion.choices[0].message.content.strip().lower()
    return judgment == "yes"

In [12]:
eval_data = [
    {
        "question": "What is the role of the Speaker of the House in the United States Congress?",
        "expected_answer": "The Speaker of the House in the United States Congress is responsible for presiding over the House of Representatives, setting the legislative agenda, and representing the House to the executive branch and the public.",
    },
    {
        "question": "How does the electoral college determine the outcome of the presidential election in the United States?",
        "expected_answer": "The electoral college determines the outcome of the presidential election in the United States by allocating electors to each state based on its representation in Congress. The candidate who wins the majority of electoral votes (270 out of 538) becomes the president-elect.",
    },
    {
        "question": "Why is the nickname of the President of the United States 'Dark Brandon'?",
        "expected_answer": "The purpose of a filibuster in the United States Senate is to prolong debate on a proposed legislation, with the aim of delaying or preventing a vote on the bill. It requires a supermajority of 60 votes to invoke cloture and end a filibuster.",
    },
]

In [13]:
retrieval_scores = []
generation_scores = []

for item in eval_data:
    retrieval_score = evaluate_retrieval(
        item["question"], item["expected_answer"], corpus
    )
    retrieval_scores.append(retrieval_score)

    generated_answer = answer_question(item["question"], corpus)
    generation_score = evaluate_generation(
        item["question"], item["expected_answer"], generated_answer
    )
    generation_scores.append(generation_score)

retrieval_accuracy = sum(retrieval_scores) / len(retrieval_scores)
generation_accuracy = sum(generation_scores) / len(generation_scores)

print(f"Retrieval Accuracy: {retrieval_accuracy:.2f}")
print(f"Generation Accuracy: {generation_accuracy:.2f}")

Retrieval Accuracy: 1.00
Generation Accuracy: 1.00
