# Evaluation with RAGAS and Advanced Retrieval Methods Using LangChain


In [1]:
import os
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

## CREATE RETRIEVER

In [7]:
from langchain.document_loaders import ArxivLoader

query_for_papers_from_arxiv = "Retrieval Augmented Generation"
base_docs = ArxivLoader(
    query=query_for_papers_from_arxiv, load_max_docs=5).load()

In [10]:
for doc in base_docs:
    print(doc.metadata)
    break

{'Published': '2022-02-13', 'Title': 'A Survey on Retrieval-Augmented Text Generation', 'Authors': 'Huayang Li, Yixuan Su, Deng Cai, Yan Wang, Lemao Liu', 'Summary': 'Recently, retrieval-augmented text generation attracted increasing attention\nof the computational linguistics community. Compared with conventional\ngeneration models, retrieval-augmented text generation has remarkable\nadvantages and particularly has achieved state-of-the-art performance in many\nNLP tasks. This paper aims to conduct a survey about retrieval-augmented text\ngeneration. It firstly highlights the generic paradigm of retrieval-augmented\ngeneration, and then it reviews notable approaches according to different tasks\nincluding dialogue response generation, machine translation, and other\ngeneration tasks. Finally, it points out some important directions on top of\nrecent methods to facilitate future research.'}


In [15]:
# create an index
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=250)
docs = text_splitter.split_documents(base_docs)
vectorstore = Chroma.from_documents(docs, OpenAIEmbeddings())

In [21]:
max([len(chunk.page_content) for chunk in docs])

249

In [23]:
base_retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 3})
base_retriever

VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x12fbce060>, search_type='mmr', search_kwargs={'k': 3})

In [28]:
# try from retriever
question = "What is Retrieval Augmented Generation?"

relevant_docs = base_retriever.get_relevant_documents(question)
len(relevant_docs)

3

## CREATE PROMPT

In [32]:
from langchain.prompts import ChatPromptTemplate

template = """Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I don't know':

### CONTEXT
{context}

### QUESTION
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template=template)
prompt

ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I don't know':\n\n### CONTEXT\n{context}\n\n### QUESTION\nQuestion: {question}\n"))])

## CHAIN

In [37]:
# def calculate_sum(a, b):
#     try:
#         return int(a) + int(b)
#     except:
#         print("not a valid input(s)")


# # calculate_sum(2, "wassup")

not a valid inputs


In [43]:
from operator import itemgetter

from langchain_openai.chat_models import ChatOpenAI  # llm
from langchain.schema.output_parser import StrOutputParser  # to clean up the output
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough

# llm
primary_qa_llm = ChatOpenAI(
    api_key=OPENAI_API_KEY, model_name="gpt-3.5-turbo", temperature=0
)

# chain (the below is helpful for looking at what the context was and the response was)
rag_qa_chain = (
    {
        "context": itemgetter("question") | base_retriever,
        "question": itemgetter("question"),
    }  # input variables
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": prompt | primary_qa_llm, "context": itemgetter("context")}
)

# rag_qa_chain

{
  context: RunnableLambda(itemgetter('question'))
           | VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x12fbce060>, search_type='mmr', search_kwargs={'k': 3}),
  question: RunnableLambda(itemgetter('question'))
}
| RunnableAssign(mapper={
    context: RunnableLambda(itemgetter('context'))
  })
| {
    response: ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I don't know':\n\n### CONTEXT\n{context}\n\n### QUESTION\nQuestion: {question}\n"))])
              | ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x312639b20>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x31261ea50>, temperature=0.0,

In [44]:
# call the chain
question = "What is a RAG?"
result = rag_qa_chain.invoke({"question": question})
result

{'response': AIMessage(content='Answer: RAG stands for Retrieval Augmented Generation.', response_metadata={'token_usage': {'completion_tokens': 12, 'prompt_tokens': 1219, 'total_tokens': 1231}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_3b956da36b', 'finish_reason': 'stop', 'logprobs': None}),
 'context': [Document(page_content='2020). RAG consists of three primary components:\nTool Retrieval, Plan Generation, and Execution.1\nIn this study, we focus on enhancing tool retrieval,\nwith the goal of achieving subsequent improve-\nments in plan generation.', metadata={'Authors': 'Raviteja Anantha, Tharun Bethi, Danil Vodianik, Srinivas Chappidi', 'Published': '2023-12-09', 'Summary': "Large language models (LLMs) have the remarkable ability to solve new tasks\nwith just a few examples, but they need access to the right tools. Retrieval\nAugmented Generation (RAG) addresses this problem by retrieving a list of\nrelevant tools for a given task. However, RAG's tool retrieval st

## GROUND TRUTH

In [45]:
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser

question_schema = ResponseSchema(
    name="question", description="a question about the context."
)

question_response_schemas = [
    question_schema,
]

In [50]:
question_output_parser = StructuredOutputParser.from_response_schemas(
    question_response_schemas
)

format_instructions = question_output_parser.get_format_instructions()

In [58]:
question_generation_llm = ChatOpenAI(model="gpt-3.5-turbo-16k")

bare_prompt_template = "{content}"
bare_template = ChatPromptTemplate.from_template(template=bare_prompt_template)

In [59]:
from langchain.prompts import ChatPromptTemplate

qa_template = """\
You are a University Professor creating a test for advanced students. For each context, create a question that is specific to the context. Avoid creating generic or general questions.

question: a question about the context.

Format the output as JSON with the following keys:
question

context: {context}
"""

prompt_template = ChatPromptTemplate.from_template(template=qa_template)

messages = prompt_template.format_messages(
    context=docs[0], format_instructions=format_instructions
)

question_generation_chain = bare_template | question_generation_llm

response = question_generation_chain.invoke({"content": messages})
output_dict = question_output_parser.parse(response.content)

In [61]:
response.content

'{\n  "question": "What is the main focus of the paper \'A Survey on Retrieval-Augmented Text Generation\'?"\n}'

In [67]:
for k, v in output_dict.items():
    print(k)
    print(v)

question
What is the main focus of the paper 'A Survey on Retrieval-Augmented Text Generation'?


In [69]:
from tqdm import tqdm

qac_triples = []

for text in tqdm(docs[:10]):
    messages = prompt_template.format_messages(
        context=text, format_instructions=format_instructions
    )
    response = question_generation_chain.invoke({"content": messages})
    try:
        output_dict = question_output_parser.parse(response.content)
    except Exception as e:
        continue
    output_dict["context"] = text
    qac_triples.append(output_dict)

100%|██████████| 10/10 [00:23<00:00,  2.40s/it]


In [71]:
qac_triples[3]

{'question': "What is the main focus of the paper 'A Survey on Retrieval-Augmented Text Generation'?",
 'context': Document(page_content='♠University of Cambridge\n♦The Chinese University of Hong Kong\n♣Tencent AI Lab\nli.huayang.lh6@is.naist.jp, ys484@cam.ac.uk\nthisisjcykcd@gmail.com, brandenwang@tencent.com\nlemaoliu@gmail.com\nAbstract\nRecently, retrieval-augmented text generation', metadata={'Published': '2022-02-13', 'Title': 'A Survey on Retrieval-Augmented Text Generation', 'Authors': 'Huayang Li, Yixuan Su, Deng Cai, Yan Wang, Lemao Liu', 'Summary': 'Recently, retrieval-augmented text generation attracted increasing attention\nof the computational linguistics community. Compared with conventional\ngeneration models, retrieval-augmented text generation has remarkable\nadvantages and particularly has achieved state-of-the-art performance in many\nNLP tasks. This paper aims to conduct a survey about retrieval-augmented text\ngeneration. It firstly highlights the generic paradigm

In [None]:
answer_generation_llm = ChatOpenAI(model="gpt-4-1106-preview")