In [1]:
import os
from google.colab import userdata
os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')


In [3]:
!pip install -U langchain langchain-openai


Collecting langchain
  Downloading langchain-0.3.24-py3-none-any.whl.metadata (7.8 kB)
Collecting langchain-openai
  Downloading langchain_openai-0.3.14-py3-none-any.whl.metadata (2.3 kB)
Collecting langchain-core<1.0.0,>=0.3.55 (from langchain)
  Downloading langchain_core-0.3.55-py3-none-any.whl.metadata (5.9 kB)
Collecting tiktoken<1,>=0.7 (from langchain-openai)
  Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading langchain-0.3.24-py3-none-any.whl (1.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m23.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading langchain_openai-0.3.14-py3-none-any.whl (62 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.4/62.4 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading langchain_core-0.3.55-py3-none-any.whl (434 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m434.1/434.1 kB[0m [31m41.2 MB/s[0m et

In [4]:
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [11]:
!pip install -U langchain-community pypdf

Collecting pypdf
  Downloading pypdf-5.4.0-py3-none-any.whl.metadata (7.3 kB)
Downloading pypdf-5.4.0-py3-none-any.whl (302 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.3/302.3 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pypdf
Successfully installed pypdf-5.4.0


In [12]:
from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader("/content/rag.pdf")
documents = loader.load()

In [13]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
documents = text_splitter.split_documents(documents)

In [14]:
 !pip install --q faiss-cpu

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m85.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [15]:
from langchain.vectorstores import FAISS
vectorstore = FAISS.from_documents(documents, embeddings)

In [16]:
retriever = vectorstore.as_retriever()

In [17]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI()

In [18]:
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser

template = """"
You are a helpful assistant that answers questions based on the provided context.
Use the provided context to answer the question.
Question: {input}
Context: {context}
Answer:
"""
prompt = ChatPromptTemplate.from_template(template)

# Setup RAG pipeline
rag_chain = (
    {"context": retriever,  "input": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [19]:
response = rag_chain.invoke("what is rag?")
response

'RAG stands for Retrieval-Augmented Generation, which integrates the capabilities of large language models (LLMs) with external knowledge retrieval to advance text generation tasks in natural language processing (NLP).'

In [21]:
# create dataset
question = ["what is rag?"]
response = []
contexts = []

# Inference
for query in question:
  response.append(rag_chain.invoke(query))
  contexts.append([docs.page_content for docs in retriever.get_relevant_documents(query)])

# To dict
data = {
    "query": question,
    "response": response,
    "context": contexts,
}

In [23]:
!pip install datasets

Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.5.0-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.2/491.2 kB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.12.0-py3-none-any.

In [24]:
from datasets import Dataset
dataset = Dataset.from_dict(data)

In [25]:
import pandas as pd
df = pd.DataFrame(dataset)

In [26]:
df

Unnamed: 0,query,response,context
0,what is rag?,"RAG stands for Retrieval-Augmented Generation,...",[7 RAG EVALUATION AND BENCHMARK\nRetrieval-Aug...


In [27]:
df_dict = df.to_dict(orient='records')

# Convert context to list
for record in df_dict:
    if not isinstance(record.get('contexts'), list):
        if record.get('contexts') is None:
            record['contexts'] = []
        else:
            record['contexts'] = [record['contexts']]

In [37]:
!pip install ragas




In [38]:
from datasets import Dataset

data = {
    "question": ["What is RAG?"],
    "contexts": [[
        "RAG (Retrieval-Augmented Generation) is a method that combines retrieval-based and generation-based models. It retrieves relevant documents and then uses them to generate more accurate and grounded responses."
    ]],
    "answer": ["RAG is a method that retrieves documents and then generates responses based on them."],
    "ground_truth": ["RAG is a method that combines document retrieval with text generation to improve accuracy and grounding in responses."]
}

dataset = Dataset.from_dict(data)


In [39]:
from ragas.evaluation import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision
)

result = evaluate(
    dataset=dataset,
    metrics=[
        context_precision,
        context_recall,
        faithfulness,
        answer_relevancy,
    ],
)

print(result)


Evaluating:   0%|          | 0/4 [00:00<?, ?it/s]

{'context_precision': 1.0000, 'context_recall': 1.0000, 'faithfulness': 1.0000, 'answer_relevancy': 1.0000}
