In [7]:
import os
import getpass

os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API Key:")

In [8]:
os.environ["COHERE_API_KEY"] = getpass.getpass("Cohere API Key:")

In [3]:
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.document_loaders import PyMuPDFLoader

path = "data/"
loader = DirectoryLoader(path, glob="pub5108.pdf", loader_cls=PyMuPDFLoader)    # glob = "*.pdf" to load all pdfs
docs = loader.load()

In [4]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split_documents = text_splitter.split_documents(docs)
len(split_documents)

88

In [None]:
# Print the first document to verify data loaded as expected
docs[0]

Document(metadata={'producer': 'Adobe PDF library 15.00', 'creator': 'Adobe Illustrator CC 23.0 (Windows)', 'creationdate': '2021-05-04T14:08:47-04:00', 'source': 'data/pub5108.pdf', 'file_path': 'data/pub5108.pdf', 'total_pages': 27, 'format': 'PDF 1.5', 'title': 'Pub.5108', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2021-05-04T14:08:47-04:00', 'trapped': '', 'modDate': "D:20210504140847-04'00'", 'creationDate': "D:20210504140847-04'00'", 'page': 0}, page_content='Know Your Permanency Options:\nThe Kinship Guardianship\nAssistance Program \n(KinGAP)\nFully understanding their options is \nthe best way for caregivers to \nhave a voice and a choice.')

In [9]:
from langchain_community.vectorstores import Qdrant
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

vectorstore = Qdrant.from_documents(
    docs,
    embeddings,
    location=":memory:",
    collection_name="FosterCarePolicies"
)

## Naive RAG Chain

In [10]:
naive_retriever = vectorstore.as_retriever(search_kwargs={"k" : 10})

In [11]:
from langchain_core.prompts import ChatPromptTemplate

RAG_TEMPLATE = """\
You are a helpful and kind assistant. Use the context provided below to answer the question.

If you do not know the answer, or are unsure, say you don't know.

Query:
{question}

Context:
{context}
"""

rag_prompt = ChatPromptTemplate.from_template(RAG_TEMPLATE)

In [12]:
from langchain_openai import ChatOpenAI

chat_model = ChatOpenAI(model="gpt-4.1-nano")

### LCEL RAG Chain

In [13]:
from langchain_core.runnables import RunnablePassthrough
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser

naive_retrieval_chain = (
    # INVOKE CHAIN WITH: {"question" : "<<SOME USER QUESTION>>"}
    # "question" : populated by getting the value of the "question" key
    # "context"  : populated by getting the value of the "question" key and chaining it into the base_retriever
    {"context": itemgetter("question") | naive_retriever, "question": itemgetter("question")}
    # "context"  : is assigned to a RunnablePassthrough object (will not be called or considered in the next step)
    #              by getting the value of the "context" key from the previous step
    | RunnablePassthrough.assign(context=itemgetter("context"))
    # "response" : the "context" and "question" values are used to format our prompt object and then piped
    #              into the LLM and stored in a key called "response"
    # "context"  : populated by getting the value of the "context" key from the previous step
    | {"response": rag_prompt | chat_model, "context": itemgetter("context")}
)

In [14]:
naive_retrieval_chain.invoke({"question" : "What are some ramifications of legal guardianship?"})["response"].content

"Some ramifications of legal guardianship include:\n\n- The guardian gains the right and responsibility to make decisions affecting the child's life, including medical, educational, and personal needs.\n- The child is discharged from foster care and is no longer under the supervision of LDSS, ACS, or voluntary agencies, although services can sometimes still be requested.\n- Guardianship is a permanent commitment that lasts until the child reaches adulthood, with guardianship assistance potentially continuing until age 21 if certain conditions are met.\n- The child's parents retain parental rights unless these have been terminated or surrendered; they may still visit the child and file petitions to regain custody.\n- Guardians have duties to protect and support the child, make legal decisions, and manage the child's welfare, including in areas like education, healthcare, and residence.\n- Guardianship arrangements can influence inheritance rights, survivor benefits, and the child's rela

## Best-Matching 25 (BM25) Retriever

In [15]:
from langchain_community.retrievers import BM25Retriever

bm25_retriever = BM25Retriever.from_documents(docs, )

We'll construct the same chain - only changing the retriever.

In [16]:
bm25_retrieval_chain = (
    {"context": itemgetter("question") | bm25_retriever, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": rag_prompt | chat_model, "context": itemgetter("context")}
)

Let's look at the responses!

In [17]:
bm25_retrieval_chain.invoke({"question" : "What are some ramifications of legal guardianship?"})["response"].content

"Some ramifications of legal guardianship include:\n\n- The guardian takes on a permanent responsibility for the child's care and support until the child reaches adulthood, and in some cases, until age 21 with the youth's consent.\n- The guardian has the authority to make important decisions for the child, such as medical and educational decisions.\n- The child's biological parents usually retain some rights unless those rights have been terminated or surrendered. They may still have visitation rights and can request to have guardianship ended if circumstances change.\n- The court can specify or modify visitation and contact arrangements with birth parents and siblings, and guardians are responsible for maintaining these relationships.\n- The child's legal name can be changed through court process, and the youth must consent to their guardianship agreement if they are 18 or older.\n- Guardianship is less permanent than adoption because birth parents can petition to have guardianship va

## Contextual compression with Cohere Rerank

In [18]:
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain_cohere import CohereRerank

compressor = CohereRerank(model="rerank-v3.5")
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=naive_retriever
)

Let's create our chain again, and see how this does!

In [19]:
contextual_compression_retrieval_chain = (
    {"context": itemgetter("question") | compression_retriever, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": rag_prompt | chat_model, "context": itemgetter("context")}
)

In [20]:
contextual_compression_retrieval_chain.invoke({"question" : "What are some ramifications of legal guardianship?"})["response"].content

"Some ramifications of legal guardianship include:\n\n- The guardian has the right and responsibility to make decisions affecting the child's life, such as medical and educational decisions.\n- The child's parents still retain parental rights unless those rights have been terminated or surrendered.\n- The guardian assumes a permanent commitment to support and care for the child until they reach adulthood or, in some cases, until age 21.\n- The child will no longer be in foster care and the guardian will no longer have caseworker visits or court reviews related to foster care.\n- The guardian may receive kinship guardianship assistance payments, which can provide financial support, though these payments may be less than foster care payments.\n- The guardian has legal duties to protect the child and meet their personal needs, including food, shelter, and clothing.\n- The guardian generally has the authority to decide on care, control, and custody, but the birth parents may still have som

## Multi-Query Retriever



In [21]:
from langchain.retrievers.multi_query import MultiQueryRetriever

multi_query_retriever = MultiQueryRetriever.from_llm(
    retriever=naive_retriever, llm=chat_model
)

In [22]:
multi_query_retrieval_chain = (
    {"context": itemgetter("question") | multi_query_retriever, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": rag_prompt | chat_model, "context": itemgetter("context")}
)

In [23]:
multi_query_retrieval_chain.invoke({"question" : "What are some ramifications of legal guardianship?"})["response"].content

"Some ramifications of legal guardianship include:\n\n- The guardian is responsible for making decisions affecting the child's life and development, such as medical and educational choices.\n- Guardianship is a permanent commitment until the child reaches adulthood or, in some cases, until age 21 with the child's consent.\n- The child's parents still retain parental rights unless these rights have been terminated or surrendered, and they may visit the child or request custody return.\n- Guardians may be eligible for financial assistance, including kinship guardianship assistance payments, Medicaid, food stamps, and other benefits.\n- The guardian's responsibilities include protecting the child, providing personal needs, and making decisions regarding care, control, and physical custody.\n- Establishing guardianship involves a court process, including signing a Kinship Guardianship Assistance Agreement and possibly filing a petition for guardianship.\n- Upon the death or incapacity of t

## Parent Document Retriever

In [24]:
from langchain.retrievers import ParentDocumentRetriever
from langchain.storage import InMemoryStore
from langchain_text_splitters import RecursiveCharacterTextSplitter
from qdrant_client import QdrantClient, models

parent_docs = docs
child_splitter = RecursiveCharacterTextSplitter(chunk_size=750)

We'll need to set up a new QDrant vectorstore - and we'll use another useful pattern to do so!

> NOTE: We are manually defining our embedding dimension, you'll need to change this if you're using a different embedding model.

In [25]:
from langchain_qdrant import QdrantVectorStore

client = QdrantClient(location=":memory:")

client.create_collection(
    collection_name="full_documents",
    vectors_config=models.VectorParams(size=1536, distance=models.Distance.COSINE)
)

parent_document_vectorstore = QdrantVectorStore(
    collection_name="full_documents", embedding=OpenAIEmbeddings(model="text-embedding-3-small"), client=client
)

Now we can create our `InMemoryStore` that will hold our "parent documents" - and build our retriever!

In [26]:
store = InMemoryStore()

parent_document_retriever = ParentDocumentRetriever(
    vectorstore = parent_document_vectorstore,
    docstore=store,
    child_splitter=child_splitter,
)

By default, this is empty as we haven't added any documents - let's add some now!

In [27]:
parent_document_retriever.add_documents(parent_docs, ids=None)

We'll create the same chain we did before - but substitute our new `parent_document_retriever`.

In [28]:
parent_document_retrieval_chain = (
    {"context": itemgetter("question") | parent_document_retriever, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": rag_prompt | chat_model, "context": itemgetter("context")}
)

In [29]:
parent_document_retrieval_chain.invoke({"question" : "What are some ramifications of legal guardianship?"})["response"].content

"Some ramifications of legal guardianship include:\n\n- The guardian takes on a permanent responsibility to support and care for the child until they reach adulthood, or possibly until age 21 with the child's consent.\n- The guardian has the authority to make medical and educational decisions for the child.\n- The child's case is no longer managed by foster care agencies or caseworkers, and court reviews of the case are generally no longer required.\n- The guardian may be eligible for financial assistance such as kinship guardianship assistance payments, Medicaid, food stamps, SSI, or child support.\n- The child's parents may still retain parental rights unless these are terminated or surrendered, meaning they may still have visitation rights or the ability to petition for custody return.\n- The guardian is responsible for protecting the child's needs, including provision of food, clothing, shelter, and general care.\n\nAdditionally, guardianship is a legal arrangement that involves on

## Ensemble Retriever

In [30]:
from langchain.retrievers import EnsembleRetriever

retriever_list = [bm25_retriever, naive_retriever, parent_document_retriever, compression_retriever, multi_query_retriever]
equal_weighting = [1/len(retriever_list)] * len(retriever_list)

ensemble_retriever = EnsembleRetriever(
    retrievers=retriever_list, weights=equal_weighting
)

We'll pack *all* of these retrievers together in an ensemble.

In [31]:
ensemble_retrieval_chain = (
    {"context": itemgetter("question") | ensemble_retriever, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": rag_prompt | chat_model, "context": itemgetter("context")}
)

In [35]:
ensemble_retrieval_chain.invoke({"question" : "What are some ramifications of legal guardianship?"})["response"].content

"Some ramifications of legal guardianship include:\n\n1. The guardian has the responsibility and authority to make decisions affecting the child's life, including medical, educational, and physical custody decisions.\n\n2. The child's biological parents still retain their parental rights unless these rights are terminated or surrendered, which means they may still visit the child or seek custody through court petitions.\n\n3. Upon becoming a guardian, the guardian no longer has involvement from foster care caseworkers, and the child is no longer in foster care.\n\n4. Guardians may be eligible for financial assistance, such as kinship guardianship assistance payments, Medicaid, and other benefits, but these may be up to a certain amount and may continue until the child reaches 18 or 21, depending on circumstances.\n\n5. The guardian's responsibilities include protecting the child, meeting personal needs, applying for benefits, and managing travel and medical decisions.\n\n6. Guardians c

In [32]:
ensemble_retrieval_chain.invoke({"question" : "What is the most common issue with loans?"})["response"].content

'The most common issue with loans is typically repayment difficulties, which can include missed payments, inability to repay the loan on time, or financial hardship that prevents consistent repayments. These issues can lead to loan defaults, penalties, and damage to credit scores.'

In [33]:
ensemble_retrieval_chain.invoke({"question" : "Did any complaints not get handled in a timely manner?"})["response"].content

'Based on the provided context, there is no specific mention of complaints not being handled in a timely manner. Therefore, I do not know if any complaints did not get handled promptly.'

In [34]:
ensemble_retrieval_chain.invoke({"question" : "Why did people fail to pay back their loans?"})["response"].content

'People failed to pay back their loans for various reasons, which may include financial difficulties, loss of income, unexpected expenses, or inability to meet repayment terms. However, based on the provided context, there is no specific information explaining why individuals failed to pay back their loans. Therefore, I do not know the exact reasons.'

## Semantic Chunking (not a retrieval method)

In [36]:
from langchain_experimental.text_splitter import SemanticChunker

semantic_chunker = SemanticChunker(
    embeddings,
    breakpoint_threshold_type="percentile"
)

In [37]:
semantic_documents = semantic_chunker.split_documents(docs[:20])

In [38]:
semantic_vectorstore = Qdrant.from_documents(
    semantic_documents,
    embeddings,
    location=":memory:",
    collection_name="Loan_Complaint_Data_Semantic_Chunks"
)

In [39]:
semantic_retriever = semantic_vectorstore.as_retriever(search_kwargs={"k" : 10})

In [40]:
semantic_retrieval_chain = (
    {"context": itemgetter("question") | semantic_retriever, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": rag_prompt | chat_model, "context": itemgetter("context")}
)

In [41]:
semantic_retrieval_chain.invoke({"question" : "What are some ramifications of legal guardianship?"})["response"].content

"Some ramifications of legal guardianship include:\n\n- It is a permanent commitment to be responsible for and support the child until they reach adulthood, and sometimes until age 21 with the child's consent.\n- Guardians have the authority to make major decisions for the child, such as medical and educational decisions.\n- Once guardianship is established, the child's parents still retain parental rights unless these rights are terminated or surrendered.\n- Guardians are responsible for meeting the personal needs of the child, including food, clothing, and shelter.\n- Guardianship may impact eligibility for certain financial assistance or services, and guardians must notify the appropriate agencies of any changes in circumstances affecting eligibility.\n- The relationship between the child and birth parents can change, especially if the birth parents' rights have not been terminated.\n- Guardianship can be temporary or permanent, but the specifics depend on the court's decisions and 

# Evaluation

In [82]:
# Based on Session 7's notebook (Synthetic_Data_Generation_RAGAS_&_LangSmith)
from uuid import uuid4

projectUuid = f"AIM - Advanced Retrieval tournament - {uuid4().hex[0:8]}"
os.environ["LANGCHAIN_PROJECT"] = projectUuid
os.environ["LANGSMITH_PROJECT"] = projectUuid

os.environ["LANGCHAIN_TRACING_V2"] = "true"

os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGCHAIN_TRACING_v2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com/"
os.environ["LANGSMITH_ENDPOINT"] = "https://api.smith.langchain.com/"


# os.environ["LANGCHAIN_API_KEY"] = getpass.getpass("LangChain API Key:")

In [56]:
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to /Users/kyle/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/kyle/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

### Generate synthetic dataset using Ragas (from our original PDF)

In [42]:
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

generator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4.1-nano"))
generator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())

  for match in re.finditer('{0}\s*'.format(re.escape(sent)), self.original_text):
  txt = re.sub('(?<={0})\.'.format(am), '∯', txt)
  txt = re.sub('(?<={0})\.'.format(am), '∯', txt)


In [44]:
from ragas.testset import TestsetGenerator

generator = TestsetGenerator(llm=generator_llm, embedding_model=generator_embeddings)
dataset = generator.generate_with_langchain_docs(docs[:20], testset_size=10)

Applying HeadlinesExtractor:   0%|          | 0/13 [00:00<?, ?it/s]

Applying HeadlineSplitter:   0%|          | 0/20 [00:00<?, ?it/s]

unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node


Applying SummaryExtractor:   0%|          | 0/25 [00:00<?, ?it/s]

Property 'summary' already exists in node 'b30bd5'. Skipping!
Property 'summary' already exists in node 'dea004'. Skipping!
Property 'summary' already exists in node 'db6624'. Skipping!
Property 'summary' already exists in node 'abf4b5'. Skipping!
Property 'summary' already exists in node 'aefe54'. Skipping!
Property 'summary' already exists in node '5de378'. Skipping!
Property 'summary' already exists in node '47da4f'. Skipping!
Property 'summary' already exists in node 'e87bbf'. Skipping!
Property 'summary' already exists in node 'ea1a17'. Skipping!
Property 'summary' already exists in node 'e1f259'. Skipping!
Property 'summary' already exists in node 'b9ee8f'. Skipping!
Property 'summary' already exists in node 'b69664'. Skipping!


Applying CustomNodeFilter:   0%|          | 0/2 [00:00<?, ?it/s]

Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:   0%|          | 0/29 [00:00<?, ?it/s]

Property 'summary_embedding' already exists in node 'b30bd5'. Skipping!
Property 'summary_embedding' already exists in node 'db6624'. Skipping!
Property 'summary_embedding' already exists in node 'dea004'. Skipping!
Property 'summary_embedding' already exists in node 'abf4b5'. Skipping!
Property 'summary_embedding' already exists in node 'e1f259'. Skipping!
Property 'summary_embedding' already exists in node 'e87bbf'. Skipping!
Property 'summary_embedding' already exists in node 'aefe54'. Skipping!
Property 'summary_embedding' already exists in node '47da4f'. Skipping!
Property 'summary_embedding' already exists in node 'b9ee8f'. Skipping!
Property 'summary_embedding' already exists in node 'b69664'. Skipping!
Property 'summary_embedding' already exists in node 'ea1a17'. Skipping!
Property 'summary_embedding' already exists in node '5de378'. Skipping!


Applying [CosineSimilarityBuilder, OverlapScoreBuilder]:   0%|          | 0/2 [00:00<?, ?it/s]

Generating personas:   0%|          | 0/3 [00:00<?, ?it/s]

Generating Scenarios:   0%|          | 0/3 [00:00<?, ?it/s]

Generating Samples:   0%|          | 0/12 [00:00<?, ?it/s]

In [45]:
dataset.to_pandas()

Unnamed: 0,user_input,reference_contexts,reference,synthesizer_name
0,What is KinGAP and how does it help families l...,[Know Your Permanency Options: KinGAP 6 State ...,KinGAP is a permanency option where state poli...,single_hop_specifc_query_synthesizer
1,What does LDSS stand for?,[Know Your Permanency Options: KinGAP 6 State ...,The context does not specify what LDSS stands ...,single_hop_specifc_query_synthesizer
2,"As a Family Law Attorney, could you explain wh...",[STEP 1: Applying for KinGAP Discuss KinGAP wi...,"In the context of kinship guardianship, ACS re...",single_hop_specifc_query_synthesizer
3,What does LDSS stand for in the context of kin...,[STEP 1: Applying for KinGAP Discuss KinGAP wi...,LDSS refers to the local department of social ...,single_hop_specifc_query_synthesizer
4,How does state polcy and application review an...,[<1-hop>\n\nKnow Your Permanency Options: KinG...,State policy encourages siblings to be placed ...,multi_hop_abstract_query_synthesizer
5,What are the steps involved in applying for Ki...,[<1-hop>\n\nKnow Your Permanency Options: KinG...,"To apply for KinGAP, you should first discuss ...",multi_hop_abstract_query_synthesizer
6,Based on the assessment of kinship guardianshi...,[<1-hop>\n\nKnow Your Permanency Options: KinG...,The application process for KinGAP begins with...,multi_hop_abstract_query_synthesizer
7,How does the kinGAP application process relate...,[<1-hop>\n\nSTEP 1: Applying for KinGAP Discus...,The kinGAP application process involves workin...,multi_hop_abstract_query_synthesizer
8,"How does ACS determine eligibility for KinGAP,...",[<1-hop>\n\nKnow Your Permanency Options: KinG...,ACS encourages siblings to be placed together ...,multi_hop_specific_query_synthesizer
9,ACS kinGAP how do I apply for kinGAP with ACS ...,[<1-hop>\n\nKnow Your Permanency Options: KinG...,"To apply for kinGAP with ACS, you need to disc...",multi_hop_specific_query_synthesizer


In [46]:
# Use Ragas for eval (from Session 8 notebook Evaluating_RAG_with_RAGAS)
import time
import copy

In [84]:
# Tracing fix from Session 4 Discord
from langchain.callbacks import LangChainTracer
from langchain.schema.runnable import RunnableConfig
tracer = LangChainTracer(project_name=os.environ["LANGSMITH_PROJECT"])

### Run SGD examples through each retriever (and trace with tags in LangSmith so we can compare latency and cost)

In [48]:
naive_dataset = copy.deepcopy(dataset)

# i = 0
for test_row in naive_dataset:
#   if i<3:
    response = naive_retrieval_chain.invoke({"question" : test_row.eval_sample.user_input}, {
    # "tags" : ["Naive retriever"],
    # "callbacks": [tracer]
})
    test_row.eval_sample.response = response["response"]
    test_row.eval_sample.retrieved_contexts = [context.page_content for context in response["context"]]
    time.sleep(2) # To try to avoid rate limiting.
    # i += 1

In [49]:
bm25_dataset = copy.deepcopy(dataset)

for test_row in bm25_dataset:
  response = bm25_retrieval_chain.invoke({"question" : test_row.eval_sample.user_input}, {
        # "tags" : ["BM25 retriever"],
        # "callbacks": [tracer]
    })
  test_row.eval_sample.response = response["response"]
  test_row.eval_sample.retrieved_contexts = [context.page_content for context in response["context"]]
  time.sleep(2) # To try to avoid rate limiting.

In [58]:
compression_dataset = copy.deepcopy(dataset)

for test_row in compression_dataset:
  response = contextual_compression_retrieval_chain.invoke({"question" : test_row.eval_sample.user_input}, {
        # "tags" : ["Compression retriever"],
        # "callbacks": [tracer]
    })
  test_row.eval_sample.response = response["response"]
  test_row.eval_sample.retrieved_contexts = [context.page_content for context in response["context"]]
  time.sleep(6) # To try to avoid rate limiting.

In [59]:
multi_query_dataset = copy.deepcopy(dataset)

for test_row in multi_query_dataset:
  response = multi_query_retrieval_chain.invoke({"question" : test_row.eval_sample.user_input}, {
        # "tags" : ["Multi query retriever"],
        # "callbacks": [tracer]
    })
  test_row.eval_sample.response = response["response"]
  test_row.eval_sample.retrieved_contexts = [context.page_content for context in response["context"]]
  time.sleep(2) # To try to avoid rate limiting.

In [60]:
parent_document_dataset = copy.deepcopy(dataset)

for test_row in parent_document_dataset:
  response = parent_document_retrieval_chain.invoke({"question" : test_row.eval_sample.user_input}, {
        # "tags" : ["Parent retriever"],
        # "callbacks": [tracer]
    })
  test_row.eval_sample.response = response["response"]
  test_row.eval_sample.retrieved_contexts = [context.page_content for context in response["context"]]
  time.sleep(2) # To try to avoid rate limiting.

In [61]:
ensemble_dataset = copy.deepcopy(dataset)

for test_row in ensemble_dataset:
  response = ensemble_retrieval_chain.invoke({"question" : test_row.eval_sample.user_input}, {
        # "tags" : ["Ensemble retriever"],
        # "callbacks": [tracer]
    })
  test_row.eval_sample.response = response["response"]
  test_row.eval_sample.retrieved_contexts = [context.page_content for context in response["context"]]
  time.sleep(2) # To try to avoid rate limiting.

In [None]:
# Response object needs to be a string (Not AIMessage object) for pandas (should've used response["response"].content)
for test_row in naive_dataset:
    test_row.eval_sample.response = test_row.eval_sample.response.content
for test_row in bm25_dataset:
    test_row.eval_sample.response = test_row.eval_sample.response.content
for test_row in compression_dataset:
    test_row.eval_sample.response = test_row.eval_sample.response.content
for test_row in multi_query_dataset:
    test_row.eval_sample.response = test_row.eval_sample.response.content
for test_row in parent_document_dataset:
    test_row.eval_sample.response = test_row.eval_sample.response.content
for test_row in ensemble_dataset:
    test_row.eval_sample.response = test_row.eval_sample.response.content

In [56]:
naive_dataset.samples[0].eval_sample.response

"KinGAP, or the Kinship Guardianship Assistance Program, is a permanency option in New York that supports children in foster care who are placed with adults related to them or with adults who had a positive relationship with the child or their family before foster care placement. The program provides financial assistance and often medical coverage to help families like siblings stay together by enabling relatives or caring adults to assume guardianship of the child, fostering a permanent relationship.\n\nHow KinGAP Helps Families:\n- It allows children to remain with familiar and connected family members or caregivers, promoting stability and ongoing relationships.\n- It provides financial support similar to foster care payments, helping guardians meet the child's needs.\n- It offers medical coverage for the child, which can continue even if the guardianship ends before the child turns 18, up to age 21 in some cases.\n- The program encourages sibling placement together when possible, f

In [53]:
bm25_dataset.samples[0].eval_sample.response

"KinGAP, or Kinship Guardianship Assistance Program, is a permanency option in New York designed to help caregivers, often relatives like siblings or other kin, assume guardianship of a child when returning home is not possible. It encourages placements with family members and provides financial and support services to help these families keep the child within their kinship network for long-term stability.\n\nHere's how it helps families stay together in permanency options:\n- **Supports kinship guardianship:** KinGAP allows approved caregivers to become legal guardians of the child, reducing the likelihood of a child being placed outside their family or community.\n- **Financial assistance:** Guardians receive monthly Kinship Guardianship Assistance payments, which are comparable to foster care payments and can be adjusted based on income and family size.\n- **Expenses reimbursement:** There is also a one-time reimbursement of up to $2,000 for expenses related to assuming guardianship

### Run RAGAS evaluations to get retrieval-specific metrics

In [63]:
from ragas import EvaluationDataset

In [64]:
from ragas.llms import LangchainLLMWrapper
from ragas import evaluate, RunConfig

custom_run_config = RunConfig(timeout=360)

evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4.1-mini"))

In [65]:
from ragas.metrics import Faithfulness, ResponseRelevancy, LLMContextPrecisionWithReference, LLMContextRecall

# NoiseSensitivity,
# ContextEntityRecall,
# FactualCorrectness,

metrics = [Faithfulness(), ResponseRelevancy(), LLMContextPrecisionWithReference(), LLMContextRecall()]

In [66]:
naive_evaluation_dataset = EvaluationDataset.from_pandas(naive_dataset.to_pandas())
result = evaluate(
    dataset=naive_evaluation_dataset,
    metrics=metrics,
    llm=evaluator_llm,
    run_config=custom_run_config
)
result

Evaluating:   0%|          | 0/48 [00:00<?, ?it/s]

{'faithfulness': 0.9296, 'answer_relevancy': 0.9641, 'llm_context_precision_with_reference': 0.6737, 'context_recall': 1.0000}

In [67]:
bm25 = EvaluationDataset.from_pandas(bm25_dataset.to_pandas())
bm25_result = evaluate(
    dataset=bm25,
    metrics=metrics,
    llm=evaluator_llm,
    run_config=custom_run_config
)
bm25_result

Evaluating:   0%|          | 0/48 [00:00<?, ?it/s]

{'faithfulness': 0.9539, 'answer_relevancy': 0.9663, 'llm_context_precision_with_reference': 0.8727, 'context_recall': 0.9722}

In [68]:
compression_evaluation_dataset = EvaluationDataset.from_pandas(compression_dataset.to_pandas())
compression_result = evaluate(
    dataset=compression_evaluation_dataset,
    metrics=metrics,
    llm=evaluator_llm,
    run_config=custom_run_config
)
compression_result

Evaluating:   0%|          | 0/48 [00:00<?, ?it/s]

{'faithfulness': 0.9362, 'answer_relevancy': 0.9664, 'llm_context_precision_with_reference': 1.0000, 'context_recall': 0.9306}

In [69]:
multi_query_evaluation_dataset = EvaluationDataset.from_pandas(multi_query_dataset.to_pandas())
multiquery_result = evaluate(
    dataset=multi_query_evaluation_dataset,
    metrics=metrics,
    llm=evaluator_llm,
    run_config=custom_run_config
)
multiquery_result

Evaluating:   0%|          | 0/48 [00:00<?, ?it/s]

{'faithfulness': 0.9419, 'answer_relevancy': 0.9586, 'llm_context_precision_with_reference': 0.6515, 'context_recall': 1.0000}

In [70]:
parent_document_evaluation_dataset = EvaluationDataset.from_pandas(parent_document_dataset.to_pandas())
parent_document_result = evaluate(
    dataset=parent_document_evaluation_dataset,
    metrics=metrics,
    llm=evaluator_llm,
    run_config=custom_run_config
)
parent_document_result

Evaluating:   0%|          | 0/48 [00:00<?, ?it/s]

{'faithfulness': 0.8857, 'answer_relevancy': 0.9592, 'llm_context_precision_with_reference': 0.8819, 'context_recall': 0.9514}

In [71]:
ensemble_evaluation_dataset = EvaluationDataset.from_pandas(ensemble_dataset.to_pandas())
ensemble_result = evaluate(
    dataset=ensemble_evaluation_dataset,
    metrics=metrics,
    llm=evaluator_llm,
    run_config=custom_run_config
)
ensemble_result

Evaluating:   0%|          | 0/48 [00:00<?, ?it/s]

{'faithfulness': 0.9822, 'answer_relevancy': 0.9598, 'llm_context_precision_with_reference': 0.8246, 'context_recall': 1.0000}

### Analysis

| Retrieval Method | Faithfulness |	Response relevance |	Context precision (llm_context_precision_with_reference) |	context_recall |  
|-------------|----|---|----|-----------|
| Naive Retrieval |	0.9296 | 0.9641 |	0.6737 |	1.0000 |
| BM25 | 0.9539 | 0.9663 |	0.8727 |	0.9722 | 
| Contextual Compression |	0.9362  | 0.9664  |	1.0000  |	0.9306  | 
| Multi-Query |  0.9419	|  0.9586 |	 0.6515 |	1.0000  | 
| Parent Document | 0.8857	| 0.9592  |	 0.8819 |	0.9514  |
| Ensemble |  0.9822	| 0.9598  |	0.8246  |  1.0000 |
