In [1]:
pip install --upgrade --quiet  flashrank

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install --upgrade cryptography

Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install -q ragas

Note: you may need to restart the kernel to use updated packages.


In [4]:
# Set up the OpenAI API key
import os
os.environ["OPENAI_API_KEY"] = "

In [6]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.schema import Document  
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from tqdm import tqdm
import requests
from bs4 import BeautifulSoup

# Specify the folder containing PDFs
pdf_folder = "docs"

# Dynamically list all PDF files in the folder
file_paths = [os.path.join(pdf_folder, file) for file in os.listdir(pdf_folder) if file.endswith('.pdf')]

documents = []

# Load all PDFs in the folder
for file_path in tqdm(file_paths, desc="Loading PDFs"):
    loader = PyPDFLoader(file_path)
    pdf_docs = loader.load()
    documents.extend(pdf_docs)

# List of URLs to scrape
urls = [
    "https://docs.sennheiser-connect.com/1.7/faq/index.html",
    "https://www.sennheiser-hearing.com/de-DE/faq/",
    "https://docs.sennheiser-connect.com/1.7/release-notes/index.html"
]

# Function to scrape text from a webpage
def scrape_webpage(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    # Extract text content from the webpage
    content = soup.get_text(separator="\n").strip()
    return content

# Add webpage content to documents
for url in tqdm(urls, desc="Scraping URLs"):
    try:
        content = scrape_webpage(url)
        # Wrap webpage content as a Document
        document = Document(page_content=content, metadata={"source": url})
        documents.append(document)
    except Exception as e:
        print(f"Failed to scrape {url}: {e}")


Loading PDFs:  44%|████▍     | 4/9 [00:10<00:14,  2.97s/it]Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 25 0 (offset 0)
Ignoring wrong pointing object 28 0 (offset 0)
Ignoring wrong pointing object 32 0 (offset 0)
Ignoring wrong pointing object 35 0 (offset 0)
Ignoring wrong pointing object 37 0 (offset 0)
Ignoring wrong pointing object 39 0 (offset 0)
Ignoring wrong pointing object 41 0 (offset 0)
Ignoring wrong pointing object 43 0 (offset 0)
Ignoring wrong pointing object 54 0 (offset 0)
Ignoring wrong pointing object 56 0 (offset 0)
Ignoring wrong pointing object 82 0 (offset 0)
Ignoring wrong pointing object 107 0 (offset 0)
Ignoring wrong pointing object 109 0 (offset 0)
Ignoring wrong pointing object 111 0 (offset 0)
Loading PDFs:  78%|███████▊  | 7/9 [00:18<00:06,  3.08s/it]Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 16 0

In [7]:
len(documents)

476

## Chunking, embedding creation and putting them in a vectorstore

In [8]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
doc_chunks = text_splitter.split_documents(documents)
for id, text in enumerate(doc_chunks):
    text.metadata["id"] = id

In [9]:
embedding = OpenAIEmbeddings(model="text-embedding-ada-002")
vectorstore = FAISS.from_documents(doc_chunks, embedding)

In [None]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 25})

In [10]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 50})

In [11]:
len(doc_chunks)

1493

## Prompt Template

In [12]:
## Define prompt

template = """You are an expert assistant for answering questions based on the Sennheiser product knowledge base, including manuals, specifications, and operational details. 
Use the following retrieved context to answer the question accurately.
If the answer is not explicitly found in the retrieved context, reply with 'I don't know.' 
Ensure the answer is precise and formatted succinctly for clarity.
Question: {question}
Context: {context}
Answer: 
"""

In [13]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser

prompt = ChatPromptTemplate.from_template(template)

## Baseline Chain

In [14]:
from langchain_openai import ChatOpenAI
# Generator LLM
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [98]:
retriever_base = vectorstore.as_retriever(search_kwargs={"k": 3})

In [99]:
chain = (
    {"context": retriever_base, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

## Dataset Creation/ Question Ground Truth Pairs for RAGAS

The RAGAS (RAG Assessment) Framework is used to evaluate the RAG components in this study. 
In this section the evaluation dataset will be created. The RAGAs Framework expects four variables: questions, answers, contexts, and ground truths.

In [15]:
questions = [
    "What does it mean when the LED display flashes yellow In Accentum Wireless Around Ear Headphones? Select the correct answer from A, B, C, D. Only one answer is correct. A) Battery charge is 0 % to 20 %. B) Battery charge is 90 % to 100 %. C) Battery charge is 20 % to 90 %. D) Headphones are in Bluetooth pairing mode",
    "Is it possible to setup voice prompt? If yes, how to do that in Accentum Wireless Around Ear Headphones?",
    "When do I replace the ear pads and where can I find the replacement ear pads for Accentum Wireless Around Ear Headphones?",
    "What is the charging temperature range of Accentum wireless around ear headphones?",
    "What is the weight of the Momentum 4 Wireless Around Ear Headphones?",
    "What is the charging time of the rechargeable battery in Momentum 4 Wireless Around Ear Headphones?",
    "What does it mean when the LED display flashes red 3x repeatedly during phone calls in Momentum 4 Wireless Headphones? Select the correct answer from A, B, C, D. Only one answer is correct.  A) Incoming call B) switch off C) Rechargeable battery is almost empty (less than 2% charged) D) incoming call with an almost empty battery.",
    "What does it mean when the LED display presents 3 white during charging in Momentum 4 wireless around ear headphones? Select the correct answer from A, B, C, D. Only one answer is correct.  A) Incoming call B) switch off C) Rechargeable battery is almost empty (less than 2% charged) D) incoming call with an almost empty battery.",
    "How many audio sources can I connect at the same time to the BTT100 Audio transmitter?",
    "What happens to the LED light colour when I switch the transmitter on and how do I know whether a connection is established in BTT100 Bluetooth Audio Transmitter?",
    "What is the number of transmission channels in BTT100 Bluetooth Audio Transmitter?",
    "What is the weight of BTT100 Bluetooth Audio Transmitter?",
    "What does green light in momentum sport's charging case LED indicate with earbuds inserted in? Select the correct answer from A, B, C, D. Only one answer is correct.  A) Earbuds are fully charged, charging case can still charge at least one complete cycle. B) Charging case is fully charged. C) Firmware update is being performed. D) Rechargeable battery of the charging case is almost empty.",
    "What does it mean when the Momentum Sport's earbud LED flashes red 3x ? Select the correct answer from A, B, C, D. Only one answer is correct.  A) Rechargeable battery is almost empty. B) Bluetooth pairing or earbud pairing failed. C) System error is present. Perform a reset (see page > 63). D) Bluetooth pairing or earbud pairing was successful.",
    "What does 'Podcast' voice prompt mean in Momentum Sport?",
    "What is Momentum Sport's microphone frequency?",
    "What is the transducer size in Momentum True Wireless 4 earbuds?",
    "What should you do to reset MOMENTUM True Wireless 4 earbuds to factory settings?",
    "How can you switch to Transparency Mode on MOMENTUM True Wireless 4 earbuds?",
    "In the scenario 1 in Network whitepaper what is the only component needing active configuration?",
    "Why does the first ruleset disallow all access using IPv6?",
    "What happens when you activate 'Low Latency' mode on MOMENTUM True Wireless 4 earbuds?",
    "What are the two modes of operation for MobileConnect, and how do they differ?",
    "What is required for using MobileConnect in Standalone Mode?",
    "How does MobileConnect handle data load per client in Standalone Mode?",
    "What network recommendation is critical for using MobileConnect?",
    "What firmware version is required for using the Digital 6000 series with WSM?",
    "What operating systems are compatible with the WSM software?",
    "What hardware is required for setting up MobileConnect in Standalone Mode?",
    "What is the recommended Wi-Fi configuration for optimal performance of MobileConnect?",
    "What kind of delay or latency can I expect in the audio transmission for iOS devices?  Select the correct answer from A, B, C, D. Only one answer is correct. A) The latency is 50-65ms B) The latency is 50-55s C) The latency is 50-55ms D) The latency is 40-55ms",
    "What is the release date of MobileConnect Manager 1.4.2?",
    "What is the release date of MobileConnect Station 7.2.0?",
    "What is the battery life of the Bose QuietComfort Earbuds II during continuous playback with ANC on?",
    "Does the Sony WH-1000XM5 support multi-device Bluetooth pairing, and how many devices can it connect to simultaneously?",
    "What is the water resistance rating of the Apple AirPods Pro 2?"
]

ground_truths = [
    "C) Battery charge is 20 % to 90 %.",
    "Yes, it is possible. To set up the voice prompts you require the Smart Control App (>18). In the app settings, you can activate voice prompts and sound signals (also the default setting) or deactivate them.",
    "There is no specific timeframe for ear pad replacement. For hygiene reasons, you should replace your ear pads from time to time. Replacement ear pads are available at www.sennheiser-hearing.com/accentum-wireless.",
    "Between +10 °C to +40 °C",
    "Approximately 293g.",
    "Approximately 2 hours",
    "D) incoming call with an almost empty battery.",
    "A) Incoming call",
    "You can simultaneously connect 2 different sources to the transmitter.",
    "The LED lights up red when the transmitter is switched on and once the connection is established, the LED lights up blue.",
    "79",
    "85g.",
    "A) Earbuds are fully charged, charging case can still charge at least one complete cycle.",
    "B) Bluetooth pairing or earbud pairing failed.",
    "Equalizer preset for podcasts is activated (improved speech intelligibility).",
    "100 Hz to 10 kHz.",
    "7mm",
    "To reset, disconnect all Bluetooth connections, place one earbud in the charging case, and hold the touch panel of the other earbud for 15 seconds.",
    "To switch to Transparency Mode, tap the touch control panel of the left earbud once.",
    "The only component needing active configuration is the SRV-record in the DNS-Server",
    "Because MobileConnect Manager is currently only supporting IPv4",
    "Activating 'Low Latency' mode minimizes delay between audio and video but may reduce Bluetooth transmission range.",
    "The two modes are Standalone Mode and Manager Mode. Standalone Mode is easier to set up with minimal IT support, focusing on audio streaming via QR codes without internet connectivity. Manager Mode offers full functionality with IT infrastructure, DNS setup, and centralized administration.",
    "An enterprise-grade router, access point(s), and a local network with DHCP enabled are required. Streams are accessed using QR codes.",
    "The data load is 165 kbps per unique client connection.",
    "The use of the 5 GHz Wi-Fi band is recommended for better performance and reduced interference.",
    "The required firmware version for the EM 6000 is 3.2.1, and for the L 6000, it is 3.0.1.",
    "WSM is compatible with Windows 10/11 (32/64-bit) and macOS 13 Ventura and macOS 14 Sonoma, with a minimum of 8 GB RAM.",
    "The hardware requirements include an enterprise-grade router, enterprise-grade access points, Server for MobileConnect Manager installation, A device providing DNS / DHCP in the network.",
    "It is recommended to use the 5 GHz band with a Quality of Service configuration. Each access point should ideally support a maximum of 50 clients, with a minimum data rate of 12 Mbps.",
    "C) The latency is 50-55ms",
    "21.09.2022",
    "18.01.2024",
    "I don't know",
    "I don't know",
    "I don't know"
]


In [41]:
answer_baseline = []
content_baseline = []

# Inference

for q in questions:
    answer_baseline.append(chain.invoke(q))
    content_baseline.append([docs.page_content for docs in retriever_base.get_relevant_documents(q)])

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api

In [42]:
data_baseline = {
    "question": questions,
    "ground_truth": ground_truths,
    "answer": answer_baseline,
    "contexts": content_baseline
}

from datasets import Dataset

dataset_baseline = Dataset.from_dict(data_baseline)

dataset_baseline

Dataset({
    features: ['question', 'ground_truth', 'answer', 'contexts'],
    num_rows: 36
})

In [43]:
# Save the dataset to a JSONL file
dataset_baseline.to_json("datasets/topK@25/dataset_baseline.jsonl", orient="records", lines=True)

# Check if it is saved successfully
print("Dataset saved to 'dataset_baseline.jsonl'")

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Dataset saved to 'dataset_baseline.jsonl'


In [None]:
# Save the dataset to a JSONL file
dataset_baseline.to_json("datasets/topK@50/dataset_baseline.jsonl", orient="records", lines=True)

# Check if it is saved successfully
print("Dataset saved to 'dataset_baseline.jsonl'")

In [22]:
from datasets import load_dataset

In [45]:
dataset_base = load_dataset('json', data_files="datasets/topK@25/dataset_baseline.jsonl")['train']


Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
dataset_base = load_dataset('json', data_files="datasets/topK@50/dataset_baseline.jsonl")['train']

In [25]:
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision,
)

In [47]:
result = evaluate(dataset=dataset_base,
                    metrics = [
                        faithfulness,
                        answer_relevancy,
                        context_recall,
                        context_precision,
                    ],
                    llm = llm,
                    embeddings = embedding)

Evaluating:   0%|          | 0/144 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"

Initial retrieval topK@25

In [48]:
result

{'faithfulness': 0.6747, 'answer_relevancy': 0.8409, 'context_recall': 0.6343, 'context_precision': 0.7037}

In [49]:
dfs_base = result.to_pandas()

In [50]:
dfs_base.to_excel("eval_results/topK@25/evaluation_baseline_results.xlsx", index=False)

Retreival Latency

In [104]:
import time

In [100]:
latencies = []
content_base_lat = []

# Inference

for question in questions:
    start_time = time.time()
    content_base_lat.append([docs.page_content for docs in retriever_base.get_relevant_documents(question)])
    end_time = time.time()
    latencies.append(end_time - start_time)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embedding

In [102]:
avg_latency = sum(latencies) / len(latencies)
print(f"Average Latency: {avg_latency:.4f} seconds")

Average Latency: 0.1896 seconds


In [103]:
data_base_lat = [{"Question": question, "Latency (seconds)": latency} 
                    for question, latency in zip(questions, latencies)]

# Save to a .jsonl file
with open("Latencies/latencies_base.jsonl", "w") as f:
    for entry in data_base_lat:
        f.write(json.dumps(entry) + "\n")

print("Latencies saved to 'latencies_base.jsonl'")

Latencies saved to 'latencies_base.jsonl'


## Flashrank Cross-Encoder Reranker (ms-marco-MultiBERT-L-12)  Integration

In [16]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import FlashrankRerank
from langchain_openai import ChatOpenAI

compressor = FlashrankRerank(model="ms-marco-MultiBERT-L-12", top_n=3)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)


INFO:flashrank.Ranker:Downloading ms-marco-MultiBERT-L-12...
ms-marco-MultiBERT-L-12.zip: 100%|██████████| 98.7M/98.7M [00:09<00:00, 10.4MiB/s]


In [17]:
chain_flashrank = (
    {"context": compression_retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

Evaluation Dataset Creation

In [18]:
answer_flashrank = []
content_flashrank = []

# Inference

for q in questions:
    answer_flashrank.append(chain_flashrank.invoke(q))
    content_flashrank.append([docs.page_content for docs in compression_retriever.get_relevant_documents(q)])

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
  content_flashrank.append([docs.page_content for docs in compression_retriever.get_relevant_documents(q)])
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request:

In [19]:
data_flashrank = {
    "question": questions,
    "ground_truth": ground_truths,
    "answer": answer_flashrank,
    "contexts": content_flashrank
}

from datasets import Dataset

dataset_flashrank = Dataset.from_dict(data_flashrank)

dataset_flashrank

INFO:datasets:PyTorch version 2.5.1 available.


Dataset({
    features: ['question', 'ground_truth', 'answer', 'contexts'],
    num_rows: 36
})

In [26]:
# Save the dataset to a JSONL file
dataset_flashrank.to_json("datasets/topK@10/dataset_flashrank.jsonl", orient="records", lines=True)

# Check if it is saved successfully
print("Dataset saved to 'dataset_flashrank.jsonl'")

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Dataset saved to 'dataset_flashrank.jsonl'


In [23]:
# Save the dataset to a JSONL file
dataset_flashrank.to_json("datasets/topK@25/dataset_flashrank.jsonl", orient="records", lines=True)

# Check if it is saved successfully
print("Dataset saved to 'dataset_flashrank.jsonl'")

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Dataset saved to 'dataset_flashrank.jsonl'


In [20]:
# Save the dataset to a JSONL file
dataset_flashrank.to_json("datasets/topK@50/dataset_flashrank.jsonl", orient="records", lines=True)

# Check if it is saved successfully
print("Dataset saved to 'dataset_flashrank.jsonl'")

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Dataset saved to 'dataset_flashrank.jsonl'


In [27]:
dataset_flash = load_dataset('json', data_files="datasets/topK@10/dataset_flashrank.jsonl")['train']

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
dataset_flash = load_dataset('json', data_files="datasets/topK@25/dataset_flashrank.jsonl")['train']

In [23]:
dataset_flash = load_dataset('json', data_files="datasets/topK@50/dataset_flashrank.jsonl")['train']

Generating train split: 0 examples [00:00, ? examples/s]

In [26]:
result_flashrank = evaluate(dataset=dataset_flash,
                    metrics = [
                        faithfulness,
                        answer_relevancy,
                        context_recall,
                        context_precision
                    ],
                    llm = llm,
                    embeddings = embedding)

Evaluating:   0%|          | 0/144 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"

Initial Retrieval topK@10

In [29]:
result_flashrank

{'faithfulness': 0.6354, 'answer_relevancy': 0.7550, 'context_recall': 0.5093, 'context_precision': 0.7060}

In [30]:
dfs_flash = result_flashrank.to_pandas()

In [31]:
dfs_flash.to_excel("eval_results/topK@10/evaluation_flash_results.xlsx", index=False)

Initial Retrieval topK@25

In [27]:
result_flashrank

{'faithfulness': 0.4271, 'answer_relevancy': 0.6985, 'context_recall': 0.4907, 'context_precision': 0.7106}

In [28]:
dfs_flash = result_flashrank.to_pandas()

In [29]:
dfs_flash.to_excel("eval_results/topK@25/evaluation_flash_results.xlsx", index=False)

Initial Retrieval topK@50

In [27]:
result_flashrank

{'faithfulness': 0.4646, 'answer_relevancy': 0.6409, 'context_recall': 0.4421, 'context_precision': 0.6852}

In [29]:
dfs_flash = result_flashrank.to_pandas()
dfs_flash.to_excel("eval_results/topK@50/evaluation_flash_results.xlsx", index=False)

### Retrieval Latency

In [105]:
latencies_flash = []
content_flash_lat = []

# Inference

for question in questions:
    start_time = time.time()
    content_flash_lat.append([docs.page_content for docs in compression_retriever.get_relevant_documents(question)])
    end_time = time.time()
    latencies_flash.append(end_time - start_time)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embedding

In [106]:
avg_latency_flash = sum(latencies_flash) / len(latencies_flash)

In [107]:
print(f"Average Latency: {avg_latency_flash:.4f} seconds")

Average Latency: 3.7620 seconds


In [108]:
data_flash_lat = [{"Question": question, "Latency (seconds)": latency} 
                    for question, latency in zip(questions, latencies_flash)]

# Save to a .jsonl file
with open("Latencies/latencies_flash.jsonl", "w") as f:
    for entry in data_flash_lat:
        f.write(json.dumps(entry) + "\n")

print("Latencies saved to 'latencies_flash.jsonl'")

Latencies saved to 'latencies_flash.jsonl'


## Cross-Encoder msmarco-distilbert-base-v2 Reranker Integrated Pipeline

In [30]:
compressor_tiny = FlashrankRerank(model="ms-marco-TinyBERT-L-2-v2", top_n=3)
compression_retriever_tiny = ContextualCompressionRetriever(
    base_compressor=compressor_tiny, base_retriever=retriever
)

INFO:flashrank.Ranker:Downloading ms-marco-TinyBERT-L-2-v2...
ms-marco-TinyBERT-L-2-v2.zip: 100%|██████████| 3.26M/3.26M [00:00<00:00, 11.0MiB/s]


Chain

In [33]:
chain_tiny = (
    {"context": compression_retriever_tiny, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

Evaluation Dataset Creation

In [32]:
answer_tiny = []
content_tiny = []

# Inference

for q in questions:
    answer_tiny.append(chain_tiny.invoke(q))
    content_tiny.append([docs.page_content for docs in compression_retriever_tiny.get_relevant_documents(q)])

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api

In [34]:
data_tiny = {
    "question": questions,
    "ground_truth": ground_truths,
    "answer": answer_tiny,
    "contexts": content_tiny
}

from datasets import Dataset

dataset_tiny = Dataset.from_dict(data_tiny)

dataset_tiny

Dataset({
    features: ['question', 'ground_truth', 'answer', 'contexts'],
    num_rows: 36
})

In [36]:
# Save the dataset to a JSONL file
dataset_tiny.to_json("datasets/topK@10/dataset_tiny.jsonl", orient="records", lines=True)

# Check if it is saved successfully
print("Dataset saved to 'dataset_tiny.jsonl'")

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Dataset saved to 'dataset_tiny.jsonl'


In [None]:
# Save the dataset to a JSONL file
dataset_tiny.to_json("datasets/topK@25/dataset_tiny.jsonl", orient="records", lines=True)

# Check if it is saved successfully
print("Dataset saved to 'dataset_tiny.jsonl'")

In [35]:
# Save the dataset to a JSONL file
dataset_tiny.to_json("datasets/topK@50/dataset_tiny.jsonl", orient="records", lines=True)

# Check if it is saved successfully
print("Dataset saved to 'dataset_tiny.jsonl'")

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Dataset saved to 'dataset_tiny.jsonl'


In [37]:
dataset_tiny = load_dataset('json', data_files="datasets/topK@10/dataset_tiny.jsonl")['train']

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
dataset_tiny = load_dataset('json', data_files="datasets/topK@25/dataset_tiny.jsonl")['train']

In [36]:
dataset_tiny = load_dataset('json', data_files="datasets/topK@50/dataset_tiny.jsonl")['train']

Generating train split: 0 examples [00:00, ? examples/s]

Evaluation

In [37]:
result_tiny = evaluate(dataset=dataset_tiny,
                    metrics = [
                        faithfulness,
                        answer_relevancy,
                        context_recall,
                        context_precision
                    ],
                    llm = llm,
                    embeddings = embedding)

Evaluating:   0%|          | 0/144 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"

TopK@10

In [39]:
result_tiny

{'faithfulness': 0.5930, 'answer_relevancy': 0.8068, 'context_recall': 0.6343, 'context_precision': 0.8102}

In [40]:
dfs_tiny = result_tiny.to_pandas()

In [41]:
dfs_tiny.to_excel("eval_results/topK@10/evaluation_tiny_results.xlsx", index=False)

TopK@25

In [66]:
result_tiny

{'faithfulness': 0.6328, 'answer_relevancy': 0.8394, 'context_recall': 0.6690, 'context_precision': 0.8009}

In [67]:
dfs_tiny = result_tiny.to_pandas()

In [68]:
dfs_tiny.to_excel("eval_results/topK@25/evaluation_tiny_results.xlsx", index=False)

TOpK@50

In [38]:
result_tiny

{'faithfulness': 0.6035, 'answer_relevancy': 0.8893, 'context_recall': 0.6481, 'context_precision': 0.8102}

In [39]:
dfs_tiny = result_tiny.to_pandas()
dfs_tiny.to_excel("eval_results/topK@50/evaluation_tiny_results.xlsx", index=False)

### Retrieval Latency

In [109]:
latencies_tiny = []
content_tiny_lat = []

# Inference

for question in questions:
    start_time = time.time()
    content_tiny_lat.append([docs.page_content for docs in compression_retriever_tiny.get_relevant_documents(question)])
    end_time = time.time()
    latencies_tiny.append(end_time - start_time)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embedding

In [110]:
avg_latency_tiny = sum(latencies_tiny) / len(latencies_tiny)
print(f"Average Latency: {avg_latency_tiny:.4f} seconds")

Average Latency: 0.2901 seconds


In [111]:
data_tiny_lat = [{"Question": question, "Latency (seconds)": latency} 
                    for question, latency in zip(questions, latencies_tiny)]

# Save to a .jsonl file
with open("Latencies/latencies_tiny.jsonl", "w") as f:
    for entry in data_tiny_lat:
        f.write(json.dumps(entry) + "\n")

print("Latencies saved to 'latencies_tiny.jsonl'")

Latencies saved to 'latencies_tiny.jsonl'


## Cross-Encoder ms-marco-MiniLM-L-12-v2 Reranker Integrated Pipeline

In [40]:
compressor_cross_v2 = FlashrankRerank(model="ms-marco-MiniLM-L-12-v2", top_n=3)
compression_retriever_cross_v2 = ContextualCompressionRetriever(
    base_compressor=compressor_cross_v2, base_retriever=retriever
)

INFO:flashrank.Ranker:Downloading ms-marco-MiniLM-L-12-v2...
ms-marco-MiniLM-L-12-v2.zip: 100%|██████████| 21.6M/21.6M [00:02<00:00, 10.5MiB/s]


In [41]:
chain_cross_v2 = (
    {"context": compression_retriever_cross_v2, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [42]:
answer_cross_v2 = []
content_cross_v2 = []

# Inference

for q in questions:
    answer_cross_v2.append(chain_cross_v2.invoke(q))
    content_cross_v2.append([docs.page_content for docs in compression_retriever_cross_v2.get_relevant_documents(q)])

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api

In [43]:
data_cross_v2 = {
    "question": questions,
    "ground_truth": ground_truths,
    "answer": answer_cross_v2,
    "contexts": content_cross_v2
}

from datasets import Dataset

dataset_cross_v2 = Dataset.from_dict(data_cross_v2)

dataset_cross_v2

Dataset({
    features: ['question', 'ground_truth', 'answer', 'contexts'],
    num_rows: 36
})

In [46]:
# Save the dataset to a JSONL file
dataset_cross_v2.to_json("datasets/topK@10/dataset_cross_v2.jsonl", orient="records", lines=True)

# Check if it is saved successfully
print("Dataset saved to 'dataset_cross_v2.jsonl'")

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Dataset saved to 'dataset_cross_v2.jsonl'


In [None]:
# Save the dataset to a JSONL file
dataset_cross_v2.to_json("datasets/topK@25/dataset_cross_v2.jsonl", orient="records", lines=True)

# Check if it is saved successfully
print("Dataset saved to 'dataset_cross_v2.jsonl'")

In [45]:
# Save the dataset to a JSONL file
dataset_cross_v2.to_json("datasets/topK@50/dataset_cross_v2.jsonl", orient="records", lines=True)

# Check if it is saved successfully
print("Dataset saved to 'dataset_cross_v2.jsonl'")

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Dataset saved to 'dataset_cross_v2.jsonl'


In [47]:
dataset_cross_v2 = load_dataset('json', data_files="datasets/topK@10/dataset_cross_v2.jsonl")['train']

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
dataset_cross_v2 = load_dataset('json', data_files="datasets/topK@25/dataset_cross_v2.jsonl")['train']

In [46]:
dataset_cross_v2 = load_dataset('json', data_files="datasets/topK@50/dataset_cross_v2.jsonl")['train']

Generating train split: 0 examples [00:00, ? examples/s]

In [47]:
result_cross_v2 = evaluate(dataset=dataset_cross_v2,
                    metrics = [
                        faithfulness,
                        answer_relevancy,
                        context_recall,
                        context_precision
                    ],
                    llm = llm,
                    embeddings = embedding)

Evaluating:   0%|          | 0/144 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"

TopK@10

In [49]:
result_cross_v2

{'faithfulness': 0.6042, 'answer_relevancy': 0.8608, 'context_recall': 0.6481, 'context_precision': 0.8056}

In [50]:
dfs_cross_v2 = result_cross_v2.to_pandas()

In [51]:
dfs_cross_v2.to_excel("eval_results/topK@10/evaluation_cross_v2_results.xlsx", index=False)

TopK@25

In [76]:
result_cross_v2

{'faithfulness': 0.6694, 'answer_relevancy': 0.8666, 'context_recall': 0.7106, 'context_precision': 0.7940}

In [77]:
dfs_cross_v2 = result_cross_v2.to_pandas()

In [78]:
dfs_cross_v2.to_excel("eval_results/topK@25/evaluation_cross_v2_results.xlsx", index=False)

TopK@50

In [48]:
result_cross_v2

{'faithfulness': 0.6708, 'answer_relevancy': 0.8636, 'context_recall': 0.6574, 'context_precision': 0.8056}

In [49]:
dfs_cross_v2 = result_cross_v2.to_pandas()

In [50]:
dfs_cross_v2.to_excel("eval_results/topK@50/evaluation_cross_v2_results.xlsx", index=False)

### Retreival Latency

In [112]:
latencies_cross_v2 = []
content_cross_v2_lat = []

# Inference

for question in questions:
    start_time = time.time()
    content_cross_v2_lat.append([docs.page_content for docs in compression_retriever_cross_v2.get_relevant_documents(question)])
    end_time = time.time()
    latencies_cross_v2.append(end_time - start_time)
    
avg_latency_cross_v2 = sum(latencies_cross_v2) / len(latencies_cross_v2)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embedding

Average Latency: 1.9993 seconds


In [115]:
print(f"Average Latency: {avg_latency_cross_v2:.4f} seconds")

Average Latency: 1.9993 seconds


In [116]:
data_cross_v2_lat = [{"Question": question, "Latency (seconds)": latency} 
                    for question, latency in zip(questions, latencies_cross_v2)]

# Save to a .jsonl file
with open("Latencies/latencies_cross_v2.jsonl", "w") as f:
    for entry in data_cross_v2_lat:
        f.write(json.dumps(entry) + "\n")

print("Latencies saved to 'latencies_cross_v2.jsonl'")

Latencies saved to 'latencies_cross_v2.jsonl'


## LLM Based Reranker rank-T5-flan Integrated Pipeline

In [83]:
compressor_flan = FlashrankRerank(model="rank-T5-flan", top_n=3)
compression_retriever_flan = ContextualCompressionRetriever(
    base_compressor=compressor_flan, base_retriever=retriever
)

INFO:flashrank.Ranker:Downloading rank-T5-flan...
rank-T5-flan.zip: 100%|██████████| 73.7M/73.7M [00:01<00:00, 42.9MiB/s]


In [85]:
chain_flan = (
    {"context": compression_retriever_flan, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [86]:
answer_flan = []
content_flan = []

# Inference

for q in questions:
    answer_flan.append(chain_flan.invoke(q))
    content_flan.append([docs.page_content for docs in compression_retriever_flan.get_relevant_documents(q)])

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api

In [87]:
data_flan = {
    "question": questions,
    "ground_truth": ground_truths,
    "answer": answer_flan,
    "contexts": content_flan
}

from datasets import Dataset

dataset_flan = Dataset.from_dict(data_flan)

dataset_flan

Dataset({
    features: ['question', 'ground_truth', 'answer', 'contexts'],
    num_rows: 36
})

In [56]:
# Save the dataset to a JSONL file
dataset_flan.to_json("datasets/topK@10/dataset_flan.jsonl", orient="records", lines=True)

# Check if it is saved successfully
print("Dataset saved to 'dataset_flan.jsonl'")

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Dataset saved to 'dataset_flan.jsonl'


In [None]:
# Save the dataset to a JSONL file
dataset_flan.to_json("datasets/topK@25/dataset_flan.jsonl", orient="records", lines=True)

# Check if it is saved successfully
print("Dataset saved to 'dataset_flan.jsonl'")

In [88]:
# Save the dataset to a JSONL file
dataset_flan.to_json("datasets/topK@50/dataset_flan.jsonl", orient="records", lines=True)

# Check if it is saved successfully
print("Dataset saved to 'dataset_flan.jsonl'")

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Dataset saved to 'dataset_flan.jsonl'


In [57]:
dataset_flan = load_dataset('json', data_files="datasets/topK@10/dataset_flan.jsonl")['train']

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
dataset_flan = load_dataset('json', data_files="datasets/topK@25/dataset_flan.jsonl")['train']

In [89]:
dataset_flan = load_dataset('json', data_files="datasets/topK@50/dataset_flan.jsonl")['train']

Generating train split: 0 examples [00:00, ? examples/s]

In [90]:
result_flan = evaluate(dataset=dataset_flan,
                    metrics = [
                        faithfulness,
                        answer_relevancy,
                        context_recall,
                        context_precision
                    ],
                    llm = llm,
                    embeddings = embedding)

Evaluating:   0%|          | 0/144 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"

topK@10

In [59]:
result_flan

{'faithfulness': 0.3750, 'answer_relevancy': 0.7699, 'context_recall': 0.5023, 'context_precision': 0.7222}

In [60]:
dfs_flan = result_flan.to_pandas()

In [61]:
dfs_flan.to_excel("eval_results/topK@10/evaluation_flan_results.xlsx", index=False)

topK@25

In [86]:
result_flan

{'faithfulness': 0.3521, 'answer_relevancy': 0.7641, 'context_recall': 0.5694, 'context_precision': 0.6806}

In [87]:
dfs_flan = result_flan.to_pandas()

In [88]:
dfs_flan.to_excel("eval_results/topK@25/evaluation_flan_results.xlsx", index=False)

topK@50

In [91]:
result_flan

{'faithfulness': 0.2396, 'answer_relevancy': 0.6743, 'context_recall': 0.4468, 'context_precision': 0.5856}

In [92]:
dfs_flan = result_flan.to_pandas()
dfs_flan.to_excel("eval_results/topK@50/evaluation_flan_results.xlsx", index=False)

### Retrieval Latency

In [117]:
latencies_flan = []
content_flan_lat = []

# Inference

for question in questions:
    start_time = time.time()
    content_flan_lat.append([docs.page_content for docs in compression_retriever_flan.get_relevant_documents(question)])
    end_time = time.time()
    latencies_flan.append(end_time - start_time)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embedding

In [119]:
avg_latency_flan = sum(latencies_flan) / len(latencies_flan)
print(f"Average Latency: {avg_latency_flan:.4f} seconds")

Average Latency: 2.6536 seconds


In [120]:
data_flan_lat = [{"Question": question, "Latency (seconds)": latency_flan} 
                    for question, latency_flan in zip(questions, latencies_flan)]

# Save to a .jsonl file
with open("Latencies/latencies_flan.jsonl", "w") as f:
    for entry in data_flan_lat:
        f.write(json.dumps(entry) + "\n")

print("Latencies saved to 'latencies_flan.jsonl'")

Latencies saved to 'latencies_flan.jsonl'


## Cross-Encoder BAAI/bge-reranker-base Reranker Integrated Pipeline

In [61]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain_community.cross_encoders import HuggingFaceCrossEncoder

model_cross = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-base")
compressor_cross = CrossEncoderReranker(model=model_cross, top_n=3)
compression_retriever_cross = ContextualCompressionRetriever(
    base_compressor=compressor_cross, base_retriever=retriever
)

INFO:sentence_transformers.cross_encoder.CrossEncoder:Use pytorch device: cuda


In [62]:
chain_cross = (
    {"context": compression_retriever_cross, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

Evaluation Dataset Creation

In [63]:
answer_cross = []
content_cross = []

# Inference

for q in questions:
    answer_cross.append(chain_cross.invoke(q))
    content_cross.append([docs.page_content for docs in compression_retriever_cross.get_relevant_documents(q)])

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

In [64]:
data_cross = {
    "question": questions,
    "ground_truth": ground_truths,
    "answer": answer_cross,
    "contexts": content_cross
}

from datasets import Dataset

dataset_cross = Dataset.from_dict(data_cross)

dataset_cross

Dataset({
    features: ['question', 'ground_truth', 'answer', 'contexts'],
    num_rows: 36
})

In [66]:
# Save the dataset to a JSONL file
dataset_cross.to_json("datasets/topK@10/dataset_cross.jsonl", orient="records", lines=True)

# Check if it is saved successfully
print("Dataset saved to 'dataset_cross.jsonl'")

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Dataset saved to 'dataset_cross.jsonl'


In [None]:
# Save the dataset to a JSONL file
dataset_cross.to_json("datasets/topK@25/dataset_cross.jsonl", orient="records", lines=True)

# Check if it is saved successfully
print("Dataset saved to 'dataset_cross.jsonl'")

In [56]:
# Save the dataset to a JSONL file
dataset_cross.to_json("datasets/topK@50/dataset_cross_topN5.jsonl", orient="records", lines=True)

# Check if it is saved successfully
print("Dataset saved to 'dataset_cross_topN5.jsonl'")

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Dataset saved to 'dataset_cross.jsonl'


In [67]:
# Save the dataset to a JSONL file
dataset_cross.to_json("datasets/topK@50/dataset_cross_topN3.jsonl", orient="records", lines=True)

# Check if it is saved successfully
print("Dataset saved to 'dataset_cross_topN3.jsonl'")

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Dataset saved to 'dataset_cross_topN3.jsonl'


In [67]:
dataset_cross = load_dataset('json', data_files="datasets/topK@10/dataset_cross.jsonl")['train']

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
dataset_cross = load_dataset('json', data_files="datasets/topK@25/dataset_cross.jsonl")['train']

In [57]:
dataset_cross = load_dataset('json', data_files="datasets/topK@50/dataset_cross_topN5.jsonl")['train']

Generating train split: 0 examples [00:00, ? examples/s]

In [68]:
dataset_cross = load_dataset('json', data_files="datasets/topK@50/dataset_cross_topN3.jsonl")['train']

Generating train split: 0 examples [00:00, ? examples/s]

Evaluation with RAGAS Framework

In [69]:
result_cross = evaluate(dataset=dataset_cross,
                    metrics = [
                        faithfulness,
                        answer_relevancy,
                        context_recall,
                        context_precision
                    ],
                    llm = llm,
                    embeddings = embedding)

Evaluating:   0%|          | 0/144 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"

TopK@10

In [69]:
result_cross

{'faithfulness': 0.6062, 'answer_relevancy': 0.8921, 'context_recall': 0.6875, 'context_precision': 0.8611}

In [70]:
dfs_cross = result_cross.to_pandas()

In [71]:
dfs_cross.to_excel("eval_results/topK@10/evaluation_cross_results.xlsx", index=False)

TopK@25

In [24]:
result_cross

{'faithfulness': 0.5581, 'answer_relevancy': 0.8941, 'context_recall': 0.7120, 'context_precision': 0.8889}

In [26]:
dfs_cross = result_cross.to_pandas()

In [27]:
dfs_cross.to_excel("eval_results/topK@25/evaluation_cross_results.xlsx", index=False)

TopK@50

top_n@3

In [70]:
result_cross

{'faithfulness': 0.7052, 'answer_relevancy': 0.8666, 'context_recall': 0.6690, 'context_precision': 0.8634}

In [84]:
dfs_cross = result_cross.to_pandas()
dfs_cross.to_excel("eval_results/topK@50/evaluation_cross_results_topN3.xlsx", index=False)

top_n@5

In [59]:
result_cross

{'faithfulness': 0.6611, 'answer_relevancy': 0.9102, 'context_recall': 0.6690, 'context_precision': 0.8343}

In [60]:
dfs_cross = result_cross.to_pandas()
dfs_cross.to_excel("eval_results/topK@50/evaluation_cross_results.xlsx", index=False)

### Retreival Latency

In [121]:
latencies_cross = []
content_cross_lat = []

# Inference

for question in questions:
    start_time = time.time()
    content_cross_lat.append([docs.page_content for docs in compression_retriever_cross.get_relevant_documents(question)])
    end_time = time.time()
    latencies_cross.append(end_time - start_time)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

In [122]:
avg_latency_cross = sum(latencies_cross) / len(latencies_cross)
print(f"Average Latency: {avg_latency_cross:.4f} seconds")

Average Latency: 0.5540 seconds


In [123]:
data_cross_lat = [{"Question": question, "Latency (seconds)": latency_cross} 
                    for question, latency_cross in zip(questions, latencies_cross)]

# Save to a .jsonl file
with open("Latencies/latencies_cross.jsonl", "w") as f:
    for entry in data_cross_lat:
        f.write(json.dumps(entry) + "\n")

print("Latencies saved to 'latencies_cross.jsonl'")

Latencies saved to 'latencies_cross.jsonl'


##  ColBERT Reranker Integrated Pipeline

In [72]:
pip install -U ragatouille

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Note: you may need to restart the kernel to use updated packages.


In [73]:
from ragatouille import RAGPretrainedModel

RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")

  self.scaler = torch.cuda.amp.GradScaler()


In [74]:
from langchain.retrievers import ContextualCompressionRetriever

compression_retriever_ColBERT = ContextualCompressionRetriever(
    base_compressor=RAG.as_langchain_document_compressor(), base_retriever=retriever, search_kwargs={"k": 3}
)

In [75]:
chain_ColBERT = (
    {"context": compression_retriever_ColBERT, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

Evaluation Dataset Creation

In [76]:
answer_ColBERT = []
content_ColBERT = []

# Inference

for q in questions:
    answer_ColBERT.append(chain_ColBERT.invoke(q))
    content_ColBERT.append([docs.page_content for docs in compression_retriever_ColBERT.get_relevant_documents(q)])

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 29.84it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 29.00it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 30.15it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 30.49it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 24.59it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 25.57it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 24.18it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 24.47it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"




  return torch.cuda.amp.autocast() if self.activated else NullContextManager()
100%|██████████| 2/2 [00:00<00:00, 24.27it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 23.62it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 24.38it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 23.74it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 27.77it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 28.88it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 30.30it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 29.77it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00,  9.77it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00,  9.68it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00,  9.68it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00,  9.69it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00,  9.66it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00,  9.59it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00,  9.71it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00,  9.69it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 29.91it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 29.91it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 26.20it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 26.94it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 26.87it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 26.27it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 24.25it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 24.24it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 24.53it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 24.61it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 29.61it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 30.00it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 30.44it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 30.42it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()
100%|██████████| 2/2 [00:00<00:00,  7.70it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()
100%|██████████| 2/2 [00:00<00:00,  7.62it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()
100%|██████████| 2/2 [00:00<00:00,  7.67it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else



100%|██████████| 2/2 [00:00<00:00, 30.64it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 30.24it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()
100%|██████████| 2/2 [00:00<00:00, 10.09it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()
100%|██████████| 2/2 [00:00<00:00, 10.08it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()
100%|██████████| 2/2 [00:00<00:00, 24.49it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else



100%|██████████| 2/2 [00:00<00:00, 30.99it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 31.01it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()
100%|██████████| 2/2 [00:00<00:00, 22.46it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()
100%|██████████| 2/2 [00:00<00:00, 22.62it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()
100%|██████████| 2/2 [00:00<00:00, 22.41it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else



100%|██████████| 2/2 [00:00<00:00, 24.28it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 24.28it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 24.69it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 24.54it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 24.12it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 24.39it/s]


In [77]:
data_ColBERT = {
    "question": questions,
    "ground_truth": ground_truths,
    "answer": answer_ColBERT,
    "contexts": content_ColBERT
}

from datasets import Dataset

dataset_ColBERT = Dataset.from_dict(data_ColBERT)

dataset_ColBERT

Dataset({
    features: ['question', 'ground_truth', 'answer', 'contexts'],
    num_rows: 36
})

In [78]:
# Save the dataset to a JSONL file
dataset_ColBERT.to_json("datasets/topK@10/dataset_ColBERT.jsonl", orient="records", lines=True)

# Check if it is saved successfully
print("Dataset saved to 'dataset_ColBERT.jsonl'")

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Dataset saved to 'dataset_ColBERT.jsonl'


In [None]:
# Save the dataset to a JSONL file
dataset_ColBERT.to_json("datasets/topK@25/dataset_ColBERT.jsonl", orient="records", lines=True)

# Check if it is saved successfully
print("Dataset saved to 'dataset_ColBERT.jsonl'")

In [78]:
# Save the dataset to a JSONL file
dataset_ColBERT.to_json("datasets/topK@50/dataset_ColBERT.jsonl", orient="records", lines=True)

# Check if it is saved successfully
print("Dataset saved to 'dataset_ColBERT.jsonl'")

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Dataset saved to 'dataset_ColBERT.jsonl'


Evaluation

In [79]:
dataset_ColBERT = load_dataset('json', data_files="datasets/topK@50/dataset_ColBERT.jsonl")['train']

Generating train split: 0 examples [00:00, ? examples/s]

In [80]:
result_ColBERT = evaluate(dataset=dataset_ColBERT,
                    metrics = [
                        faithfulness,
                        answer_relevancy,
                        context_recall,
                        context_precision
                    ],
                    llm = llm,
                    embeddings = embedding)

Evaluating:   0%|          | 0/144 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"

TopK@10

In [81]:
result_ColBERT

{'faithfulness': 0.6559, 'answer_relevancy': 0.8107, 'context_recall': 0.7153, 'context_precision': 0.7853}

In [82]:
dfs_ColBERT = result_ColBERT.to_pandas()

In [83]:
dfs_ColBERT.to_excel("eval_results/topK@10/evaluation_ColBERT_results.xlsx", index=False)

TopK@25

In [39]:
result_ColBERT

{'faithfulness': 0.7204, 'answer_relevancy': 0.8675, 'context_recall': 0.7014, 'context_precision': 0.8751}

In [40]:
dfs_ColBERT = result_ColBERT.to_pandas()

In [41]:
dfs_ColBERT.to_excel("eval_results/topK@25/evaluation_ColBERT_results.xlsx", index=False)

TopK@50

In [81]:
result_ColBERT

{'faithfulness': 0.7118, 'answer_relevancy': 0.8685, 'context_recall': 0.7037, 'context_precision': 0.8572}

In [82]:
dfs_ColBERT = result_ColBERT.to_pandas()
dfs_ColBERT.to_excel("eval_results/topK@50/evaluation_ColBERT_results.xlsx", index=False)

### Retrieval Latency

In [94]:
import time

latencies_ColBERT = []
content_ColBERT_lat = []

# Inference

for question in questions:
    start_time = time.time()
    content_ColBERT_lat.append([docs.page_content for docs in compression_retriever_ColBERT.get_relevant_documents(question)])
    end_time = time.time()
    latencies_ColBERT.append(end_time - start_time)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 29.47it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 30.52it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 24.94it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 24.10it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 24.14it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 24.06it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 27.89it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 29.49it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00,  9.71it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00,  9.66it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00,  9.53it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00,  9.67it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 29.89it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 27.35it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 26.79it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 23.85it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 24.25it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 29.79it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 30.22it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()
100%|██████████| 2/2 [00:00<00:00,  7.72it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()
100%|██████████| 2/2 [00:00<00:00,  7.68it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 30.28it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()
100%|██████████| 2/2 [00:00<00:00,  9.98it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()
100%|██████████| 2/2 [00:00<00:00, 24.65it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()
100%|██████████| 2/2 [00:00<00:00, 32.38it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()
100%|██████████| 2/2 [00:00<00:00, 26.48it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autoc



100%|██████████| 2/2 [00:00<00:00, 30.91it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()
100%|██████████| 2/2 [00:00<00:00, 22.35it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()
100%|██████████| 2/2 [00:00<00:00, 22.09it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 24.40it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 23.80it/s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()




100%|██████████| 2/2 [00:00<00:00, 24.10it/s]


In [95]:
avg_latency_ColBERT = sum(latencies_ColBERT) / len(latencies_ColBERT)
print(f"Average Latency: {avg_latency_ColBERT:.4f} seconds")

Average Latency: 0.3250 seconds


In [96]:
import json

data_ColBERT_lat = [{"Question": question, "Latency (seconds)": latency} for question, latency in zip(questions, latencies_ColBERT)]

# Save to a .jsonl file
with open("Latencies/latencies_ColBERT.jsonl", "w") as f:
    for entry in data_ColBERT_lat:
        f.write(json.dumps(entry) + "\n")

print("Latencies saved to 'latencies_ColBERT.jsonl'")

Latencies saved to 'latencies_ColBERT.jsonl'
