In [None]:
!pip install pymupdf
!pip install pandas
!pip install pdfplumber
!pip install langchain
!pip install faiss-cpu
!pip install sentence_transformers
!pip install pypdf
!pip install -U langchain-community
!pip install -U langchain-huggingface
!pip install transformers torch
!pip install groq
!pip install rank_bm25

Collecting pymupdf
  Downloading pymupdf-1.25.5-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Downloading pymupdf-1.25.5-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.0/20.0 MB[0m [31m59.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pymupdf
Successfully installed pymupdf-1.25.5
Collecting pdfplumber
  Downloading pdfplumber-0.11.6-py3-none-any.whl.metadata (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pdfminer.six==20250327 (from pdfplumber)
  Downloading pdfminer_six-20250327-py3-none-any.whl.metadata (4.1 kB)
Collecting pypdfium2>=4.18.0 (from pdfplumber)
  Downloading pypdfium2-4.30.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.2/48.2 kB[0m [31m4.4 MB/

In [None]:
import os
import glob
import fitz  # PyMuPDF
import pdfplumber
import numpy as np
import pandas as pd
import faiss
from typing import Dict, List, Tuple
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from groq import Groq
from rank_bm25 import BM25Okapi
import nltk
from nltk.tokenize import word_tokenize
import time
from google.colab import userdata


# Download NLTK data
nltk.download('punkt')
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [None]:
# Text Extraction
def get_pdf_paths(folder_path):
    """
    Returns a list of all PDF file paths in the given folder.
    """
    return glob.glob(os.path.join(folder_path, "*.pdf"))

def extract_data(pdf_paths):
    all_data = []  # List to store extracted data for all PDFs

    for pdf_path in pdf_paths:
        pdf_data = {"filename": os.path.basename(pdf_path), "text": "", "tables": []}

        # ----------- Extract Full Text with PyMuPDF ------------
        try:
            doc = fitz.open(pdf_path)
            for page_num in range(len(doc)):
                page = doc.load_page(page_num)
                pdf_data["text"] += page.get_text()
        except Exception as e:
            print(f"Failed to read text from {pdf_path}: {e}")

        # ----------- Extract Tables with pdfplumber ------------
        try:
            with pdfplumber.open(pdf_path) as pdf:
                for page in pdf.pages:
                    tables = page.extract_tables()
                    pdf_data["tables"].extend(tables)
        except Exception as e:
            print(f"Failed to read tables from {pdf_path}: {e}")

        all_data.append(pdf_data)  # Add data for this PDF to the main list

    return all_data

In [None]:
import nltk
from langchain.text_splitter import TokenTextSplitter, RecursiveCharacterTextSplitter

# Ensure you have the necessary NLTK data
nltk.download('punkt')

# Function for token-based chunking
def chunk_token_based(extracted_data, chunk_size=256, chunk_overlap=32):
    """
    Token-based chunking using TokenTextSplitter.
    """
    text_splitter = TokenTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    all_chunks = []
    for data in extracted_data:
        text_chunks = text_splitter.split_text(data['text'])
        table_data = str(data.get('tables', []))
        all_chunks.extend(text_chunks)
        all_chunks.append(table_data)
    return all_chunks

# Function for paragraph-based chunking
def chunk_paragraph_based(extracted_data, chunk_size=3, chunk_overlap=1):
    """
    Paragraph-based chunking where chunks are created based on paragraphs.
    """
    def paragraph_chunker(text):
        paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
        chunks = []
        for i in range(0, len(paragraphs), chunk_size - chunk_overlap):
            chunk = "\n\n".join(paragraphs[i:i + chunk_size])
            chunks.append(chunk)
        return chunks

    all_chunks = []
    for data in extracted_data:
        text = data.get("text", "")
        table_data = str(data.get("tables", []))
        text_chunks = paragraph_chunker(text)
        all_chunks.extend(text_chunks)
        all_chunks.append(table_data)
    return all_chunks

# Function for recursive character-based chunking
def chunk_recursive_based(extracted_data, chunk_size=500, chunk_overlap=100):
    """
    Recursive character-based chunking using RecursiveCharacterTextSplitter.
    """
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    all_chunks = []
    for data in extracted_data:
        text_chunks = text_splitter.split_text(data['text'])
        table_data = str(data.get('tables', []))
        all_chunks.extend(text_chunks)
        all_chunks.append(table_data)
    return all_chunks

# Function for sentence-based chunking
def chunk_sentence_based(extracted_data, chunk_size=5):
    """
    Sentence-based chunking, where each chunk contains chunk_size sentences.
    """
    def sentence_chunker(text):
        sentences = nltk.sent_tokenize(text)
        chunks = []
        for i in range(0, len(sentences), chunk_size):
            chunk = " ".join(sentences[i:i + chunk_size])
            chunks.append(chunk)
        return chunks

    all_chunks = []
    for data in extracted_data:
        text = data.get("text", "")
        table_data = str(data.get("tables", []))
        text_chunks = sentence_chunker(text)
        all_chunks.extend(text_chunks)
        all_chunks.append(table_data)
    return all_chunks

# Main function to choose the chunking method based on user input
def chunk_texts(extracted_data, chunk_type='recursive', chunk_size=500, chunk_overlap=100):
    """
    Main function that selects the chunking method based on user input.
    """
    if chunk_type == 'token':
        return chunk_token_based(extracted_data, chunk_size, chunk_overlap)
    elif chunk_type == 'paragraph':
        return chunk_paragraph_based(extracted_data, chunk_size, chunk_overlap)
    elif chunk_type == 'recursive':
        return chunk_recursive_based(extracted_data, chunk_size, chunk_overlap)
    elif chunk_type == 'sentence':
        return chunk_sentence_based(extracted_data, chunk_size)
    else:
        raise ValueError("Invalid chunk_type. Choose from 'token', 'paragraph', 'recursive', or 'sentence'.")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [44]:
from langchain.embeddings import HuggingFaceEmbeddings
import faiss
import numpy as np

# Initialize the MiniLM embedding model
def init_embedding_model():
    return HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        encode_kwargs={"normalize_embeddings": True}
    )

# Embed all chunks
def embed_all_chunks(embedding_model, chunks):
    return embedding_model.embed_documents(chunks)

# Build FAISS index
def build_faiss_index(embeddings):
    dimension = len(embeddings[0])
    index = faiss.IndexFlatL2(dimension)
    index.add(np.array(embeddings).astype("float32"))
    return index


In [None]:
# Query functions
def semantic_search(query, index, embedding_model, all_chunks, top_k=3):
    """Pure semantic search using FAISS"""
    query_embedding = np.array([embedding_model.embed_query(query)]).astype('float32')
    _, indices = index.search(query_embedding, top_k)
    return [all_chunks[i] for i in indices[0]]

def bm25_search(query, tokenized_corpus, bm25, all_chunks, top_k=3):
    """Pure BM25 search using pre-initialized index"""
    tokenized_query = word_tokenize(query.lower())  # Tokenize first
    scores = bm25.get_scores(tokenized_query)       # Then get scores
    indices = np.argsort(scores)[-top_k:][::-1]     # Get top-k indices
    return [all_chunks[i] for i in indices]         # Return original chunks

def mmr_search(query, embedding_model, index, all_chunks, top_k=3, diversity=0.7):
    """Maximal Marginal Relevance search"""
    # Calculate similarity scores
    query_embedding = np.array([embedding_model.embed_query(query)]).astype('float32')
    _, indices = index.search(query_embedding, top_k*2)  # Get extra candidates

    # Implement MMR diversification
    selected = []
    candidates = [all_chunks[i] for i in indices[0]]
    candidate_embeddings = [embedding_model.embed_query(doc) for doc in candidates]
    query_embedding = query_embedding[0]  # Unwrap from array

    while len(selected) < top_k and candidates:
        scores = []
        for i, (doc, doc_embedding) in enumerate(zip(candidates, candidate_embeddings)):
            # Calculate similarity to query
            sim_score = np.dot(query_embedding, doc_embedding)
            if selected:
                # Calculate max redundancy with already selected docs
                selected_embeddings = [embedding_model.embed_query(s) for s in selected]
                max_redun = max(np.dot(doc_embedding, sel_emb) for sel_emb in selected_embeddings)
                scores.append(diversity * sim_score - (1 - diversity) * max_redun)
            else:
                scores.append(sim_score)

        best_idx = np.argmax(scores)
        selected.append(candidates.pop(best_idx))
        candidate_embeddings.pop(best_idx)

    return selected

# Hybrid methods
def hybrid_semantic_bm25(query, index, embedding_model, bm25, tokenized_corpus, all_chunks, top_k=3):
    """Combine semantic and BM25 results"""
    semantic = semantic_search(query, index, embedding_model, all_chunks, top_k)
    bm25_results = bm25_search(query, tokenized_corpus, bm25, all_chunks, top_k)
    return list(dict.fromkeys(semantic + bm25_results))[:top_k*2]

def hybrid_mmr_semantic(query, index, embedding_model, all_chunks, top_k=3):
    """Combine MMR and semantic results"""
    mmr = mmr_search(query, embedding_model, index, all_chunks, top_k)
    semantic = semantic_search(query, index, embedding_model, all_chunks, top_k)
    return list(dict.fromkeys(mmr + semantic))[:top_k*2]


In [None]:
# ================== GROQ SUMMARIZATION ==================
SUMMARIZATION_PROMPT = """
Please summarize the following medical text while preserving all critical information.
Keep medical terminology accurate and maintain all important steps or recommendations.
Focus on preserving:
- Step-by-step procedures
- Dosage information
- Warning signs
- Key recommendations

Text to summarize:
{text}

Concise summary (250-300 words):
"""

def summarize_text(text, client, model):
    """Summarize text using Groq API"""
    try:
        response = client.chat.completions.create(
            messages=[{
                "role": "user",
                "content": SUMMARIZATION_PROMPT.format(text=text)
            }],
            model=model,
            temperature=0.3,  # Lower temperature for more factual outputs
            max_tokens=400
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"Summarization failed: {str(e)}")
        return text[:500]  # Fallback to truncation if summarization fails

def summarize_context(context, client, max_length=300):
    """Summarize retrieved context using Groq"""
    model='allam-2-7b'
    combined = "\n".join(context)

    # First try to summarize with Groq
    summary = summarize_text(combined, client, model)

    # Fallback to simple truncation if summary is too long
    if len(summary) > max_length * 1.5:  # Allow some overflow
        summary = ". ".join([s.strip() for s in combined.split(".")[:5]]) + "."
        summary = summary[:max_length]

    return [summary]

In [None]:
PROMPT_TEMPLATE = """
Use the following pieces of information enclosed in <context> tags to provide an answer to the question enclosed in <question> tags.
<context>
{context}
</context>
<question>
{question}
</question>
"""

JUDGE_PROMPT_TEMPLATE = """
You are a judge evaluating a question-answering system.

<question>
{question}
</question>

<answer_generated>
{answer}
</answer_generated>

<golden_reference>
{golden}
</golden_reference>

Evaluate the generated answer on a scale of 1 to 5 (5 being highest) for the following:

Faithfulness: Does the generated answer stay factually consistent with the golden reference?
Relevance: Does the generated answer actually answer the question?
"""

In [None]:
# ================== UPDATED CORE FUNCTIONS ==================
def ask_groq(query, context, client, model="llama3-8b-8192"):
    """Model-aware generation"""
    prompt = PROMPT_TEMPLATE.format(
        context="\n".join(context),
        question=query
    )
    response = client.chat.completions.create(
        messages=[{"role": "user", "content": prompt}],
        model=model,
    )
    return response.choices[0].message.content

def evaluate_with_groq_judge(question, answer, golden, client, model):
    """Model-aware evaluation"""
    prompt = JUDGE_PROMPT_TEMPLATE.format(
        question=question,
        answer=answer,
        golden=golden
    )
    response = client.chat.completions.create(
        messages=[{"role": "user", "content": prompt}],
        model=model,
    )
    return response.choices[0].message.content

In [45]:
class RAGEvaluator:
    def __init__(self,
                 embedding_model,
                 all_chunks,
                 index,
                 groq_client,
                 index_model="sentence-transformers/all-MiniLM-L6-v2",
                 gen_model="llama3-8b-8192",
                 eval_model="deepseek-r1-distill-llama-70b",
                 summary_model="allam-2-7b"):
        self.embedding_model = embedding_model
        self.all_chunks = all_chunks
        self.index = index
        self.client = groq_client
        self.timing_data = []
        self.cache = {}
        self.models = {
            "index": index_model,
            "gen": gen_model,
            "eval": eval_model,
            "summary": summary_model
        }

        # Initialize retrieval components
        self._init_retrieval()

    def _init_retrieval(self):
        """Initialize retrieval systems once"""
        try:
            # Tokenize corpus for BM25
            self.tokenized_corpus = [word_tokenize(chunk.lower()) for chunk in self.all_chunks]
            self.bm25 = BM25Okapi(self.tokenized_corpus)
        except Exception as e:
            print(f"Error initializing retrieval components: {str(e)}")
            # Provide fallback initialization
            self.tokenized_corpus = []
            self.bm25 = None

    def get_filename(self):
        """Generate filename with all model info"""
        return (f"rag_results_"
                f"index_{self.models['index']}_"
                f"gen_{self.models['gen']}_"
                f"eval_{self.models['eval']}.csv")

    def timed_retrieve(self, query, strategy_name, k):
        """Cached retrieval with timing"""
        cache_key = f"{strategy_name}_{query}_{k}"

        if cache_key in self.cache:
            return self.cache[cache_key], 0.0

        start = time.time()
        result = []
        error_message = None

        try:
            # Select the appropriate strategy
            if strategy_name == "semantic":
                result = semantic_search(query, self.index, self.embedding_model, self.all_chunks, k)
            elif strategy_name == "bm25":
                result = bm25_search(query, self.tokenized_corpus, self.bm25, self.all_chunks, k)
            elif strategy_name == "mmr":
                result = mmr_search(query, self.embedding_model, self.index, self.all_chunks, k)
            elif strategy_name == "hybrid_semantic_bm25":
                result = hybrid_semantic_bm25(query, self.index, self.embedding_model, self.bm25,
                                             self.tokenized_corpus, self.all_chunks, k)
            elif strategy_name == "hybrid_mmr_semantic":
                result = hybrid_mmr_semantic(query, self.index, self.embedding_model, self.all_chunks, k)
            elif strategy_name == "semantic_summarized":
                semantic_results = semantic_search(query, self.index, self.embedding_model, self.all_chunks, k)
                result = summarize_context(semantic_results, self.client)
            elif strategy_name == "bm25_summarized":
                bm25_results = bm25_search(query, self.tokenized_corpus, self.bm25, self.all_chunks, k)
                result = summarize_context(bm25_results, self.client)
            elif strategy_name == "mmr_summarized":
                mmr_results = mmr_search(query, self.embedding_model, self.index, self.all_chunks, k)
                result = summarize_context(mmr_results, self.client)
            elif strategy_name == "hybrid_semantic_bm25_summarized":
                hybrid_results = hybrid_semantic_bm25(query, self.index, self.embedding_model, self.bm25,
                                                    self.tokenized_corpus, self.all_chunks, k)
                result = summarize_context(hybrid_results, self.client)
            elif strategy_name == "hybrid_mmr_semantic_summarized":
                hybrid_results = hybrid_mmr_semantic(query, self.index, self.embedding_model, self.all_chunks, k)
                result = summarize_context(hybrid_results, self.client)
            else:
                error_message = f"Unknown strategy: {strategy_name}"
                result = []
        except Exception as e:
            error_message = f"Error in retrieval ({strategy_name}): {str(e)}"
            result = []  # Return empty context if retrieval fails

        retrieval_time = time.time() - start
        self.cache[cache_key] = result
        return result, retrieval_time, error_message

    def evaluate_strategy(self, query, golden, strategy_name, k):
        """Complete evaluation pipeline for one strategy"""
        result_data = {
            "query": query,
            "golden_answer": golden,
            "strategy": strategy_name,
            "k": k,
            "index_model": self.models['index'],
            "gen_model": self.models['gen'],
            "eval_model": self.models['eval'],
            "summary_model": self.models['summary']
        }

        # Track errors
        errors = []

        # 1. Retrieval
        try:
            context, ret_time, error = self.timed_retrieve(query, strategy_name, k)
            result_data["retrieval_time"] = ret_time
            result_data["context_length"] = sum(len(c) for c in context) if context else 0

            if error:
                errors.append(error)
        except Exception as e:
            errors.append(f"Retrieval exception: {str(e)}")
            context = []
            result_data["retrieval_time"] = 0
            result_data["context_length"] = 0

        # 2. Generation
        try:
            start_gen = time.time()
            answer = ask_groq(query, context, self.client, model=self.models['gen'])
            gen_time = time.time() - start_gen
            result_data["generated_answer"] = answer
            result_data["gen_time"] = gen_time
        except Exception as e:
            errors.append(f"Generation error: {str(e)}")
            answer = "Error generating answer"
            result_data["generated_answer"] = answer
            result_data["gen_time"] = 0

        # 3. Evaluation
        try:
            start_eval = time.time()
            eval_text = evaluate_with_groq_judge(
                query, answer, golden, self.client, model=self.models['eval']
            )
            eval_time = time.time() - start_eval
            result_data["evaluation_response"] = eval_text
            result_data["eval_time"] = eval_time

            # Parse scores
            try:
                faithfulness = eval_text.split("Faithfulness:")[1].split()[0].strip()
                relevance = eval_text.split("Relevance:")[1].split()[0].strip()
            except (IndexError, AttributeError):
                faithfulness = "N/A"
                relevance = "N/A"
                errors.append("Failed to parse evaluation scores")

            result_data["faithfulness"] = faithfulness
            result_data["relevance"] = relevance
        except Exception as e:
            errors.append(f"Evaluation error: {str(e)}")
            result_data["evaluation_response"] = "Error during evaluation"
            result_data["eval_time"] = 0
            result_data["faithfulness"] = "N/A"
            result_data["relevance"] = "N/A"

        # Calculate total time
        total_time = result_data.get("retrieval_time", 0) + result_data.get("gen_time", 0) + result_data.get("eval_time", 0)
        result_data["total_time"] = total_time

        # Log errors if any
        if errors:
            result_data["errors"] = "; ".join(errors)

        return result_data

    def run_evaluations(self, queries, k_values):
        """Evaluate all strategies across k values"""
        strategies = [
            "semantic",
            "bm25",
            "mmr",
            "hybrid_semantic_bm25",
            # "hybrid_mmr_semantic",
            "semantic_summarized",
            "bm25_summarized",
            "mmr_summarized",
            "hybrid_semantic_bm25_summarized",
            "hybrid_mmr_semantic_summarized"
        ]

        for query, golden in queries.items():
            for k in k_values:
                for strategy in strategies:
                    try:
                        result = self.evaluate_strategy(query, golden, strategy, k)
                        self.timing_data.append(result)

                        # Display results
                        error_status = f"[ERRORS: {result.get('errors', '').split(';')[0]}...]" if "errors" in result else ""
                        print(f"{strategy}(k={k}): "
                              f"{result['total_time']:.1f}s, "
                              f"Faith={result['faithfulness']}, "
                              f"Rel={result['relevance']} {error_status}")
                    except Exception as e:
                        # Catch any unexpected errors at the highest level
                        error_data = {
                            "query": query,
                            "golden_answer": golden,
                            "strategy": strategy,
                            "k": k,
                            "index_model": self.models['index'],
                            "gen_model": self.models['gen'],
                            "eval_model": self.models['eval'],
                            "summary_model": self.models['summary'],
                            "errors": f"Critical error: {str(e)}",
                            "faithfulness": "ERROR",
                            "relevance": "ERROR",
                            "total_time": 0
                        }
                        self.timing_data.append(error_data)
                        print(f"{strategy}(k={k}): CRITICAL ERROR - {str(e)}")

    def save_results(self):
        """Save comprehensive results"""
        try:
            df = pd.DataFrame(self.timing_data)
            filename = self.get_filename()
            df.to_csv(filename, index=False)
            print(f"Saved results to {filename}")
            return df
        except Exception as e:
            # If saving fails, attempt to save to a backup file
            try:
                backup_filename = "rag_results_backup_" + str(int(time.time())) + ".csv"
                df = pd.DataFrame(self.timing_data)
                df.to_csv(backup_filename, index=False)
                print(f"Error saving to original file: {str(e)}")
                print(f"Saved backup to {backup_filename}")
                return df
            except Exception as e2:
                print(f"Critical error saving results: {str(e2)}")
                return None

In [None]:
# Define test queries and golden answers

# Define test queries and golden answers
FIRSTAID_QA = {
    "What specific adjustments should be made when performing CPR on a child compared to an adult, in terms of hand placement, compression depth, head positioning, and the use of rescue breaths?":
    """
    While the overall CPR process is similar for both adults and children—consisting of chest compressions and rescue breaths—there are a few important distinctions that take the size and physical development of the person into account. For adults, CPR is performed with two hands placed one over the other on the center of the chest, compressing at least 2 inches deep at a rate of 100–120 compressions per minute. The head is tilted to a past-neutral position to open the airway for rescue breaths, and each breath should last about one second, making the chest rise visibly.

    For children, the approach is slightly gentler. The compression depth is about 2 inches rather than at least 2 inches, and in the case of a smaller child, only one hand may be needed for chest compressions instead of two. The airway is opened by tilting the head to a slightly past-neutral position, which is less extreme than the tilt for adults. However, the rate of compressions remains the same—100 to 120 per minute—and rescue breaths are also given after every 30 compressions, just like with adults.

    These adjustments help avoid injury while still ensuring that CPR is effective for a smaller and more delicate body.
    """
}

FIRSTAID_QA1 = {
    "What are the steps for providing first aid to a choking child or baby, and what actions should you take if the child or baby becomes unresponsive?":
    """
    If a child is choking and able to cough or speak, encourage them to continue coughing as this means their airway is not completely blocked. However, if the airway becomes fully obstructed, first aid must be provided immediately. Start by delivering back blows, placing your arm across the child’s chest, bending them forward, and giving up to five firm blows between the shoulder blades. If the object is not dislodged, move on to abdominal thrusts, where you place your fist just above the belly button and give five quick inward and upward thrusts. If the blockage persists, use chest thrusts, positioning your fist in the center of the chest and giving five thrusts by pulling straight back. If the child becomes unresponsive, ensure EMS is called and start CPR, beginning with chest compressions.
    """
}

FIRSTAID_QA2 = {
    "In what circumstances should each type of sling—arm sling, elevated sling, and collar-and-cuff—be used, and what precautions must be taken to ensure proper limb support and circulation in both standard and improvised first aid situations?":
    """
   Slings are used to support an injured arm or to assist in the treatment of injuries such as fractured ribs, but should only be applied if they do not cause the casualty pain. There are three main types of slings: the arm sling, elevated sling, and collar-and-cuff (clove hitch), each used based on the location of the injury. An arm sling is suitable for forearm injuries and should hold the forearm parallel to the ground, with the wrist slightly higher than the elbow. The elevated sling is used for shoulder injuries and supports the hand across the chest towards the uninjured side. The collar-and-cuff is ideal for upper arm injuries or as added support for fractured ribs, allowing the elbow to hang naturally while the wrist is secured with a clove hitch.

While triangular bandages are ideal, in emergency situations, other materials like ties, belts, or even shirts can be improvised. After applying any sling, it’s crucial to check for proper circulation by feeling for a pulse at the wrist or using the fingernail color test. Slings must always be positioned comfortably, and the injured limb should never be forced into any sling. Additional support, if needed, can be provided by securing the arm against the chest using more triangular bandages.
    """

}

FIRSTAID_QA3 = {
    "What are the causes, symptoms, and appropriate first aid treatment for hypothermia caused by cold exposure?":
    """
   Hypothermia is a serious cold-related emergency that occurs when
    the body temperature drops below 35°C due to prolonged exposure to cold environments.
    It commonly affects elderly individuals, young children, and those with impaired mental or physical abilities, especially when they are unable to access warmth or seek help. Risk situations include being caught in bad weather, wearing wet clothing, or lacking proper heating. Early signs of hypothermia include pale, cold skin, uncontrollable shivering, fatigue, poor coordination, and confusion. As the condition worsens, symptoms may progress to slow breathing, blurred vision, muscle stiffness, and unconsciousness, with the person possibly appearing asleep or even dead. Immediate care involves calling emergency services, moving the person to a warmer place, removing wet clothing, and wrapping them in blankets or a space blanket.
    If conscious, warm sweet drinks can be given. However, the person must be warmed slowly—avoiding direct heat, hot baths, rubbing the skin, or giving alcohol—as these can worsen the condition. Prompt action is crucial to prevent serious complications or death.
    """
}

FIRSTAID_QA4 = {
    "What lifestyle habits can help manage high blood pressure and reduce related health risks?":
    """
      The treatment and prevention of high blood pressure largely depend on its severity and any other existing medical conditions. Doctors often recommend lifestyle modifications as a key part of managing and preventing high blood pressure. Maintaining a healthy body weight is crucial, as excess weight can put additional strain on the heart. A well-balanced diet rich in fresh fruits, vegetables, and low-fat dairy products is also essential, while high-fat and high-cholesterol foods should be avoided to reduce the risk of atherosclerosis. It is important to limit salt intake, as sodium causes fluid retention and increases the heart’s workload. Adequate consumption of nutrients like potassium, magnesium, and calcium is recommended, with fruits—particularly citrus—being good sources of potassium. Regular physical activity is another important preventive measure; aerobic exercises such as walking or swimming should be done for at least 30 to 45 minutes, five times a week. Additionally, quitting smoking is strongly advised, as smoking constricts blood vessels, raises blood pressure, and is one of the most serious risk factors for cardiovascular diseases.
      Together, these measures play a significant role in effectively managing high blood pressure and promoting heart health.
    """
}

FIRSTAID_QA5 = {
    "What are the steps to revive a person if they have had a heart attack ?":
    """
     To revive a heart attack victim, you must act quickly and follow these key steps:

Lay the person flat on their back on a firm surface, ensuring their neck is supported and their face is facing upward.

Check responsiveness by gently tapping their shoulders. If unresponsive, immediately call emergency services (e.g., 999) or ask someone nearby to do so.

Open their airway by tilting the head back and lifting the chin. If there’s something blocking the mouth or throat, remove it carefully.

Check for breathing by placing your ear near their mouth and watching their chest for movement—for no more than 10 seconds.

If the person is not breathing, begin mouth-to-mouth resuscitation:

Pinch the nose shut, give 2 deep breaths into their mouth (for adults/children) or 2 short puffs (for infants), watching the chest rise.

Check for a pulse by feeling the side of the neck with two fingers for up to 10 seconds.

If there is no pulse, begin chest compressions:

Place one hand over the other in the center of the chest, keep elbows straight, and press down firmly about 4–5 cm deep.

Perform 30 compressions followed by 2 breaths, continuing at a steady pace. Aim for 9 cycles in 2 minutes, checking for a pulse every few cycles.

As soon as a pulse returns, stop compressions and check if the person is breathing. If not, continue rescue breaths until normal breathing resumes.


    """
}

In [None]:
# Get GROQ API key from user data (or set directly)
GROQ_API_KEY = userdata.get('GROQ_API_KEY')
groq_client = Groq(api_key=GROQ_API_KEY)

In [None]:
!pip install tiktoken

Collecting tiktoken
  Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m56.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tiktoken
Successfully installed tiktoken-0.9.0


In [42]:
# Set up the data pipeline
folder_path = "/content/data"  # Update this to your folder
pdf_paths = get_pdf_paths(folder_path)
extracted_data = extract_data(pdf_paths)




MuPDF error: format error: cmsOpenProfileFromMem failed





MuPDF error: format error: cmsOpenProfileFromMem failed





recursive

In [47]:
all_chunks = chunk_texts(extracted_data, chunk_type='token', chunk_size=1056, chunk_overlap=200)

token

In [None]:
# all_chunks = chunk_texts(extracted_data, chunk_type='token', chunk_size=1056, chunk_overlap=200)

In [None]:
# all_chunks = chunk_texts(extracted_data, chunk_type='token', chunk_size=512, chunk_overlap=100)

sentence

In [None]:
# all_chunks = chunk_texts(extracted_data, chunk_type='sentence', chunk_size=20, chunk_overlap=2)

In [None]:
# all_chunks = chunk_texts(extracted_data, chunk_type='sentence', chunk_size=10, chunk_overlap=2)

In [36]:
!pip install --upgrade --force-reinstall torch sentence-transformers


Collecting torch
  Using cached torch-2.6.0-cp311-cp311-manylinux1_x86_64.whl.metadata (28 kB)
Collecting sentence-transformers
  Using cached sentence_transformers-4.1.0-py3-none-any.whl.metadata (13 kB)
Collecting filelock (from torch)
  Using cached filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting typing-extensions>=4.10.0 (from torch)
  Using cached typing_extensions-4.13.2-py3-none-any.whl.metadata (3.0 kB)
Collecting networkx (from torch)
  Using cached networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
Collecting jinja2 (from torch)
  Using cached jinja2-3.1.6-py3-none-any.whl.metadata (2.9 kB)
Collecting fsspec (from torch)
  Using cached fsspec-2025.3.2-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2

In [48]:
# Initialize embedding model, embed all chunks, and build index
embedding_model = init_embedding_model()
embeddings = embed_all_chunks(embedding_model, all_chunks)
faiss_index = build_faiss_index(embeddings)

# Save index and chunks for future use if needed
faiss.write_index(faiss_index, "faiss_index.idx")

  return HuggingFaceEmbeddings(


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [51]:
# List of k values to test
k_values = [ 5, 7]

# List of generation models to test
gen_models = [
    #"llama3-8b-8192",

    "llama-guard-3-8b"
]

# Evaluation model to use (consistent across runs)
eval_model = "deepseek-r1-distill-llama-70b"

# Initialize a DataFrame to store all results
all_results = []

In [52]:
import traceback
import pandas as pd

# Define all your query sets here
query_sets = {
    "FirstAid1": FIRSTAID_QA1,
    "FirstAid2": FIRSTAID_QA2,
    "FirstAid3": FIRSTAID_QA3,
    "FirstAid4": FIRSTAID_QA4,
    "FirstAid5": FIRSTAID_QA5
}

# To store all results from all query sets
all_results = []

# Loop through all query sets
for query_name, query_data in query_sets.items():
    print(f"\n================ Evaluating Query Set: {query_name} ================\n")

    # Loop through all generation models
    for gen_model in gen_models:
        print(f"\n===== Testing Generation Model: {gen_model} =====\n")

        try:
            evaluator = RAGEvaluator(
                embedding_model=embedding_model,
                all_chunks=all_chunks,
                index=faiss_index,
                groq_client=groq_client,
                index_model="sentence-transformers/all-MiniLM-L6-v2",
                gen_model=gen_model,
                eval_model=eval_model
            )

            # Run evaluations with the current query set
            evaluator.run_evaluations(
                queries=query_data,
                k_values=k_values
            )

            results_df = evaluator.save_results()

            if results_df is not None and not results_df.empty:
                # Add metadata: query set name, query, and golden answer
                results_df["query_set"] = query_name
                results_df["query"] = [q["query"] for q in query_data]
                results_df["golden_answer"] = [q["answer"] for q in query_data]

                all_results.append(results_df)
            else:
                print(f"⚠️ No results returned for model {gen_model} on {query_name}, skipping.")

        except Exception as e:
            print(f"❌ Error with model {gen_model} on query set {query_name}:\n{e}")
            traceback.print_exc()
            continue

# Save all combined results into a single CSV
if all_results:
    final_df = pd.concat(all_results, ignore_index=True)
    final_df.to_csv("rag_comparison_all_queries.csv", index=False)
    print("✅ All results saved to rag_comparison_all_queries.csv")
else:
    print("⚠️ No valid results found across all query sets.")





===== Testing Generation Model: llama-guard-3-8b =====

semantic(k=5): 2.1s, Faith=1, Rel=1 
bm25(k=5): 2.2s, Faith=1/5**, Rel=1/5** 
mmr(k=5): 2.4s, Faith=1, Rel=1 
hybrid_semantic_bm25(k=5): 1.2s, Faith=1, Rel=1 [ERRORS: Generation error: Error code: 400 - {'error': {'message': 'Please reduce the length of the messages or completion.', 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}...]
Summarization failed: Error code: 400 - {'error': {'message': 'Please reduce the length of the messages or completion.', 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
semantic_summarized(k=5): 2.3s, Faith=1/5**, Rel=1/5** 
Summarization failed: Error code: 400 - {'error': {'message': 'Please reduce the length of the messages or completion.', 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
bm25_summarized(k=5): 50.7s, Faith=1/5**, Rel=1/5** 
Summarization failed: Error code

Traceback (most recent call last):
  File "<ipython-input-52-c1449bc271c0>", line 46, in <cell line: 0>
    results_df["query"] = [q["query"] for q in query_data]
                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<ipython-input-52-c1449bc271c0>", line 46, in <listcomp>
    results_df["query"] = [q["query"] for q in query_data]
                           ~^^^^^^^^^
TypeError: string indices must be integers, not 'str'


semantic(k=5): 1.8s, Faith=1, Rel=1 
bm25(k=5): 2.2s, Faith=1**, Rel=1** 
mmr(k=5): 3.5s, Faith=1/5**, Rel=1/5** 
hybrid_semantic_bm25(k=5): 1.3s, Faith=1, Rel=1 [ERRORS: Generation error: Error code: 400 - {'error': {'message': 'Please reduce the length of the messages or completion.', 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}...]
Summarization failed: Error code: 400 - {'error': {'message': 'Please reduce the length of the messages or completion.', 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
semantic_summarized(k=5): 48.0s, Faith=1/5**, Rel=1/5** 
Summarization failed: Error code: 400 - {'error': {'message': 'Please reduce the length of the messages or completion.', 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
bm25_summarized(k=5): 95.3s, Faith=1, Rel=1 
Summarization failed: Error code: 400 - {'error': {'message': 'Please reduce the length of the

Traceback (most recent call last):
  File "<ipython-input-52-c1449bc271c0>", line 46, in <cell line: 0>
    results_df["query"] = [q["query"] for q in query_data]
                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<ipython-input-52-c1449bc271c0>", line 46, in <listcomp>
    results_df["query"] = [q["query"] for q in query_data]
                           ~^^^^^^^^^
TypeError: string indices must be integers, not 'str'


semantic(k=5): 1.7s, Faith=1, Rel=1 
bm25(k=5): 2.2s, Faith=1/5**, Rel=1/5** 
mmr(k=5): 2.6s, Faith=1, Rel=1 
hybrid_semantic_bm25(k=5): 29.4s, Faith=1, Rel=1 
Summarization failed: Error code: 400 - {'error': {'message': 'Please reduce the length of the messages or completion.', 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
semantic_summarized(k=5): 2.2s, Faith=1/5**, Rel=1/5** 
Summarization failed: Error code: 400 - {'error': {'message': 'Please reduce the length of the messages or completion.', 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
bm25_summarized(k=5): 57.0s, Faith=1, Rel=1 
Summarization failed: Error code: 400 - {'error': {'message': 'Please reduce the length of the messages or completion.', 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
mmr_summarized(k=5): 57.9s, Faith=1, Rel=1 
Summarization failed: Error code: 413 - {'error': {'message': 

Traceback (most recent call last):
  File "<ipython-input-52-c1449bc271c0>", line 46, in <cell line: 0>
    results_df["query"] = [q["query"] for q in query_data]
                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<ipython-input-52-c1449bc271c0>", line 46, in <listcomp>
    results_df["query"] = [q["query"] for q in query_data]
                           ~^^^^^^^^^
TypeError: string indices must be integers, not 'str'


semantic(k=5): 2.9s, Faith=1/5**, Rel=1/5** 
bm25(k=5): 1.8s, Faith=1, Rel=1 
mmr(k=5): 13.1s, Faith=1, Rel=1 
hybrid_semantic_bm25(k=5): 1.1s, Faith=1/5**, Rel=1/5** [ERRORS: Generation error: Error code: 400 - {'error': {'message': 'Please reduce the length of the messages or completion.', 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}...]
Summarization failed: Error code: 413 - {'error': {'message': 'Request too large for model `allam-2-7b` in organization `org_01jhcb10swevyrz5qyt1a8703k` service tier `on_demand` on tokens per minute (TPM): Limit 6000, Requested 8961, please reduce your message size and try again. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
semantic_summarized(k=5): 1.8s, Faith=1/5**, Rel=1/5** 
Summarization failed: Error code: 400 - {'error': {'message': 'Please reduce the length of the messages or completion.', 'type': 'inval

Traceback (most recent call last):
  File "<ipython-input-52-c1449bc271c0>", line 46, in <cell line: 0>
    results_df["query"] = [q["query"] for q in query_data]
                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<ipython-input-52-c1449bc271c0>", line 46, in <listcomp>
    results_df["query"] = [q["query"] for q in query_data]
                           ~^^^^^^^^^
TypeError: string indices must be integers, not 'str'


semantic(k=5): 2.0s, Faith=1, Rel=1 
bm25(k=5): 2.8s, Faith=1/5**, Rel=1/5** 
mmr(k=5): 3.3s, Faith=1, Rel=1 
hybrid_semantic_bm25(k=5): 1.2s, Faith=1, Rel=1 [ERRORS: Generation error: Error code: 400 - {'error': {'message': 'Please reduce the length of the messages or completion.', 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}...]
Summarization failed: Error code: 400 - {'error': {'message': 'Please reduce the length of the messages or completion.', 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
semantic_summarized(k=5): 2.8s, Faith=1/5**, Rel=1/5** 
Summarization failed: Error code: 400 - {'error': {'message': 'Please reduce the length of the messages or completion.', 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
bm25_summarized(k=5): 53.3s, Faith=1, Rel=1 
Summarization failed: Error code: 400 - {'error': {'message': 'Please reduce the length of the mess

Traceback (most recent call last):
  File "<ipython-input-52-c1449bc271c0>", line 46, in <cell line: 0>
    results_df["query"] = [q["query"] for q in query_data]
                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<ipython-input-52-c1449bc271c0>", line 46, in <listcomp>
    results_df["query"] = [q["query"] for q in query_data]
                           ~^^^^^^^^^
TypeError: string indices must be integers, not 'str'


In [None]:
all_results

[]

In [None]:
import traceback
import pandas as pd

# Define all your query sets here
query_sets = {
    "FirstAid1": FIRSTAID_QA1,
    "FirstAid2": FIRSTAID_QA2,
    "FirstAid3": FIRSTAID_QA3,
    "FirstAid4": FIRSTAID_QA4,
    "FirstAid5": FIRSTAID_QA5
}

all_results = []

print(f"\n🧪 gen_models contains: {gen_models}\n")

# Loop through all query sets
for query_name, query_data in query_sets.items():
    print(f"\n================ Evaluating Query Set: {query_name} ================\n")

    # Loop through all generation models
    for gen_model in gen_models:
        print(f"\n===== Testing Generation Model: {gen_model} =====\n")

        try:
            evaluator = RAGEvaluator(
                embedding_model=embedding_model,
                all_chunks=all_chunks,
                index=faiss_index,
                groq_client=groq_client,
                index_model="Bio-Small-CSE-BERT-base",
                gen_model=gen_model,
                eval_model=eval_model
            )

            evaluator.run_evaluations(
                queries=query_data,
                k_values=k_values
            )

            results_df = evaluator.save_results()

            if results_df is not None and not results_df.empty:
                print(f"✅ Results found for {gen_model} on {query_name}")
                results_df["query_set"] = query_name
                results_df["query"] = list(query_data.keys())
                results_df["golden_answer"] = list(query_data.values())
                all_results.append(results_df)
            else:
                print(f"⚠️ No results returned for model {gen_model} on {query_name}")

        except Exception as e:
            print(f"❌ Error with model {gen_model} on query set {query_name}:\n{e}")
            traceback.print_exc()
            # Save the exception info too as part of the results
            error_info = {
                "query_set": query_name,
                "gen_model": gen_model,
                "error": str(e),
                "traceback": traceback.format_exc()
            }
            all_results.append(pd.DataFrame([error_info]))  # Append error as a DataFrame

print(f"\n📦 Total results collected (including errors): {len(all_results)}\n")

# Save to a CSV
try:
    if all_results:
        final_df = pd.concat(all_results, ignore_index=True)
        final_df.to_csv("rag_comparison_all_queries.csv", index=False)
        print("✅ All results (including errors) saved to rag_comparison_all_queries.csv")
    else:
        print("⚠️ No results were saved.")
except Exception as e:
    print(f"❌ Failed to save CSV: {e}")
    traceback.print_exc()



🧪 gen_models contains: ['llama3-8b-8192']




===== Testing Generation Model: llama3-8b-8192 =====

semantic(k=5): 2.7s, Faith=4/5**, Rel=5/5** 
bm25(k=5): 2.8s, Faith=3/5**, Rel=4/5** 
mmr(k=5): 11.9s, Faith=4/5**, Rel=The 
hybrid_semantic_bm25(k=5): 44.9s, Faith=The, Rel=The 
semantic_summarized(k=5): 3.8s, Faith=The, Rel=The 
bm25_summarized(k=5): 2.9s, Faith=3/5**, Rel=4/5** 
mmr_summarized(k=5): 17.7s, Faith=2/5**, Rel=3/5** 
Summarization failed: Error code: 400 - {'error': {'message': 'Please reduce the length of the messages or completion.', 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
hybrid_semantic_bm25_summarized(k=5): 49.7s, Faith=The, Rel=The 
Summarization failed: Error code: 429 - {'error': {'message': 'Rate limit reached for model `allam-2-7b` in organization `org_01jhcb10swevyrz5qyt1a8703k` service tier `on_demand` on tokens per minute (TPM): Limit 6000, Used 4975, Requested 3815. Please try again in 27.9s. Need more token

Traceback (most recent call last):
  File "<ipython-input-34-6ef00326d6cd>", line 46, in <cell line: 0>
    results_df["query"] = list(query_data.keys())
    ~~~~~~~~~~^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/pandas/core/frame.py", line 4311, in __setitem__
    self._set_item(key, value)
  File "/usr/local/lib/python3.11/dist-packages/pandas/core/frame.py", line 4524, in _set_item
    value, refs = self._sanitize_column(value)
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/pandas/core/frame.py", line 5266, in _sanitize_column
    com.require_length_match(value, self.index)
  File "/usr/local/lib/python3.11/dist-packages/pandas/core/common.py", line 573, in require_length_match
    raise ValueError(
ValueError: Length of values (1) does not match length of index (18)


semantic(k=5): 3.4s, Faith=The, Rel=The 
bm25(k=5): 10.6s, Faith=4/5**, Rel=4/5** 
mmr(k=5): 5.5s, Faith=4/5**, Rel=5/5** 
hybrid_semantic_bm25(k=5): 54.4s, Faith=4/5**, Rel=5/5** 
semantic_summarized(k=5): 3.0s, Faith=The, Rel=The 
bm25_summarized(k=5): 18.0s, Faith=4/5**, Rel=5/5** 
mmr_summarized(k=5): 35.4s, Faith=4/5**, Rel=5/5** 
Summarization failed: Error code: 400 - {'error': {'message': 'Please reduce the length of the messages or completion.', 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
hybrid_semantic_bm25_summarized(k=5): 35.2s, Faith=4/5**, Rel=5/5** 
Summarization failed: Error code: 400 - {'error': {'message': 'Please reduce the length of the messages or completion.', 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
hybrid_mmr_semantic_summarized(k=5): 42.5s, Faith=4/5**, Rel=4/5** 
semantic(k=7): 4.1s, Faith=4/5**, Rel=5/5** 
bm25(k=7): 37.9s, Faith=4/5**, Rel=5/5** 
mmr(k=7): 36.7s,

Traceback (most recent call last):
  File "<ipython-input-34-6ef00326d6cd>", line 46, in <cell line: 0>
    results_df["query"] = list(query_data.keys())
    ~~~~~~~~~~^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/pandas/core/frame.py", line 4311, in __setitem__
    self._set_item(key, value)
  File "/usr/local/lib/python3.11/dist-packages/pandas/core/frame.py", line 4524, in _set_item
    value, refs = self._sanitize_column(value)
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/pandas/core/frame.py", line 5266, in _sanitize_column
    com.require_length_match(value, self.index)
  File "/usr/local/lib/python3.11/dist-packages/pandas/core/common.py", line 573, in require_length_match
    raise ValueError(
ValueError: Length of values (1) does not match length of index (18)


semantic(k=5): 4.3s, Faith=Does, Rel=Does 
bm25(k=5): 11.1s, Faith=5/5**, Rel=5/5** 
mmr(k=5): 15.4s, Faith=4/5**, Rel=5/5** 
hybrid_semantic_bm25(k=5): 49.7s, Faith=4/5**, Rel=5/5** 
Summarization failed: Error code: 400 - {'error': {'message': 'Please reduce the length of the messages or completion.', 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
semantic_summarized(k=5): 2.2s, Faith=1, Rel=1 
bm25_summarized(k=5): 21.7s, Faith=4/5**, Rel=5/5** 
Summarization failed: Error code: 400 - {'error': {'message': 'Please reduce the length of the messages or completion.', 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
mmr_summarized(k=5): 25.7s, Faith=**, Rel=** 
Summarization failed: Error code: 400 - {'error': {'message': 'Please reduce the length of the messages or completion.', 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
hybrid_semantic_bm25_summarized(k=5)

Traceback (most recent call last):
  File "<ipython-input-34-6ef00326d6cd>", line 46, in <cell line: 0>
    results_df["query"] = list(query_data.keys())
    ~~~~~~~~~~^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/pandas/core/frame.py", line 4311, in __setitem__
    self._set_item(key, value)
  File "/usr/local/lib/python3.11/dist-packages/pandas/core/frame.py", line 4524, in _set_item
    value, refs = self._sanitize_column(value)
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/pandas/core/frame.py", line 5266, in _sanitize_column
    com.require_length_match(value, self.index)
  File "/usr/local/lib/python3.11/dist-packages/pandas/core/common.py", line 573, in require_length_match
    raise ValueError(
ValueError: Length of values (1) does not match length of index (18)


semantic(k=5): 1.5s, Faith=1, Rel=1 [ERRORS: Generation error: Error code: 413 - {'error': {'message': 'Request too large for model `llama3-8b-8192` in organization `org_01jhcb10swevyrz5qyt1a8703k` service tier `on_demand` on tokens per minute (TPM): Limit 6000, Requested 6791, please reduce your message size and try again. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}...]
bm25(k=5): 2.4s, Faith=The, Rel=It 
mmr(k=5): 4.0s, Faith=1/5**, Rel=1/5** [ERRORS: Generation error: Error code: 413 - {'error': {'message': 'Request too large for model `llama3-8b-8192` in organization `org_01jhcb10swevyrz5qyt1a8703k` service tier `on_demand` on tokens per minute (TPM): Limit 6000, Requested 6564, please reduce your message size and try again. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}...]
hybrid_semantic_bm2

Traceback (most recent call last):
  File "<ipython-input-34-6ef00326d6cd>", line 46, in <cell line: 0>
    results_df["query"] = list(query_data.keys())
    ~~~~~~~~~~^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/pandas/core/frame.py", line 4311, in __setitem__
    self._set_item(key, value)
  File "/usr/local/lib/python3.11/dist-packages/pandas/core/frame.py", line 4524, in _set_item
    value, refs = self._sanitize_column(value)
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/pandas/core/frame.py", line 5266, in _sanitize_column
    com.require_length_match(value, self.index)
  File "/usr/local/lib/python3.11/dist-packages/pandas/core/common.py", line 573, in require_length_match
    raise ValueError(
ValueError: Length of values (1) does not match length of index (18)


semantic(k=5): 1.1s, Faith=1, Rel=1 [ERRORS: Generation error: Error code: 413 - {'error': {'message': 'Request too large for model `llama3-8b-8192` in organization `org_01jhcb10swevyrz5qyt1a8703k` service tier `on_demand` on tokens per minute (TPM): Limit 6000, Requested 6629, please reduce your message size and try again. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}...]
bm25(k=5): 2.7s, Faith=3/5**, Rel=4/5** 
mmr(k=5): 5.4s, Faith=1, Rel=1 [ERRORS: Generation error: Error code: 413 - {'error': {'message': 'Request too large for model `llama3-8b-8192` in organization `org_01jhcb10swevyrz5qyt1a8703k` service tier `on_demand` on tokens per minute (TPM): Limit 6000, Requested 6515, please reduce your message size and try again. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}...]
hybrid_semantic_bm25(k

Traceback (most recent call last):
  File "<ipython-input-34-6ef00326d6cd>", line 46, in <cell line: 0>
    results_df["query"] = list(query_data.keys())
    ~~~~~~~~~~^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/pandas/core/frame.py", line 4311, in __setitem__
    self._set_item(key, value)
  File "/usr/local/lib/python3.11/dist-packages/pandas/core/frame.py", line 4524, in _set_item
    value, refs = self._sanitize_column(value)
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/pandas/core/frame.py", line 5266, in _sanitize_column
    com.require_length_match(value, self.index)
  File "/usr/local/lib/python3.11/dist-packages/pandas/core/common.py", line 573, in require_length_match
    raise ValueError(
ValueError: Length of values (1) does not match length of index (18)
