In [None]:
!pip install pymupdf
!pip install pandas
!pip install pdfplumber
!pip install langchain
!pip install faiss-cpu
!pip install sentence_transformers
!pip install pypdf
!pip install -U langchain-community
!pip install -U langchain-huggingface
!pip install transformers torch
!pip install groq
!pip install rank_bm25
!pip install transformers accelerate bitsandbytes
!pip install tiktoken

In [None]:
import os
import glob
import fitz  # PyMuPDF
import pdfplumber
import numpy as np
import pandas as pd
import faiss
from typing import Dict, List, Tuple
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from groq import Groq
from rank_bm25 import BM25Okapi
import nltk
from nltk.tokenize import word_tokenize
import time
from google.colab import userdata
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GenerationConfig
import torch
import traceback

# Download NLTK data
nltk.download('punkt')
nltk.download('punkt_tab')

In [None]:
# Text Extraction
def get_pdf_paths(folder_path):
    """
    Returns a list of all PDF file paths in the given folder.
    """
    return glob.glob(os.path.join(folder_path, "*.pdf"))

def extract_data(pdf_paths):
    all_data = []  # List to store extracted data for all PDFs

    for pdf_path in pdf_paths:
        pdf_data = {"filename": os.path.basename(pdf_path), "text": "", "tables": []}

        # ----------- Extract Full Text with PyMuPDF ------------
        try:
            doc = fitz.open(pdf_path)
            for page_num in range(len(doc)):
                page = doc.load_page(page_num)
                pdf_data["text"] += page.get_text()
        except Exception as e:
            print(f"Failed to read text from {pdf_path}: {e}")

        # ----------- Extract Tables with pdfplumber ------------
        try:
            with pdfplumber.open(pdf_path) as pdf:
                for page in pdf.pages:
                    tables = page.extract_tables()
                    pdf_data["tables"].extend(tables)
        except Exception as e:
            print(f"Failed to read tables from {pdf_path}: {e}")

        all_data.append(pdf_data)  # Add data for this PDF to the main list

    return all_data

In [None]:
import nltk
from langchain.text_splitter import TokenTextSplitter, RecursiveCharacterTextSplitter

# Ensure you have the necessary NLTK data
nltk.download('punkt')

# Function for token-based chunking
def chunk_token_based(extracted_data, chunk_size=256, chunk_overlap=32):
    """
    Token-based chunking using TokenTextSplitter.
    """
    text_splitter = TokenTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    all_chunks = []
    for data in extracted_data:
        text_chunks = text_splitter.split_text(data['text'])
        table_data = str(data.get('tables', []))
        all_chunks.extend(text_chunks)
        all_chunks.append(table_data)
    return all_chunks

# Function for paragraph-based chunking
def chunk_paragraph_based(extracted_data, chunk_size=3, chunk_overlap=1):
    """
    Paragraph-based chunking where chunks are created based on paragraphs.
    """
    def paragraph_chunker(text):
        paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
        chunks = []
        for i in range(0, len(paragraphs), chunk_size - chunk_overlap):
            chunk = "\n\n".join(paragraphs[i:i + chunk_size])
            chunks.append(chunk)
        return chunks

    all_chunks = []
    for data in extracted_data:
        text = data.get("text", "")
        table_data = str(data.get("tables", []))
        text_chunks = paragraph_chunker(text)
        all_chunks.extend(text_chunks)
        all_chunks.append(table_data)
    return all_chunks

# Function for recursive character-based chunking
def chunk_recursive_based(extracted_data, chunk_size=500, chunk_overlap=100):
    """
    Recursive character-based chunking using RecursiveCharacterTextSplitter.
    """
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    all_chunks = []
    for data in extracted_data:
        text_chunks = text_splitter.split_text(data['text'])
        table_data = str(data.get('tables', []))
        all_chunks.extend(text_chunks)
        all_chunks.append(table_data)
    return all_chunks

# Function for sentence-based chunking
def chunk_sentence_based(extracted_data, chunk_size=5):
    """
    Sentence-based chunking, where each chunk contains chunk_size sentences.
    """
    def sentence_chunker(text):
        sentences = nltk.sent_tokenize(text)
        chunks = []
        for i in range(0, len(sentences), chunk_size):
            chunk = " ".join(sentences[i:i + chunk_size])
            chunks.append(chunk)
        return chunks

    all_chunks = []
    for data in extracted_data:
        text = data.get("text", "")
        table_data = str(data.get("tables", []))
        text_chunks = sentence_chunker(text)
        all_chunks.extend(text_chunks)
        all_chunks.append(table_data)
    return all_chunks

# Main function to choose the chunking method based on user input
def chunk_texts(extracted_data, chunk_type='recursive', chunk_size=500, chunk_overlap=100):
    """
    Main function that selects the chunking method based on user input.
    """
    if chunk_type == 'token':
        return chunk_token_based(extracted_data, chunk_size, chunk_overlap)
    elif chunk_type == 'paragraph':
        return chunk_paragraph_based(extracted_data, chunk_size, chunk_overlap)
    elif chunk_type == 'recursive':
        return chunk_recursive_based(extracted_data, chunk_size, chunk_overlap)
    elif chunk_type == 'sentence':
        return chunk_sentence_based(extracted_data, chunk_size)
    else:
        raise ValueError("Invalid chunk_type. Choose from 'token', 'paragraph', 'recursive', or 'sentence'.")

In [None]:
# Initialize the embedding model
def init_embedding_model():
    return HuggingFaceEmbeddings(
        model_name="pritamdeka/S-PubMedBERT-MS-MARCO",
        encode_kwargs={"normalize_embeddings": True}
    )

# Embed all chunks
def embed_all_chunks(embedding_model, chunks):
    return embedding_model.embed_documents(chunks)

# Build FAISS index
def build_faiss_index(embeddings):
    dimension = len(embeddings[0])
    index = faiss.IndexFlatL2(dimension)
    index.add(np.array(embeddings).astype("float32"))
    return index

In [None]:
# Query functions
def semantic_search(query, index, embedding_model, all_chunks, top_k=3):
    """Pure semantic search using FAISS"""
    query_embedding = np.array([embedding_model.embed_query(query)]).astype('float32')
    _, indices = index.search(query_embedding, top_k)
    return [all_chunks[i] for i in indices[0]]

def bm25_search(query, tokenized_corpus, bm25, all_chunks, top_k=3):
    """Pure BM25 search using pre-initialized index"""
    tokenized_query = word_tokenize(query.lower())
    scores = bm25.get_scores(tokenized_query)
    indices = np.argsort(scores)[-top_k:][::-1]
    return [all_chunks[i] for i in indices]

def mmr_search(query, embedding_model, index, all_chunks, top_k=3, diversity=0.7):
    """Maximal Marginal Relevance search"""
    query_embedding = np.array([embedding_model.embed_query(query)]).astype('float32')
    _, indices = index.search(query_embedding, top_k*2)  # Get extra candidates

    # Implement MMR diversification
    selected = []
    candidates = [all_chunks[i] for i in indices[0]]
    candidate_embeddings = [embedding_model.embed_query(doc) for doc in candidates]
    query_embedding = query_embedding[0]  # Unwrap from array

    while len(selected) < top_k and candidates:
        scores = []
        for i, (doc, doc_embedding) in enumerate(zip(candidates, candidate_embeddings)):
            # Calculate similarity to query
            sim_score = np.dot(query_embedding, doc_embedding)
            if selected:
                # Calculate max redundancy with already selected docs
                selected_embeddings = [embedding_model.embed_query(s) for s in selected]
                max_redun = max(np.dot(doc_embedding, sel_emb) for sel_emb in selected_embeddings)
                scores.append(diversity * sim_score - (1 - diversity) * max_redun)
            else:
                scores.append(sim_score)

        best_idx = np.argmax(scores)
        selected.append(candidates.pop(best_idx))
        candidate_embeddings.pop(best_idx)

    return selected

# Hybrid methods
def hybrid_semantic_bm25(query, index, embedding_model, bm25, tokenized_corpus, all_chunks, top_k=3):
    """Combine semantic and BM25 results"""
    semantic = semantic_search(query, index, embedding_model, all_chunks, top_k)
    bm25_results = bm25_search(query, tokenized_corpus, bm25, all_chunks, top_k)
    return list(dict.fromkeys(semantic + bm25_results))[:top_k*2]

def hybrid_mmr_semantic(query, index, embedding_model, all_chunks, top_k=3):
    """Combine MMR and semantic results"""
    mmr = mmr_search(query, embedding_model, index, all_chunks, top_k)
    semantic = semantic_search(query, index, embedding_model, all_chunks, top_k)
    return list(dict.fromkeys(mmr + semantic))[:top_k*2]


In [None]:
# ================== GROQ SUMMARIZATION ==================
SUMMARIZATION_PROMPT = """
Please summarize the following medical text while preserving all critical information.
Keep medical terminology accurate and maintain all important steps or recommendations.
Focus on preserving:
- Step-by-step procedures
- Dosage information
- Warning signs
- Key recommendations

Text to summarize:
{text}

Concise summary (250-300 words):
"""

def summarize_text(text, client, model):
    """Summarize text using Groq API"""
    try:
        response = client.chat.completions.create(
            messages=[{
                "role": "user",
                "content": SUMMARIZATION_PROMPT.format(text=text)
            }],
            model=model,
            temperature=0.3,
            max_tokens=400
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"Summarization failed: {str(e)}")
        return text[:500]  # Fallback to truncation if summarization fails

def summarize_context(context, client, max_length=300):
    """Summarize retrieved context using Groq"""
    model='allam-2-7b'
    combined = "\n".join(context)

    # First try to summarize with Groq
    summary = summarize_text(combined, client, model)

    # Fallback to simple truncation if summary is too long
    if len(summary) > max_length * 1.5:  # Allow some overflow
        summary = ". ".join([s.strip() for s in combined.split(".")[:5]]) + "."
        summary = summary[:max_length]

    return [summary]

In [None]:
PROMPT_TEMPLATE = """
Use the following pieces of information enclosed in <context> tags to provide an answer to the question enclosed in <question> tags.
<context>
{context}
</context>
<question>
{question}
</question>
"""

JUDGE_PROMPT_TEMPLATE = """
You are a judge evaluating a question-answering system.

<question>
{question}
</question>

<answer_generated>
{answer}
</answer_generated>

<golden_reference>
{golden}
</golden_reference>

Evaluate the generated answer on a scale of 1 to 5 (5 being highest) for the following:

Faithfulness: Does the generated answer stay factually consistent with the golden reference?
Relevance: Does the generated answer actually answer the question?
"""

In [None]:
# ================== UPDATED CORE FUNCTIONS ==================
def ask_groq(query, context, client, model):
    """Model-aware generation"""
    prompt = PROMPT_TEMPLATE.format(
        context="\n".join(context),
        question=query
    )
    response = client.chat.completions.create(
        messages=[{"role": "user", "content": prompt}],
        model=model,
    )
    return response.choices[0].message.content

def evaluate_with_groq_judge(question, answer, golden, client, model):
    """Model-aware evaluation"""
    prompt = JUDGE_PROMPT_TEMPLATE.format(
        question=question,
        answer=answer,
        golden=golden
    )
    response = client.chat.completions.create(
        messages=[{"role": "user", "content": prompt}],
        model=model,
    )
    return response.choices[0].message.content

In [None]:
class LocalModelWrapper:
    def __init__(self, model_name, device="cuda" if torch.cuda.is_available() else "cpu"):
        self.model_name = model_name
        self.device = device
        self.tokenizer = None
        self.model = None
        self.load_model()

    def load_model(self):
        """Load the model and tokenizer with 4-bit quantization"""
        print(f"Loading {self.model_name}...")

        # Initialize tokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(
            self.model_name,
            padding_side="left",  # Important for generation
            trust_remote_code=True
        )

        # Set pad token if not defined
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token

        # Configure quantization for memory efficiency
        quant_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.float16,
            bnb_4bit_use_double_quant=True  # Additional memory savings
        )

        # Load model with quantization
        self.model = AutoModelForCausalLM.from_pretrained(
            self.model_name,
            quantization_config=quant_config,
            device_map="auto",
            trust_remote_code=True,
            torch_dtype=torch.float16
        )
        print(f"{self.model_name} loaded successfully!")

    def generate(self, prompt, max_length=500, temperature=0.7, max_input_length=1024):
        """Generate text from prompt with model-specific formatting"""
        # Apply model-specific prompt formatting
        if "falcon" in self.model_name.lower():
            prompt = f"User: {prompt}\nAssistant:"
        elif "qwen" in self.model_name.lower():
            prompt = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
        else:  # Default format
            prompt = f"### Instruction:\n{prompt}\n\n### Response:\n"

        # Tokenize with truncation
        inputs = self.tokenizer(
            prompt,
            return_tensors="pt",
            truncation=True,
            max_length=max_input_length
        ).to(self.device)

        # Configure generation
        generation_config = GenerationConfig(
            max_new_tokens=max_length,
            temperature=temperature,
            do_sample=True,
            top_p=0.9,
            pad_token_id=self.tokenizer.eos_token_id,
            eos_token_id=self.tokenizer.eos_token_id
        )

        # Generate text
        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                generation_config=generation_config
            )

        # Decode and clean output
        full_output = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Remove input prompt from output
        generated_text = full_output[len(prompt):].strip()
        return generated_text

In [None]:
class RAGEvaluator:
    def __init__(self,
                 embedding_model,
                 all_chunks,
                 index,
                 groq_client=None,
                 index_model="S-PubMedBERT-MS-MARCO",
                 gen_model="llama3-8b-8192",
                 eval_model="deepseek-r1-distill-llama-70b",
                 summary_model="allam-2-7b"):

        self.embedding_model = embedding_model
        self.all_chunks = all_chunks
        self.index = index
        self.client = groq_client
        self.timing_data = []
        self.cache = {}
        self.models = {
            "index": index_model,
            "gen": gen_model,
            "eval": eval_model,
            "summary": summary_model
        }

        # Initialize local models if specified
        self.local_model = None
        if any(m in gen_model for m in ["tiiuae/Falcon3-3B-Base", "Qwen/Qwen2.5-3B"]):
            self.local_model = LocalModelWrapper(gen_model)

        # Initialize retrieval components
        self._init_retrieval()

    def __del__(self):
        """Clean up models when evaluator is destroyed"""
        if self.local_model:
            del self.local_model
            torch.cuda.empty_cache()

    def _init_retrieval(self):
        """Initialize retrieval systems once"""
        try:
            # Tokenize corpus for BM25
            self.tokenized_corpus = [word_tokenize(chunk.lower()) for chunk in self.all_chunks]
            self.bm25 = BM25Okapi(self.tokenized_corpus)
        except Exception as e:
            print(f"Error initializing retrieval components: {str(e)}")
            self.tokenized_corpus = []
            self.bm25 = None

    def get_filename(self):
        """Generate filename with timestamp and model info"""
        timestamp = time.strftime("%Y%m%d-%H%M%S")
        return (f"rag_results_{timestamp}_"
                f"index_{self.models['index']}_"
                # f"gen_{self.models['gen'].replace('/','-')}_"
                f"eval_{self.models['eval']}.csv")

    def ask_model(self, query, context):
        """Unified method to query either Groq or local models"""
        prompt = PROMPT_TEMPLATE.format(
            context="\n".join(context),
            question=query
        )

        # Use local model if available
        if self.local_model:
            try:
                start_gen = time.time()
                answer = self.local_model.generate(prompt)
                gen_time = time.time() - start_gen
                return answer, gen_time
            except torch.cuda.OutOfMemoryError:
                return "ERROR: GPU Out of Memory", 0
            except RuntimeError as e:
                if "out of memory" in str(e).lower():
                    return "ERROR: GPU Out of Memory", 0
                return f"ERROR: {str(e)}", 0

        # Use Groq if no local model
        elif self.client:
            try:
                start_gen = time.time()
                response = self.client.chat.completions.create(
                    messages=[{"role": "user", "content": prompt}],
                    model=self.models['gen'],
                )
                gen_time = time.time() - start_gen
                return response.choices[0].message.content, gen_time
            except Exception as e:
                return f"ERROR: {str(e)}", 0

        return "ERROR: No generation backend available", 0

    def timed_retrieve(self, query, strategy_name, k):
        """Cached retrieval with timing"""
        cache_key = f"{strategy_name}_{query}_{k}"

        if cache_key in self.cache:
            return self.cache[cache_key], 0.0, None

        start = time.time()
        result = []
        error_message = None

        try:
            if strategy_name == "semantic":
                result = semantic_search(query, self.index, self.embedding_model, self.all_chunks, k)
            elif strategy_name == "bm25":
                result = bm25_search(query, self.tokenized_corpus, self.bm25, self.all_chunks, k)
            elif strategy_name == "mmr":
                result = mmr_search(query, self.embedding_model, self.index, self.all_chunks, k)
            elif strategy_name == "hybrid_semantic_bm25":
                result = hybrid_semantic_bm25(query, self.index, self.embedding_model, self.bm25,
                                            self.tokenized_corpus, self.all_chunks, k)
            elif strategy_name == "hybrid_mmr_semantic":
                result = hybrid_mmr_semantic(query, self.index, self.embedding_model, self.all_chunks, k)
            elif strategy_name.endswith("_summarized"):
                base_strategy = strategy_name.replace("_summarized", "")
                base_results, _, _ = self.timed_retrieve(query, base_strategy, k)
                result = summarize_context(base_results, self.client)
            else:
                error_message = f"Unknown strategy: {strategy_name}"
        except Exception as e:
            error_message = f"Retrieval error ({strategy_name}): {str(e)}"

        retrieval_time = time.time() - start
        self.cache[cache_key] = result
        return result, retrieval_time, error_message

    def evaluate_strategy(self, query, golden, strategy_name, k):
        """Complete evaluation pipeline for one strategy"""
        result_data = {
            "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
            "query": query,
            "golden_answer": golden,
            "strategy": strategy_name,
            "k": k,
            "index_model": self.models['index'],
            "gen_model": self.models['gen'],
            "eval_model": self.models['eval'],
            "summary_model": self.models['summary'],
            "context": "",
            "context_length": 0,
            "retrieval_time": 0,
            "generated_answer": "",
            "gen_time": 0,
            "evaluation_response": "",
            "eval_time": 0,
            "faithfulness": "N/A",
            "relevance": "N/A",
            "total_time": 0,
            "errors": ""
        }

        errors = []

        # 1. Retrieval
        try:
            context, ret_time, error = self.timed_retrieve(query, strategy_name, k)
            result_data["retrieval_time"] = ret_time
            result_data["context_length"] = sum(len(c) for c in context) if context else 0
            result_data["context"] = " ||| ".join(context) if context else ""

            if error:
                errors.append(error)
                context = []  # Ensure empty context if retrieval failed
        except Exception as e:
            errors.append(f"Retrieval exception: {str(e)}")
            context = []

        # 2. Generation
        try:
            answer, gen_time = self.ask_model(query, context)
            result_data["generated_answer"] = answer
            result_data["gen_time"] = gen_time
            if "ERROR" in answer:
                errors.append(answer)
        except Exception as e:
            errors.append(f"Generation error: {str(e)}")
            result_data["generated_answer"] = f"ERROR: {str(e)}"
            result_data["gen_time"] = 0

        # 3. Evaluation (skip if generation failed)
        if not any("ERROR" in result_data["generated_answer"] for e in errors):
            try:
                start_eval = time.time()
                eval_text = evaluate_with_groq_judge(
                    query,
                    result_data["generated_answer"],
                    golden,
                    self.client,
                    model=self.models['eval']
                )
                eval_time = time.time() - start_eval
                result_data["evaluation_response"] = eval_text
                result_data["eval_time"] = eval_time

                # Parse scores
                try:
                    result_data["faithfulness"] = eval_text.split("Faithfulness:")[1].split()[0].strip()
                    result_data["relevance"] = eval_text.split("Relevance:")[1].split()[0].strip()
                except:
                    errors.append("Failed to parse evaluation scores")
            except Exception as e:
                errors.append(f"Evaluation error: {str(e)}")

        # Calculate total time
        result_data["total_time"] = (
            result_data["retrieval_time"] +
            result_data["gen_time"] +
            result_data.get("eval_time", 0)
        )

        # Log errors
        if errors:
            result_data["errors"] = " | ".join(errors)

        return result_data

    def run_evaluations(self, queries, k_values, strategies=None):
        """Evaluate all strategies across k values for multiple queries"""
        if strategies is None:
            strategies = [
                "semantic",
                "bm25",
                "mmr",
                "hybrid_semantic_bm25",
                "hybrid_mmr_semantic",
                "semantic_summarized",
                "bm25_summarized",
                "mmr_summarized",
                "hybrid_semantic_bm25_summarized",
                "hybrid_mmr_semantic_summarized"
            ]

        for query, golden in queries.items():
            print(f"\nEvaluating query: {query[:60]}...")
            for k in k_values:
                for strategy in strategies:
                    try:
                        result = self.evaluate_strategy(query, golden, strategy, k)
                        self.timing_data.append(result)

                        # Print status
                        status = (
                            f"{strategy}(k={k}): {result['total_time']:.1f}s, "
                            f"Faith={result['faithfulness']}, Rel={result['relevance']}"
                        )
                        if result['errors']:
                            status += f" [ERRORS: {result['errors'][:50]}...]"
                        print(status)

                    except Exception as e:
                        error_data = {
                            "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
                            "query": query,
                            "golden_answer": golden,
                            "strategy": strategy,
                            "k": k,
                            "errors": f"Critical error: {str(e)}",
                            "faithfulness": "ERROR",
                            "relevance": "ERROR"
                        }
                        self.timing_data.append(error_data)
                        print(f"{strategy}(k={k}): CRITICAL ERROR - {str(e)}")

    def save_results(self, filename=None):
        """Save comprehensive results with error handling"""
        if not self.timing_data:
            print("Warning: No results to save")
            return None

        try:
            df = pd.DataFrame(self.timing_data)

            # Ensure consistent column order
            columns = [
                'timestamp', 'query', 'golden_answer', 'strategy', 'k',
                'index_model', 'gen_model', 'eval_model', 'summary_model',
                'context_length', 'retrieval_time', 'gen_time', 'eval_time',
                'total_time', 'faithfulness', 'relevance', 'errors',
                'context', 'generated_answer', 'evaluation_response'
            ]
            df = df.reindex(columns=[c for c in columns if c in df.columns])

            filename = filename or self.get_filename()
            df.to_csv(filename, index=False)
            print(f"Saved {len(df)} results to {filename}")
            return df

        except Exception as e:
            print(f"Failed to save results: {str(e)}")
            try:
                # Emergency save
                backup_file = f"rag_results_backup_{int(time.time())}.csv"
                pd.DataFrame(self.timing_data).to_csv(backup_file, index=False)
                print(f"Saved backup to {backup_file}")
            except:
                print("Critical: Could not save backup!")
            return None

In [None]:
FIRSTAID_QA = {
    "What specific adjustments should be made when performing CPR on a child compared to an adult, in terms of hand placement, compression depth, head positioning, and the use of rescue breaths?":
    """
    While the overall CPR process is similar for both adults and children—consisting of chest compressions and rescue breaths—there are a few important distinctions that take the size and physical development of the person into account. For adults, CPR is performed with two hands placed one over the other on the center of the chest, compressing at least 2 inches deep at a rate of 100–120 compressions per minute. The head is tilted to a past-neutral position to open the airway for rescue breaths, and each breath should last about one second, making the chest rise visibly.

    For children, the approach is slightly gentler. The compression depth is about 2 inches rather than at least 2 inches, and in the case of a smaller child, only one hand may be needed for chest compressions instead of two. The airway is opened by tilting the head to a slightly past-neutral position, which is less extreme than the tilt for adults. However, the rate of compressions remains the same—100 to 120 per minute—and rescue breaths are also given after every 30 compressions, just like with adults.

    These adjustments help avoid injury while still ensuring that CPR is effective for a smaller and more delicate body.
    """,

    "What are the steps for providing first aid to a choking child or baby, and what actions should you take if the child or baby becomes unresponsive?":
    """
    If a child is choking and able to cough or speak, encourage them to continue coughing as this means their airway is not completely blocked. However, if the airway becomes fully obstructed, first aid must be provided immediately. Start by delivering back blows, placing your arm across the child’s chest, bending them forward, and giving up to five firm blows between the shoulder blades. If the object is not dislodged, move on to abdominal thrusts, where you place your fist just above the belly button and give five quick inward and upward thrusts. If the blockage persists, use chest thrusts, positioning your fist in the center of the chest and giving five thrusts by pulling straight back. If the child becomes unresponsive, ensure EMS is called and start CPR, beginning with chest compressions.
    """,

    "In what circumstances should each type of sling—arm sling, elevated sling, and collar-and-cuff—be used, and what precautions must be taken to ensure proper limb support and circulation in both standard and improvised first aid situations?":
    """
    Slings are used to support an injured arm or to assist in the treatment of injuries such as fractured ribs, but should only be applied if they do not cause the casualty pain. There are three main types of slings: the arm sling, elevated sling, and collar-and-cuff (clove hitch), each used based on the location of the injury. An arm sling is suitable for forearm injuries and should hold the forearm parallel to the ground, with the wrist slightly higher than the elbow. The elevated sling is used for shoulder injuries and supports the hand across the chest towards the uninjured side. The collar-and-cuff is ideal for upper arm injuries or as added support for fractured ribs, allowing the elbow to hang naturally while the wrist is secured with a clove hitch.

    While triangular bandages are ideal, in emergency situations, other materials like ties, belts, or even shirts can be improvised. After applying any sling, it’s crucial to check for proper circulation by feeling for a pulse at the wrist or using the fingernail color test. Slings must always be positioned comfortably, and the injured limb should never be forced into any sling. Additional support, if needed, can be provided by securing the arm against the chest using more triangular bandages.
    """,

    "What are the causes, symptoms, and appropriate first aid treatment for hypothermia caused by cold exposure?":
    """
    Hypothermia is a serious cold-related emergency that occurs when the body temperature drops below 35°C due to prolonged exposure to cold environments. It commonly affects elderly individuals, young children, and those with impaired mental or physical abilities, especially when they are unable to access warmth or seek help. Risk situations include being caught in bad weather, wearing wet clothing, or lacking proper heating. Early signs of hypothermia include pale, cold skin, uncontrollable shivering, fatigue, poor coordination, and confusion. As the condition worsens, symptoms may progress to slow breathing, blurred vision, muscle stiffness, and unconsciousness, with the person possibly appearing asleep or even dead. Immediate care involves calling emergency services, moving the person to a warmer place, removing wet clothing, and wrapping them in blankets or a space blanket. If conscious, warm sweet drinks can be given. However, the person must be warmed slowly—avoiding direct heat, hot baths, rubbing the skin, or giving alcohol—as these can worsen the condition. Prompt action is crucial to prevent serious complications or death.
    """,

    "What lifestyle habits can help manage high blood pressure and reduce related health risks?":
    """
    The treatment and prevention of high blood pressure largely depend on its severity and any other existing medical conditions. Doctors often recommend lifestyle modifications as a key part of managing and preventing high blood pressure. Maintaining a healthy body weight is crucial, as excess weight can put additional strain on the heart. A well-balanced diet rich in fresh fruits, vegetables, and low-fat dairy products is also essential, while high-fat and high-cholesterol foods should be avoided to reduce the risk of atherosclerosis. It is important to limit salt intake, as sodium causes fluid retention and increases the heart’s workload. Adequate consumption of nutrients like potassium, magnesium, and calcium is recommended, with fruits—particularly citrus—being good sources of potassium. Regular physical activity is another important preventive measure; aerobic exercises such as walking or swimming should be done for at least 30 to 45 minutes, five times a week. Additionally, quitting smoking is strongly advised, as smoking constricts blood vessels, raises blood pressure, and is one of the most serious risk factors for cardiovascular diseases. Together, these measures play a significant role in effectively managing high blood pressure and promoting heart health.
    """,

    "What are the steps to revive a person if they have had a heart attack ?":
    """
    To revive a heart attack victim, you must act quickly and follow these key steps:

    Lay the person flat on their back on a firm surface, ensuring their neck is supported and their face is facing upward.

    Check responsiveness by gently tapping their shoulders. If unresponsive, immediately call emergency services (e.g., 999) or ask someone nearby to do so.

    Open their airway by tilting the head back and lifting the chin. If there’s something blocking the mouth or throat, remove it carefully.

    Check for breathing by placing your ear near their mouth and watching their chest for movement—for no more than 10 seconds.

    If the person is not breathing, begin mouth-to-mouth resuscitation:

    Pinch the nose shut, give 2 deep breaths into their mouth (for adults/children) or 2 short puffs (for infants), watching the chest rise.

    Check for a pulse by feeling the side of the neck with two fingers for up to 10 seconds.

    If there is no pulse, begin chest compressions:

    Place one hand over the other in the center of the chest, keep elbows straight, and press down firmly about 4–5 cm deep.

    Perform 30 compressions followed by 2 breaths, continuing at a steady pace. Aim for 9 cycles in 2 minutes, checking for a pulse every few cycles.

    As soon as a pulse returns, stop compressions and check if the person is breathing. If not, continue rescue breaths until normal breathing resumes.
    """
}


In [None]:
# Get GROQ API key from user data (or set directly)
GROQ_API_KEY = userdata.get('GROQ_API_KEY')
groq_client = Groq(api_key=GROQ_API_KEY)

In [None]:
# Set up the data pipeline
folder_path = "/content/data"  # Update this to your folder
pdf_paths = get_pdf_paths(folder_path)
extracted_data = extract_data(pdf_paths)


In [None]:
all_chunks = chunk_texts(extracted_data, chunk_type='recursive', chunk_size=512, chunk_overlap=100)

In [None]:
# Initialize embedding model, embed all chunks, and build index
embedding_model = init_embedding_model()
embeddings = embed_all_chunks(embedding_model, all_chunks)
faiss_index = build_faiss_index(embeddings)

# Save index and chunks for future use if needed
faiss.write_index(faiss_index, "faiss_index.idx")

In [None]:
# List of k values to test
k_values = [3, 5, 7]

# List of generation models to test
gen_models = [
    "llama3-70b-8192",
    "tiiuae/Falcon3-3B-Base",
    "Qwen/Qwen2.5-3B"
]

# Evaluation model to use (consistent across runs)
eval_model = "deepseek-r1-distill-llama-70b"

# Initialize a DataFrame to store all results
all_results = []

In [None]:
# Loop through all generation models
for gen_model in gen_models:
    print(f"\n===== Testing Generation Model: {gen_model} =====\n")

    try:
        # Set up evaluator with the current generation model
        evaluator = RAGEvaluator(
            embedding_model=embedding_model,
            all_chunks=all_chunks,
            index=faiss_index,
            groq_client=groq_client,
            index_model="S-PubMedBERT-MS-MARCO",
            gen_model=gen_model,
            eval_model=eval_model
        )

        # Run evaluations with all k values
        evaluator.run_evaluations(
            queries=FIRSTAID_QA,
            k_values=k_values
        )

        # Save individual results for this model
        results_df = evaluator.save_results()

        # Only append if the DataFrame is not empty
        if results_df is not None and not results_df.empty:
            all_results.append(results_df)
        else:
            print(f"⚠️ No results returned for model {gen_model}, skipping.")

    except Exception as e:
        print(f"Error encountered while processing model {gen_model}:\n{e}")
        traceback.print_exc()
        continue  # Continue to the next model

# Combine all results into one DataFrame
if all_results:
    combined_results = pd.concat(all_results, ignore_index=True)
    combined_results.to_csv("combined_rag_comparison.csv", index=False)
    print("All results saved to combined_rag_comparison.csv")
else:
    print("No valid results to save. CSV not created.")
