In [1]:
import numpy as np
import os
import pandas as pd
import re
import time
import os
import PyPDF2
import re
from datetime import datetime
from tqdm import tqdm
from pathlib import Path
import os
import PyPDF2
import requests
import logging
from datetime import datetime
from openai import OpenAI
from sentence_transformers import SentenceTransformer
from langchain.text_splitter import RecursiveCharacterTextSplitter
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, PointStruct


  from .autonotebook import tqdm as notebook_tqdm





In [2]:
model = SentenceTransformer("all-MiniLM-L6-v2")

### **Processing Pdfs Text**

In [3]:
def extract_and_split_pdfs(pdf_folder_path, chunk_size=400, chunk_overlap=50):
    """
    Extract text from PDFs in a given folder, then split into chunks using
    RecursiveCharacterTextSplitter from langchain.

    Args:
        pdf_folder_path (str): Path to the folder containing PDF files.
        chunk_size (int): Maximum number of characters in each chunk.
        chunk_overlap (int): Number of overlapping characters between chunks.

    Returns:
        all_chunks (List[str]): List of all text chunks from all PDFs.
        filenames (List[str]): List of PDF filenames processed.
    """
    text_splitter = RecursiveCharacterTextSplitter(
        separators=["\n\n", "\n", " "],
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )

    all_chunks = []
    filenames = []

    pdf_files = [f for f in os.listdir(pdf_folder_path) if f.endswith(".pdf")]
    for filename in pdf_files:
        pdf_path = os.path.join(pdf_folder_path, filename)
        filenames.append(filename)

        # Extract text from the PDF
        text = ""
        with open(pdf_path, "rb") as file:
            pdf_reader = PyPDF2.PdfReader(file)
            for page_num in range(len(pdf_reader.pages)):
                page = pdf_reader.pages[page_num]
                page_text = page.extract_text() or ""
                if page_text:
                    text += page_text + "\n"

        # Split text into chunks
        chunks = text_splitter.split_text(text)
        all_chunks.extend(chunks)

    return all_chunks, filenames


In [4]:
pdf_folder_path = "pdfs"
chunks, filenames = extract_and_split_pdfs(pdf_folder_path)
print(f"Extracted {len(chunks)} text chunks total from {len(filenames)} PDF(s).")

Extracted 509 text chunks total from 5 PDF(s).


## **Creating and Storing Embeddings of Documents in Qdrant DB**

In [5]:
document_embeddings = model.encode(chunks).astype("float32")
print("Embeddings shape:", document_embeddings.shape)

Embeddings shape: (509, 384)


In [6]:
# Connect to Qdrant (local mode, storing data in 'qdrant_local.db')
client = QdrantClient(path="local_qdrant.db")

collection_name = "docus_chunks"
client.recreate_collection(
    collection_name=collection_name,
    vectors_config=VectorParams(
        size=document_embeddings.shape[1],  # Dimension from the embedding model
        distance="Cosine"                   # 'Cosine' is typical for sentence embeddings
    )
)

print(f"Collection '{collection_name}' has been created or recreated successfully!")


Collection 'docus_chunks' has been created or recreated successfully!


  client.recreate_collection(


In [7]:
points = []
for i, embedding_vector in enumerate(document_embeddings):
    points.append(
        PointStruct(
            id=i,  # unique ID for each chunk
            vector=embedding_vector.tolist(),
            payload={
                # Store the actual chunk text, or any other metadata you need
                "chunk_text": chunks[i]
            }
        )
    )

# Upsert (insert/update) the chunk embeddings into Qdrant
client.upsert(collection_name=collection_name, points=points)

print(f"Upserted {len(points)} chunk embeddings into Qdrant!")

Upserted 509 chunk embeddings into Qdrant!


## **Creating and Storing Embeddings of Q&As in Qdrant DB**

### **Uploading already Generated Q&As**

In [8]:
def extract_qna_from_folder(folder_path):
    all_questions = []
    all_answers = []

    # Iterate through all .txt files in the folder
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.txt'):  # Process only .txt files
            file_path = os.path.join(folder_path, file_name)
            with open(file_path, 'r', encoding='utf-8') as file:
                content = file.read()

            # Regular expression to extract Q&A pairs
            pattern = r'Q:\s*(.*?)\s*\nA:\s*(.*?)(?=\s*(?:Q:|$))'
            qna_pairs = re.findall(pattern, content, re.DOTALL)

            # Extract questions and answers, preserving their order
            all_questions.extend([q.strip() for q, _ in qna_pairs])
            all_answers.extend([a.strip() for _, a in qna_pairs])

    return all_questions, all_answers

In [9]:
# Processing
folder_path = "refined_q&as"
questions, answers = extract_qna_from_folder(folder_path)

# Verify results
print(f"Extracted {len(questions)} questions and {len(answers)} answers.")
for i in range(min(5, len(questions))):  # Show the first 5 pairs
    print(f"Q{i+1}: {questions[i]}")
    print(f"A{i+1}: {answers[i]}")

Extracted 5767 questions and 5767 answers.
Q1: Where did the Olympic torch arrive for the first time ever during the 2008 Summer Olympics?
A1: Kazakhstan.
Q2: Who was the first torchbearer in Almaty, Kazakhstan during the 2008 Summer Olympics?
A2: The President of Kazakhstan, Nursultan Nazarbaev.
Q3: How many kilometers did the route of the Olympic torch relay run in Almaty?
A3: 20 km.
Q4: Were there any reports of Uighur activists being arrested during the Olympic torch relay in Kazakhstan?
A4: Yes, some were arrested and deported back to China.
Q5: When did the Olympic torch arrive in Almaty for the first time ever during the 2008 Summer Olympics?
A5: On April 2.


**Creating Collection in same Local_Qdrant DB**

In [10]:
# Function to create a collection in Qdrant
def create_collection(client, collection_name="faq_embeddings"):
    client.recreate_collection(
        collection_name=collection_name,
        vectors_config=VectorParams(size=384, distance="Cosine")  # For all-MiniLM-L6-v2
    )

print(f"Collection '{collection_name}' has been created or recreated successfully!")

Collection 'docus_chunks' has been created or recreated successfully!


### **Generating Embeddings and Storing**

In [11]:
# Function to generate embeddings for a list of questions
def generate_embeddings(questions):
    model = SentenceTransformer('all-MiniLM-L6-v2')
    embeddings = model.encode(questions)
    return embeddings

In [12]:
# Function to store embeddings and metadata (answers) in Qdrant
def store_embeddings_in_qdrant(client, collection_name, questions, answers, embeddings):
    points = [
        PointStruct(
            id=i,
            vector=embeddings[i].tolist(),  # Convert numpy array to list
            payload={"answer": answers[i]}  # Store metadata (only answers)
        ) for i in range(len(questions))
    ]
    client.upsert(collection_name=collection_name, points=points)


In [13]:
# # Connect to Qdrant in local mode
# client = QdrantClient(path="local_qdrant.db")

# Create a new collection
collection_name = "faq_embeddings"
create_collection(client, collection_name)

# Generate embeddings for questions
embeddings = generate_embeddings(questions)

# Store embeddings and answers in Qdrant
store_embeddings_in_qdrant(client, collection_name, questions, answers, embeddings)

  client.recreate_collection(


# **Finding Semantic Similarity of Query with Pdf Text**

In [14]:
def query_similarity_docus_chunks(client, collection_name, query, top_k=3):
    """
    Search for the top_k most similar chunks in a Qdrant collection
    that stores 'chunk_text' as payload.
    
    Args:
        client: QdrantClient instance connected to the relevant DB file.
        collection_name (str): The name of the collection containing chunk vectors.
        query (str): The user query string.
        top_k (int): Number of top similar chunks to retrieve.

    Returns:
        chunks (List[str]): The retrieved chunk texts.
        scores (List[float]): The similarity scores.
    """
    # Use the same embedding model you used to store the chunks
    model = SentenceTransformer('all-MiniLM-L6-v2')
    query_embedding = model.encode([query])[0].tolist()

    search_results = client.search(
        collection_name=collection_name,
        query_vector=query_embedding,
        limit=top_k
    )

    # Assuming you stored your PDF text in payload["chunk_text"]
    chunks = [result.payload["chunk_text"] for result in search_results]
    scores = [result.score for result in search_results]

    return chunks , scores


In [15]:
doc_collection_name = "docus_chunks" 
query = "What is the Olympic Torch made from?"

chunks, chunk_scores = query_similarity_docus_chunks(client, doc_collection_name, query, top_k=3)

print("Top 3 Retrieved Chunks:\n")
for i, (chunk, score) in enumerate(zip(chunks, chunk_scores), 1):
    print(f"Chunk {i} (score={score:.4f}):\n{chunk}\n{'-'*50}")


Top 3 Retrieved Chunks:

Chunk 1 (score=0.6124):
Cloud". It is made from aluminum. It is 72 centimetres high and weighs 985 grams. The torch is
designed to remain lit in 65 kilometre per hour (37 mile per hour) winds, and in rain of up to 50
millimetres (2 inches) per hour. An ignition key is used to ignite and extinguish the flame. The torch
is fueled by cans of propane. Each can will light the torch for 15 minutes. It is designed by a team
--------------------------------------------------
Chunk 2 (score=0.5139):
torch of the first torchbearer, a silver medalist of the 2004 Summer Olympics in taekwondo
Alexandros Nikolaidis from Greece, who handed the flame over to the second torchbearer, Olympic
champion in women\'s breaststroke Luo Xuejuan from China. Following the recent unrest in Tibet,
three members of Reporters Without Borders, including Robert Ménard, breached security and
--------------------------------------------------
Chunk 3 (score=0.4932):
issue, and had its staff in it

# **Finding Semantic Similarity of Query with Q&As**

In [16]:
# Function to query Qdrant for top-k similar embeddings
def query_similar_embeddings(client, collection_name, query, top_k=5):
    model = SentenceTransformer('all-MiniLM-L6-v2')
    query_embedding = model.encode([query])[0].tolist()  # Generate query embedding

    # Perform the search to get the most similar vectors
    search_results = client.search(
        collection_name=collection_name,
        query_vector=query_embedding,
        limit=top_k  # Number of nearest neighbors to retrieve
    )

    # Retrieve answers from the search results
    answers = [result.payload['answer'] for result in search_results]
    scores = [result.score for result in search_results]

    return answers , scores

In [17]:
# Query similar questions to a given query
query = "What is the Olympic Torch made from?"

retrieved_answers , scores = query_similar_embeddings(client, collection_name, query)


# Display retrieved answers
print("Retrieved Answers")
for i, answer in enumerate(retrieved_answers, 1):
    print(f"{i}: {answer}")

Retrieved Answers
1: Traditional scrolls.
2: Aluminum.
3: No, it is used during various events and ceremonies.
4: It is 72 centimetres high.
5: Macau.


In [18]:
print("Retrieved Scores")
for i, answer in enumerate(scores, 1):
    print(f"{i}: {answer}")

Retrieved Scores
1: 0.8671069641571898
2: 0.7382860309141743
3: 0.7291142660523945
4: 0.7124895793365728
5: 0.7088016914154129


### **Generating response from Retrived_Answers using Ollama Llama3 ChatQA 8b**

In [19]:
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def generate_answer_using_ollama(
    retrieved_context: str,
    query: str,
    base_url: str = "http://localhost:11434/v1",
    api_key: str = "ollama",
    model: str = "llama3-chatqa:8b"
) -> str:
    prompt = (
        f"Query: {query}\n"
        f"Context (answers from the most relevant context/ sentences related to this query): {retrieved_context}\n\n"
        f"Answer the query using only the provided context. Provide a concise, direct answer. If no relevant information is available, please indicate so."
    )
    payload = {
        "model": model,
        "prompt": prompt,
        "max_tokens": 2000,
        "temperature": 0.3,
    }

    try:
        logger.info("Sending request to Ollama...")
        response = requests.post(
            f"{base_url}/completions",
            headers={"Content-Type": "application/json"},
            json=payload,
            timeout=60
        )
        logger.info("Received response from Ollama")
        response.raise_for_status()
        data = response.json()
        if data.get("choices"):
            return data["choices"][0]["text"].strip()
        return "No response generated."
    except Exception as e:
        logger.error(f"Error generating answer: {e}")
        return "An error occurred while generating the answer."

In [20]:
query = "What is the Olympic Torch made from?"
if __name__ == "__main__":

    # Generate an answer using the local Ollama server
    final_answer = generate_answer_using_ollama(retrieved_answers, query)
    print("Generated Answer:", final_answer)

2025-04-12 15:03:12,843 - INFO - Sending request to Ollama...
2025-04-12 15:04:16,327 - INFO - Received response from Ollama


Generated Answer: Aluminum


# **RAG Pipelines**

In [21]:
def traditional_rag_pipeline(client, trad_collection_name, query):
    """
    Executes the traditional RAG approach.
    Returns results, retrieval time, and generation time.
    """
    start_retrieval = time.time()
    retrieved_context , similarity_scores = query_similarity_docus_chunks(client, trad_collection_name, query, top_k=3)  # Retrieve relevant sentences
    end_retrieval = time.time()
    retrieval_time = end_retrieval - start_retrieval

    start_answer = time.time()
    generated_answer = generate_answer_using_ollama(retrieved_context, query)  # Generate answer
    end_answer = time.time()
    generation_time = end_answer - start_answer

    total_time = retrieval_time + generation_time

    result = {
        "retrieved_context": retrieved_context,
        "retrieval_time": retrieval_time,
        "answer": generated_answer,  # Make sure this key is correctly returned
        "generation_time": generation_time,
        "total_time": total_time,
        "similarity score": similarity_scores
    }

    # Debug: Print the result to inspect the return value
    print("Traditional RAG Result Generated")

    return result


In [22]:
def mod_rag_pipeline(query, client, mod_collection_name, top_k=3):
    """
    Executes the second RAG approach using query embeddings.
    Returns results, retrieval time, and other metadata.
    """
    start_retrieval = time.time()
    answers, scores = query_similar_embeddings(client, mod_collection_name, query, top_k)  # Now returns two lists
    end_retrieval = time.time()
    retrieval_time = end_retrieval - start_retrieval

    if answers:
        # Combine answers with their corresponding similarity scores for context
        retrieved_documents = [f"Answer: {answers[i]}\nScore: {scores[i]:.2f}" for i in range(len(answers))]

        # Generate answer based on retrieved answers
        start_answer = time.time()
        generated_answer = generate_answer_using_ollama(answers, query)
        end_answer = time.time()
        answer_time = end_answer - start_answer

        total_time = retrieval_time + answer_time

        return {
            "retrieved_documents": retrieved_documents,
            "similarity_scores": scores,
            "retrieval_time": retrieval_time,
            "generated_answer": generated_answer,
            "generation_time": answer_time,
            "total_time": total_time
        }
    else:
        return {
            "retrieved_documents": [],
            "similarity_scores": [],
            "retrieval_time": retrieval_time,
            "generated_answer": None,
            "generation_time": 0,
            "total_time": retrieval_time
        }

In [23]:
def rag_pipeline(query, client, trad_collection_name, mod_collection_name, similarity_threshold=0.8, top_k=3):
    """
    Main pipeline that first tries query embeddings (approach 2).
    Falls back to traditional RAG if similarity scores are below the threshold.
    """
    # Step 1: Try query embeddings (Approach 2)
    query_similar_results = mod_rag_pipeline(query, client, mod_collection_name, top_k)

    # Check similarity scores
    if query_similar_results["similarity_scores"]:
        max_similarity = max(query_similar_results["similarity_scores"])

        if max_similarity >= similarity_threshold:
            # Return results from the query embeddings approach
            return {
                "case": "Q-A Index",
                "answer": query_similar_results["generated_answer"],
                "retrieved_context": "\n".join(query_similar_results["retrieved_documents"]),
                "retrieval_time": query_similar_results["retrieval_time"],
                "generated_context": query_similar_results["generated_answer"],
                "generation_time": query_similar_results["generation_time"],
                "total_time": query_similar_results["total_time"],
            }

    # Step 2: Fallback to traditional RAG if similarity is low or no results
    traditional_results = traditional_rag_pipeline(client, trad_collection_name, query)

    return {
        "case": "Traditional RAG",
        "answer": traditional_results["answer"],
        "retrieved_context": "\n".join(traditional_results["retrieved_context"]),
        "retrieval_time": traditional_results["retrieval_time"],
        "generation_time": traditional_results["generation_time"],
        "total_time": traditional_results["total_time"],
    }


In [24]:
# Execute the RAG pipeline
results = rag_pipeline(query="Where was ceremony of Olympics 2008 held?", client=client, trad_collection_name="docus_chunks", mod_collection_name="faq_embeddings", similarity_threshold=0.8, top_k=3)

# Print Results
print(f"Case: {results['case']}\n")
print(f"Answer: {results['answer']}\n")
print(f"Retrieved Context: {results['retrieved_context']}\n")
print(f"Retrieval Time: {results['retrieval_time']:.2f}s\n")
print(f"Generation Time: {results['generation_time']:.2f}s\n")
print(f"Total Time: {results['total_time']:.2f}s")


2025-04-12 15:04:16,375 - INFO - Use pytorch device_name: cuda
2025-04-12 15:04:16,375 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
Batches: 100%|██████████| 1/1 [00:00<00:00, 60.22it/s]
2025-04-12 15:04:19,042 - INFO - Sending request to Ollama...
2025-04-12 15:05:22,581 - INFO - Received response from Ollama
2025-04-12 15:05:22,583 - INFO - Use pytorch device_name: cuda
2025-04-12 15:05:22,583 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
Batches: 100%|██████████| 1/1 [00:00<00:00, 213.97it/s]
2025-04-12 15:05:25,298 - INFO - Sending request to Ollama...
2025-04-12 15:06:29,432 - INFO - Received response from Ollama


Traditional RAG Result Generated
Case: Traditional RAG

Answer: The ceremony was held in Beijing, China.

Retrieved Context: 2008_Summer_Olympics_torch_relay
done peacefully.". Also, Australia\'s ACT Chief Minister, Jon Stanhope confirmed that the Chinese
embassy was closely involve to ensure that "pro-China demonstrators vastly outnumbered Tibetan
activists." Australian freestyle swimmer and five-time Olympic gold medalist Ian Thorpe ended the
Olympic flame passed next to were the Parliament House, National Mosque, KL Tower and
Merdeka Stadium. A team of 1000 personnel from the Malaysian police Special Action Squad
guarded the event and escorted the torchbearers. The last time an Olympic torch relay was held in
2008_Summer_Olympics_torch_relay
2008_Summer_Olympics_torch_relay
San Francisco\'s Marina district and was then moved by bus to San Francisco International Airport
for a makeshift closing ceremony at the terminal, from which the free media was excluded. San Jose
Mercury News de

### Uploading Ground Truth CSVs and merging results into 1 Dataframe

In [25]:
def get_matching_csv_files(filenames, csv_folder_path):
    # Remove .pdf extension from each filename in the list
    filenames = [os.path.splitext(f)[0] for f in filenames]

    # Filter for CSV files that match the filenames list
    matching_csv_files = [
        os.path.join(csv_folder_path, f) for f in os.listdir(csv_folder_path)
        if f.endswith('.csv') and os.path.splitext(f)[0] in filenames
    ]

    return matching_csv_files

def merge_csv_files(file_paths):
    # Read and concatenate all CSV files into a single DataFrame
    dataframes = [pd.read_csv(file) for file in file_paths]
    merged_df = pd.concat(dataframes, ignore_index=True)

    return merged_df

import os
import pandas as pd

csv_folder_path = 'Ground Truths'

# Get the list of matching CSV files
matching_csv_files = get_matching_csv_files(filenames, csv_folder_path)

# Merge all matching CSV files into one DataFrame
merged_df = merge_csv_files(matching_csv_files)

# Initialize filtered_df to merged_df to ensure it's defined
filtered_df = merged_df

# Remove rows where the 'is_possible' column has a value of True
if 'is_possible' in merged_df.columns:
    filtered_df = merged_df[~merged_df['is_possible']]

# Display the filtered DataFrame or save it to a new CSV file
print("Filtered DataFrame (is_possible != True):")
print(filtered_df)


Filtered DataFrame (is_possible != True):
                               title  \
0   2008_Summer_Olympics_torch_relay   
1   2008_Summer_Olympics_torch_relay   
2                                NaN   
3                                NaN   
4                                NaN   
..                               ...   
89                               NaN   
90                               NaN   
91                               NaN   
92                               NaN   
93                               NaN   

                                              context  \
0   ['The 2008 Summer Olympics torch relay was run...   
1   ['The 2008 Summer Olympics torch relay was run...   
2                                                 NaN   
3                                                 NaN   
4                                                 NaN   
..                                                ...   
89                                                NaN   
90                   

In [26]:
def get_question_answer_columns(filtered_df):
    # Select only 'question' and 'answers' columns from the merged DataFrame
    if 'question' in merged_df.columns and 'answers' in merged_df.columns:
        return merged_df[['question', 'answers']]
    else:
        raise ValueError("The columns 'question' and/or 'answers' do not exist in the DataFrame.")

In [27]:
merged_df.to_csv('output_test.csv', index=False)

In [29]:
import pandas as pd

# Read the CSV file into a DataFrame
csv_path = "output_test.csv"
merged_df = pd.read_csv(csv_path)

In [30]:
new_df=get_question_answer_columns(merged_df)

In [31]:
new_df

Unnamed: 0,question,answers
0,When did the tradition of people carrying the ...,1936 Summer Olympics.
1,How many days did people carry the Olympic tor...,129 days
2,,
3,,
4,,
...,...,...
89,,
90,,
91,,
92,,


In [32]:
# Drop rows with NaN in either 'question' or 'answers' column
new_df = new_df.dropna(subset=['question', 'answers']).reset_index(drop=True)

# View the cleaned DataFrame
print(new_df)


                                            question                answers
0  When did the tradition of people carrying the ...  1936 Summer Olympics.
1  How many days did people carry the Olympic tor...               129 days
2  Along with pop, soul, rhythm and blues, quiet ...         easy listening
3  What was the prevailing style of adult contemp...              soft rock
4  Outside of employment, what is the other main ...              education
5  Affirmative action attempts to ask institution...      racial minorities
6               What type of carrier is the largest?          fleet carrier
7        What capability does a fleet carrier offer?              offensive
8  What is an aerodome with facilities for flight...                airport


## **Generating results for all Ground Truth Q&As**

In [38]:
import pandas as pd
import time

In [39]:
import torch
print("PyTorch CUDA available?", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU name:", torch.cuda.get_device_name(0))


PyTorch CUDA available? True
GPU name: NVIDIA GeForce GTX 1080 Ti


### **Final Answers Generate** ###

In [40]:
# Initialize empty columns in new_df. 
# If they don't exist, you can create them to ensure they are present before the loop.
new_df['modified_rag_retrieved_context'] = ""
new_df['modified_rag_refined_answer'] = ""
new_df['modified_rag_case'] = ""
new_df['modified_rag_retrieval_time'] = ""
new_df['modified_rag_generation_time'] = ""
new_df['modified_rag_total_time'] = ""

new_df['traditional_rag_retrieved_context'] = ""
new_df['traditional_rag_refined_answer'] = ""
new_df['traditional_rag_retrieval_time'] = ""
new_df['traditional_rag_generation_time'] = ""
new_df['traditional_rag_total_time'] = ""

output_file = 'Final_Answers_Generated.csv'

it = 0
for index, row in new_df.iterrows():
    query = row['question']  # Extract the question from the CSV

    # Pass the query through the RAG pipeline
    rag_result = rag_pipeline(
        query, 
        client=client, 
        trad_collection_name="docus_chunks", 
        mod_collection_name="faq_embeddings", 
        similarity_threshold=0.8, 
        top_k=3
    )

    # Extract RAG results
    retrival_time = rag_result["retrieval_time"]
    generation_time = rag_result["generation_time"]
    total_time = rag_result["total_time"]
    retrieved_context = rag_result["retrieved_context"]
    refined_answer = rag_result["answer"]
    case = rag_result["case"]

    # Update the DataFrame directly for the current row
    new_df.at[index, 'modified_rag_retrieved_context'] = retrieved_context
    new_df.at[index, 'modified_rag_refined_answer'] = refined_answer
    new_df.at[index, 'modified_rag_case'] = case
    new_df.at[index, 'modified_rag_retrieval_time'] = f"{retrival_time:.2f}"
    new_df.at[index, 'modified_rag_generation_time'] = f"{generation_time:.2f}"
    new_df.at[index, 'modified_rag_total_time'] = f"{total_time:.2f}"

    # Traditional RAG approach
    traditional_results = traditional_rag_pipeline(client, "docus_chunks", query)
    trad_rag_retrieved_context_str = "\n\n".join(traditional_results["retrieved_context"])
    trad_refined_answer = traditional_results["answer"]
    trad_retrieval_time = traditional_results["retrieval_time"]
    trad_generation_time = traditional_results["generation_time"]
    trad_total_time = traditional_results["total_time"]

    # Update DataFrame columns for traditional RAG
    new_df.at[index, 'traditional_rag_retrieved_context'] = trad_rag_retrieved_context_str
    new_df.at[index, 'traditional_rag_refined_answer'] = trad_refined_answer
    new_df.at[index, 'traditional_rag_retrieval_time'] = f"{trad_retrieval_time:.2f}"
    new_df.at[index, 'traditional_rag_generation_time'] = f"{trad_generation_time:.2f}"
    new_df.at[index, 'traditional_rag_total_time'] = f"{trad_total_time:.2f}"

    # After each iteration, save the partially updated DataFrame to CSV
    # You could also use a different file name for partial results, if you prefer.
    new_df.to_csv(output_file, index=False)

    it += 1
    print(f"Processed {it} queries. Saved partial results to {output_file}")

print("All queries processed. Final results saved to:", output_file)

2025-04-12 15:14:14,398 - INFO - Use pytorch device_name: cuda
2025-04-12 15:14:14,399 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
Batches: 100%|██████████| 1/1 [00:00<00:00, 60.07it/s]
2025-04-12 15:14:21,468 - INFO - Sending request to Ollama...
2025-04-12 15:15:26,194 - INFO - Received response from Ollama
2025-04-12 15:15:26,196 - INFO - Use pytorch device_name: cuda
2025-04-12 15:15:26,198 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
Batches: 100%|██████████| 1/1 [00:00<?, ?it/s]
2025-04-12 15:15:29,454 - INFO - Sending request to Ollama...
2025-04-12 15:16:33,886 - INFO - Received response from Ollama
2025-04-12 15:16:33,888 - INFO - Use pytorch device_name: cuda
2025-04-12 15:16:33,888 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2


Traditional RAG Result Generated


Batches: 100%|██████████| 1/1 [00:00<00:00, 60.93it/s]
2025-04-12 15:16:36,535 - INFO - Sending request to Ollama...
2025-04-12 15:17:41,011 - INFO - Received response from Ollama
2025-04-12 15:17:41,012 - INFO - Use pytorch device_name: cuda
2025-04-12 15:17:41,012 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2


Traditional RAG Result Generated
Processed 1 queries. Saved partial results to Final_Answers_Generated.csv


Batches: 100%|██████████| 1/1 [00:00<?, ?it/s]
2025-04-12 15:17:43,724 - INFO - Sending request to Ollama...
2025-04-12 15:17:46,290 - INFO - Received response from Ollama
2025-04-12 15:17:46,292 - INFO - Use pytorch device_name: cuda
2025-04-12 15:17:46,292 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
Batches: 100%|██████████| 1/1 [00:00<?, ?it/s]
2025-04-12 15:17:49,305 - INFO - Sending request to Ollama...
2025-04-12 15:18:53,795 - INFO - Received response from Ollama
2025-04-12 15:18:53,800 - INFO - Use pytorch device_name: cuda
2025-04-12 15:18:53,801 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2


Traditional RAG Result Generated
Processed 2 queries. Saved partial results to Final_Answers_Generated.csv


Batches: 100%|██████████| 1/1 [00:00<00:00, 713.20it/s]
2025-04-12 15:18:56,692 - INFO - Sending request to Ollama...
2025-04-12 15:20:00,710 - INFO - Received response from Ollama
2025-04-12 15:20:00,714 - INFO - Use pytorch device_name: cuda
2025-04-12 15:20:00,714 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
Batches: 100%|██████████| 1/1 [00:00<?, ?it/s]
2025-04-12 15:20:03,627 - INFO - Sending request to Ollama...
2025-04-12 15:21:07,850 - INFO - Received response from Ollama
2025-04-12 15:21:07,851 - INFO - Use pytorch device_name: cuda
2025-04-12 15:21:07,851 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2


Traditional RAG Result Generated


Batches: 100%|██████████| 1/1 [00:00<00:00, 126.22it/s]
2025-04-12 15:21:10,464 - INFO - Sending request to Ollama...
2025-04-12 15:21:17,417 - INFO - Received response from Ollama
2025-04-12 15:21:17,418 - INFO - Use pytorch device_name: cuda
2025-04-12 15:21:17,418 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2


Traditional RAG Result Generated
Processed 3 queries. Saved partial results to Final_Answers_Generated.csv


Batches: 100%|██████████| 1/1 [00:00<00:00, 64.05it/s]
2025-04-12 15:21:20,798 - INFO - Sending request to Ollama...
2025-04-12 15:22:26,802 - INFO - Received response from Ollama
2025-04-12 15:22:26,805 - INFO - Use pytorch device_name: cuda
2025-04-12 15:22:26,806 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
Batches: 100%|██████████| 1/1 [00:00<00:00, 166.43it/s]
2025-04-12 15:22:30,228 - INFO - Sending request to Ollama...
2025-04-12 15:23:37,118 - INFO - Received response from Ollama
2025-04-12 15:23:37,124 - INFO - Use pytorch device_name: cuda
2025-04-12 15:23:37,125 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2


Traditional RAG Result Generated
Processed 4 queries. Saved partial results to Final_Answers_Generated.csv


Batches: 100%|██████████| 1/1 [00:00<00:00, 142.69it/s]
2025-04-12 15:23:40,106 - INFO - Sending request to Ollama...
2025-04-12 15:24:44,211 - INFO - Received response from Ollama
2025-04-12 15:24:44,213 - INFO - Use pytorch device_name: cuda
2025-04-12 15:24:44,214 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
Batches: 100%|██████████| 1/1 [00:00<00:00, 143.28it/s]
2025-04-12 15:24:46,937 - INFO - Sending request to Ollama...
2025-04-12 15:25:51,235 - INFO - Received response from Ollama
2025-04-12 15:25:51,241 - INFO - Use pytorch device_name: cuda
2025-04-12 15:25:51,242 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2


Traditional RAG Result Generated
Processed 5 queries. Saved partial results to Final_Answers_Generated.csv


Batches: 100%|██████████| 1/1 [00:00<00:00, 125.01it/s]
2025-04-12 15:25:54,049 - INFO - Sending request to Ollama...
2025-04-12 15:26:00,349 - INFO - Received response from Ollama
2025-04-12 15:26:00,351 - INFO - Use pytorch device_name: cuda
2025-04-12 15:26:00,352 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
Batches: 100%|██████████| 1/1 [00:00<00:00, 129.94it/s]
2025-04-12 15:26:03,081 - INFO - Sending request to Ollama...
2025-04-12 15:27:10,971 - INFO - Received response from Ollama
2025-04-12 15:27:10,975 - INFO - Use pytorch device_name: cuda
2025-04-12 15:27:10,975 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2


Traditional RAG Result Generated


Batches: 100%|██████████| 1/1 [00:00<00:00, 167.84it/s]
2025-04-12 15:27:13,622 - INFO - Sending request to Ollama...
2025-04-12 15:28:18,032 - INFO - Received response from Ollama
2025-04-12 15:28:18,033 - INFO - Use pytorch device_name: cuda
2025-04-12 15:28:18,033 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2


Traditional RAG Result Generated
Processed 6 queries. Saved partial results to Final_Answers_Generated.csv


Batches: 100%|██████████| 1/1 [00:00<00:00, 64.11it/s]
2025-04-12 15:28:21,173 - INFO - Sending request to Ollama...
2025-04-12 15:29:25,355 - INFO - Received response from Ollama
2025-04-12 15:29:25,357 - INFO - Use pytorch device_name: cuda
2025-04-12 15:29:25,357 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
Batches: 100%|██████████| 1/1 [00:00<?, ?it/s]
2025-04-12 15:29:28,303 - INFO - Sending request to Ollama...
2025-04-12 15:30:33,647 - INFO - Received response from Ollama
2025-04-12 15:30:33,650 - INFO - Use pytorch device_name: cuda
2025-04-12 15:30:33,650 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2


Traditional RAG Result Generated
Processed 7 queries. Saved partial results to Final_Answers_Generated.csv


Batches: 100%|██████████| 1/1 [00:00<00:00, 64.01it/s]
2025-04-12 15:30:36,807 - INFO - Sending request to Ollama...
2025-04-12 15:30:41,653 - INFO - Received response from Ollama
2025-04-12 15:30:41,654 - INFO - Use pytorch device_name: cuda
2025-04-12 15:30:41,654 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
Batches: 100%|██████████| 1/1 [00:00<?, ?it/s]
2025-04-12 15:30:44,273 - INFO - Sending request to Ollama...
2025-04-12 15:31:49,902 - INFO - Received response from Ollama
2025-04-12 15:31:49,908 - INFO - Use pytorch device_name: cuda
2025-04-12 15:31:49,908 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2


Traditional RAG Result Generated
Processed 8 queries. Saved partial results to Final_Answers_Generated.csv


Batches: 100%|██████████| 1/1 [00:00<00:00, 125.34it/s]
2025-04-12 15:31:52,581 - INFO - Sending request to Ollama...
2025-04-12 15:31:56,565 - INFO - Received response from Ollama
2025-04-12 15:31:56,566 - INFO - Use pytorch device_name: cuda
2025-04-12 15:31:56,566 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
Batches: 100%|██████████| 1/1 [00:00<00:00, 128.17it/s]
2025-04-12 15:31:59,791 - INFO - Sending request to Ollama...
2025-04-12 15:33:05,541 - INFO - Received response from Ollama
2025-04-12 15:33:05,542 - INFO - Use pytorch device_name: cuda
2025-04-12 15:33:05,542 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2


Traditional RAG Result Generated


Batches: 100%|██████████| 1/1 [00:00<00:00, 1089.43it/s]
2025-04-12 15:33:08,223 - INFO - Sending request to Ollama...
2025-04-12 15:34:14,134 - INFO - Received response from Ollama


Traditional RAG Result Generated
Processed 9 queries. Saved partial results to Final_Answers_Generated.csv
All queries processed. Final results saved to: Final_Answers_Generated.csv
