In [None]:
# Bismillah

In [None]:
import time
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

from sklearn.metrics.pairwise import cosine_similarity
!pip install sentence-transformers
from sentence_transformers import SentenceTransformer

!pip install faiss-cpu
!pip install rank_bm25


from sklearn.feature_extraction.text import TfidfVectorizer
!pip install -U langchain-community
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain.retrievers import BM25Retriever
from langchain.schema import Document

from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import pipeline

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=1.11.0->sentence-transformers)
 

In [None]:
# Loading the data
loader = TextLoader("/bible.txt")
# raw_documents = loader.load() # was doing this before, but resulted in each
                                # individual line being its own document, not
                                # big enough for chunking, so instead took whole
                                # bible as single document.
                                # Can also take multiple verses as a single doc.

raw_lines = loader.load()
merged_text = " ".join([doc.page_content.strip() for doc in raw_lines])
raw_documents = [Document(page_content=merged_text)]


In [None]:
# Chunking Configurations
# [256, 512, 1024, 2048]
chunk_sizes = [256]

# Number of documents to retrieve
doc_retrieval_counts = [3, 5, 10]

# Embedding Methods
embedding_methods = ["bge-small-en", "paraphrase-MiniLM-L6-v2", "all-MiniLM-L6-v2"]

In [None]:
# LLMs setup
def load_llm(model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)
    return pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=5000, do_sample=False)

In [None]:
llm_models = {
    # "Falcon3-1B-Instruct" : load_llm("tiiuae/Falcon3-1B-Instruct"),
    "Qwen2.5-1.5B-Instruct": load_llm("Qwen/Qwen2.5-1.5B-Instruct"),
}

tokenizer_config.json:   0%|          | 0.00/365k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.78M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/826 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/656 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.34G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/113 [00:00<?, ?B/s]

Device set to use cuda:0


In [None]:
def build_vectorstore(docs, method):
    model_map = {
        "bge-small-en": "BAAI/bge-small-en",
        "paraphrase-MiniLM-L6-v2": "sentence-transformers/paraphrase-MiniLM-L6-v2",
        "all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2"
    }
    model_name = model_map[method]
    embeddings = HuggingFaceEmbeddings(model_name=model_name)
    return FAISS.from_documents(docs, embeddings), SentenceTransformer(model_name)

In [None]:
# Function: Apply Reciprocal Rank Fusion (RRF) - Used by Hybrid_RRF_Search() below
def rrf_fusion(retrieval_results, k=60, top_k=5):
    scores = {}
    for results in retrieval_results:
        for rank, doc in enumerate(results):
            doc_id = doc.page_content
            scores[doc_id] = scores.get(doc_id, 0) + 1 / (rank + k)
    sorted_docs = sorted(scores.keys(), key=lambda doc_id: scores[doc_id], reverse=True)
    return [Document(page_content=doc_id) for doc_id in sorted_docs[:top_k]]

In [None]:
# def pad_documents(docs, all_docs, k):
#     """
#     Ensure k documents are returned. Pad with unused docs if needed.
#     Returns: (padded_docs, was_padded)
#     """
#     if len(docs) >= k:
#         return docs[:k], False

#     seen = set(doc.page_content for doc in docs)
#     padding = [doc for doc in all_docs if doc.page_content not in seen]
#     padded_docs = docs + padding[:k - len(docs)]
#     return padded_docs, True


# Retrieval Method Functions
def bm25_search(question, documents, k):
    retriever = BM25Retriever.from_documents(documents)
    return retriever.get_relevant_documents(question)[:k]
# def bm25_search(question, documents, k):
#     retriever = BM25Retriever.from_documents(documents)
#     results = retriever.get_relevant_documents(question)
#     return pad_documents(results, documents, k)


def mmr_search(question, vectorstore, k):
    retriever = vectorstore.as_retriever(search_type="mmr")
    return retriever.get_relevant_documents(question)[:k]
# def mmr_search(question, vectorstore, k, all_docs):
#     retriever = vectorstore.as_retriever(search_type="mmr")
#     results = retriever.get_relevant_documents(question)
#     return pad_documents(results, all_docs, k)


def semantic_search(question, vectorstore, k):
    retriever = vectorstore.as_retriever()
    return retriever.get_relevant_documents(question)[:k]
# def semantic_search(question, vectorstore, k, all_docs):
#     retriever = vectorstore.as_retriever()
#     results = retriever.get_relevant_documents(question)
#     return pad_documents(results, all_docs, k)


# def hybrid_rrf_search(question, documents, vectorstore, k):
#     bm25 = bm25_search(question, documents, k)
#     sem = semantic_search(question, vectorstore, k)
#     mmr = mmr_search(question, vectorstore, k)
#     return rrf_fusion([bm25, sem], top_k = k)# mmr], top_k=k)

def hybrid_rrf_search(precomputed_results, k):
    return rrf_fusion([precomputed_results["BM25"], precomputed_results["Semantic"], precomputed_results["MMR"]], top_k=k)
# def hybrid_rrf_search(precomputed_results, all_docs, k):
#     fused = rrf_fusion([precomputed_results["BM25"], precomputed_results["Semantic"], precomputed_results["MMR"]], top_k=k)
#     return pad_documents(fused, all_docs, k)


In [None]:
# Used by ragas - Needs to be generated on a seperate run.
# def generate_ground_truth(question, docs):
#     context = "\n\n".join(doc.page_content for doc in docs)
#     pipeline_gt = load_llm("Qwen/Qwen2.5-1.5B-Instruct")  # Different model from the tested ones
#     response = pipeline_gt(f"""You are a Bible expert. Using strictly and only the context provided below, answer the question factually in a detailed paragraph. Do not add personal thoughts, opinions, or explanations.
#                 Context:
#                 {context}

#                 Question: {question}
#                 Answer: """)[0]['generated_text']
#     return response

In [None]:
# Function: Evaluate Response
# def evaluate_response(question, response, retrieved_docs, ground_truth):
#     context_strs = [doc.page_content for doc in retrieved_docs]
#     data = {
#         "question": [question],
#         "contexts": [context_strs],
#         "answer": [response],
#         "ground_truth": [ground_truth]
#     }
#     ragas_dataset = Dataset.from_dict(data)
#     scores = evaluate(ragas_dataset, metrics=[faithfulness, answer_relevancy])
#     return float(scores["faithfulness"][0]), float(scores["answer_relevancy"][0])

def evaluate_response(question, response, retrieved_docs, ground_truth, embedder):
    answer_vec = embedder.encode([response])
    # context_vec = embedder.encode([" ".join(retrieved_docs)])
    context_vec = embedder.encode([" ".join([doc.page_content for doc in retrieved_docs])])
    question_vec = embedder.encode([question])
    true_answer_vec = embedder.encode([ground_truth])

    faithfulness = cosine_similarity(answer_vec, context_vec)[0][0]
    relevance = cosine_similarity(answer_vec, question_vec)[0][0]
    simAnswer = cosine_similarity(answer_vec, true_answer_vec)[0][0]
    return {
        "faithfulness": round(faithfulness, 4),
        "relevance": round(relevance, 4),
        "similarity": round(simAnswer, 4)
    }

In [None]:
def format_retrieved_docs(docs):
    return "\n\n".join([f"[{i+1}] {doc.page_content}" for i, doc in enumerate(docs)])

In [None]:
# results = []
# question = "What does the Bible say about forgiveness?"

# Questions and answers
questions_and_answers = [
    {"question": "What does the Bible say about forgiveness?",
     "ground_truth": f"""The Bible presents forgiveness as a central theme in the relationship between God and humanity, as well as among individuals. From Genesis to Revelation, forgiveness reveals God’s mercy, His justice, and the call for believers to live in harmony with others through grace and compassion.
      God’s forgiveness is abundant and freely offered to those who repent. Throughout Scripture, God is portrayed as merciful, slow to anger, and ready to pardon. For example, 1 John 1:9 affirms, “If we confess our sins, he is faithful and just to forgive us our sins, and to cleanse us from all unrighteousness.” This promise shows that forgiveness is not earned by works, but received through confession and faith in God’s righteousness. Similarly, Psalm 103:12 expresses the completeness of God's forgiveness: “As far as the east is from the west, so far hath he removed our transgressions from us.” God’s forgiveness restores fellowship with Him and gives peace to the repentant heart.
      Christians are also called to forgive others, as a reflection of the forgiveness they have received. Ephesians 4:32 commands, “And be ye kind one to another, tenderhearted, forgiving one another, even as God for Christ's sake hath forgiven you.” This reveals that forgiveness is not merely an act of courtesy but a divine obligation rooted in the work of Christ. Jesus emphasized this in the Lord’s Prayer, stating in Matthew 6:14-15 that if we do not forgive others, our own forgiveness from God is hindered. Forgiveness is thus not optional—it is evidence of a transformed heart.
      Jesus also taught that forgiveness should be unlimited and sincere. When Peter asked how many times he should forgive someone, Jesus replied, “Until seventy times seven” (Matthew 18:22), indicating that there should be no limit to our willingness to forgive. This teaching is reinforced in the parable of the unforgiving servant (Matthew 18:23-35), where a man forgiven a great debt by his master refuses to forgive a small debt owed to him. The master’s judgment illustrates God’s displeasure when forgiven people harbor unforgiveness.
      Finally, forgiveness is made possible through the atoning sacrifice of Jesus Christ. The cross stands as the ultimate expression of God’s love and forgiveness. Acts 13:38-39 proclaims that through Jesus, forgiveness of sins is preached and all who believe are justified. The believer’s assurance of pardon is rooted in Christ’s finished work, which not only cleanses the soul but also empowers the believer to forgive others.
      In sum, forgiveness in the Bible is a divine gift that believers are to receive and extend. It is foundational to Christian faith and essential to walking in the love and grace of God."""},
    {"question": "What is the greatest commandment?",
     "ground_truth": f"""The greatest commandment, according to Jesus Christ, is to love God with all one’s being. When asked by a Pharisee which commandment in the law was the greatest, Jesus responded in Matthew 22:37-38 (KJV), “Thou shalt love the Lord thy God with all thy heart, and with all thy soul, and with all thy mind. This is the first and great commandment.” This commandment is a quotation from Deuteronomy 6:5, a foundational verse in the Old Testament known as part of the Shema, which was recited daily by faithful Jews. It emphasizes total devotion to God, involving every part of a person—their emotions, soul, and intellect. Loving God fully is the highest priority and the foundation of all obedience in the believer’s life.
      Jesus went on to say that the second commandment is closely related: “And the second is like unto it, Thou shalt love thy neighbour as thyself” (Matthew 22:39). This commandment, drawn from Leviticus 19:18, teaches that genuine love for God will naturally result in love for others. It reflects the outworking of divine love in human relationships. Jesus concluded by saying, “On these two commandments hang all the law and the prophets” (Matthew 22:40), meaning that all the moral teachings of Scripture are rooted in these two principles. They encapsulate the entire moral will of God as revealed in the Law and the Prophets.
      Together, these commandments reveal that true obedience to God is not just about external rituals or legalistic rule-keeping, but about a heart transformed by love. Loving God with all one's heart and loving others as oneself sums up the Christian ethic and forms the basis for all other commandments. These two principles guide the believer’s conduct, showing that love is both the motive and the measure of spiritual maturity."""},
    {"question": "What is the Bible's view on wealth and poverty?",
     "ground_truth": f"""The Bible addresses the themes of wealth and poverty with deep spiritual insight, offering guidance on how both should be viewed in light of God’s kingdom. Wealth itself is not condemned, but the misuse of it, the love of it, and trust in riches are strongly warned against. Conversely, poverty is not praised for its own sake, but the poor are often depicted as those whom God defends and blesses when they remain faithful.
      Scripture teaches that all wealth ultimately belongs to God, and humans are stewards of what He provides. Deuteronomy 8:18 (KJV) states, “But thou shalt remember the LORD thy God: for it is he that giveth thee power to get wealth.” Wealth, therefore, should be handled with gratitude and responsibility. The danger lies not in having riches, but in letting them become a source of pride or idolatry. 1 Timothy 6:10 warns, “For the love of money is the root of all evil,” showing that an improper attachment to riches can lead to spiritual ruin. Jesus also taught, “Ye cannot serve God and mammon” (Matthew 6:24), emphasizing the incompatibility of serving both God and wealth.
      On the other hand, the Bible consistently upholds a concern for the poor and needy. God is described as their defender and provider. Proverbs 19:17 (KJV) says, “He that hath pity upon the poor lendeth unto the LORD; and that which he hath given will he pay him again.” Acts of generosity are not merely social duties but are considered acts of righteousness before God. The Law, the Prophets, and the teachings of Jesus call for justice, compassion, and active care for the poor. In the ministry of Jesus, the poor often received His special attention. In Luke 4:18, Jesus said He was sent to “preach the gospel to the poor,” signaling their significance in God’s redemptive plan.
      Importantly, Jesus taught that true riches are spiritual and not material. In Luke 12:15, He warned, “Take heed, and beware of covetousness: for a man's life consisteth not in the abundance of the things which he possesseth.” Christians are encouraged to seek treasures in heaven, not on earth (Matthew 6:19-21), reflecting an eternal perspective. Those who are rich are exhorted to be generous and not high-minded, recognizing their accountability to God (1 Timothy 6:17-19).
      In conclusion, the Bible presents a balanced view of wealth and poverty. Wealth is not inherently sinful, but it must be managed with humility, generosity, and a heart submitted to God. Poverty is not a curse when coupled with godliness, and the poor are often shown to be spiritually rich in faith. Both conditions are opportunities to glorify God, either by using wealth for His purposes or by trusting Him in need."""
    },
    {"question": "Explain the concept of grace in the Bible.",
     "ground_truth": f"""The concept of grace in the Bible is foundational to the message of salvation and Christian life. Grace refers to the unmerited favor of God toward sinners. It is God’s loving kindness extended to humanity, not because of any worth or works of their own, but because of His own mercy and purpose. The clearest expression of grace is found in the person and work of Jesus Christ, through whom salvation is freely offered to all who believe.
     Scripture declares that salvation is by grace, not by works. Ephesians 2:8-9 (KJV) states, “For by grace are ye saved through faith; and that not of yourselves: it is the gift of God: Not of works, lest any man should boast.” This passage highlights that grace is a gift—it cannot be earned, deserved, or bought. It is entirely the result of God’s initiative. Romans 11:6 further affirms, “And if by grace, then is it no more of works: otherwise grace is no more grace.” Grace thus magnifies God's sovereignty and love, while humbling the sinner before Him.
     Grace is also transformative, not merely a covering for sin. Titus 2:11-12 (KJV) teaches, “For the grace of God that bringeth salvation hath appeared to all men, Teaching us that, denying ungodliness and worldly lusts, we should live soberly, righteously, and godly, in this present world.” True grace changes the heart and empowers the believer to live a holy life. It instructs, sanctifies, and sustains the Christian throughout their journey. Grace does not excuse sin; rather, it enables victory over sin through the power of the Holy Spirit.
     Furthermore, grace is abundant and available to all, regardless of background or past sin. The Apostle Paul, once a persecutor of the church, described himself as a chief example of God's grace. In 1 Corinthians 15:10, he said, “But by the grace of God I am what I am: and his grace which was bestowed upon me was not in vain.” His life was radically changed by God’s grace, demonstrating that no one is beyond its reach.
     In summary, grace in the Bible is the generous, unearned favor of God extended to undeserving sinners. It is the basis of salvation, the power for holy living, and the expression of God’s infinite love. Grace exalts God as the giver and sustainer of life and calls the believer to live in gratitude and obedience."""
    }
]

# Results
results = []

In [None]:
# Generating ground truth to be used for evaluation by ragas
# splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)
# docs_for_eval = splitter.split_documents(raw_documents)
# for i, doc in enumerate(docs_for_eval[:5]):
#     print(f"Chunk {i+1} ({len(doc.page_content)} chars):\n{doc.page_content}\n")



# Ground Truth for "Forgiveness" question by Falcon1B
# The Bible presents forgiveness as a central theme, particularly in the context of personal relationships and spiritual growth. It emphasizes the importance of extending forgiveness to others, as seen in Mark 11:25-27, where Jesus teaches the necessity of forgiving one's enemies to receive divine forgiveness. This is further reinforced in Ephesians 1:7-9, where the concept of redemption through Christ's blood is highlighted, and in Matthew 6:11-14, where the practice of forgiving others is encouraged, with the understanding that forgiveness is a gift from God and is reciprocated by divine forgiveness.

# The Bible also underscores the role of forgiveness in maintaining spiritual health and harmony. In 2 Corinthians 2:9-11, Paul emphasizes the need for believers to forgive others, as it is a demonstration of their faith and commitment to Christ. This act of forgiveness is not only a response to the forgiveness extended by God but also a means to strengthen one's own spiritual resolve and resist the temptations of sin.

# Furthermore, the Bible teaches that forgiveness is not merely a personal act but a communal one, as seen in Ephesians 4:32, where believers are called to forgive one another, reflecting the unity and love within the Christian community. This communal aspect of forgiveness is crucial for building a supportive and loving environment, as it mirrors the love and reconciliation God desires among His followers.

# In summary, the Bible teaches that forgiveness is a profound act of love and grace, essential for both personal spiritual growth and the health of the Christian community. It is a practice that requires self-awareness, humility, and a deep understanding of God's boundless mercy and forgiveness. Through forgiveness, believers are encouraged to emulate Christ's example, fostering a spirit of reconciliation and unity in their relationships with others and with God.
###############################################
################################################
#################################################

# Ground Truth for "Forgiveness" question by Qwen2.5-1.5B
# The Bible teaches that forgiveness is essential for spiritual growth and unity with God.
# It emphasizes the importance of forgiving others, both within oneself and towards those who wronged them.
# Forgiveness allows individuals to move past hurtful experiences and fosters reconciliation between people.
# This concept is further emphasized by Jesus' teachings in Matthew 6:14-15, where He states that if one forgives others, God will forgive them as well.
# Additionally, Paul's letter to the Ephesians highlights the divine nature of forgiveness, stating that God has given believers redemption through His blood and has forgiven them according to His grace.
# These verses underscore the significance of forgiveness in Christian theology and its role in maintaining harmony among believers.
# Furthermore, 2 Corinthians 2:9-11 underscores the need for forgiveness in relationships, suggesting that forgiveness can protect individuals from being used by Satan.
# Overall, the Bible portrays forgiveness as a fundamental aspect of faith and spirituality, promoting healing and unity within communities.
###################################################
####################################################
########################################################






In [None]:
# general testing to see the chunking works
for chunk_size in chunk_sizes:
    print("Chunk Size: ", chunk_size)
    splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=50)
    docs = splitter.split_documents(raw_documents)
    for i, doc in enumerate(docs[21:45]):
        print(f"Chunk {i+1} ({len(doc.page_content)} chars):\n{doc.page_content}\n")
    print("--------------------------------------------------------------------")

Chunk Size:  256
Chunk 1 (217 chars):
Genesis 1:29	And God said, Behold, I have given you every herb bearing seed, which [is] upon the face of all the earth, and every tree, in the which [is] the fruit of a tree yielding seed; to you it shall be for meat.

Chunk 2 (206 chars):
Genesis 1:30	And to every beast of the earth, and to every fowl of the air, and to every thing that creepeth upon the earth, wherein [there is] life, [I have given] every green herb for meat: and it was so.

Chunk 3 (223 chars):
Genesis 1:31	And God saw every thing that he had made, and, behold, [it was] very good. And the evening and the morning were the sixth day.
Genesis 2:1	Thus the heavens and the earth were finished, and all the host of them.

Chunk 4 (142 chars):
Genesis 2:2	And on the seventh day God ended his work which he had made; and he rested on the seventh day from all his work which he had made.

Chunk 5 (142 chars):
Genesis 2:3	And God blessed the seventh day, and sanctified it: because that in it

In [None]:
# ground_truth = generate_ground_truth(question, semantic_search(question, build_vectorstore(docs_for_eval, "bge-small-en"), 5))
# ground_truth = f"""The Bible teaches that forgiveness is essential for spiritual growth and unity with God.
# It emphasizes the importance of forgiving others, both within oneself and towards those who wronged them.
# Forgiveness allows individuals to move past hurtful experiences and fosters reconciliation between people.
# This concept is further emphasized by Jesus' teachings in Matthew 6:14-15, where He states that if one forgives others, God will forgive them as well.
# Additionally, Paul's letter to the Ephesians highlights the divine nature of forgiveness, stating that God has given believers redemption through His blood and has forgiven them according to His grace.
# These verses underscore the significance of forgiveness in Christian theology and its role in maintaining harmony among believers.
# Furthermore, 2 Corinthians 2:9-11 underscores the need for forgiveness in relationships, suggesting that forgiveness can protect individuals from being used by Satan.
# Overall, the Bible portrays forgiveness as a fundamental aspect of faith and spirituality, promoting healing and unity within communities.
# """
# print("Generated ground truth: \n", ground_truth)

In [None]:
for chunk_size in chunk_sizes:
    print("Chunk Size: ", chunk_size)
    splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=50)
    docs = splitter.split_documents(raw_documents)

    for embedding_type in embedding_methods:
        print("Embedding type: ", embedding_type)
        vectorstore, embedder = build_vectorstore(docs, embedding_type)

        for model_name, pipeline_model in llm_models.items():
            print("LLM model used: ", model_name)
            print("beginning retrivals! \n")

            for k_docs in doc_retrieval_counts:
                print("Number of documents being retrived: ", k_docs)

                for q_idx, qa in enumerate(questions_and_answers, 1):
                    print(f"Processing Question {q_idx}: {qa['question']}")
                    question = qa["question"]
                    ground_truth = qa["ground_truth"]

                    precomputed = {
                    "BM25": bm25_search(question, docs, k_docs),
                    "Semantic": semantic_search(question, vectorstore, k_docs),
                    "MMR": mmr_search(question, vectorstore, k_docs)
                    }

                    for method_name in ["BM25", "Semantic", "MMR"]:
                        print("retrieval method:", method_name)
                        start_time = time.time()
                        retrieved_docs = precomputed[method_name]
                        formatted_docs = format_retrieved_docs(retrieved_docs)
                        llm_docs = "\n\n".join(doc.page_content for doc in retrieved_docs)
                        # input_text = f"Context: {formatted_docs}\n\nQuestion: {question}\nAnswer:"
                        input_text = f"""You are a Bible expert. Using strictly and only the context provided below, answer the question factually in a couple of paragraphs. Do not add personal thoughts, opinions, or explanations.
                            Context:
                            {llm_docs}

                            Question: {question}
                            Answer: """
                        # response = pipeline_model(input_text)[0]['generated_text']
                        raw_response = pipeline_model(input_text)[0]['generated_text']

                        # Extract only what comes after "Answer:"
                        if "<|assistant|>" in raw_response: # Falcon has this instead of "Answer"
                            response = raw_response.split("<|assistant|>")[-1].strip()
                        elif "Answer:" in raw_response:
                            response = raw_response.split("Answer:")[-1].strip()
                        else:
                            response = raw_response.strip()

                        elapsed_time = time.time() - start_time
                        eval_scores = evaluate_response(question, response, retrieved_docs, ground_truth, embedder)
                        results.append({
                            "query": question,
                            "number_of_retrieved_documents": k_docs,
                            "number_of_retrieved_documents_actual": len(retrieved_docs),
                            # "retrieved_documents": formatted_docs,
                            "retrieved_lengths": [len(doc.page_content) for doc in retrieved_docs],
                            "ground_truth": ground_truth,
                            "raw_response": raw_response,
                            "generated_response": response,
                            "chunk_size": chunk_size,
                            "retrieval_method": method_name,
                            "model": model_name,
                            "embedding_type": embedding_type,
                            "time_taken": elapsed_time,
                            **eval_scores
                        })

                    # Hybrid after precomputation
                    print("retrieval method: Hybrid (RRF)")
                    start_time = time.time()
                    retrieved_docs = hybrid_rrf_search(precomputed, k_docs)
                    formatted_docs = format_retrieved_docs(retrieved_docs)
                    # input_text = f"Context: {formatted_docs}\n\nQuestion: {question}\nAnswer:"
                    llm_docs = "\n\n".join(doc.page_content for doc in retrieved_docs)
                    input_text = f"""You are a Bible expert. Using strictly and only the context provided below, answer the question factually in a couple of paragraphs. Do not add personal thoughts, opinions, or explanations.
                        Context:
                        {llm_docs}

                        Question: {question}
                        Answer: """
                    # response = pipeline_model(input_text)[0]['generated_text']
                    raw_response = pipeline_model(input_text)[0]['generated_text']

                    # Extract only what comes after "Answer:"
                    if "<|assistant|>" in raw_response: # Falcon uses this instead of "Answer"
                        response = raw_response.split("<|assistant|>")[-1].strip()
                    elif "Answer:" in raw_response:
                        response = raw_response.split("Answer:")[-1].strip()
                    else:
                        response = raw_response.strip()

                    elapsed_time = time.time() - start_time
                    eval_scores = evaluate_response(question, response, retrieved_docs, ground_truth, embedder)
                    results.append({
                        "query": question,
                        "number_of_retrieved_documents": k_docs,
                        "number_of_retrieved_documents_actual": len(retrieved_docs),
                        # "retrieved_documents": formatted_docs,
                        "retrieved_lengths": [len(doc.page_content) for doc in retrieved_docs],
                        "ground_truth": ground_truth,
                        "raw_response": raw_response,
                        "generated_response": response,
                        "chunk_size": chunk_size,
                        "retrieval_method": "Hybrid (RRF)",
                        "model": model_name,
                        "embedding_type": embedding_type,
                        "time_taken": elapsed_time,
                        **eval_scores
                    })

    print("-----------------")
                # for method_name, method_func in {
                #     "BM25": lambda q, d, k: bm25_search(q, d, k),
                #     "Semantic": lambda q, d, k: semantic_search(q, vectorstore, k),
                #     # "MMR": lambda q, d, k: mmr_search(q, vectorstore, k), # takes a long time
                #     # "Hybrid (RRF)": lambda q, d, k: hybrid_rrf_search(q, d, vectorstore, k) # taking long
                # }.items():

                #     print("retrieval method: ", method_name)
                #     start_time = time.time()
                #     retrieved_docs = method_func(question, docs, k_docs)
                #     formatted_docs = format_retrieved_docs(retrieved_docs)

                #     input_text = f"Context: {formatted_docs}\n\nQuestion: {question}\nAnswer:"
                #     response = pipeline_model(input_text)[0]['generated_text']

                #     elapsed_time = time.time() - start_time
                #     # faithfulness, relevancy = evaluate_response(question, response, retrieved_docs, ground_truth)

                #     results.append({
                #         "query": question,
                #         "retrieved_documents": formatted_docs,
                #         "generated_response": response,
                #         "chunk_size": chunk_size,
                #         "retrieval_method": method_name,
                #         "model": model_name,
                #         "embedding_type": embedding_type,
                #         # "faithfulness": faithfulness,
                #         # "relevancy": relevancy,
                #         "time_taken": elapsed_time,
                #         "num_documents": k_docs
                #     })


In [None]:
# results_df = pd.DataFrame(results)
# results_df.to_csv("falcon1B_Results.csv", index=False)
# print(results_df)
results_df = pd.DataFrame(results)
results_df[[
    "query",
    "number_of_retrieved_documents",
    "retrieved_lengths",
    "chunk_size",
    "retrieval_method",
    "model",
    "embedding_type",
    "time_taken",
    "faithfulness",
    "relevance",
    "similarity",
    "raw_response",
    "generated_response"
]].to_csv("falcon1B_chunk256_Results.csv", index=False)


In [None]:
for i, result in enumerate(results, 1):
    print(f"\nResult {i}:")
    for key, value in result.items():
        print(f"  {key}: {value}")
