In [4]:
from langchain_community.document_loaders import UnstructuredURLLoader

In [5]:
with open('unique_links_list.txt', 'r') as file:
    lines = file.readlines()
    items = [line.strip() for line in lines]

In [6]:
urls = items

In [7]:
loader = UnstructuredURLLoader(urls=urls)

data = loader.load()

In [8]:
data[0]

Document(metadata={'source': 'https://datascience.uchicago.edu/education/masters-programs/ms-in-applied-data-science/'}, page_content="Elevate Your Expertise in Data Science\n\nThe University of Chicago’s MS in Applied Data Science program equips you with in-demand expertise and an unparalleled network of global alumni. Take the next step and start your application today.\n\nHow to Apply\n\nPrograms\n\nChoose from full- and part-time options in our In-Person and Online programs. Apply today!\n\nIn-Person Program\n\nIf you are an early career professional, need to complete a master's in one year, or require OPT STEM eligibility, the In-Person program is for you.\n\nLearn More\n\nOnline Program\n\nIf you want 360° flexibility and the same rigorous curriculum and outcomes as an in-person degree, the Online Program is for you.\n\nLearn More\n\nMBA/MS Program\n\nThe joint degree with UChicago’s Booth School of Business is ideal for ambitious students looking to supplement their MBA studies 

In [9]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(data)

len(all_splits)

129

In [55]:
len(all_splits[0].page_content)

1899

In [None]:
import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = ''
os.environ['OPENAI_API_KEY'] = ''

In [11]:
import chromadb
from re import search
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma

In [30]:
vectorstore = Chroma.from_documents(documents=all_splits,
                                    embedding=OpenAIEmbeddings())

retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})

retrieved_docs = retriever.invoke("What are the Core Courses of the Applied Data Science Program?")

len(retrieved_docs)

3

In [17]:
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

# Prompt
template = """Given the context below, please provide an accurate and detailed response to the user's inquiry about the MS in Applied Data Science program at the University of Chicago.
Respond with a JSON object (not in a code block) in the following format:
{{
    "answer": "detailed answer using only information from the context",
    "confidence": "high/medium/low, based on how thoroughly the context supports your answer",
    "reasoning": "concise explanation of how the context informs your answer"
}}

If the context does not contain enough information, respond with:
{{
    "answer": "I'm sorry, but I cannot answer this question based on the provided context.",
    "confidence": "low",
    "reasoning": "The provided context lacks sufficient details to answer this inquiry"
}}

Context: {context}

Question: {question}

Guidelines:
1. Use only information from the context provided.
2. Craft a detailed response that addresses the inquiry directly.
3. Use bullet points for distinct points if applicable.
4. Include a relevant URL if it directly supports the answer.
5. Assign confidence based on the relevance and completeness of the context.
6. Briefly explain your reasoning, focusing on how the context justifies your answer.
7. Return ONLY the JSON object without any code block markers or extraneous text.
"""


prompt = ChatPromptTemplate.from_template(template)
prompt

ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template='Given the context below, please provide an accurate and detailed response to the user\'s inquiry about the MS in Applied Data Science program at the University of Chicago.\nRespond with a JSON object (not in a code block) in the following format:\n{{\n    "answer": "detailed answer using only information from the context",\n    "confidence": "high/medium/low, based on how thoroughly the context supports your answer",\n    "reasoning": "concise explanation of how the context informs your answer"\n}}\n\nIf the context does not contain enough information, respond with:\n{{\n    "answer": "I\'m sorry, but I cannot answer this question based on the provided context.",\n    "confidence": "low",\n    "reasoning": "The provided context lacks sufficient details to answer this inquiry"\n}}\n\nContext: {context}\n\nQuestion: {que

In [18]:
llm = ChatOpenAI(model_name="gpt-4o", temperature=0)

In [19]:
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

# 1. Multi-Query Generation
template_multiquery = """You are an AI language model assistant. Your task is to generate 
five different versions of the given user question to retrieve relevant documents
from a vector database. Generate diverse perspectives on the user question to help
overcome some limitations of distance-based similarity search.
Provide these alternative questions separated by newlines.

Original question: {question}"""

prompt_multiquery = ChatPromptTemplate.from_template(template_multiquery)

In [20]:
# 2. RAG-Fusion Generation
template_ragfusion = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
Generate multiple search queries related to: {question} \n
Output (4 queries):"""

prompt_ragfusion = ChatPromptTemplate.from_template(template_ragfusion)

# Generate queries chains
generate_queries_multiquery = (
    prompt_multiquery 
    | ChatOpenAI(temperature=0) 
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

generate_queries_ragfusion = (
    prompt_ragfusion 
    | ChatOpenAI(temperature=0) 
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

# Function for unique union and rank fusion
def get_unique_union(documents):
    """Get unique union of retrieved documents"""
    seen = set()
    unique_docs = []
    
    # Flatten list of lists
    flattened_docs = [doc for sublist in documents for doc in sublist]
    
    for doc in flattened_docs:
        doc_str = f"{doc.page_content}{doc.metadata}"
        if doc_str not in seen:
            seen.add(doc_str)
            unique_docs.append(doc)
            
    return unique_docs

def reciprocal_rank_fusion(results, k=60):
    """Reciprocal Rank Fusion for combining multiple ranked lists"""
    flattened_docs = [doc for sublist in results for doc in sublist]
    fused_scores = {}
    
    for rank, doc in enumerate(flattened_docs):
        doc_str = f"{doc.page_content}{doc.metadata}"
        if doc_str not in fused_scores:
            fused_scores[doc_str] = 0
        fused_scores[doc_str] += 1 / (rank + k)
    
    reranked_docs = []
    for doc_str, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True):
        for doc in flattened_docs:
            if f"{doc.page_content}{doc.metadata}" == doc_str:
                reranked_docs.append((doc, score))
                break
                
    return reranked_docs

In [21]:
# Create retrieval chains
retrieval_chain_multiquery = (
    generate_queries_multiquery 
    | retriever.map() 
    | get_unique_union
)

retrieval_chain_ragfusion = (
    generate_queries_ragfusion 
    | retriever.map() 
    | reciprocal_rank_fusion
)

In [35]:
def test_retrieval_methods_detailed(questions, retrieval_chain_multiquery, retrieval_chain_ragfusion, prompt, llm):
    """Compare multiquery and RAG-fusion approaches with detailed output"""
    results = []
    
    for question in questions:
        # Generate queries for both methods
        multiquery_queries = generate_queries_multiquery.invoke({"question": question})
        ragfusion_queries = generate_queries_ragfusion.invoke({"question": question})
        
        # Get retrieved documents using both methods
        multiquery_docs = retrieval_chain_multiquery.invoke({"question": question})
        fusion_docs_with_scores = retrieval_chain_ragfusion.invoke({"question": question})
        
        # Extract just the documents from fusion results (without scores)
        fusion_docs = [doc for doc, score in fusion_docs_with_scores]
        
        # Create RAG chain
        rag_chain = prompt | llm | StrOutputParser()
        
        # Generate responses
        multiquery_answer = rag_chain.invoke({
            "context": "\n\n".join(doc.page_content for doc in multiquery_docs),
            "question": question
        })
        
        fusion_answer = rag_chain.invoke({
            "context": "\n\n".join(doc.page_content for doc in fusion_docs[:4]),  # Using top 4 docs
            "question": question
        })
        
        result = {
            "question": question,
            "multiquery": {
                "generated_queries": multiquery_queries,
                "top_chunks": [doc.page_content[:200] + "..." for doc in multiquery_docs[:3]],  # First 200 chars
                "answer": multiquery_answer
            },
            "ragfusion": {
                "generated_queries": ragfusion_queries,
                "top_chunks_with_scores": [
                    (doc.page_content[:200] + "...", score) 
                    for doc, score in fusion_docs_with_scores[:3]
                ],
                "answer": fusion_answer
            }
        }
        results.append(result)
    
    return results

# Test questions
Questions = [
    "What is tuition cost for the program?",
    "What scholarships are available for the program?",
    "What are the minimum scores for the TOEFL and IELTS English Language Requirement?",
    "Is there an application fee waiver?"
]

# Run detailed comparison
detailed_results = test_retrieval_methods_detailed(
    Questions,
    retrieval_chain_multiquery,
    retrieval_chain_ragfusion,
    prompt,
    llm
)

# Print detailed results
for result in detailed_results:
    print(f"\nQuestion: {result['question']}")
    
    print("\nMulti-Query Approach:")
    print("Generated queries:")
    for q in result['multiquery']['generated_queries']:
        print(f"- {q}")
    print("\nTop retrieved chunks:")
    for chunk in result['multiquery']['top_chunks']:
        print(f"- {chunk}")
    print("\nAnswer:")
    print(result['multiquery']['answer'])
    
    print("\nRAG-Fusion Approach:")
    print("Generated queries:")
    for q in result['ragfusion']['generated_queries']:
        print(f"- {q}")
    print("\nTop retrieved chunks (with scores):")
    for chunk, score in result['ragfusion']['top_chunks_with_scores']:
        print(f"- Score {score:.4f}: {chunk}")
    print("\nAnswer:")
    print(result['ragfusion']['answer'])
    
    print("\n" + "="*80)


Question: What is tuition cost for the program?

Multi-Query Approach:
Generated queries:
- 1. How much does the program charge for tuition fees?
- 2. Can you provide information on the program's tuition expenses?
- 3. What are the costs associated with enrolling in the program?
- 4. How expensive is it to attend the program in terms of tuition?
- 5. What is the price of tuition for the program?

Top retrieved chunks:
- MS in Applied Data Science

Overview

In-Person Program

Online Program

Capstone Projects

Course Progressions

How to Apply

Events & Deadlines

Tuition, Fees, & Aid

Our Students

Faculty, Instruct...
- Other Scholarships

Students are encouraged to investigate scholarships offered through various civic and professional organizations, foundations and state agencies. One place to search for scholarshi...
- Elective Courses (4)

You will complete 4 required electives toward your 12-course degree program. We continually add electives to evolve with the data science lan

In [36]:
from typing import List, Dict
from dataclasses import dataclass
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

@dataclass
class RetrievalMetrics:
    precision_at_k: float
    recall_at_k: float
    mean_reciprocal_rank: float
    relevance_score: float
    
def calculate_retrieval_metrics(retrieved_docs: List, 
                              question: str,
                              embeddings,
                              k: int = 4) -> RetrievalMetrics:
    """Calculate retrieval metrics for a single question"""
    
    # 1. Calculate semantic similarity between question and retrieved docs
    question_embedding = embeddings.embed_query(question)
    
    # Handle both regular docs and (doc, score) tuples
    if isinstance(retrieved_docs[0], tuple):
        doc_contents = [doc[0].page_content for doc in retrieved_docs[:k]]
    else:
        doc_contents = [doc.page_content for doc in retrieved_docs[:k]]
    
    doc_embeddings = [embeddings.embed_query(doc) for doc in doc_contents]
    
    similarities = [
        cosine_similarity(
            np.array(question_embedding).reshape(1, -1),
            np.array(doc_embedding).reshape(1, -1)
        )[0][0]
        for doc_embedding in doc_embeddings
    ]
    
    # 2. Calculate metrics
    # Assume documents with similarity > 0.5 are relevant
    relevant_docs = [sim > 0.5 for sim in similarities]
    
    # Precision@k
    precision = sum(relevant_docs) / k if k > 0 else 0
    
    # Recall@k (assuming we know total relevant docs = 10 for example)
    total_relevant = 10  # This should be adjusted based on your knowledge
    recall = sum(relevant_docs) / total_relevant if total_relevant > 0 else 0
    
    # Mean Reciprocal Rank (MRR)
    for i, is_relevant in enumerate(relevant_docs):
        if is_relevant:
            mrr = 1 / (i + 1)
            break
    else:
        mrr = 0
        
    # Average similarity score
    avg_similarity = np.mean(similarities)
    
    return RetrievalMetrics(
        precision_at_k=precision,
        recall_at_k=recall,
        mean_reciprocal_rank=mrr,
        relevance_score=avg_similarity
    )

In [38]:
# 1. Regular RAG Pipeline
regular_retrieval_chain = retriever

embeddings = OpenAIEmbeddings()

# 2. Multi-Query (already implemented)
retrieval_chain_multiquery = (
    generate_queries_multiquery 
    | retriever.map() 
    | get_unique_union
)

# 3. RAG-Fusion (already implemented)
retrieval_chain_ragfusion = (
    generate_queries_ragfusion 
    | retriever.map() 
    | reciprocal_rank_fusion
)

def compare_three_retrieval_methods(questions: List[str], 
                                  regular_chain,
                                  multiquery_chain, 
                                  ragfusion_chain,
                                  embeddings):
    """Compare all three retrieval methods using multiple metrics"""
    
    results = []
    for question in questions:
        # Get retrievals from all three methods
        regular_docs = regular_chain.invoke(question)
        multiquery_docs = multiquery_chain.invoke({"question": question})
        ragfusion_docs = ragfusion_chain.invoke({"question": question})
        
        # Calculate metrics
        regular_metrics = calculate_retrieval_metrics(
            regular_docs, question, embeddings
        )
        multiquery_metrics = calculate_retrieval_metrics(
            multiquery_docs, question, embeddings
        )
        ragfusion_metrics = calculate_retrieval_metrics(
            ragfusion_docs, question, embeddings
        )
        
        # Get answers from all three methods
        rag_chain = prompt | llm | StrOutputParser()
        
        regular_answer = rag_chain.invoke({
            "context": "\n\n".join(doc.page_content for doc in regular_docs),
            "question": question
        })
        
        multiquery_answer = rag_chain.invoke({
            "context": "\n\n".join(doc.page_content for doc in multiquery_docs),
            "question": question
        })
        
        fusion_answer = rag_chain.invoke({
            "context": "\n\n".join(doc[0].page_content for doc in ragfusion_docs[:4]),
            "question": question
        })
        
        results.append({
            "question": question,
            "regular": {
                "metrics": regular_metrics,
                "answer": regular_answer,
                "top_chunks": [doc.page_content[:200] + "..." for doc in regular_docs[:3]]
            },
            "multiquery": {
                "metrics": multiquery_metrics,
                "answer": multiquery_answer,
                "top_chunks": [doc.page_content[:200] + "..." for doc in multiquery_docs[:3]]
            },
            "ragfusion": {
                "metrics": ragfusion_metrics,
                "answer": fusion_answer,
                "top_chunks": [doc[0].page_content[:200] + "..." for doc in ragfusion_docs[:3]]
            }
        })
        
    return results

# Test questions
Questions = [
    "What is tuition cost for the program?",
    "What scholarships are available for the program?",
    "What are the minimum scores for the TOEFL and IELTS English Language Requirement?",
    "Is there an application fee waiver?"
]

# Run evaluation
eval_results = compare_three_retrieval_methods(
    Questions,
    regular_retrieval_chain,
    retrieval_chain_multiquery,
    retrieval_chain_ragfusion,
    embeddings
)

# Print detailed results
for result in eval_results:
    print(f"\nQuestion: {result['question']}")
    
    print("\nRegular RAG Metrics:")
    print(f"Precision@4: {result['regular']['metrics'].precision_at_k:.3f}")
    print(f"Recall@4: {result['regular']['metrics'].recall_at_k:.3f}")
    print(f"MRR: {result['regular']['metrics'].mean_reciprocal_rank:.3f}")
    print(f"Avg Relevance: {result['regular']['metrics'].relevance_score:.3f}")
    print("\nAnswer:", result['regular']['answer'])
    
    print("\nMulti-Query Metrics:")
    print(f"Precision@4: {result['multiquery']['metrics'].precision_at_k:.3f}")
    print(f"Recall@4: {result['multiquery']['metrics'].recall_at_k:.3f}")
    print(f"MRR: {result['multiquery']['metrics'].mean_reciprocal_rank:.3f}")
    print(f"Avg Relevance: {result['multiquery']['metrics'].relevance_score:.3f}")
    print("\nAnswer:", result['multiquery']['answer'])
    
    print("\nRAG-Fusion Metrics:")
    print(f"Precision@4: {result['ragfusion']['metrics'].precision_at_k:.3f}")
    print(f"Recall@4: {result['ragfusion']['metrics'].recall_at_k:.3f}")
    print(f"MRR: {result['ragfusion']['metrics'].mean_reciprocal_rank:.3f}")
    print(f"Avg Relevance: {result['ragfusion']['metrics'].relevance_score:.3f}")
    print("\nAnswer:", result['ragfusion']['answer'])
    
    print("\n" + "="*80)

# Calculate average metrics across all questions
avg_metrics = {
    "regular": {
        "precision": np.mean([r['regular']['metrics'].precision_at_k for r in eval_results]),
        "recall": np.mean([r['regular']['metrics'].recall_at_k for r in eval_results]),
        "mrr": np.mean([r['regular']['metrics'].mean_reciprocal_rank for r in eval_results]),
        "relevance": np.mean([r['regular']['metrics'].relevance_score for r in eval_results])
    },
    "multiquery": {
        "precision": np.mean([r['multiquery']['metrics'].precision_at_k for r in eval_results]),
        "recall": np.mean([r['multiquery']['metrics'].recall_at_k for r in eval_results]),
        "mrr": np.mean([r['multiquery']['metrics'].mean_reciprocal_rank for r in eval_results]),
        "relevance": np.mean([r['multiquery']['metrics'].relevance_score for r in eval_results])
    },
    "ragfusion": {
        "precision": np.mean([r['ragfusion']['metrics'].precision_at_k for r in eval_results]),
        "recall": np.mean([r['ragfusion']['metrics'].recall_at_k for r in eval_results]),
        "mrr": np.mean([r['ragfusion']['metrics'].mean_reciprocal_rank for r in eval_results]),
        "relevance": np.mean([r['ragfusion']['metrics'].relevance_score for r in eval_results])
    }
}

print("\nAverage Metrics Across All Questions:")
for method in ["regular", "multiquery", "ragfusion"]:
    print(f"\n{method.title()} RAG:")
    print(f"Avg Precision@4: {avg_metrics[method]['precision']:.3f}")
    print(f"Avg Recall@4: {avg_metrics[method]['recall']:.3f}")
    print(f"Avg MRR: {avg_metrics[method]['mrr']:.3f}")
    print(f"Avg Relevance: {avg_metrics[method]['relevance']:.3f}")


Question: What is tuition cost for the program?

Regular RAG Metrics:
Precision@4: 0.750
Recall@4: 0.300
MRR: 1.000
Avg Relevance: 0.834

Answer: {
    "answer": "The tuition for the MS in Applied Data Science program at the University of Chicago is $5,967 per course, with a total tuition cost of $71,604. Additionally, there is a non-refundable program enrollment deposit of $1,500, which is credited toward the first quarter’s tuition balance. It is important to note that tuition is expected to increase by 3-7% per year. The program also offers merit-based scholarships, and partial tuition scholarships are available to top applicants without requiring a separate application. Candidates are encouraged to apply early to maximize their chances of securing a scholarship.",
    "confidence": "high",
    "reasoning": "The context provides specific details about the tuition cost per course and the total tuition for the program, as well as information about the enrollment deposit and potential

In [40]:
# Ground truth answers for comparison
ground_truth = {
    "What is tuition cost for the program?": "Tuition for the MS in Applied Data Science program: $5,967 per course/$71,604 total tuition",
    "What scholarships are available for the program?": "The Data Science Institute Scholarship, MS in Applied Data Science Alumni Scholarship",
    "What are the minimum scores for the TOEFL and IELTS English Language Requirement?": "Minimum scores for the Master’s in Applied Data Science program: TOEFL, 102 (no subscore requirement); IELTS, 7 (no subscore requirement).",
    "Is there an application fee waiver?": "For questions regarding an application fee waiver, please refer to the Physical Sciences Division fee waiver policy.",
    "What are the deadlines for the in-person program?": "November 7, 2024 – Priority Application Deadline\nDecember 4, 2024 – Scholarship Priority Deadline\nJanuary 21, 2025 – International Application Deadline (requiring visa sponsorship from UChicago)\nMarch 4, 2025 – Second Priority Application Deadline\nMay 6, 2025 – Third Priority Application Deadline\nJune 23, 2025 – Final Application Deadline",
    "How long will it take for me to receive a decision on my application?": "In-Person application decisions are released approximately 1 to 2 months after each respected deadline. Online application decisions are released on a rolling basis",
    "Can I set up an advising appointment with the enrollment management team?": "Yes, meet your admissions counselor by scheduling an appointment https://apply-psd.uchicago.edu/portal/applied-data-science",
    "Where can I mail my official transcripts?": "The University of Chicago\nAttention: MS in Applied Data Science Admissions\n455 N Cityfront Plaza Dr., Suite 950\nChicago, Illinois 60611",
    "Does the Master’s in Applied Data Science Online program provide visa sponsorship?": "Only our In-Person, Full-Time program is Visa eligible",
    "How do I apply to the MBA/MS program?": "Applicants interested in the Joint MBA/MS degree will apply through Booth’s centralized, joint-application process. Applicants should complete the Chicago Booth Full-Time MBA application and select the MBA/MS in Applied Data Science as their program of interest",
    "Is the MS in Applied Data Science program STEM/OPT eligible?": "The MS in Applied Data Science program is STEM/OPT eligible",
    "How many courses must you complete to earn UChicago’s Master’s in Applied Data Science?": "To earn the MS-ADS degree students must successfully complete 12 courses (6 core, 4 elective, 2 Capstone) and our tailored Career Seminar."
}

# Adjusted evaluation function to compare answers to the ground truth
def compare_retrieval_methods_with_ground_truth(questions, regular_chain, multiquery_chain, ragfusion_chain, embeddings, ground_truth):
    """Compare all three retrieval methods against ground truth using multiple metrics"""
    
    results = []
    for question in questions:
        # Get retrievals from all three methods
        regular_docs = regular_chain.invoke(question)
        multiquery_docs = multiquery_chain.invoke({"question": question})
        ragfusion_docs = ragfusion_chain.invoke({"question": question})
        
        # Calculate metrics
        regular_metrics = calculate_retrieval_metrics(regular_docs, question, embeddings)
        multiquery_metrics = calculate_retrieval_metrics(multiquery_docs, question, embeddings)
        ragfusion_metrics = calculate_retrieval_metrics(ragfusion_docs, question, embeddings)
        
        # Get answers and compare with ground truth
        rag_chain = prompt | llm | StrOutputParser()
        
        regular_answer = rag_chain.invoke({"context": "\n\n".join(doc.page_content for doc in regular_docs), "question": question})
        multiquery_answer = rag_chain.invoke({"context": "\n\n".join(doc.page_content for doc in multiquery_docs), "question": question})
        fusion_answer = rag_chain.invoke({"context": "\n\n".join(doc[0].page_content for doc in ragfusion_docs[:4]), "question": question})
        
        # Add comparison results
        results.append({
            "regular": {"metrics": regular_metrics, "is_correct": regular_answer.strip() == ground_truth[question]},
            "multiquery": {"metrics": multiquery_metrics, "is_correct": multiquery_answer.strip() == ground_truth[question]},
            "ragfusion": {"metrics": ragfusion_metrics, "is_correct": fusion_answer.strip() == ground_truth[question]}
        })
        
    return results

# Compute average metrics only
def calculate_average_metrics(eval_results):
    avg_metrics = {"regular": {}, "multiquery": {}, "ragfusion": {}}
    for method in avg_metrics:
        avg_metrics[method] = {
            "precision": np.mean([r[method]["metrics"].precision_at_k for r in eval_results]),
            "recall": np.mean([r[method]["metrics"].recall_at_k for r in eval_results]),
            "mrr": np.mean([r[method]["metrics"].mean_reciprocal_rank for r in eval_results]),
            "relevance": np.mean([r[method]["metrics"].relevance_score for r in eval_results]),
            "accuracy": np.mean([r[method]["is_correct"] for r in eval_results]) * 100
        }
    return avg_metrics

# Test with the updated questions and answers
Questions = list(ground_truth.keys())

eval_results = compare_retrieval_methods_with_ground_truth(
    Questions,
    regular_retrieval_chain,
    retrieval_chain_multiquery,
    retrieval_chain_ragfusion,
    embeddings,
    ground_truth
)

# Calculate average metrics and print
avg_metrics = calculate_average_metrics(eval_results)

print("\nAverage Metrics Across All Questions:")
for method, metrics in avg_metrics.items():
    print(f"\n{method.title()} RAG:")
    print(f"Avg Precision@4: {metrics['precision']:.3f}")
    print(f"Avg Recall@4: {metrics['recall']:.3f}")
    print(f"Avg MRR: {metrics['mrr']:.3f}")
    print(f"Avg Relevance: {metrics['relevance']:.3f}")
    print(f"Accuracy: {metrics['accuracy']:.2f}%")



Average Metrics Across All Questions:

Regular RAG:
Avg Precision@4: 0.750
Avg Recall@4: 0.300
Avg MRR: 1.000
Avg Relevance: 0.850
Accuracy: 0.00%

Multiquery RAG:
Avg Precision@4: 0.542
Avg Recall@4: 0.217
Avg MRR: 1.000
Avg Relevance: 0.841
Accuracy: 0.00%

Ragfusion RAG:
Avg Precision@4: 0.438
Avg Recall@4: 0.175
Avg MRR: 1.000
Avg Relevance: 0.842
Accuracy: 0.00%
