# Advanced RAG

Cassandra Maldonado

In [11]:
!pip -q install -U langchain langchain-openai

In [12]:
import os

def _get_secret(name: str):
    v = os.getenv(name)
    if v:
        return v
    try:
        from google.colab import userdata
        v = userdata.get(name)
        if v:
            return v
    except Exception:
        pass
    return None

OPENAI_API_KEY = _get_secret("OPENAI_API_KEY")
assert OPENAI_API_KEY, "Missing OPENAI_API_KEY."
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY


In [13]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0.0,
    max_retries=0
)
print("OpenAI model:", llm.model_name)

OpenAI model: gpt-4o-mini


In [6]:
!pip install google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client
!pip install langchain langchain-community langchain-chroma langchain-openai langchain-core langchain-google-genai langchain-experimental
!pip install rank_bm25 sentence-transformers pypdf python-dotenv  scikit-learn numpy

Collecting langchain-community
  Downloading langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain-chroma
  Downloading langchain_chroma-0.2.5-py3-none-any.whl.metadata (1.1 kB)
Collecting langchain-google-genai
  Downloading langchain_google_genai-2.1.9-py3-none-any.whl.metadata (7.2 kB)
Collecting langchain-experimental
  Downloading langchain_experimental-0.3.4-py3-none-any.whl.metadata (1.7 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting chromadb>=1.0.9 (from langchain-chroma)
  Downloading chromadb-1.0.20-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.3 kB)
Collecting filetype<2.0.0,>=1.2.0 (from langchain-google-genai)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting google-ai-generativelanguage<0.7.0,>=0.6.18 (from langchain-google-genai)
  Downloading google_ai_generativelanguage-0.6.18-py3-none

Collecting rank_bm25
  Downloading rank_bm25-0.2.2-py3-none-any.whl.metadata (3.2 kB)
Collecting pypdf
  Downloading pypdf-6.0.0-py3-none-any.whl.metadata (7.1 kB)
Downloading rank_bm25-0.2.2-py3-none-any.whl (8.6 kB)
Downloading pypdf-6.0.0-py3-none-any.whl (310 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m310.5/310.5 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rank_bm25, pypdf
Successfully installed pypdf-6.0.0 rank_bm25-0.2.2


In [17]:
import os
import glob
from typing import List, Dict, Optional
from pathlib import Path
import logging
import time

from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import hub
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.chat_models import init_chat_model
from langchain_google_genai import GoogleGenerativeAIEmbeddings

from dotenv import load_dotenv
import getpass
from sklearn.metrics.pairwise import cosine_similarity
import torch
from sentence_transformers import SentenceTransformer
import pandas as pd
import numpy as np
from langchain_community.embeddings import HuggingFaceEmbeddings

In [21]:
from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser

RAG_PROMPT = PromptTemplate.from_template(
    "Use ONLY the context to answer. If unsure, say you don't know.\n\n"
    "Question: {question}\n\n"
    "Context:\n{context}\n\n"
    "Answer:"
)

def format_docs(docs):
    return "\n\n".join(getattr(d, "page_content", str(d)) for d in docs)

print("RAG with OpenAI.")

RAG with OpenAI.


In [None]:
import os
import glob
from typing import List, Dict, Optional
from pathlib import Path
import logging
import time

from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import hub
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.chat_models import init_chat_model
from langchain_google_genai import GoogleGenerativeAIEmbeddings

from dotenv import load_dotenv
import getpass
from sklearn.metrics.pairwise import cosine_similarity
import torch
from sentence_transformers import SentenceTransformer
import pandas as pd
import numpy as np
from langchain_community.embeddings import HuggingFaceEmbeddings

In [None]:
DEFAULT_MODEL_PROVIDER = "openai"
DEFAULT_MODEL_NAME = "gpt-4o-mini"
DEFAULT_TEMPERATURE = 0.3
DEFAULT_CHUNK_SIZE = 2000
DEFAULT_CHUNK_OVERLAP = 400
DEFAULT_RETRIEVAL_K = 3

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

In [25]:
class AcademicRAG:
    SUPPORTED_PROVIDERS = ["openai", "google_genai"]

    PROVIDER_ENV_VARS = {
        "openai": "OPENAI_API_KEY",
        "google_genai": "GOOGLE_API_KEY"
    }

    def __init__(self, model_name: str = DEFAULT_MODEL_NAME, model_provider: str = DEFAULT_MODEL_PROVIDER,
                 temperature: float = DEFAULT_TEMPERATURE, use_fine_tuned: bool = False,
                 fine_tuned_model_path: str = "/content/drive/MyDrive/fine_tuned_model_ragqa"):
        if model_provider not in self.SUPPORTED_PROVIDERS:
            raise ValueError(f"Unsupported model provider: {model_provider}. Supported providers: {', '.join(self.SUPPORTED_PROVIDERS)}")

        self.model_name = model_name
        self.temperature = temperature
        self.model_provider = model_provider
        self.use_fine_tuned = use_fine_tuned
        self.fine_tuned_model_path = fine_tuned_model_path

        self.vectorstore = None
        self.retriever = None
        self.rag_chain = None
        self.rag_folder = None
        self.documents = []

        self.chunk_size = DEFAULT_CHUNK_SIZE
        self.chunk_overlap = DEFAULT_CHUNK_OVERLAP
        self.retrieval_k = DEFAULT_RETRIEVAL_K
        self.embeddings_model = None

    @staticmethod
    def get_api_key_for_provider(provider: str) -> bool:
        if not provider:
            print("Provider cannot be empty.")
            return False

        provider = provider.lower()

        if provider not in AcademicRAG.SUPPORTED_PROVIDERS:
            print(f"Unknown model provider '{provider}'. Supported providers: {', '.join(AcademicRAG.SUPPORTED_PROVIDERS)}")
            return False

        env_var = AcademicRAG.PROVIDER_ENV_VARS[provider]
        key = os.getenv(env_var)

        if not key:
            if provider == "openai":
                key = getpass.getpass("Enter your OpenAI API key: ")
            elif provider == "google_genai":
                key = getpass.getpass("Enter your Google API key: ")

        if provider == "openai":
            if not key or len(key) < 20 or not key.startswith('sk-'):
                print("Error: Invalid OpenAI API key.")
                print(f"Please set {env_var} environment variable.")
                return False
        elif provider == "google_genai":
            if not key or len(key) < 20:
                print("Error: Invalid Google API key.")
                print(f"Please set {env_var} environment variable.")
                return False

        os.environ[env_var] = key
        print(f"{provider.title()} API key set in environment (length: {len(key)})")
        return True

    @staticmethod
    def verify_environment_setup(provider: str) -> bool:
        if not provider:
            print("Provider cannot be empty.")
            return False

        provider = provider.lower()

        if provider not in AcademicRAG.SUPPORTED_PROVIDERS:
            print(f"X Unknown provider '{provider}'. Supported providers: {', '.join(AcademicRAG.SUPPORTED_PROVIDERS)}")
            return False

        env_var = AcademicRAG.PROVIDER_ENV_VARS[provider]
        key = os.getenv(env_var)

        if provider == "openai":
            if key and len(key) >= 20 and key.startswith('sk-'):
                print(f"OpenAI API key verified in environment (length: {len(key)})")
                return True
            else:
                print("X OpenAI API key not properly set in environment.")
                return False
        elif provider == "google_genai":
            if key and len(key) >= 20:
                print(f"Google API key verified in environment (length: {len(key)})")
                return True
            else:
                print("X Google API key not properly set in environment.")
                return False

        return False

    def setup_google_drive(self) -> Optional[str]:
        try:
            from google.colab import drive
            drive.mount('/content/drive')
            logger.info("Google Drive accessed.")

            self.rag_folder = self._find_rag_folder()
            if self.rag_folder:
                return self.rag_folder
            else:
                logger.warning("RAG folder not found.")
                return None

        except ImportError:
            logger.info("Google Colab not detected, running in local mode.")
            return None
        except Exception as e:
            logger.error(f"Error setting up Google Drive: {e}")
            return None

    def _find_rag_folder(self) -> Optional[str]:
        possible_paths = [
            "/content/drive/MyDrive/Rag",
            "/content/drive/My Drive/Rag",
            "/content/drive/MyDrive/rag",
            "/content/drive/My Drive/rag",
            "/content/drive/MyDrive/RAG",
            "/content/drive/My Drive/RAG"
        ]

        for path in possible_paths:
            if os.path.exists(path):
                print(f"Found RAG folder at: {path}")
                return path

        print("Folder not found in locations.")
        return None

    def load_documents(self, folder_path: str) -> List:
        if not os.path.exists(folder_path):
            raise ValueError(f"Folder not found: {folder_path}")

        pdf_pattern = os.path.join(folder_path, "*.pdf")
        pdf_files = glob.glob(pdf_pattern)

        if not pdf_files:
            raise ValueError(f"No PDF files found in folder: {folder_path}")

        logger.info(f"Found {len(pdf_files)} PDF files")

        documents = []
        for pdf_file in pdf_files:
            try:
                logger.info(f"Processing: {os.path.basename(pdf_file)}")
                loader = PyPDFLoader(pdf_file)
                file_documents = loader.load()

                for doc in file_documents:
                    doc.metadata['source'] = os.path.basename(pdf_file)
                    doc.metadata['file_path'] = pdf_file

                documents.extend(file_documents)
                logger.info(f"Loaded {len(file_documents)} pages from {os.path.basename(pdf_file)}.")

            except Exception as e:
                logger.error(f"Error loading {pdf_file}: {e}")
                continue

        self.documents = documents
        logger.info(f"Total pages loaded: {len(documents)}")
        return documents

    def create_vector_store(self, documents: List = None):
        if documents:
            self.documents = documents

        if not self.documents:
            raise ValueError("No documents loaded.")

        # Deciding which embeddings to use
        if self.use_fine_tuned and os.path.exists(self.fine_tuned_model_path):
            try:
                from sentence_transformers import SentenceTransformer

                class FineTunedEmbeddings:
                    def __init__(self, model_path: str):
                        self.model = SentenceTransformer(model_path)
                    def embed_documents(self, texts: List[str]) -> List[List[float]]:
                        return self.model.encode(texts, convert_to_tensor=False).tolist()
                    def embed_query(self, text: str) -> List[float]:
                        return self.model.encode(text, convert_to_tensor=False).tolist()

                print(f"Using fine-tuned embeddings from: {self.fine_tuned_model_path}")
                self.embeddings_model = FineTunedEmbeddings(self.fine_tuned_model_path)
            except ImportError:
                print("sentence-transformers not available, falling back to default embeddings.")
                self.embeddings_model = self._get_default_embeddings()
        else:
            print("Using standard embeddings.")
            self.embeddings_model = self._get_default_embeddings()

        # Splitting the docs.
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=self.chunk_size,
            chunk_overlap=self.chunk_overlap,
            length_function=len,
        )
        chunks = text_splitter.split_documents(self.documents)
        print(f"Created {len(chunks)} text chunks")

        # Building the vector store.
        self.vectorstore = Chroma.from_documents(
            documents=chunks,
            embedding=self.embeddings_model,
            collection_name="academic_papers"
        )
        self.retriever = self.vectorstore.as_retriever(
            search_type="similarity",
            search_kwargs={"k": self.retrieval_k}
        )
        print("Vector store created.")

    def _get_default_embeddings(self):
        return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

    def evaluate_retrieval(self, qa_pairs=None, k_values=[1,3,5]):
        # Loading the Q/A dataset.
        if qa_pairs is None:
            if not os.path.exists("/content/drive/MyDrive/datasets/qa_pairs.csv"):
                raise ValueError("No qa_pairs provided and /content/drive/MyDrive/datasets/qa_pairs.csv not found!")
            df = pd.read_csv("/content/drive/MyDrive/datasets/qa_pairs.csv")
            qa_pairs = list(zip(df["query"].tolist(), df["answer"].tolist()))

        queries = [q for q, _ in qa_pairs]
        answers = [a for _, a in qa_pairs]

        # Embed queries and answers with the retriever's embeddings.
        query_embs = self.embeddings_model.embed_documents(queries)
        doc_embs = self.embeddings_model.embed_documents(answers)

        sim_matrix = cosine_similarity(query_embs, doc_embs)

        metrics = {}
        for k in k_values:
            correct = 0
            mrr_total = 0
            for i in range(len(queries)):
                scores = sim_matrix[i]
                topk_idx = np.argsort(scores)[::-1][:k]
                if i in topk_idx:
                    correct += 1
                    rank = np.where(topk_idx == i)[0][0] + 1
                    mrr_total += 1.0 / rank
            recall = correct / len(queries)
            mrr = mrr_total / len(queries)
            metrics[f"Recall@{k}"] = recall
            metrics[f"MRR@{k}"] = mrr

        return metrics

    def compare_fine_tuned_vs_baseline(self):
        print("Fine-tuned vs Baseline")
        print("-"*60)

        # Baseline RAG.
        baseline_rag = AcademicRAG(use_fine_tuned=False)
        baseline_rag.load_documents(self.rag_folder)
        baseline_rag.create_vector_store()
        baseline_metrics = baseline_rag.evaluate_retrieval()

        # Fine-tuned RAG.
        fine_rag = AcademicRAG(use_fine_tuned=True)
        fine_rag.load_documents(self.rag_folder)
        fine_rag.create_vector_store()
        fine_metrics = fine_rag.evaluate_retrieval()

        print("\n Evaluation Metrics for the Baseline vs Fine-tuned:")
        table_data = []
        for key in baseline_metrics.keys():
            b = baseline_metrics[key]
            f = fine_metrics[key]
            diff = (f - b) * 100
            table_data.append({
                "Metric": key,
                "Baseline": round(b, 3),
                "Fine-tuned": round(f, 3),
                "Change (%)": round(diff, 1)
            })
        df = pd.DataFrame(table_data)
        print(df)

        return baseline_metrics, fine_metrics

    def _create_custom_prompt(self):
        prompt_template = """You are an expert academic research assistant. Your task is to provide comprehensive, accurate answers based on the provided context from academic papers.

IMPORTANT INSTRUCTIONS:
1. Always include specific details: Mention author names, publication years, specific numbers, percentages, and concrete examples when they appear in the context.
2. Be precise: Use exact figures, statistics, and measurements as stated in the documents.
3. Maintain academic tone: Write in a scholarly, professional manner.
4. Structure your response: Organize information logically with clear sections if appropriate.
5. Cite sources: When possible, reference which document/page the information comes from.
6. Include quantitative data: Always mention specific numbers, percentages, and metrics when available.
7. Provide context: Explain the significance and implications of the findings.
8. Be comprehensive: Don't skip important details that are present in the context.

CONTEXT INFORMATION:
{context}

QUESTION: {question}

Please provide a comprehensive answer that includes all relevant specific details, numbers, and examples from the context. Structure your response to directly address the question while incorporating all pertinent information from the provided context. If the context doesn't contain enough information to fully answer the question, say so clearly.

ANSWER:"""

        return ChatPromptTemplate.from_template(prompt_template)

    def _enhance_context(self, context_docs):
        enhanced_context = []

        for i, doc in enumerate(context_docs):
            enhanced_content = f"[Chunk {i+1} from {doc.metadata.get('source', 'Unknown')}, Page {doc.metadata.get('page', 'Unknown')}]\n{doc.page_content}"
            enhanced_context.append(enhanced_content)

        return "\n\n---\n\n".join(enhanced_context)

    def setup_rag_chain(self):
        if not self.retriever:
            raise ValueError("Vector store not created.")

        # LLM.
        llm = init_chat_model(self.model_name,
                              model_provider=self.model_provider,
                              temperature=self.temperature)

        # Custom prompt.
        prompt = self._create_custom_prompt()

        # RAG chain with enhanced context processing.
        def rag_chain(question):
            try:
                # Relevant documents.
                context_docs = self.retriever.invoke(question)
                enhanced_context = self._enhance_context(context_docs)

                # Formatting the prompt and getting a response.
                messages = prompt.format_messages(
                    context=enhanced_context, question=question)

                answer = llm.invoke(messages)

                if hasattr(answer, 'content'):
                    answer_text = answer.content
                elif isinstance(answer, str):
                    answer_text = answer
                else:
                    answer_text = str(answer)

                result = {
                    'context': context_docs,
                    'answer': answer_text,
                    'question': question
                }

                return result

            except Exception as e:
                return {
                    'context': [],
                    'answer': f"Error processing your question: {str(e)}",
                    'question': question
                }

        self.rag_chain = rag_chain
        logger.info("RAG chain setup complete with custom prompt and context.")

    def query(self, question: str) -> Dict:
        if not self.rag_chain:
            raise ValueError("RAG chain not set up.")

        try:
            logger.info(f"Processing query: {question[:50]}.")
            result = self.rag_chain(question)

            if not isinstance(result, dict) or 'context' not in result or 'answer' not in result:
                return {
                    'answer': f"Unexpected result structure from RAG chain. Got {type(result)}",
                    'sources': [],
                    'question': question,
                    'context_chunks': 0,
                    'total_context_length': 0
                }

            context_docs = result['context']

            if not isinstance(context_docs, list):
                context_docs = list(context_docs) if hasattr(
                    context_docs, '__iter__') else []

            # Formatting sources with more detail and ranking them by relevance.
            sources = []
            for i, doc in enumerate(context_docs):
                try:
                    source_name = doc.metadata.get('source', 'Unknown')
                    page = doc.metadata.get('page', 'Unknown')
                    relevance_score = len(doc.page_content) / 1000

                    sources.append({
                        'index': i + 1,
                        'source': source_name,
                        'page': page,
                        'chunk_length': len(doc.page_content),
                        'relevance_score': round(relevance_score, 2)
                    })
                except Exception as e:
                    continue

            sources.sort(key=lambda x: x['relevance_score'], reverse=True)

            return {
                'answer': result['answer'],
                'sources': sources,
                'question': question,
                'context_chunks': len(context_docs),
                'total_context_length': sum(len(doc.page_content) for doc in context_docs if hasattr(doc, 'page_content'))
            }

        except Exception as e:
            return {
                'answer': f"Error processing your question: {str(e)}",
                'sources': [],
                'question': question,
                'context_chunks': 0,
                'total_context_length': 0
            }

    def answer_query(self, q: str) -> str:
        try:
            result = self.query(q)
            return result['answer']
        except Exception as e:
            return f"[generation error] {e}"

    def run_test_all_questions(self):
        Question_Tests = [
            "1. What is the primary goal of the SELF-ROUTE method proposed by Zhuowan Li?",
            "2. Explain why the researchers believe RAG might still be useful despite the superior performance of long-context LLMs.",
            "3. Compare the reranking techniques mentioned in the Wang paper. How do they impact the retrieval quality?",
            "4. What are the trade-offs involved when using different chunking strategies in RAG systems?",
            "5. How does multimodal retrieval enhance the capabilities of RAG?",
            "6. What were the key failure cases for RAG in handling long context retrievals, as noted by Zhuowan Li?",
            "7. Why does the Zhuowan paper claim that long-context LLMs outperformed RAG in most cases? What benefits does RAG still offer?",
            "8. Describe the metrics used to evaluate the different embedding models for RAG in Wang's paper.",
            "9. Discuss the implications of using self-reflection in routing queries between RAG and long-context LLMs.",
            "10. How does query rewriting contribute to the overall efficiency of RAG according to Wang's findings?"
        ]

        for q in Question_Tests:
            print(f"\nQ: {q}")
            print(f"A: {self.answer_query(q)}")
            print("-"*60)

        print("All questions tested.")

    def interactive_query_mode(self):
        print("\n" + "-"*60)
        print("Interactive RAG query mode.")
        print("-"*60)
        print("Type your questions (type 'quit' to exit).")
        print("-" * 60)

        while True:
            try:
                question = input("\n Your question: ").strip()

                if question.lower() in ['quit', 'exit', 'q']:
                    print("Goodbye.")
                    break

                if not question:
                    print("Please enter a question.")
                    continue

                print(f"\n Processing Query...")
                answer = self.answer_query(question)
                print(f"Answer: {answer}")
                print("-" * 60)

            except KeyboardInterrupt:
                print("\n Goodbye.")
                break
            except Exception as e:
                print(f"Error: {e}")

    @staticmethod
    def setup_rag_system():
        load_dotenv()

        if not AcademicRAG.get_api_key_for_provider(DEFAULT_MODEL_PROVIDER):
            return None

        rag_system = AcademicRAG(
            model_provider=DEFAULT_MODEL_PROVIDER,
            model_name=DEFAULT_MODEL_NAME,
            temperature=DEFAULT_TEMPERATURE)

        rag_folder = rag_system.setup_google_drive()

        if not rag_folder:
            print("\n RAG folder not found.")

            print("\n" + "-"*60)
            print("Manual setup.")
            print("-"*60)
            print("Please specify the path to your documents folder:")

            while True:
                manual_path = input("\n Enter the path to your documents folder: ").strip()

                if not manual_path:
                    print("Please enter a valid path.")
                    continue

                if os.path.exists(manual_path):
                    rag_folder = manual_path
                    break
                else:
                    print(f"Path not found: {manual_path}")
                    print("Please enter a valid path.")

        try:
            print(f"\n Loading documents from: {rag_folder}")
            rag_system.load_documents(rag_folder)
            print(f"Loaded {len(rag_system.documents)} documents.")

            rag_system.create_vector_store()
            print("Vector store created.")

            rag_system.setup_rag_chain()
            print("RAG chain setup.")

            print("\n RAG system ready.")
            return rag_system

        except Exception as e:
            print(f"Error: {e}")
            return None

def main():
    print("RAG System for Academic Papers.")
    print("-"*60)

    rag_system = AcademicRAG.setup_rag_system()

    if not rag_system:
        print("Failed to setup RAG system.")
        return

    fine_tuned_path = "/content/drive/MyDrive/fine_tuned_model_ragqa"
    if os.path.exists(fine_tuned_path):
        print(f"\n Fine-tuned model found at: {fine_tuned_path}")
        print("Would you like to use the fine-tuned model for better performance? (y/n): ", end="")
        use_fine_tuned = input().strip().lower() == 'y'

        if use_fine_tuned:
            print("Using fine-tuned embeddings for improved retrieval.")
        else:
            print("Using standard embeddings.")
    else:
        print(f"\n No fine-tuned model found at: {fine_tuned_path}")
        print("To create a fine-tuned model, run finetuned.py.")

    print("\n" + "-"*60)
    print("Select Mode:")
    print("1. Interactive Query Mode.")
    print("2. Run Test For All Questions.")
    print("3. Compare the Fine-tuned vs Baseline.")
    print("4. Exit")
    print("-"*60)

    choice = input("\n Enter your choice (1-4): ").strip()

    if choice == "1":
        rag_system.interactive_query_mode()
    elif choice == "2":
        rag_system.run_test_all_questions()
    elif choice == "3":
        if os.path.exists(fine_tuned_path):
            print("\n Running performance comparison.")
            rag_system.compare_fine_tuned_vs_baseline()
        else:
            print("X No fine-tuned model available for comparison.")
            print("Run finetuned.py to create one.")
    elif choice == "4":
        print("Goodbye.")
    else:
        print("Invalid choice.")
        rag_system.interactive_query_mode()

if __name__ == "__main__":
    main()

RAG System for Academic Papers.
------------------------------------------------------------
Openai API key set in environment (length: 164)
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Found RAG folder at: /content/drive/MyDrive/RAG

 Loading documents from: /content/drive/MyDrive/RAG
Loaded 34 documents.
Using standard embeddings.
Created 86 text chunks
Vector store created.
Vector store created.
RAG chain setup.

 RAG system ready.

 Fine-tuned model found at: /content/drive/MyDrive/fine_tuned_model_ragqa
Would you like to use the fine-tuned model for better performance? (y/n): y
Using fine-tuned embeddings for improved retrieval.

------------------------------------------------------------
Select Mode:
1. Interactive Query Mode.
2. Run Test For All Questions.
3. Compare the Fine-tuned vs Baseline.
4. Exit
------------------------------------------------------------

 Enter your choice (1-4): 3

 Ru

In [24]:
if __name__ == "__main__":
    main()

RAG System for Academic Papers
------------------------------------------------------------
Openai API key set in environment (length: 164)
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Found RAG folder at: /content/drive/MyDrive/RAG

 Loading documents from: /content/drive/MyDrive/RAG
Loaded 34 documents.
Using standard embeddings.
Created 86 text chunks
Vector store created.
Vector store created.
RAG chain setup.

 RAG system ready.

 Fine-tuned model found at: /content/drive/MyDrive/fine_tuned_model_ragqa
Would you like to use the fine-tuned model for better performance? (y/n): y
Using fine-tuned embeddings for improved retrieval.

------------------------------------------------------------
Select Mode:
1. Interactive Query Mode.
2. Run Test For All Questions.
3. Compare the Fine-tuned vs Baseline.
4. Exit
------------------------------------------------------------

 Enter your choice (1-4): 2

Q: 1

In [10]:
main()

RAG System for Academic Papers
------------------------------------------------------------
Google_Genai API key set in environment (length: 39)
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Found RAG folder at: /content/drive/MyDrive/RAG

 Loading documents from: /content/drive/MyDrive/RAG
Loaded 34 documents.
Using standard embeddings.
Created 86 text chunks
Vector store created.
Vector store created.
RAG chain setup.

 RAG system ready.

 Fine-tuned model found at: /content/drive/MyDrive/fine_tuned_model_ragqa
Would you like to use the fine-tuned model for better performance? (y/n): y
Using fine-tuned embeddings for improved retrieval.

------------------------------------------------------------
Select Mode:
1. Interactive Query Mode.
2. Run Test For All Questions.
3. Compare the Fine-tuned vs Baseline.
4. Exit
------------------------------------------------------------

 Enter your choice (1-4): 2


domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "serviceTitle"
  value: "Generative Language API"
}
metadata {
  key: "containerInfo"
  value: "1012481686979"
}
metadata {
  key: "consumer"
  value: "projects/1012481686979"
}
metadata {
  key: "activationUrl"
  value: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979"
}
, locale: "en-US"
message: "Generative Language API has not been used in project 1012481686979 before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979 then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry."
, links {
  description: "Google developers console API activation"
  url: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?proj

Processing Question... 1. What is the primary goal of the SELF-ROUTE method proposed by Zhuowan Li?
Answer: Error processing your question: 403 Generative Language API has not been used in project 1012481686979 before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979 then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry. [reason: "SERVICE_DISABLED"
domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "serviceTitle"
  value: "Generative Language API"
}
metadata {
  key: "containerInfo"
  value: "1012481686979"
}
metadata {
  key: "consumer"
  value: "projects/1012481686979"
}
metadata {
  key: "activationUrl"
  value: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979"
}
, locale: "en-US"
message: 

domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "serviceTitle"
  value: "Generative Language API"
}
metadata {
  key: "containerInfo"
  value: "1012481686979"
}
metadata {
  key: "consumer"
  value: "projects/1012481686979"
}
metadata {
  key: "activationUrl"
  value: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979"
}
, locale: "en-US"
message: "Generative Language API has not been used in project 1012481686979 before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979 then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry."
, links {
  description: "Google developers console API activation"
  url: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?proj

Processing Question... 2. Explain why the researchers believe RAG might still be useful despite the superior performance of long-context LLMs.
Answer: Error processing your question: 403 Generative Language API has not been used in project 1012481686979 before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979 then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry. [reason: "SERVICE_DISABLED"
domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "serviceTitle"
  value: "Generative Language API"
}
metadata {
  key: "containerInfo"
  value: "1012481686979"
}
metadata {
  key: "consumer"
  value: "projects/1012481686979"
}
metadata {
  key: "activationUrl"
  value: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1

domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "serviceTitle"
  value: "Generative Language API"
}
metadata {
  key: "containerInfo"
  value: "1012481686979"
}
metadata {
  key: "consumer"
  value: "projects/1012481686979"
}
metadata {
  key: "activationUrl"
  value: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979"
}
, locale: "en-US"
message: "Generative Language API has not been used in project 1012481686979 before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979 then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry."
, links {
  description: "Google developers console API activation"
  url: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?proj

Processing Question... 3. Compare the reranking techniques mentioned in the Wang paper. How do they impact the retrieval quality?
Answer: Error processing your question: 403 Generative Language API has not been used in project 1012481686979 before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979 then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry. [reason: "SERVICE_DISABLED"
domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "serviceTitle"
  value: "Generative Language API"
}
metadata {
  key: "containerInfo"
  value: "1012481686979"
}
metadata {
  key: "consumer"
  value: "projects/1012481686979"
}
metadata {
  key: "activationUrl"
  value: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979"

domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "serviceTitle"
  value: "Generative Language API"
}
metadata {
  key: "containerInfo"
  value: "1012481686979"
}
metadata {
  key: "consumer"
  value: "projects/1012481686979"
}
metadata {
  key: "activationUrl"
  value: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979"
}
, locale: "en-US"
message: "Generative Language API has not been used in project 1012481686979 before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979 then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry."
, links {
  description: "Google developers console API activation"
  url: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?proj

Processing Question... 4. What are the trade-offs involved when using different chunking strategies in RAG systems?
Answer: Error processing your question: 403 Generative Language API has not been used in project 1012481686979 before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979 then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry. [reason: "SERVICE_DISABLED"
domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "serviceTitle"
  value: "Generative Language API"
}
metadata {
  key: "containerInfo"
  value: "1012481686979"
}
metadata {
  key: "consumer"
  value: "projects/1012481686979"
}
metadata {
  key: "activationUrl"
  value: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979"
}
, locale: "

domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "serviceTitle"
  value: "Generative Language API"
}
metadata {
  key: "containerInfo"
  value: "1012481686979"
}
metadata {
  key: "consumer"
  value: "projects/1012481686979"
}
metadata {
  key: "activationUrl"
  value: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979"
}
, locale: "en-US"
message: "Generative Language API has not been used in project 1012481686979 before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979 then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry."
, links {
  description: "Google developers console API activation"
  url: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?proj

Processing Question... 5. How does multimodal retrieval enhance the capabilities of RAG?
Answer: Error processing your question: 403 Generative Language API has not been used in project 1012481686979 before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979 then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry. [reason: "SERVICE_DISABLED"
domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "serviceTitle"
  value: "Generative Language API"
}
metadata {
  key: "containerInfo"
  value: "1012481686979"
}
metadata {
  key: "consumer"
  value: "projects/1012481686979"
}
metadata {
  key: "activationUrl"
  value: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979"
}
, locale: "en-US"
message: "Generative

domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "serviceTitle"
  value: "Generative Language API"
}
metadata {
  key: "containerInfo"
  value: "1012481686979"
}
metadata {
  key: "consumer"
  value: "projects/1012481686979"
}
metadata {
  key: "activationUrl"
  value: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979"
}
, locale: "en-US"
message: "Generative Language API has not been used in project 1012481686979 before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979 then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry."
, links {
  description: "Google developers console API activation"
  url: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?proj

Processing Question... 6. What were the key failure cases for RAG in handling long context retrievals, as noted by Zhuowan Li?
Answer: Error processing your question: 403 Generative Language API has not been used in project 1012481686979 before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979 then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry. [reason: "SERVICE_DISABLED"
domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "serviceTitle"
  value: "Generative Language API"
}
metadata {
  key: "containerInfo"
  value: "1012481686979"
}
metadata {
  key: "consumer"
  value: "projects/1012481686979"
}
metadata {
  key: "activationUrl"
  value: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979"
}


domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "serviceTitle"
  value: "Generative Language API"
}
metadata {
  key: "containerInfo"
  value: "1012481686979"
}
metadata {
  key: "consumer"
  value: "projects/1012481686979"
}
metadata {
  key: "activationUrl"
  value: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979"
}
, locale: "en-US"
message: "Generative Language API has not been used in project 1012481686979 before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979 then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry."
, links {
  description: "Google developers console API activation"
  url: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?proj

Processing Question... 7. Why does the Zhuowan paper claim that long-context LLMs outperformed RAG in most cases? What benefits does RAG still offer?
Answer: Error processing your question: 403 Generative Language API has not been used in project 1012481686979 before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979 then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry. [reason: "SERVICE_DISABLED"
domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "serviceTitle"
  value: "Generative Language API"
}
metadata {
  key: "containerInfo"
  value: "1012481686979"
}
metadata {
  key: "consumer"
  value: "projects/1012481686979"
}
metadata {
  key: "activationUrl"
  value: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?pr

domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "serviceTitle"
  value: "Generative Language API"
}
metadata {
  key: "containerInfo"
  value: "1012481686979"
}
metadata {
  key: "consumer"
  value: "projects/1012481686979"
}
metadata {
  key: "activationUrl"
  value: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979"
}
, locale: "en-US"
message: "Generative Language API has not been used in project 1012481686979 before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979 then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry."
, links {
  description: "Google developers console API activation"
  url: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?proj

Processing Question... 8. Describe the metrics used to evaluate the different embedding models for RAG in Wang's paper.
Answer: Error processing your question: 403 Generative Language API has not been used in project 1012481686979 before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979 then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry. [reason: "SERVICE_DISABLED"
domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "serviceTitle"
  value: "Generative Language API"
}
metadata {
  key: "containerInfo"
  value: "1012481686979"
}
metadata {
  key: "consumer"
  value: "projects/1012481686979"
}
metadata {
  key: "activationUrl"
  value: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979"
}
, local

domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "serviceTitle"
  value: "Generative Language API"
}
metadata {
  key: "containerInfo"
  value: "1012481686979"
}
metadata {
  key: "consumer"
  value: "projects/1012481686979"
}
metadata {
  key: "activationUrl"
  value: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979"
}
, locale: "en-US"
message: "Generative Language API has not been used in project 1012481686979 before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979 then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry."
, links {
  description: "Google developers console API activation"
  url: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?proj

Processing Question... 9. Discuss the implications of using self-reflection in routing queries between RAG and long-context LLMs.
Answer: Error processing your question: 403 Generative Language API has not been used in project 1012481686979 before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979 then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry. [reason: "SERVICE_DISABLED"
domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "serviceTitle"
  value: "Generative Language API"
}
metadata {
  key: "containerInfo"
  value: "1012481686979"
}
metadata {
  key: "consumer"
  value: "projects/1012481686979"
}
metadata {
  key: "activationUrl"
  value: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979"

domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "serviceTitle"
  value: "Generative Language API"
}
metadata {
  key: "containerInfo"
  value: "1012481686979"
}
metadata {
  key: "consumer"
  value: "projects/1012481686979"
}
metadata {
  key: "activationUrl"
  value: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979"
}
, locale: "en-US"
message: "Generative Language API has not been used in project 1012481686979 before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979 then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry."
, links {
  description: "Google developers console API activation"
  url: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?proj

Processing Question... 10. How does query rewriting contribute to the overall efficiency of RAG according to Wang's findings?
Answer: Error processing your question: 403 Generative Language API has not been used in project 1012481686979 before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979 then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry. [reason: "SERVICE_DISABLED"
domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "serviceTitle"
  value: "Generative Language API"
}
metadata {
  key: "containerInfo"
  value: "1012481686979"
}
metadata {
  key: "consumer"
  value: "projects/1012481686979"
}
metadata {
  key: "activationUrl"
  value: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979"
}
,

In [None]:
main()

RAG System for Academic Papers
------------------------------------------------------------
Google_Genai API key set in environment (length: 39)
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Found RAG folder at: /content/drive/MyDrive/RAG

 Loading documents from: /content/drive/MyDrive/RAG
Loaded 34 documents.
Using standard embeddings.
Created 86 text chunks
Vector store created.
Vector store created.
RAG chain setup.

 RAG system ready.

 Fine-tuned model found at: /content/drive/MyDrive/fine_tuned_model_ragqa
Would you like to use the fine-tuned model for better performance? (y/n): y
Using fine-tuned embeddings for improved retrieval.

------------------------------------------------------------
Select Mode:
1. Interactive Query Mode.
2. Run Test For All Questions.
3. Compare the Fine-tuned vs Baseline.
4. Exit
------------------------------------------------------------

 Enter your choice (1-4): 1


domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "serviceTitle"
  value: "Generative Language API"
}
metadata {
  key: "containerInfo"
  value: "1012481686979"
}
metadata {
  key: "consumer"
  value: "projects/1012481686979"
}
metadata {
  key: "activationUrl"
  value: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979"
}
, locale: "en-US"
message: "Generative Language API has not been used in project 1012481686979 before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979 then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry."
, links {
  description: "Google developers console API activation"
  url: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?proj

Answer: Error processing your question: 403 Generative Language API has not been used in project 1012481686979 before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979 then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry. [reason: "SERVICE_DISABLED"
domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "serviceTitle"
  value: "Generative Language API"
}
metadata {
  key: "containerInfo"
  value: "1012481686979"
}
metadata {
  key: "consumer"
  value: "projects/1012481686979"
}
metadata {
  key: "activationUrl"
  value: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979"
}
, locale: "en-US"
message: "Generative Language API has not been used in project 1012481686979 before or it is disabled. Enable

domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "serviceTitle"
  value: "Generative Language API"
}
metadata {
  key: "containerInfo"
  value: "1012481686979"
}
metadata {
  key: "consumer"
  value: "projects/1012481686979"
}
metadata {
  key: "activationUrl"
  value: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979"
}
, locale: "en-US"
message: "Generative Language API has not been used in project 1012481686979 before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979 then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry."
, links {
  description: "Google developers console API activation"
  url: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?proj


 Processing Query...


domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "serviceTitle"
  value: "Generative Language API"
}
metadata {
  key: "containerInfo"
  value: "1012481686979"
}
metadata {
  key: "consumer"
  value: "projects/1012481686979"
}
metadata {
  key: "activationUrl"
  value: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979"
}
, locale: "en-US"
message: "Generative Language API has not been used in project 1012481686979 before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979 then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry."
, links {
  description: "Google developers console API activation"
  url: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?proj

Answer: Error processing your question: 403 Generative Language API has not been used in project 1012481686979 before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979 then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry. [reason: "SERVICE_DISABLED"
domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "serviceTitle"
  value: "Generative Language API"
}
metadata {
  key: "containerInfo"
  value: "1012481686979"
}
metadata {
  key: "consumer"
  value: "projects/1012481686979"
}
metadata {
  key: "activationUrl"
  value: "https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview?project=1012481686979"
}
, locale: "en-US"
message: "Generative Language API has not been used in project 1012481686979 before or it is disabled. Enable

Baseline: all-MiniLM-L6-v2.
Fine-tuning on in-domain QA pairs with hard negatives improved early-rank retrieval with an increaste in Recall@1 of 5.4% and on Recall@5 of 27.1% compared to the first RAG.

In Part 1 of the assignment, I implemented a RAG pipeline using all-MiniLM-L6-v2 as the baseline embedding model. I evaluated retrieval performance on academic articles using rank metrics such as Recall@k and MRR@k.

I extended the work by fine-tuning a SentenceTransformer model on the in-domain QA pairs, incorporating hard negatives. The fine-tuned model was integrated back into the same RAG setup to ensure consistent evaluation with Part 1.

The results show clear improvements over the baseline embeddings.

•	Recall@1 and MRR@1 both improved by 5.4%.

•	Recall@3 increased by 13.2% and MRR@3 by 8.4%.

•	Recall@5 achieved the largest gain at 27.1%, with MRR@5 improving by 11.6%.

These improvements show that fine-tuning domain specific embeddings can lead to stronger retrieval performance and higher early-rank accuracy.

I built a baseline RAG system, fine-tuned an embedding model on QA pairs, used a baseline other than BGE, and demonstrated improvements compared to Part 1.
