In [None]:
!pip install llama-index
!pip install llama-index-llms-gemini
!pip install llama-index-embeddings-huggingface
!pip install sentence-transformers
!pip install transformers
!pip install google-generativeai
!pip install llama-index-llms-google-genai llama-index

Collecting llama-index-embeddings-huggingface
  Using cached llama_index_embeddings_huggingface-0.5.3-py3-none-any.whl.metadata (767 bytes)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers>=2.6.1->llama-index-embeddings-huggingface)
  Using cached nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers>=2.6.1->llama-index-embeddings-huggingface)
  Using cached nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers>=2.6.1->llama-index-embeddings-huggingface)
  Using cached nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers>=2.6.1->llama-index-embeddings-huggingface)
  Using cached nvidia_cudnn_cu12-9.1.0.7

In [None]:
# First install required packages if you haven't already
!pip install llama-index llama-index-readers-file pypdf



In [None]:
!pip install --upgrade llama-index langchain-google-genai

Collecting langchain-google-genai
  Downloading langchain_google_genai-2.1.4-py3-none-any.whl.metadata (5.2 kB)
Collecting google-ai-generativelanguage<0.7.0,>=0.6.18 (from langchain-google-genai)
  Downloading google_ai_generativelanguage-0.6.18-py3-none-any.whl.metadata (9.8 kB)
Downloading langchain_google_genai-2.1.4-py3-none-any.whl (44 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading google_ai_generativelanguage-0.6.18-py3-none-any.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: google-ai-generativelanguage, langchain-google-genai
  Attempting uninstall: google-ai-generativelanguage
    Found existing installation: google-ai-generativelanguage 0.6.15
    Uninstalling google-ai-generativelanguage-0.6.15:
      Successfully uninstalled google-ai-generativelanguage-0.6.15
[31

In [None]:
import sys
sys.path.insert(0, '/content')  # make sure /content is on the import path

from utils import get_doc_tools
import inspect
print(inspect.getsource(get_doc_tools))

def get_doc_tools(file_path: str, name: str, gemini_api_key: str):
    """Get vector and summary tools for a document using Gemini."""
    
    llm = GoogleGenAI(api_key=gemini_api_key, model="models/gemini-2.0-flash")
    embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
    
    documents = SimpleDirectoryReader(input_files=[file_path]).load_data()
    splitter = SentenceSplitter(chunk_size=1024)
    nodes = splitter.get_nodes_from_documents(documents)
    
    vector_index = VectorStoreIndex(nodes, embed_model=embed_model)
    
    def vector_query(query: str, page_numbers: Optional[List[str]] = None) -> str:
        page_numbers = page_numbers or []
        metadata_dicts = [{"key": "page_label", "value": p} for p in page_numbers]
        
        query_engine = vector_index.as_query_engine(
            similarity_top_k=2,
            filters=MetadataFilters.from_dicts(
                metadata_dicts,
                condition=FilterCondition.O

In [None]:
import importlib
import utils
importlib.reload(utils)
from utils import get_doc_tools

In [None]:
# Step 1: Add current directory to system path
import sys
sys.path.insert(0, '/content')

# Step 2: Import libraries
from utils import get_doc_tools
from pathlib import Path
import os
import nest_asyncio
from google.colab import userdata

# Step 3: Setup
nest_asyncio.apply()
gemini_api_key = userdata.get('gemini_api')
os.environ["GEMINI_API_KEY"] = gemini_api_key

# Step 4: Define paper paths
paper_paths = [
    "/content/longlora.pdf",
    "/content/selfrag.pdf",
    "/content/metagpt.pdf"
]

# Step 5: Load embedding model
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Step 6: Set up Gemini LLM
from llama_index.llms.google_genai import GoogleGenAI
llm = GoogleGenAI(api_key=gemini_api_key, model="models/gemini-2.0-flash")

# Step 7: Build tools for each paper
all_tools = []

for paper_path in paper_paths:
    name = Path(paper_path).stem
    print(f"Processing {name}...")

    vector_tool, summary_tool = get_doc_tools(
        file_path=paper_path,
        name=name,
        gemini_api_key=gemini_api_key
    )

    print(f"Tool name: {vector_tool.metadata.name}")
    print(f"Tool name: {summary_tool.metadata.name}")

    all_tools.extend([vector_tool, summary_tool])

# Step 8: Set up Agent with all tools
from llama_index.core.agent import FunctionCallingAgentWorker, AgentRunner

agent_worker = FunctionCallingAgentWorker.from_tools(
    tools=all_tools,
    llm=llm,
    verbose=True
)

agent = AgentRunner(agent_worker)

# Step 9: Ask your query with verbose mode
response = agent.query(
    "Tell me summary of self rag "
)

# Print just the final response text
#print(response)

Processing longlora...
Tool name: vector_query_longlora
Tool name: summarize_longlora
Processing selfrag...
Tool name: vector_query_selfrag
Tool name: summarize_selfrag
Processing metagpt...
Tool name: vector_query_metagpt
Tool name: summarize_metagpt
Added user message to memory: Tell me summary of self rag 
=== Calling Function ===
Calling function: summarize_selfrag with args: {"input": "Tell me summary of self rag"}
=== Function Output ===
SELF-RAG is a framework that improves the quality and accuracy of large language models (LLMs) by using retrieval and self-reflection, without reducing the LLM's versatility. It trains a single LM to retrieve passages when needed and to generate and reflect on the retrieved passages and its own generations using reflection tokens. The framework uses special tokens that make the LM controllable, allowing it to adjust to different task needs. Experiments have demonstrated that SELF-RAG outperforms other LLMs and retrieval-augmented models on tasks 