In [1]:
!wget "https://openreview.net/pdf?id=VtmBAGCN7o"

--2025-02-10 14:59:57--  https://openreview.net/pdf?id=VtmBAGCN7o
Resolving openreview.net (openreview.net)... 35.184.86.251
Connecting to openreview.net (openreview.net)|35.184.86.251|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 16911937 (16M) [application/pdf]
Saving to: ‘pdf?id=VtmBAGCN7o.1’


2025-02-10 14:59:58 (33.7 MB/s) - ‘pdf?id=VtmBAGCN7o.1’ saved [16911937/16911937]



In [2]:
%pip install llama-index-embeddings-gemini



In [3]:
!pip install llama-index
%pip install llama-index-llms-gemini llama-index



In [4]:
import nest_asyncio

nest_asyncio.apply()

In [5]:
from llama_index.core import SimpleDirectoryReader

# load documents
documents = SimpleDirectoryReader(input_files=["/content/Attention is all you need.pdf"]).load_data()

In [20]:
import getpass
import os

if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your Google AI API key: ")

Enter your Google AI API key: ··········


In [21]:
GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"]

In [22]:
from llama_index.core.node_parser import SentenceSplitter

splitter = SentenceSplitter(chunk_size=1024)
nodes = splitter.get_nodes_from_documents(documents)

In [23]:
# imports
from llama_index.embeddings.gemini import GeminiEmbedding

In [24]:
model_name = "models/embedding-001"

embed_model = GeminiEmbedding(
    model_name=model_name, api_key=GOOGLE_API_KEY, title="this is a document"
)


In [25]:
from llama_index.core import Settings
from llama_index.llms.gemini import Gemini
from llama_index.embeddings.openai import OpenAIEmbedding

Settings.llm =  Gemini(
    model="models/gemini-1.5-flash",
)
Settings.embed_model = GeminiEmbedding(model=embed_model)

Simple tool call


In [26]:
from llama_index.core.tools import FunctionTool

def add(x: int, y: int) -> int:
    """Adds two integers together."""
    return x + y

def mystery(x: int, y: int) -> int:
    """Mystery function that operates on top of two numbers."""
    return (x + y) * (x + y)


add_tool = FunctionTool.from_defaults(fn=add)
mystery_tool = FunctionTool.from_defaults(fn=mystery)

In [27]:
llm = Gemini(model="models/gemini-1.5-flash")
response = llm.predict_and_call(
    [add_tool, mystery_tool],
    "Tell me the output of the mystery function on 2 and 9",
    verbose=True
)
print(str(response))

=== Calling Function ===
Calling function: mystery with args: {"y": 9.0, "x": 2.0}
=== Function Output ===
121.0
121.0


In [28]:
from llama_index.core.node_parser import SentenceSplitter
splitter = SentenceSplitter(chunk_size=1024)
nodes = splitter.get_nodes_from_documents(documents)

In [29]:
print(nodes[0].get_content(metadata_mode="all"))

page_label: 1
file_name: Attention is all you need.pdf
file_path: /content/Attention is all you need.pdf
file_type: application/pdf
file_size: 2215244
creation_date: 2025-02-10
last_modified_date: 2025-02-10

Provided proper attribution is provided, Google hereby grants permission to
reproduce the tables and figures in this paper solely for use in journalistic or
scholarly works.
Attention Is All You Need
Ashish Vaswani∗
Google Brain
avaswani@google.com
Noam Shazeer∗
Google Brain
noam@google.com
Niki Parmar∗
Google Research
nikip@google.com
Jakob Uszkoreit∗
Google Research
usz@google.com
Llion Jones∗
Google Research
llion@google.com
Aidan N. Gomez∗ †
University of Toronto
aidan@cs.toronto.edu
Łukasz Kaiser∗
Google Brain
lukaszkaiser@google.com
Illia Polosukhin∗ ‡
illia.polosukhin@gmail.com
Abstract
The dominant sequence transduction models are based on complex recurrent or
convolutional neural networks that include an encoder and a decoder. The best
performing models also connect the e

In [30]:
from llama_index.core import VectorStoreIndex

vector_index = VectorStoreIndex(nodes)
query_engine = vector_index.as_query_engine(similarity_top_k=2)

In [32]:
from llama_index.core.vector_stores import MetadataFilters

query_engine = vector_index.as_query_engine(
    similarity_top_k=2,
    filters=MetadataFilters.from_dicts(
        [
            {"key": "page_label", "value": "2"}
        ]
    )
)

response = query_engine.query(
    "What are some high-level results of the paper",
)

In [33]:
print(response)

The paper introduces the Transformer, a model architecture that replaces recurrence with an attention mechanism to establish global dependencies between input and output.  This allows for greater parallelization and achieves state-of-the-art translation quality with significantly less training time (twelve hours on eight P100 GPUs).



In [36]:
for n in response.source_nodes:
  print(n.metadata)

{'page_label': '2', 'file_name': 'Attention is all you need.pdf', 'file_path': '/content/Attention is all you need.pdf', 'file_type': 'application/pdf', 'file_size': 2215244, 'creation_date': '2025-02-10', 'last_modified_date': '2025-02-10'}


In [40]:
import google.generativeai as genai

for m in genai.list_models():
    if "generateContent" in m.supported_generation_methods:
        print(m.name)

models/gemini-1.0-pro-latest
models/gemini-1.0-pro
models/gemini-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash-001-tuning
models/gemini-1.5-flash
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-1.5-flash-8b-exp-0827
models/gemini-1.5-flash-8b-exp-0924
models/gemini-2.0-flash-exp
models/gemini-2.0-flash
models/gemini-2.0-flash-001
models/gemini-2.0-flash-lite-preview
models/gemini-2.0-flash-lite-preview-02-05
models/gemini-2.0-pro-exp
models/gemini-2.0-pro-exp-02-05
models/gemini-exp-1206
models/gemini-2.0-flash-thinking-exp-01-21
models/gemini-2.0-flash-thinking-exp
models/gemini-2.0-flash-thinking-exp-1219
models/learnlm-1.5-pro-experimental


In [41]:
llm = Gemini(model="models/gemini-2.0-flash-001")

Enchaning Data Retrivel

In [37]:
from typing import List
from llama_index.core.vector_stores import FilterCondition


def vector_query(
    query: str,
    page_numbers: List[str]
) -> str:
    """Perform a vector search over an index.

    query (str): the string query to be embedded.
    page_numbers (List[str]): Filter by set of pages. Leave BLANK if we want to perform a vector search
        over all pages. Otherwise, filter by the set of specified pages.

    """

    metadata_dicts = [
        {"key": "page_label", "value": p} for p in page_numbers
    ]

    query_engine = vector_index.as_query_engine(
        similarity_top_k=2,
        filters=MetadataFilters.from_dicts(
            metadata_dicts,
            condition=FilterCondition.OR
        )
    )
    response = query_engine.query(query)
    return response


vector_query_tool = FunctionTool.from_defaults(
    name="vector_tool",
    fn=vector_query
)

In [39]:
# response = llm.predict("What are the high-level results of MetaGPT as described on page 2?")
# print(response)


ValidationError: 1 validation error for LLMPredictStartEvent
template
  Input should be a valid dictionary or instance of BasePromptTemplate [type=model_type, input_value='What are the high-level ...as described on page 2?', input_type=str]
    For further information visit https://errors.pydantic.dev/2.10/v/model_type

In [42]:
response = llm.predict_and_call(
    [vector_query_tool],
    "What are the high-level results of MetaGPT as described on page 2?",
    verbose=True
)

TypeError: Object of type RepeatedComposite is not JSON serializable