In [1]:
%pip install -U -q "google-generativeai>=0.8.3" chromadb

Note: you may need to restart the kernel to use updated packages.


In [27]:
import google.generativeai as genai
from IPython.display import display

In [None]:
from kaggle_secrets import UserSecretsClient

GOOGLE_API_KEY = UserSecretsClient().get_secret("GOOGLE_API_KEY")
genai.configure(api_key=GOOGLE_API_KEY)

In [29]:
for m in genai.list_models():
    if "embedding" in m.name:
        print(m.name)



models/embedding-gecko-001
models/embedding-001
models/text-embedding-004


In [30]:
DOCUMENT_1 = "Self-attention allows a model to focus on different parts of a sequence by computing relevance scores between tokens. Each token gathers information from others, helping the model understand context and relationships within the sequence. This mechanism enables efficient handling of long-range dependencies, making it crucial for tasks like language translation and text generation."
DOCUMENT_2 = "The Transformer architecture introduced by Google in 2017 revolutionized the field of natural language processing. It introduced the concept of self-attention, which allows the model to process sequences in parallel, significantly increasing processing speed. The Transformer architecture consists of multiple layers of self-attention mechanisms, each layer processing the entire sequence at once, rather than processing one token at a time as in previous models like LSTM."

documents = [DOCUMENT_1, DOCUMENT_2]

In [31]:
print(documents)

['Self-attention allows a model to focus on different parts of a sequence by computing relevance scores between tokens. Each token gathers information from others, helping the model understand context and relationships within the sequence. This mechanism enables efficient handling of long-range dependencies, making it crucial for tasks like language translation and text generation.', 'The Transformer architecture introduced by Google in 2017 revolutionized the field of natural language processing. It introduced the concept of self-attention, which allows the model to process sequences in parallel, significantly increasing processing speed. The Transformer architecture consists of multiple layers of self-attention mechanisms, each layer processing the entire sequence at once, rather than processing one token at a time as in previous models like LSTM.']


In [34]:
from chromadb import Documents, EmbeddingFunction, Embeddings
from google.api_core import retry


class GeminiEmbeddingFunction(EmbeddingFunction):
    # Specify whether to generate embeddings for documents, or queries
    document_mode = True

    def __call__(self, input: Documents) -> Embeddings:
        if self.document_mode:
            embedding_task = "retrieval_document"
        else:
            embedding_task = "retrieval_query"

        retry_policy = {"retry": retry.Retry(predicate=retry.if_transient_error)}

        response = genai.embed_content(
            model="models/text-embedding-004",
            content=input,
            task_type=embedding_task,
            request_options=retry_policy,
        )
        return response["embedding"]

In [35]:
import chromadb

DB_NAME='transformer'
embed_fn = GeminiEmbeddingFunction()
embed_fn.document_mode = True

chroma_client = chromadb.Client()
db = chroma_client.get_or_create_collection(DB_NAME, embedding_function=embed_fn)

db.add(documents = documents, ids = [str(i) for i in range(len(documents))])

In [38]:
db.count()
db.peek()

{'ids': ['0', '1'],
 'embeddings': array([[-0.02625349,  0.0407034 , -0.02011823, ..., -0.04470832,
          0.03691332, -0.00053415],
        [-0.03317715,  0.00780859, -0.02967615, ..., -0.05387708,
          0.07014375, -0.00397073]]),
 'documents': ['Self-attention allows a model to focus on different parts of a sequence by computing relevance scores between tokens. Each token gathers information from others, helping the model understand context and relationships within the sequence. This mechanism enables efficient handling of long-range dependencies, making it crucial for tasks like language translation and text generation.',
  'The Transformer architecture introduced by Google in 2017 revolutionized the field of natural language processing. It introduced the concept of self-attention, which allows the model to process sequences in parallel, significantly increasing processing speed. The Transformer architecture consists of multiple layers of self-attention mechanisms, each laye

In [46]:
from IPython.display import Markdown, display
embed_fn.document_mode = False
query = "What is self-attention?"

result = db.query(query_texts = [query], n_results = 1)
[[passage]] = result["documents"]
Markdown(passage)

Self-attention allows a model to focus on different parts of a sequence by computing relevance scores between tokens. Each token gathers information from others, helping the model understand context and relationships within the sequence. This mechanism enables efficient handling of long-range dependencies, making it crucial for tasks like language translation and text generation.

In [47]:
passage_oneline = passage.replace("\n", " ")
query_oneline = query.replace("\n", " ")

# This prompt is where you can specify any guidance on tone, or what topics the model should stick to, or avoid.
prompt = f"""You are a lazy bot that answers questions using text from the reference passage included below and answer in one sentence. 
Be sure to respond in a complete sentence including all relevant background information. 
However, you are talking to technical audience, so be sure to skip easy concepts. If the passage is irrelevant to the answer, you may ignore it.

QUESTION: {query_oneline}
PASSAGE: {passage_oneline}
"""
Markdown(prompt)

You are a lazy bot that answers questions using text from the reference passage included below and answer in one sentence. 
Be sure to respond in a complete sentence including all relevant background information. 
However, you are talking to technical audience, so be sure to skip easy concepts. If the passage is irrelevant to the answer, you may ignore it.

QUESTION: What is self-attention?
PASSAGE: Self-attention allows a model to focus on different parts of a sequence by computing relevance scores between tokens. Each token gathers information from others, helping the model understand context and relationships within the sequence. This mechanism enables efficient handling of long-range dependencies, making it crucial for tasks like language translation and text generation.


In [48]:
model = genai.GenerativeModel("gemini-1.5-flash-latest")
answer = model.generate_content(prompt)
Markdown(answer.text)

Self-attention is a mechanism that allows a model to focus on different parts of a sequence by computing relevance scores between tokens. 
