<a href="https://colab.research.google.com/github/JoanaKnobbe/Python_GeminiAI_API_Embedded/blob/main/GeminiEmbeddedQuery.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install -q -U google-generativeai

import google.generativeai as genai
import numpy as np
import pandas as pd

from google.colab import userdata
api_key = userdata.get("SECRET_KEY")
genai.configure(api_key=api_key)

In [3]:
for m in genai.list_models():
  if 'embedContent' in m.supported_generation_methods:
    print(m.name)


models/embedding-001
models/text-embedding-004


In [3]:
#Exemplo de embedding
title = "Hello"
sample_text = ("Hello world!")

embeddings = genai.embed_content(model="models/embedding-001",
                                 content=sample_text,
                                 title=title,
                                 task_type="RETRIEVAL_DOCUMENT")
print(embeddings)

{'embedding': [0.06283626, -0.010903991, -0.06310342, -0.01607426, 0.050194014, 0.02689511, 0.037256084, -0.01698408, -0.0073538637, 0.026560329, 0.0037626845, 0.031305768, -0.035426993, -0.018131876, -0.017698376, -0.018551167, 0.010107553, 0.010917018, 0.011896836, 0.017212171, 0.011183023, 0.013177256, -0.03323382, -0.019199826, 0.046001088, -0.011214548, -0.0005080298, -0.03706313, 0.0025976405, 0.014842195, -0.025320403, 0.0030553949, -0.018513683, 0.02481184, 0.025873156, -0.05189631, 0.004343101, -0.0015855819, -0.0077374065, -0.006960651, 0.015160776, -0.08238759, -0.046412457, -0.008614321, 0.04986332, 0.007858687, -0.03137756, 0.028572721, 0.039814997, -0.07668484, 0.05946354, 0.0311962, 0.0961107, -0.027220953, 0.0103850765, -0.027734637, 0.074004725, -0.00096279965, -0.04874524, 0.02631272, 0.005379421, 0.016897222, 0.04594916, 0.021376036, -0.0056711375, -0.057069086, -0.07248201, 0.026729759, 0.02951505, -0.010758517, -0.011137081, -0.05284877, 0.014242772, -0.0070309592,

In [4]:
#Listagem de documentos que serão buscados
DOCUMENT1 = {
    "title": "Title number 1",
    "content": "Content example number 1"
}

DOCUMENT2 = {
    "title": "Title number 2",
    "content": "Content example number 2"
}

DOCUMENT3 = {
    "title": "Title number 3",
    "content": "Content example number 3"
}

documents = [DOCUMENT1, DOCUMENT2, DOCUMENT3]

In [5]:
df = pd.DataFrame(documents)
df

Unnamed: 0,title,content
0,Title number 1,Content example number 1
1,Title number 2,Content example number 2
2,Title number 3,Content example number 3


In [8]:
model = "models/embedding-001"

In [9]:
def embed_fn(title, text):
  return genai.embed_content(model="models/embedding-001",
                                 content=text,
                                 title=title,
                                 task_type="RETRIEVAL_DOCUMENT")["embedding"]

In [10]:
df["Embeddings"] = df.apply(lambda row: embed_fn(row["title"], row["content"]), axis=1)
df

Unnamed: 0,title,content,Embeddings
0,Title number 1,Content example number 1,"[0.011937153, -0.059627376, -0.018696543, -0.0..."
1,Title number 2,Content example number 2,"[0.0067644655, -0.06106462, -0.015498343, -0.0..."
2,Title number 3,Content example number 3,"[0.019363392, -0.058742013, -0.013472862, -0.0..."


In [13]:
def generate_and_query(query, base, model):
  query_embedding = genai.embed_content(model=model,
                                 content=query,
                                 task_type="RETRIEVAL_QUERY")["embedding"]

  scalable_products = np.dot(np.stack(df["Embeddings"]), query_embedding)

  index = np.argmax(scalable_products)

  return df.iloc[index]["content"]

In [15]:
query = "Query example about anything at one of the documents"

piece = generate_and_query(query, df, model)
print(piece)

Content example number 1


In [18]:
prompt = f"Reescreva esse texto de uma forma mais descontraída, sem adicionar informações que não façam parte do texto: {piece}"
generation_config = {
    "temperature": 0.5,
    "candidate_count": 1,
}

model_2 = genai.GenerativeModel("gemini-1.0-pro", generation_config=generation_config)
response = model_2.generate_content(prompt)
print(response.text)

**Texto original:**

Content example number 1

**Texto descontraído:**

Exemplo de conteúdo número 1, bora lá!
