# Imports

In [None]:
import pandas as pd
from pymongo import MongoClient
import google.generativeai as genai
from urllib.parse import quote_plus
from dotenv import load_dotenv
import os

# Criando conexão com o mongo

In [None]:
DB_USERNAME = quote_plus(os.getenv("MONGO_USER"))

DB_PASSWORD = quote_plus(os.getenv("MONGO_PWD")) 

DB_URI = f"mongodb+srv://{DB_USERNAME}:{DB_PASSWORD}@nutriamdb.zb8v6ic.mongodb.net/?retryWrites=true&w=majority"

DB_NAME = "NutriaMDB"

conn = MongoClient(DB_URI)
db = conn[DB_NAME]
coll_ingrediente = db["ingrediente"]

## Criando agente de embedding


In [None]:
# Carrega a chave do arquivo .env
load_dotenv()
api_key = os.getenv("GOOGLE_GEMINI_API")
genai.configure(api_key=api_key)

# Escolha o modelo de embedding — o mais recente é o 'text-embedding-004'
model = "text-embedding-004"
# Texto para gerar o embedding
texto = "O lixo eletrônico é um problema ambiental crescente no Brasil."

# Chamada à API
response = genai.embed_content(
    model=model,
    content=texto
)

# O vetor de embedding
vetor = response["embedding"]

print(f"Tamanho do vetor: {len(vetor)}")
print(vetor)  # mostra os 10 primeiros valores

# Pegando os dados

In [None]:
# Criação do DataFrame
df_infos = pd.read_excel("tabela.xlsx")

display(df_infos.head(5))

# Transformando em um modelo possível de ser inserido no mongo

In [None]:
list_ingredients = []

dict_template = {
    "_id":0,
    "cNmIngrediente":"",
    "cEmbedding":[],
    "cCategoria":"",
    "nCaloria(kcal)":0,
    "nProteina(g)":0,
    "nCarboidrato(g)":0,
    "nAcucar(g)":0,
    "nFibra(g)":0,
    "nGorduraTotal(g)":0,
    "nGorduraSaturada(g)":0,
    "nGorduraMonoinsaturada(g)":0,
    "nGorduraPoliinsaturada(g)":0,
    "nColesterol(mg)":0,
    "nRetinol(mcg)":0,
    "nTiamina(mg)":0,
    "nRiboflavina(mg)":0,
    "nNiacina(mg)":0,
    "nVitB6(mg)":0,
    "nFolato(mcg)":0,
    "nColina(mg)":0,
    "nVitB12(mcg)":0,
    "nVitC(mg)":0,
    "nVitD(mcg)":0,
    "nVitE(mg)":0,
    "nVitK(mcg)":0,
    "nCalcio(mg)":0,
    "nFosforo(mg)":0,
    "nMagnesio(mg)":0,
    "nFerro(mg)":0,
    "nZinco(mg)":0,
    "nCobre(mg)":0,
    "nSelenio(mcg)":0,
    "nPotassio(mg)":0,
    "nSodio(mg)":0,
    "nCafeina(mg)":0,
    "nTeobromina(mg)":0,
    "nAlcool(g)":0,
    "nAgua(g)":0,
}

for r in df_infos.values:
    dict_template["_id"] += 1
    dict_template["cNmIngrediente"] = r[1]
    # dict_template["cEmbedding"] = genai.embed_content(model=model,content=r[1])["embedding"]
    dict_template["cCategoria"] = r[2]
    dict_template["nCaloria(kcal)"] = r[3]
    dict_template["nProteina(g)"] = r[4]
    dict_template["nCarboidrato(g)"] = r[5]
    dict_template["nAcucar(g)"] = r[6]
    dict_template["nFibra(g)"] = r[7]
    dict_template["nGorduraTotal(g)"] = r[8]
    dict_template["nGorduraSaturada(g)"] = r[9]
    dict_template["nGorduraMonoinsaturada(g)"] = r[10]
    dict_template["nGorduraPoliinsaturada(g)"] = r[11]
    dict_template["nColesterol(mg)"] = r[12]
    dict_template["nRetinol(mcg)"] = r[13]
    dict_template["nTiamina(mg)"] = r[14]
    dict_template["nRiboflavina(mg)"] = r[15]
    dict_template["nNiacina(mg)"] = r[16]
    dict_template["nVitB6(mg)"] = r[17]
    dict_template["nFolato(mcg)"] = r[18]
    dict_template["nColina(mg)"] = r[19]
    dict_template["nVitB12(mcg)"] = r[20]
    dict_template["nVitC(mg)"] = r[21]
    dict_template["nVitD(mcg)"] = r[22]
    dict_template["nVitE(mg)"] = r[23]
    dict_template["nVitK(mcg)"] = r[24]
    dict_template["nCalcio(mg)"] = r[25]
    dict_template["nFosforo(mg)"] = r[26]
    dict_template["nMagnesio(mg)"] = r[27]
    dict_template["nFerro(mg)"] = r[28]
    dict_template["nZinco(mg)"] = r[29]
    dict_template["nCobre(mg)"] = r[30]
    dict_template["nSelenio(mcg)"] = r[31]
    dict_template["nPotassio(mg)"] = r[32]
    dict_template["nSodio(mg)"] = r[33]
    dict_template["nCafeina(mg)"] = r[34]
    dict_template["nTeobromina(mg)"] = r[35]
    dict_template["nAlcool(g)"] = r[36]
    dict_template["nAgua(g)"] = r[37]

    list_ingredients.append(dict_template.copy())


for i in list_ingredients:
    i["cEmbedding"] = genai.embed_content(model=model,content=i["cNmIngrediente"])["embedding"]

# Adicionando ao banco

In [None]:
coll_ingrediente.insert_many(list_ingredients)