### Database Config

In [None]:
from datetime import datetime
from pymongo import MongoClient
from time import sleep
from dotenv import load_dotenv
import os

load_dotenv()

client = MongoClient(os.getenv("MONGO_URL"))
db = client["dbCodCoz"]
collection = db["receitas2"]

### Embedding Config

In [None]:
import requests

model_name = "paraphrase-multilingual-MiniLM-L12-v2"
HUGGING_FACE_TOKEN = os.getenv("HUGGING_FACE_TOKEN")
API_URL = f"https://router.huggingface.co/hf-inference/models/sentence-transformers/{model_name}/pipeline/feature-extraction"
headers = {"Authorization": f"Bearer {HUGGING_FACE_TOKEN}"}

def embed_text(texts):
    response = requests.post(API_URL, headers=headers, json={"inputs": texts, "options":{"wait_for_model": True}}) #wait_for_model espera pelo modelo caso ele esteja sobrecarregado, ao invés de retornar um erro

    print(f"{response.status_code} - {response.reason}")
    
    if response.status_code != 200:
        print(response.text)

    return response.json()

def gerar_texto_embedding(doc):
    partes = []

    # nome
    if "nome" in doc and doc["nome"]:
        partes.append(f"Nome: {doc['nome']}.")

    # descrição 
    if "descricao" in doc and doc["descricao"]:
        partes.append(f"Descrição: {doc['descricao']}.")

    # ingredientes
    if "ingredientes" in doc and isinstance(doc["ingredientes"], list) and doc["ingredientes"]:
        nomes_ingredientes = [i.get("nome") for i in doc["ingredientes"] if i.get("nome")]
        if nomes_ingredientes:
            partes.append(f"Ingrediente(s): {', '.join(nomes_ingredientes)}.")

    # modo de preparo
    if "modoPreparo" in doc and isinstance(doc["modoPreparo"], list) and doc["modoPreparo"]:
        passos = [p.get('passo', '') for p in doc.get('modoPreparo', [])]
        if passos:
            partes.append(f"Modo de preparo: {' '.join(passos)}")

    # montar texto final
    texto = "\n".join(partes).strip()
    # print(texto)
    return texto or None  # retorna None se nada aproveitável existir

In [None]:
for doc in collection.find(): # {"embedding": {"$exists": False}}    
    try:
        embedding = embed_text(gerar_texto_embedding(doc))
        collection.update_one(
            {"_id": doc["_id"]},
            {"$set": {"embedding": embedding}}
        )
        print(f"Embedding salvo para: {doc.get('nome')}")
        sleep(1)  # para evitar rate limit

    except Exception as e:
        print(f"Erro em '{doc.get('nome')}':", e)
        continue