In [1]:
from llama_index.core import Document
from llama_index.core.node_parser import JSONNodeParser
from llama_index.embeddings.openai import OpenAIEmbedding


In [2]:
# configure

filename = "recipes_flat.json"


In [3]:
with open(f"data/loader/{filename}", "r", encoding="utf-8") as file:
    document = Document(
        text=file.read(),
        metadata={"filename": filename},
    )
print(len(document.text))


2091083


In [4]:
splitter = JSONNodeParser()
nodes = splitter.get_nodes_from_documents([document], show_progress=False)
print(len(nodes))


1397


In [5]:
for ix, node in enumerate(nodes[0:5]):
    print(f">>>{ix} {node.id_}")
    print("Metadata", node.metadata)
    print("Text", node.text[:200])
    print("\n\n")


>>>0 4f491fc8-fdb8-4450-a23c-0806fbe825d2
Metadata {'filename': 'recipes_flat.json'}
Text category Bebidas
name Margarita Cristalina
source https://www.kiwilimon.com/receta/bebidas/cocteles/margarita-cristalina
prep_time 5 mins
cook_time N/A
difficulty Baja
ingredients 1 1/2 onzas de Tequi



>>>1 9b4fc15d-c5ee-47aa-9d58-6cc3c9a4f666
Metadata {'filename': 'recipes_flat.json'}
Text category Bebidas
name Gin Con Lichi
source https://www.kiwilimon.com/receta/bebidas/cocteles/gin-con-lichi
prep_time 5 mins
cook_time N/A
difficulty Baja
ingredients suficiente de hielo, en cubos
1 1/



>>>2 24ebd300-7582-4cd9-b282-15a2c700d21e
Metadata {'filename': 'recipes_flat.json'}
Text category Bebidas
name Martini 007
source https://www.kiwilimon.com/receta/bebidas/cocteles/martini-007
prep_time 5 mins
cook_time N/A
difficulty Baja
ingredients 2 onzas de ginebra
1 onza de vodka, en



>>>3 e5cd89b2-9d95-4947-a4cc-7f402be14ec0
Metadata {'filename': 'recipes_flat.json'}
Text category Bebidas
name Agua D

# Metadata filters

Let's add metadata filters so only certain nodes are retrieved.

This example is adapted from https://docs.llamaindex.ai/en/stable/examples/vector_stores/chroma_auto_retriever/


In [6]:
import re

import chromadb

from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.core.retrievers import VectorIndexAutoRetriever
from llama_index.core.vector_stores.types import MetadataInfo, VectorStoreInfo
from llama_index.vector_stores.chroma import ChromaVectorStore

from llama_index.llms.openai import OpenAI

# from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.embeddings.openai import OpenAIEmbedding

## import settings from llama_index
from llama_index.core import Settings


In [7]:
# configure the llm and the embedding model
Settings.llm = OpenAI(
    model="gpt-4o-mini",
    temperature=0.5,
)
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")


In [8]:
chroma_client = chromadb.EphemeralClient()
chroma_collection = chroma_client.create_collection("recipes")

vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex(nodes, storage_context=storage_context)


In [9]:
vector_store_info = VectorStoreInfo(
    content_info="Recipes from the web to eat in diferent ocasions",
    metadata_info=[
        MetadataInfo(
            name="filename",
            type="str",
            description=(
                "The filename of the document where the recipe was extracted from"
            ),
        ),
    ],
)
retriever = VectorIndexAutoRetriever(index, vector_store_info=vector_store_info)


In [10]:
retriever.retrieve("Give me a recipe with chocolate")


[NodeWithScore(node=TextNode(id_='f6e2994f-ba77-4c52-a00b-030cb194dd47', embedding=None, metadata={'filename': 'recipes_flat.json'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='c9aa72ab-4955-419e-b13c-8920da498773', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'filename': 'recipes_flat.json'}, hash='f97ac26fbb96bc6da10d33567380c98af7e8ec0589da4ace2435cd5fa39c259e'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='8cd611a2-2178-48f8-89d9-0b6861bd0b33', node_type=<ObjectType.TEXT: '1'>, metadata={'filename': 'recipes_flat.json'}, hash='dca514c135bd149d0d07a540847ad6eebd83cf861e9fca795169dbb86c3b2c5e'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='81303b7d-f8b9-420e-a6b3-dbe9e770b583', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='aba24d4945562b0073af7910ff5ee7c9ddc0dea104bfcd9616737293bf1e9009')}, text='category Postres\nname Crema Batida Sabor Chocolate\nsource http

In [23]:
engine = index.as_query_engine()
result = engine.query("Give me a recipe with chocolate")


In [24]:
print(result)


Here is a recipe for Tarta De Chocolate Sin Harina:

**Ingredients:**
- 1 1/2 tazas de chocolate amargo, troceado
- 1 taza de mantequilla
- 8 Huevos San Juan®
- 1 taza de azúcar
- 50 gramos de cocoa
- 1 cucharadita de cardamomo
- 1/4 cucharaditas de sal
- 1 cucharada de esencia de vainilla
- 1 taza de nuez, finamente picada

**Preparation:**
1. Coloca el chocolate en un bowl y derrite a baño maría. Agrega la mantequilla, mezcla y reserva.
2. Separa las claras de las yemas de las piezas de Huevo San Juan®. Bate las claras con el azúcar hasta que dupliquen su tamaño y queden firmes. Reserva.
3. En un bowl, usa un batidor globo para mezclar el chocolate derretido y las piezas de Huevo San Juan® hasta integrar. Incorpora la cocoa, el cardamomo, la sal y la esencia de vainilla hasta integrar. Añade las claras de manera suave y envolvente.
4. Vierte la preparación anterior en un molde redondo previamente enharinado y engrasado. Cubre con las nueces y hornea a 175 °C por 25 minutos. Deja enfr