# Quackling — Node Transformations

In [1]:
# requirements for this example:
%pip install -qq \
    quackling \
    llama-index-embeddings-huggingface

Note: you may need to restart the kernel to use updated packages.


In [2]:
FILE_PATHS = ["https://arxiv.org/pdf/2408.09869"]
HF_EMBED_MODEL_ID = "BAAI/bge-small-en-v1.5"

In [3]:
from rich.pretty import pprint

In [4]:
import warnings

warnings.filterwarnings(action="ignore", category=UserWarning, module="torch")
warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr")

## Initialization

In [5]:
from quackling.llama_index.node_parsers import HierarchicalJSONNodeParser
from quackling.llama_index.readers import DoclingPDFReader

reader = DoclingPDFReader(parse_type=DoclingPDFReader.ParseType.JSON)
node_parser = HierarchicalJSONNodeParser()

In [6]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

embed_model = HuggingFaceEmbedding(model_name=HF_EMBED_MODEL_ID)

In [7]:
docs = reader.load_data(file_path=FILE_PATHS)
pprint(docs, max_length=2, max_string=50, max_depth=4)

Fetching 7 files:   0%|          | 0/7 [00:00<?, ?it/s]

In [8]:
chunks = node_parser.get_nodes_from_documents(documents=docs)
pprint(chunks, max_length=2, max_string=100, max_depth=4)

In [9]:
chunks_with_emb = embed_model(chunks)
pprint(chunks_with_emb, max_length=2, max_string=100, max_depth=4)