# Quackling — Node Transformations

In [1]:
# requirements for this example:
%pip install -q \
    quackling \
    llama-index-embeddings-huggingface

Note: you may need to restart the kernel to use updated packages.


In [2]:
FILE_PATHS = [
    # "/path/to/local/pdf",  # file path
    "https://arxiv.org/pdf/2206.01062",  # URL (DocLayNet paper)
]
HF_EMBED_MODEL_ID = "BAAI/bge-small-en-v1.5"

In [3]:
import warnings

warnings.filterwarnings(
    action="ignore", category=UserWarning, module="torchvision|torch"
)

## Initialization

In [4]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from rich.pretty import pprint

embed_model = HuggingFaceEmbedding(model_name=HF_EMBED_MODEL_ID)

In [5]:
from quackling.llama_index.readers.docling_reader import DoclingReader

reader = DoclingReader(parse_type=DoclingReader.ParseType.JSON)
docs = reader.load_data(file_path=FILE_PATHS)
pprint(docs, max_length=2, max_string=250, max_depth=4)

Fetching 7 files:   0%|          | 0/7 [00:00<?, ?it/s]

In [6]:
from quackling.llama_index.node_parsers.hier_node_parser import HierarchicalNodeParser

node_parser = HierarchicalNodeParser()
chunks = node_parser.get_nodes_from_documents(documents=docs)
pprint(chunks, max_length=2, max_string=250, max_depth=4)

In [7]:
chunks_with_emb = embed_model(chunks)
pprint(chunks_with_emb, max_length=2, max_string=250, max_depth=4)