# Quackling — Basic Pipeline

In [1]:
# requirements for this example:
%pip install -q \
    quackling \
    python-dotenv \
    llama-index-embeddings-huggingface \
    llama-index-llms-huggingface-api \
    llama-index-vector-stores-milvus

Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
from tempfile import TemporaryDirectory

from dotenv import load_dotenv
from pydantic import TypeAdapter
from rich.pretty import pprint

load_dotenv()

FILE_PATHS = [
    # "/path/to/local/pdf",  # file path
    "https://arxiv.org/pdf/2206.01062",  # URL (DocLayNet paper)
]
TEXT_QA_TEMPLATE_STR = "Context information is below.\n---------------------\n{context_str}\n---------------------\nGiven the context information and not prior knowledge, answer the query.\nQuery: {query_str}\nAnswer:\n"
QUERY = "How many pages were human annotated?"
TOP_K = 3

INGEST = TypeAdapter(bool).validate_python(os.environ.get("INGEST", "True"))

In [3]:
import warnings

warnings.filterwarnings(
    action="ignore", category=UserWarning, module="torchvision|torch"
)

## Setup

### Reader

Below we use our `DoclingReader`. As our goal is to leverage Docling's native JSON format (see *Node parser* further below), we set the parse type to JSON.

In [4]:
from quackling.llama_index.readers.docling_reader import DoclingReader

reader = DoclingReader(parse_type=DoclingReader.ParseType.JSON)
load_kwargs = dict(
    file_path=FILE_PATHS,
)

### Node parser

Setting up our `HierarchicalNodeParser` for exctracting chunks from JSON in a way that leverages the document structure:

In [5]:
from quackling.llama_index.node_parsers.hier_node_parser import HierarchicalNodeParser

node_parser = HierarchicalNodeParser()

### Embed model

In [6]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

HF_EMBED_MODEL_ID = "BAAI/bge-small-en-v1.5"

embed_model = HuggingFaceEmbedding(model_name=HF_EMBED_MODEL_ID)

### LLM

In [7]:
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI

HF_API_KEY = os.environ.get("HF_API_KEY")
HF_LLM_MODEL_ID = "mistralai/Mixtral-8x7B-Instruct-v0.1"

llm = HuggingFaceInferenceAPI(
    token=HF_API_KEY,
    model_name=HF_LLM_MODEL_ID,
)

### Vector store

In [8]:
from llama_index.vector_stores.milvus import MilvusVectorStore

MILVUS_URL = os.environ.get(
    "MILVUS_URL", f"{(tmp_dir := TemporaryDirectory()).name}/milvus_demo.db"
)
MILVUS_COLL_NAME = os.environ.get("MILVUS_COLL_NAME", "quackling_basic_pipeline")
MILVUS_KWARGS = TypeAdapter(dict).validate_json(os.environ.get("MILVUS_KWARGS", "{}"))

vector_store = MilvusVectorStore(
    uri=MILVUS_URL,
    collection_name=MILVUS_COLL_NAME,
    dim=len(embed_model.get_text_embedding("hi")),
    overwrite=INGEST,  # not showing follow-up/incremental ingestions in this example
    **MILVUS_KWARGS,
)

In [9]:
from llama_index.core.vector_stores.types import VectorStoreQueryMode

vector_store_query_mode = VectorStoreQueryMode.DEFAULT  # i.e. dense

## Preparing vector store index

In [10]:
from llama_index.core import StorageContext, VectorStoreIndex

if INGEST:
    # in this case we ingest the data into the vector store
    docs = reader.load_data(**load_kwargs)
    pprint(docs, max_length=1, max_string=250, max_depth=4)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    index = VectorStoreIndex.from_documents(
        documents=docs,
        embed_model=embed_model,
        storage_context=storage_context,
        transformations=[
            node_parser,
        ],
    )
else:
    # in this case we just load the vector store index
    index = VectorStoreIndex.from_vector_store(vector_store)

Fetching 7 files:   0%|          | 0/7 [00:00<?, ?it/s]

## RAG

In [11]:
from llama_index.core import PromptTemplate

query_engine = index.as_query_engine(
    llm=llm,
    similarity_top_k=TOP_K,
    text_qa_template=PromptTemplate(TEXT_QA_TEMPLATE_STR),
    vector_store_query_mode=vector_store_query_mode,
)
query_res = query_engine.query(QUERY)
pprint(query_res, max_length=1, max_string=250, max_depth=4)

## Retrieval

We can also just run the retrieval without generation:

In [12]:
retriever = index.as_retriever(
    similarity_top_k=TOP_K,
    vector_store_query_mode=vector_store_query_mode,
)
retr_res = retriever.retrieve(QUERY)
pprint(retr_res, max_length=1, max_string=250, max_depth=4)