In [5]:
# %pip install -U llama-index-readers-file pymupdf
# %pip install -U llama-index-core
# %pip install -U llama-index-llms-azure-openai
# %pip install -U llama-index-embeddings-azure-openai
# %pip install -U llama-index-vector-stores-chroma
# %pip install -U python-dotenv

In [None]:
%load_ext autoreload
%autoreload 2

In [7]:
from pathlib import Path

# Create data directory safely (Windows / Linux / macOS)
DATA_DIR = Path("data")
DATA_DIR.mkdir(parents=True, exist_ok=True)


### Load Data

In [None]:
from pathlib import Path

from llama_index.readers.file import PDFReader
from llama_index.readers.file import PyMuPDFReader

In [10]:
loader = PyMuPDFReader()
# docs0 = loader.load_data(file=Path("./data/llama2.pdf"))
docs0 = loader.load(file_path=Path("./data/GMDSS_System-IOM_Manual.pdf"))

By default, the PDF reader creates a separate doc for each page. For the sake of this notebook, we stitch docs together into one doc. This will help us better highlight auto-merging capabilities that “stitch” chunks together later on.

In [None]:
from llama_index.core import Document

doc_text = "\n\n".join([d.get_content() for d in docs0])
docs = [Document(text=doc_text)]

### Parse Chunk Hierarchy from Text, Load into Storage

By default, the hierarchy is:

- 1st level: chunk size 2048
- 2nd level: chunk size 512
- 3rd level: chunk size 128

In [12]:
from llama_index.core.node_parser import HierarchicalNodeParser
from llama_index.core.node_parser import SentenceSplitter

In [13]:
node_parser = HierarchicalNodeParser.from_defaults()

In [19]:
nodes = node_parser.get_nodes_from_documents(docs)

In [20]:
len(nodes)

1219

Here we import a simple helper function for fetching “leaf” nodes within a node list. These are nodes that don’t have children of their own.

In [21]:
from llama_index.core.node_parser import get_leaf_nodes, get_root_nodes

In [22]:
leaf_nodes = get_leaf_nodes(nodes)

In [23]:
len(leaf_nodes)

934

In [24]:
root_nodes = get_root_nodes(nodes)

In [25]:
from dotenv import load_dotenv
import os

load_dotenv()

AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
OPENAI_API_VERSION = os.getenv("OPENAI_API_VERSION")

AZURE_CHAT_DEPLOYMENT = os.getenv("AZURE_CHAT_DEPLOYMENT")
AZURE_EMBEDDING_DEPLOYMENT = os.getenv("AZURE_EMBEDDING_DEPLOYMENT")

CHROMA_PERSIST_DIR = os.getenv("CHROMA_PERSIST_DIR")
EMBEDDING_DIMENSIONS = int(os.getenv("EMBEDDING_DIMENSIONS"))

In [37]:
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core import StorageContext
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding

docstore = SimpleDocumentStore()

# insert nodes into docstore
docstore.add_documents(nodes)

# define storage context (will include vector store by default too)
storage_context = StorageContext.from_defaults(docstore=docstore)

llm = AzureOpenAI(
    model="gpt-4o",  # logical model name
    deployment_name=AZURE_CHAT_DEPLOYMENT,
    api_key=AZURE_OPENAI_API_KEY,
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_version=OPENAI_API_VERSION,
    temperature=0.1,
)

embed_model = AzureOpenAIEmbedding(
    model="text-embedding-3-large",
    deployment_name=AZURE_EMBEDDING_DEPLOYMENT,
    api_key=AZURE_OPENAI_API_KEY,
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_version=OPENAI_API_VERSION,
    dimensions=EMBEDDING_DIMENSIONS,  # 3072
)

In [38]:
from llama_index.core import Settings

Settings.embed_model = embed_model   # AzureOpenAIEmbedding
Settings.llm = llm   

In [None]:
from llama_index.core import VectorStoreIndex

base_index = VectorStoreIndex(
    leaf_nodes,
    storage_context=storage_context,
)

2026-01-14 13:22:30,137 - INFO - HTTP Request: POST https://nilup-mgaf895d-eastus.cognitiveservices.azure.com/openai/deployments/text-embedding-3-large/embeddings?api-version=2024-12-01-preview "HTTP/1.1 200 OK"
2026-01-14 13:22:31,281 - INFO - HTTP Request: POST https://nilup-mgaf895d-eastus.cognitiveservices.azure.com/openai/deployments/text-embedding-3-large/embeddings?api-version=2024-12-01-preview "HTTP/1.1 200 OK"
2026-01-14 13:22:31,918 - INFO - HTTP Request: POST https://nilup-mgaf895d-eastus.cognitiveservices.azure.com/openai/deployments/text-embedding-3-large/embeddings?api-version=2024-12-01-preview "HTTP/1.1 200 OK"
2026-01-14 13:22:32,549 - INFO - HTTP Request: POST https://nilup-mgaf895d-eastus.cognitiveservices.azure.com/openai/deployments/text-embedding-3-large/embeddings?api-version=2024-12-01-preview "HTTP/1.1 200 OK"
2026-01-14 13:22:33,177 - INFO - HTTP Request: POST https://nilup-mgaf895d-eastus.cognitiveservices.azure.com/openai/deployments/text-embedding-3-large/

### Define Retriever

In [40]:
from llama_index.core.retrievers import AutoMergingRetriever

In [41]:
base_retriever = base_index.as_retriever(similarity_top_k=6)
retriever = AutoMergingRetriever(base_retriever, storage_context, verbose=True)

In [None]:
# query_str = "What were some lessons learned from red-teaming?"
# query_str = "Can you tell me about the key concepts for safety finetuning"
query_str = (
    "give me Mechanical outline drawing, SAILOR 3027 GMDSS Terminal"
)

nodes = retriever.retrieve(query_str)
base_nodes = base_retriever.retrieve(query_str)

2026-01-14 16:23:50,817 - INFO - HTTP Request: POST https://nilup-mgaf895d-eastus.cognitiveservices.azure.com/openai/deployments/text-embedding-3-large/embeddings?api-version=2024-12-01-preview "HTTP/1.1 200 OK"
2026-01-14 16:23:51,380 - INFO - HTTP Request: POST https://nilup-mgaf895d-eastus.cognitiveservices.azure.com/openai/deployments/text-embedding-3-large/embeddings?api-version=2024-12-01-preview "HTTP/1.1 200 OK"


In [79]:
len(nodes)

5

In [80]:
len(base_nodes)

5

In [81]:
for node in nodes:
    print("=" * 80)
    print(node.text[:10000])
    print("Metadata:", node.metadata)

: 
A1 
Date: 
20-APR-2023
TABLE OF CONTENTS 
Pos 
Description 
Model 
Make 
Page 
1 
VHF FM DSC Radio 
7222 
Sailor 
4 
2 
Inmarsat-C 
6110 Mini-C 
Sailor 
49 
3 
MF/HF Radio 
6320 
Sailor 
59 
4 
Navtex Receiver 
6390 
Sailor 
69 
5 
GMDSS Printer 
H125B 
Sailor 
96 
6 
SART 
SART 20 
Jotron
Metadata: {}
in 
order to be available as a service for the LT-3100S GMDSS system. 
DOCUMENT: GMDSS SYSTEM IOM MANUAL
Page 247 of 307
Metadata: {}
Sailor 
96 
6 
SART 
SART 20 
Jotron 
103 
7 
EPIRB 
TRON 60S 
Jotron 
111 
8 
Iridium Terminal 
LT-3100S 
Lars
Thrane
128 
9 
Alarm Panel 
6103 
Sailor 
298 
DOCUMENT: GMDSS SYSTEM IOM MANUAL
Page 3 of 307


DOCUMENT: GMDSS SYSTEM IOM MANUAL
Page 4 of 307
Metadata: {}
DOCUMENT: GMDSS SYSTEM IOM MANUAL
Page 152 of 307


LT-3100S GMDSS User & Installation Manual Rev.
Metadata: {}
LT-3100S GMDSS User & Installation Manual Rev.
Metadata: {}


In [82]:
for node in base_nodes:
    print("=" * 80)
    print(node.text[:10000])
    print("Metadata:", node.metadata)

: 
A1 
Date: 
20-APR-2023
TABLE OF CONTENTS 
Pos 
Description 
Model 
Make 
Page 
1 
VHF FM DSC Radio 
7222 
Sailor 
4 
2 
Inmarsat-C 
6110 Mini-C 
Sailor 
49 
3 
MF/HF Radio 
6320 
Sailor 
59 
4 
Navtex Receiver 
6390 
Sailor 
69 
5 
GMDSS Printer 
H125B 
Sailor 
96 
6 
SART 
SART 20 
Jotron
Metadata: {}
in 
order to be available as a service for the LT-3100S GMDSS system. 
DOCUMENT: GMDSS SYSTEM IOM MANUAL
Page 247 of 307
Metadata: {}
Sailor 
96 
6 
SART 
SART 20 
Jotron 
103 
7 
EPIRB 
TRON 60S 
Jotron 
111 
8 
Iridium Terminal 
LT-3100S 
Lars
Thrane
128 
9 
Alarm Panel 
6103 
Sailor 
298 
DOCUMENT: GMDSS SYSTEM IOM MANUAL
Page 3 of 307


DOCUMENT: GMDSS SYSTEM IOM MANUAL
Page 4 of 307
Metadata: {}
DOCUMENT: GMDSS SYSTEM IOM MANUAL
Page 152 of 307


LT-3100S GMDSS User & Installation Manual Rev.
Metadata: {}
LT-3100S GMDSS User & Installation Manual Rev.
Metadata: {}


### Plug it into Query Engine

In [62]:
from llama_index.core.query_engine import RetrieverQueryEngine

In [63]:
query_engine = RetrieverQueryEngine.from_args(retriever)
base_query_engine = RetrieverQueryEngine.from_args(base_retriever)

In [64]:
response = query_engine.query(query_str)

2026-01-14 14:33:38,619 - INFO - HTTP Request: POST https://nilup-mgaf895d-eastus.cognitiveservices.azure.com/openai/deployments/text-embedding-3-large/embeddings?api-version=2024-12-01-preview "HTTP/1.1 200 OK"
2026-01-14 14:33:41,924 - INFO - HTTP Request: POST https://nilup-mgaf895d-eastus.cognitiveservices.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-12-01-preview "HTTP/1.1 200 OK"


In [65]:
response = query_engine.query(query_str)

2026-01-14 14:33:51,862 - INFO - HTTP Request: POST https://nilup-mgaf895d-eastus.cognitiveservices.azure.com/openai/deployments/text-embedding-3-large/embeddings?api-version=2024-12-01-preview "HTTP/1.1 200 OK"
2026-01-14 14:33:54,300 - INFO - HTTP Request: POST https://nilup-mgaf895d-eastus.cognitiveservices.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-12-01-preview "HTTP/1.1 200 OK"


In [66]:
print(str(response))

When hearing noise or an unwanted signal, briefly push the Volume/Squelch button to ensure the squelch bar is visible on the display, then turn the squelch button clockwise until the radio is muted.


In [67]:
base_response = base_query_engine.query(query_str)

2026-01-14 14:35:09,851 - INFO - HTTP Request: POST https://nilup-mgaf895d-eastus.cognitiveservices.azure.com/openai/deployments/text-embedding-3-large/embeddings?api-version=2024-12-01-preview "HTTP/1.1 200 OK"
2026-01-14 14:35:12,597 - INFO - HTTP Request: POST https://nilup-mgaf895d-eastus.cognitiveservices.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-12-01-preview "HTTP/1.1 200 OK"


In [83]:
print(str(base_response))

When hearing noise or an unwanted signal, briefly push the Volume/Squelch button to ensure the squelch bar is visible on the display, then turn the squelch button clockwise until the radio is muted.


### Evaluation
