In [1]:
# %pip install -U llama-index-readers-file pymupdf
# %pip install -U llama-index-core
# %pip install -U llama-index-llms-azure-openai
# %pip install -U llama-index-embeddings-azure-openai
# %pip install -U llama-index-vector-stores-chroma
# %pip install -U python-dotenv

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from pathlib import Path

# Create data directory safely (Windows / Linux / macOS)
DATA_DIR = Path("data")
DATA_DIR.mkdir(parents=True, exist_ok=True)


### Load Data

In [4]:
from pathlib import Path

from llama_index.readers.file import PDFReader
from llama_index.readers.file import PyMuPDFReader

In [6]:
loader = PyMuPDFReader()
# docs0 = loader.load_data(file=Path("./data/llama2.pdf"))
docs0 = loader.load(file_path=Path("./data/GMDSS_System-IOM_Manual.pdf"))

By default, the PDF reader creates a separate doc for each page. For the sake of this notebook, we stitch docs together into one doc. This will help us better highlight auto-merging capabilities that “stitch” chunks together later on.

In [7]:
from llama_index.core import Document

doc_text = "\n\n".join([d.get_content() for d in docs0])
docs = [Document(text=doc_text)]

### Parse Chunk Hierarchy from Text, Load into Storage

By default, the hierarchy is:

- 1st level: chunk size 2048
- 2nd level: chunk size 512
- 3rd level: chunk size 128

In [8]:
from llama_index.core.node_parser import HierarchicalNodeParser
from llama_index.core.node_parser import SentenceSplitter

In [9]:
node_parser = HierarchicalNodeParser.from_defaults()

In [10]:
nodes = node_parser.get_nodes_from_documents(docs)

In [11]:
len(nodes)

1219

Here we import a simple helper function for fetching “leaf” nodes within a node list. These are nodes that don’t have children of their own.

In [12]:
from llama_index.core.node_parser import get_leaf_nodes, get_root_nodes

In [13]:
leaf_nodes = get_leaf_nodes(nodes)

In [14]:
len(leaf_nodes)

934

In [15]:
root_nodes = get_root_nodes(nodes)

In [16]:
from dotenv import load_dotenv
import os

load_dotenv()

AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
OPENAI_API_VERSION = os.getenv("OPENAI_API_VERSION")

AZURE_CHAT_DEPLOYMENT = os.getenv("AZURE_CHAT_DEPLOYMENT")
AZURE_EMBEDDING_DEPLOYMENT = os.getenv("AZURE_EMBEDDING_DEPLOYMENT")

CHROMA_PERSIST_DIR = os.getenv("CHROMA_PERSIST_DIR")
EMBEDDING_DIMENSIONS = int(os.getenv("EMBEDDING_DIMENSIONS"))

In [17]:
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core import StorageContext
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding

docstore = SimpleDocumentStore()

# insert nodes into docstore
docstore.add_documents(nodes)

# define storage context (will include vector store by default too)
storage_context = StorageContext.from_defaults(docstore=docstore)

llm = AzureOpenAI(
    model="gpt-4o",  # logical model name
    deployment_name=AZURE_CHAT_DEPLOYMENT,
    api_key=AZURE_OPENAI_API_KEY,
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_version=OPENAI_API_VERSION,
    temperature=0.1,
)

embed_model = AzureOpenAIEmbedding(
    model="text-embedding-3-large",
    deployment_name=AZURE_EMBEDDING_DEPLOYMENT,
    api_key=AZURE_OPENAI_API_KEY,
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_version=OPENAI_API_VERSION,
    dimensions=EMBEDDING_DIMENSIONS,  # 3072
)

In [18]:
from llama_index.core import Settings

Settings.embed_model = embed_model   # AzureOpenAIEmbedding
Settings.llm = llm   

In [19]:
from llama_index.core import VectorStoreIndex

base_index = VectorStoreIndex(
    leaf_nodes,
    storage_context=storage_context,
)

2026-01-15 12:23:01,240 - INFO - HTTP Request: POST https://nilup-mgaf895d-eastus.cognitiveservices.azure.com/openai/deployments/text-embedding-3-large/embeddings?api-version=2024-12-01-preview "HTTP/1.1 200 OK"
2026-01-15 12:23:05,503 - INFO - HTTP Request: POST https://nilup-mgaf895d-eastus.cognitiveservices.azure.com/openai/deployments/text-embedding-3-large/embeddings?api-version=2024-12-01-preview "HTTP/1.1 200 OK"
2026-01-15 12:23:08,282 - INFO - HTTP Request: POST https://nilup-mgaf895d-eastus.cognitiveservices.azure.com/openai/deployments/text-embedding-3-large/embeddings?api-version=2024-12-01-preview "HTTP/1.1 200 OK"
2026-01-15 12:23:09,731 - INFO - HTTP Request: POST https://nilup-mgaf895d-eastus.cognitiveservices.azure.com/openai/deployments/text-embedding-3-large/embeddings?api-version=2024-12-01-preview "HTTP/1.1 200 OK"
2026-01-15 12:23:11,335 - INFO - HTTP Request: POST https://nilup-mgaf895d-eastus.cognitiveservices.azure.com/openai/deployments/text-embedding-3-large/

### Define Retriever

In [20]:
from llama_index.core.retrievers import AutoMergingRetriever

In [21]:
base_retriever = base_index.as_retriever(similarity_top_k=6)
retriever = AutoMergingRetriever(base_retriever, storage_context, verbose=True)

In [23]:

query_str = (
    "how to Replace the fuse in the Transceiver Unit"
)

nodes = retriever.retrieve(query_str)
base_nodes = base_retriever.retrieve(query_str)

2026-01-15 12:27:49,951 - INFO - Retrying request to /embeddings in 0.479918 seconds
2026-01-15 12:27:53,649 - INFO - HTTP Request: POST https://nilup-mgaf895d-eastus.cognitiveservices.azure.com/openai/deployments/text-embedding-3-large/embeddings?api-version=2024-12-01-preview "HTTP/1.1 200 OK"
2026-01-15 12:27:53,932 - INFO - > Merging 2 nodes into parent node.
> Parent node id: 23ecde3b-a40b-4ee7-8695-7fd14e12ddc3.
> Parent node text: a CAM. 
DOCUMENT: GMDSS SYSTEM IOM MANUAL
Page 46 of 307


Replacing the fuse in theTransceiver L...



> Merging 2 nodes into parent node.
> Parent node id: 23ecde3b-a40b-4ee7-8695-7fd14e12ddc3.
> Parent node text: a CAM. 
DOCUMENT: GMDSS SYSTEM IOM MANUAL
Page 46 of 307


Replacing the fuse in theTransceiver L...



2026-01-15 12:27:54,552 - INFO - HTTP Request: POST https://nilup-mgaf895d-eastus.cognitiveservices.azure.com/openai/deployments/text-embedding-3-large/embeddings?api-version=2024-12-01-preview "HTTP/1.1 200 OK"


In [24]:
len(nodes)

5

In [25]:
len(base_nodes)

6

In [26]:
for node in nodes:
    print("=" * 80)
    print(node.text[:10000])
    print("Metadata:", node.metadata)

a CAM. 
DOCUMENT: GMDSS SYSTEM IOM MANUAL
Page 46 of 307


Replacing the fuse in theTransceiver L/nit
Replacing the fuse in the Transceiver Unit 
One fuse is installed in the Transceiver Unit. If the fuse is blown, do as follows: 
1.
Track down why the fuse was blown and solve the problem.
2.
Take out the old fuse.
3.
Insert the new fuse. The fuse rating is 10 A T.
Figure 3: Replacing the fuse in the SAILOR 7226 VHF Transceiver Unit 
98-171832-A
 
Chapter 3: Service & maintenanc 
DOCUMENT: GMDSS SYSTEM IOM MANUAL
Page 47 of 307
Metadata: {}
Replacing the fuse in theTransceiver L/nit
 
 
 
List of alerts 
The table below shows the alerts you may see in the Bridge Alert Management (BAM) 
system. 
 
ID
Instance Priority Category
Title
Description
3023
1
C
B
PS COMM Lost 
Power supply 
communication lost 
3023
2
 
 
B
BATT VOLT Low
Battery and charger. 
Voltage below limit 
3023
3
 
 
B
BATT VOLT High
Battery and charger.
Metadata: {}
Replacing the fuse in theTransceiver L/nit
Warranty and

In [28]:
for node in base_nodes:
    print("=" * 80)
    print(node.text[:10000])
    print("Metadata:", node.metadata)

a CAM. 
DOCUMENT: GMDSS SYSTEM IOM MANUAL
Page 46 of 307


Replacing the fuse in theTransceiver L/nit
Replacing the fuse in the Transceiver Unit 
One fuse is installed in the Transceiver Unit. If the fuse is blown, do as follows: 
1.
Track down why the fuse was blown and solve the problem.
2.
Take out the old fuse.
3.
Insert the new fuse.
Metadata: {}
2.
Take out the old fuse.
3.
Insert the new fuse. The fuse rating is 10 A T.
Figure 3: Replacing the fuse in the SAILOR 7226 VHF Transceiver Unit 
98-171832-A
 
Chapter 3: Service & maintenanc 
DOCUMENT: GMDSS SYSTEM IOM MANUAL
Page 47 of 307
Metadata: {}
Replacing the fuse in theTransceiver L/nit
 
 
 
List of alerts 
The table below shows the alerts you may see in the Bridge Alert Management (BAM) 
system. 
 
ID
Instance Priority Category
Title
Description
3023
1
C
B
PS COMM Lost 
Power supply 
communication lost 
3023
2
 
 
B
BATT VOLT Low
Battery and charger. 
Voltage below limit 
3023
3
 
 
B
BATT VOLT High
Battery and charger.
Metad

### Plug it into Query Engine

In [29]:
from llama_index.core.query_engine import RetrieverQueryEngine

In [30]:
query_engine = RetrieverQueryEngine.from_args(retriever)
base_query_engine = RetrieverQueryEngine.from_args(base_retriever)

In [31]:
response = query_engine.query(query_str)

2026-01-15 12:28:36,880 - INFO - HTTP Request: POST https://nilup-mgaf895d-eastus.cognitiveservices.azure.com/openai/deployments/text-embedding-3-large/embeddings?api-version=2024-12-01-preview "HTTP/1.1 200 OK"
2026-01-15 12:28:37,158 - INFO - > Merging 2 nodes into parent node.
> Parent node id: 23ecde3b-a40b-4ee7-8695-7fd14e12ddc3.
> Parent node text: a CAM. 
DOCUMENT: GMDSS SYSTEM IOM MANUAL
Page 46 of 307


Replacing the fuse in theTransceiver L...



> Merging 2 nodes into parent node.
> Parent node id: 23ecde3b-a40b-4ee7-8695-7fd14e12ddc3.
> Parent node text: a CAM. 
DOCUMENT: GMDSS SYSTEM IOM MANUAL
Page 46 of 307


Replacing the fuse in theTransceiver L...



2026-01-15 12:28:41,185 - INFO - HTTP Request: POST https://nilup-mgaf895d-eastus.cognitiveservices.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-12-01-preview "HTTP/1.1 200 OK"


In [32]:
response = query_engine.query(query_str)

2026-01-15 12:28:42,401 - INFO - HTTP Request: POST https://nilup-mgaf895d-eastus.cognitiveservices.azure.com/openai/deployments/text-embedding-3-large/embeddings?api-version=2024-12-01-preview "HTTP/1.1 200 OK"
2026-01-15 12:28:42,685 - INFO - > Merging 2 nodes into parent node.
> Parent node id: 23ecde3b-a40b-4ee7-8695-7fd14e12ddc3.
> Parent node text: a CAM. 
DOCUMENT: GMDSS SYSTEM IOM MANUAL
Page 46 of 307


Replacing the fuse in theTransceiver L...



> Merging 2 nodes into parent node.
> Parent node id: 23ecde3b-a40b-4ee7-8695-7fd14e12ddc3.
> Parent node text: a CAM. 
DOCUMENT: GMDSS SYSTEM IOM MANUAL
Page 46 of 307


Replacing the fuse in theTransceiver L...



2026-01-15 12:28:44,157 - INFO - HTTP Request: POST https://nilup-mgaf895d-eastus.cognitiveservices.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-12-01-preview "HTTP/1.1 200 OK"


In [33]:
print(str(response))

To replace the fuse in the Transceiver Unit, follow these steps:

1. Identify and resolve the issue that caused the fuse to blow.
2. Remove the old fuse from the unit.
3. Insert a new fuse with a rating of 10 A T.


In [34]:
base_response = base_query_engine.query(query_str)

2026-01-15 12:28:45,657 - INFO - HTTP Request: POST https://nilup-mgaf895d-eastus.cognitiveservices.azure.com/openai/deployments/text-embedding-3-large/embeddings?api-version=2024-12-01-preview "HTTP/1.1 200 OK"
2026-01-15 12:28:47,344 - INFO - HTTP Request: POST https://nilup-mgaf895d-eastus.cognitiveservices.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-12-01-preview "HTTP/1.1 200 OK"


In [35]:
print(str(base_response))

To replace the fuse in the Transceiver Unit, follow these steps:

1. Identify and resolve the issue that caused the fuse to blow.
2. Remove the old fuse from the unit.
3. Insert a new fuse with a rating of 10 A T.


### Evaluation
