**Load The Document**

In [None]:
from llama_index.core import SimpleDirectoryReader
documents = SimpleDirectoryReader("./documents").load_data()
print(type(documents), "\n")
print(len(documents), "\n")
print(type(documents[0]))
print(documents[0])

<class 'list'> 

23 

<class 'llama_index.core.schema.Document'>
Doc ID: bd06bca0-f4ab-40a9-921d-ca609119f2ed
Text: December  2015  Dormant assets    A factsheet of the  Swiss
Bankers Association


Combine the text in the Document

In [None]:
from llama_index.core import Document

document = Document(text="\n\n".join([doc.text for doc in documents]))

## Auto-merging retrieval setup

**Hierrarchical Node Parser and Lead nodes**

In [None]:
from llama_index.core.node_parser import HierarchicalNodeParser

# create the hierarchical node parser w/ default settings
node_parser = HierarchicalNodeParser.from_defaults(
    chunk_sizes=[2048, 512, 128]
)

In [None]:
nodes = node_parser.get_nodes_from_documents([document])

In [None]:
from llama_index.core.node_parser import get_leaf_nodes

leaf_nodes = get_leaf_nodes(nodes)
print(leaf_nodes[30].text)

In addition to the main scheme, the Act also provided for an alternative s cheme. 
This section of the review will address each of these in turn.  
2.5 The main scheme allows any bank or building society regardless of size, which opts into the 
scheme, to transfer dormant account money to an authorised reclaim fund which will then give 
any surplus money to the Big Lottery Fund to distribute to good causes in the community.   
2.6 The Government welcomes the fact that the largest  banks and building societies have opted 
into the main scheme.


**Implement the LLM GPT 3.5 model**

In [None]:
from llama_index.llms.openai import OpenAI

llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)

In [None]:
import openai
openai.api_key = "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"

In [None]:
from llama_index.core import ServiceContext

auto_merging_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    node_parser=node_parser,
)

  auto_merging_context = ServiceContext.from_defaults(
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


## Putting it All together

In [None]:
import os

from llama_index.core import (
    ServiceContext,
    StorageContext,
    VectorStoreIndex,
    load_index_from_storage,
)
from llama_index.core.node_parser import HierarchicalNodeParser
from llama_index.core.node_parser import get_leaf_nodes
from llama_index.core import StorageContext, load_index_from_storage
from llama_index.core.retrievers import AutoMergingRetriever
from llama_index.core.indices.postprocessor import SentenceTransformerRerank
from llama_index.core.query_engine import RetrieverQueryEngine

## Defining the retriever and running the query engine class


def build_automerging_index(
    documents,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="merging_index",
    chunk_sizes=None,
):
    chunk_sizes = chunk_sizes or [2048, 512, 128]
    node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes)
    nodes = node_parser.get_nodes_from_documents(documents)
    leaf_nodes = get_leaf_nodes(nodes)
    merging_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model,
    )
    storage_context = StorageContext.from_defaults()
    storage_context.docstore.add_documents(nodes)

    if not os.path.exists(save_dir):
        automerging_index = VectorStoreIndex(
            leaf_nodes, storage_context=storage_context, service_context=merging_context
        )
        automerging_index.storage_context.persist(persist_dir=save_dir)
    else:
        automerging_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=save_dir),
            service_context=merging_context,
        )
    return automerging_index


def get_automerging_query_engine(
    automerging_index,
    similarity_top_k=12,
    rerank_top_n=6,
):
    base_retriever = automerging_index.as_retriever(similarity_top_k=similarity_top_k)
    retriever = AutoMergingRetriever(
        base_retriever, automerging_index.storage_context, verbose=True
    )
    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n, model="BAAI/bge-reranker-base"
    )
    auto_merging_engine = RetrieverQueryEngine.from_args(
        retriever, node_postprocessors=[rerank]
    )
    return auto_merging_engine

**Running the query engine**

In [None]:
index = build_automerging_index(
    [document],
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1),
    save_dir="./merging_index",
)


  merging_context = ServiceContext.from_defaults(


In [None]:
query_engine = get_automerging_query_engine(index, similarity_top_k=6)

**Two layers**

In [None]:
auto_merging_index_0 = build_automerging_index(
    documents,
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1),
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="merging_index_0",
    chunk_sizes=[2048,512],
)

  merging_context = ServiceContext.from_defaults(


In [None]:
auto_merging_engine_0 = get_automerging_query_engine(
    auto_merging_index_0,
    similarity_top_k=12,
    rerank_top_n=6,
)

## Response from the Model

In [None]:
response = auto_merging_engine_0.query("As an officer in the monetary authority of singapore,I would like to evaluate the key differences between the Swiss act and UK act. Can you please provide me the differences thematically by sections?")
print(response)

> Merging 2 nodes into parent node.
> Parent node id: 2dbf977c-2cfb-469c-83b8-4e68e541c342.
> Parent node text: 3 1 Introduction 
 
1.1 The Dor mant Bank and Building Society  Accounts Act 20081
1.2 The Act en...

> Merging 2 nodes into parent node.
> Parent node id: d7d7ed29-9063-4b37-8950-753162149e76.
> Parent node text: 6  2.8 The majority of participants transfer dormant account money on an annual basis to the 
Rec...

> Merging 2 nodes into parent node.
> Parent node id: ddbfda16-2568-49eb-b1ff-9d648ef0218b.
> Parent node text: 2 December  2015 | SBA | Dorm ant assets  Introduction  
Circumstances may arise in which contact...

> Merging 1 nodes into parent node.
> Parent node id: 072bf4fe-2fe0-4b4a-81dc-a74fb3125d1c.
> Parent node text: © Crown copyright 2014 
You may re-use this information (excluding logos) free of charge in any f...

> Merging 1 nodes into parent node.
> Parent node id: d3b59637-972f-46f8-be34-82d3d00a5725.
> Parent node text: 5 2 Transferring dormant 
accoun

##Context and Source retrival

In [None]:
import re
if hasattr(response, 'metadata'):
        document_info = str(response.metadata)
        find = re.findall(r"'page_label': '[^']*', 'file_name': '[^']*'", document_info)

        print('\n'+'=' * 60+'\n')
        print('Context Information')
        print(str(find))
        print('\n'+'=' * 60+'\n')



Context Information
["'page_label': '9', 'file_name': 'DS3-assessment-UK-Dormant-Assets.pdf'", "'page_label': '13', 'file_name': 'DS3-assessment-UK-Dormant-Assets.pdf'", "'page_label': '7', 'file_name': 'DS3-assessment-UK-Dormant-Assets.pdf'", "'page_label': '2', 'file_name': 'DS3-assessment-Swiss-Dormant-Assets.pdf'", "'page_label': '3', 'file_name': 'DS3-assessment-UK-Dormant-Assets.pdf'", "'page_label': '11', 'file_name': 'DS3-assessment-UK-Dormant-Assets.pdf'"]


