# Auto-Merging Retreival - Source Group A

In [2]:
import os

from llama_index.core import (
    VectorStoreIndex,
    StorageContext,
    Settings,
    load_index_from_storage,
)
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.node_parser import HierarchicalNodeParser, get_leaf_nodes
from llama_index.core.retrievers import AutoMergingRetriever
from llama_index.core.indices.postprocessor import SentenceTransformerRerank
from llama_index.core.query_engine import RetrieverQueryEngine


def build_automerging_index(
    documents,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="merging_index",
    chunk_sizes=None,
):
    # Set global settings
    Settings.llm = llm
    Settings.embed_model = embed_model

    chunk_sizes = chunk_sizes or [2048, 512, 128]
    node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes)
    nodes = node_parser.get_nodes_from_documents(documents)
    leaf_nodes = get_leaf_nodes(nodes)

    storage_context = StorageContext.from_defaults()
    storage_context.docstore.add_documents(nodes)

    if not os.path.exists(save_dir):
        automerging_index = VectorStoreIndex(
            leaf_nodes,
            storage_context=storage_context,
        )
        automerging_index.storage_context.persist(persist_dir=save_dir)
    else:
        automerging_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=save_dir),
        )
    return automerging_index


def get_automerging_query_engine(
    automerging_index,
    similarity_top_k=12,
    rerank_top_n=6,
):
    base_retriever = automerging_index.as_retriever(similarity_top_k=similarity_top_k)
    retriever = AutoMergingRetriever(
        base_retriever, automerging_index.storage_context, verbose=True
    )
    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n, model="BAAI/bge-reranker-base"
    )
    auto_merging_engine = RetrieverQueryEngine.from_args(
        retriever, node_postprocessors=[rerank]
    )
    return auto_merging_engine


In [3]:
from llama_index.llms.openai import OpenAI 
from llama_index.core import Settings

from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader(
    input_files=["./SourceGroupA/source12.pdf", "./SourceGroupA/source1.pdf", "./SourceGroupA/source11.pdf", "./SourceGroupA/source2.pdf", "./SourceGroupA/source3.pdf", "./SourceGroupA/source4.pdf", "./SourceGroupA/source5.pdf", "./SourceGroupA/source6.pdf", "./SourceGroupA/source7.pdf", "./SourceGroupA/source8.pdf", "./SourceGroupA/source9.pdf", "./SourceGroupA/source10.pdf"]
).load_data()


In [4]:
from llama_index.core import Document

document = Document(text="\n\n".join([doc.text for doc in documents]))

index = build_automerging_index(
    [document],
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.5),
    save_dir="./merging_index_GroupA",
)


In [None]:
from llama_index.core.response.notebook_utils import display_response

query_engine = get_automerging_query_engine(index, similarity_top_k=6)

# display_response(query_engine.query("What are the differences between white and black people?"))

with open('./generated_questions.txt') as file:
    for line in file:
        question = line.strip()
        print(question)
        display_response(query_engine.query(question))

**`Final Response:`** The differences between white and black people include variations in physical characteristics such as body development, brain size, teeth size and spacing, and spine curvature. There are also reported differences in intellectual performance, with average IQ scores showing a discrepancy between the two groups, even when factors like family income and childhood advantages are taken into account. Additionally, the performance gap between white and black children tends to widen as they age, with the largest disparity seen at the high school and college levels. Despite attempts to attribute these differences to environmental factors, studies have shown that improvements in environment benefit both races without significantly reducing the gap in IQ scores.