# Auto-Merging Retreival

In [16]:
import os

from llama_index.core import (
    VectorStoreIndex,
    StorageContext,
    Settings,
    load_index_from_storage,
)
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.node_parser import HierarchicalNodeParser, get_leaf_nodes
from llama_index.core.retrievers import AutoMergingRetriever
from llama_index.core.indices.postprocessor import SentenceTransformerRerank
from llama_index.core.query_engine import RetrieverQueryEngine


def build_automerging_index(
    documents,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="merging_index",
    chunk_sizes=None,
):
    # Set global settings
    Settings.llm = llm
    Settings.embed_model = embed_model

    chunk_sizes = chunk_sizes or [2048, 512, 128]
    node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes)
    nodes = node_parser.get_nodes_from_documents(documents)
    leaf_nodes = get_leaf_nodes(nodes)

    storage_context = StorageContext.from_defaults()
    storage_context.docstore.add_documents(nodes)

    if not os.path.exists(save_dir):
        automerging_index = VectorStoreIndex(
            leaf_nodes,
            storage_context=storage_context,
        )
        automerging_index.storage_context.persist(persist_dir=save_dir)
    else:
        automerging_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=save_dir),
        )
    return automerging_index


def get_automerging_query_engine(
    automerging_index,
    similarity_top_k=12,
    rerank_top_n=6,
):
    base_retriever = automerging_index.as_retriever(similarity_top_k=similarity_top_k)
    retriever = AutoMergingRetriever(
        base_retriever, automerging_index.storage_context, verbose=True
    )
    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n, model="BAAI/bge-reranker-base"
    )
    auto_merging_engine = RetrieverQueryEngine.from_args(
        retriever, node_postprocessors=[rerank]
    )
    return auto_merging_engine


In [17]:
from llama_index.llms.openai import OpenAI 
from llama_index.core import Settings

from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader(
    input_files=["./Infowars/source1.pdf", "./Infowars/source2.pdf", "./Infowars/source3.pdf", "./Infowars/source4.pdf", "./Infowars/source5.pdf", "./Infowars/source6.pdf", "./Infowars/source7.pdf", "./Infowars/source8.pdf", "./Infowars/source9.pdf", "./Infowars/source10.pdf"]
).load_data()


In [18]:
from llama_index.core import Document

document = Document(text="\n\n".join([doc.text for doc in documents]))

index = build_automerging_index(
    [document],
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1),
    save_dir="./merging_index",
)


In [19]:
from llama_index.core.response.notebook_utils import display_response

query_engine = get_automerging_query_engine(index, similarity_top_k=6)

with open('./generated_questions.txt') as file:
    for line in file:
        question = line.strip()
        print(question)
        display_response(query_engine.query(question))

What do racial disparities in police shootings say about Black people?
> Merging 1 nodes into parent node.
> Parent node id: 31f1f412-5bc1-4c28-9114-cc6746428b3a.
> Parent node text: There is no evidence of systematic anti-Black disparities in fatal shootings of unarmed citizens....



**`Final Response:`** Racial disparities in police shootings do not indicate systematic anti-Black disparities in fatal shootings of unarmed citizens or in fatal shootings involving misidentification of objects as weapons.

Why is critical race theory a poor idea?
> Merging 1 nodes into parent node.
> Parent node id: 31f1f412-5bc1-4c28-9114-cc6746428b3a.
> Parent node text: There is no evidence of systematic anti-Black disparities in fatal shootings of unarmed citizens....



**`Final Response:`** Critical race theory is considered a poor idea because it often relies on population-based comparisons to argue for racial disparity or bias, which can lead to misleading conclusions. This approach assumes that all citizens are equally likely to be exposed to certain situations, such as police use of deadly force, which may not accurately reflect the complexities of individual cases. Additionally, critical race theory's emphasis on making every story about race can sometimes result in overlooking important facts and details, potentially leading to embarrassing mistakes and misinterpretations.

Are current policing methods effective in reducing crime in minority communities?
> Merging 1 nodes into parent node.
> Parent node id: 31f1f412-5bc1-4c28-9114-cc6746428b3a.
> Parent node text: There is no evidence of systematic anti-Black disparities in fatal shootings of unarmed citizens....



**`Final Response:`** Current policing methods are being scrutinized for potential bias and racial disparities, particularly in interactions with black citizens. Despite efforts to address concerns and compare different data sets, there are ongoing debates about the effectiveness of these methods in reducing crime in minority communities. The issue of crime in these communities is complex and involves discussions around public perception, political spin, and the reality of criminal activity. The effectiveness of current policing methods in addressing crime in minority communities remains a topic of debate and further examination.

Why is there insufficient evidence to support claims of systemic racism within law enforcement agencies?
> Merging 1 nodes into parent node.
> Parent node id: 31f1f412-5bc1-4c28-9114-cc6746428b3a.
> Parent node text: There is no evidence of systematic anti-Black disparities in fatal shootings of unarmed citizens....



**`Final Response:`** There is insufficient evidence to support claims of systemic racism within law enforcement agencies because research findings have shown no evidence of systematic anti-Black disparities in fatal shootings of unarmed citizens or in fatal shootings involving misidentification of objects as weapons. Additionally, comparisons of data sets uncontaminated by police bias have consistently shown no significant racial disparity in police shootings after accounting for estimated rates of police exposure.