## Setup

In [1]:
from helper import get_openai_api_key

OPENAI_API_KEY = get_openai_api_key()

In [2]:
import nest_asyncio

nest_asyncio.apply()

## Load Data

I pass the BERT Paper through this router

In [3]:
from llama_index.core import SimpleDirectoryReader

# load documents
documents = SimpleDirectoryReader(input_files=["Bert Paper.pdf"]).load_data()

Ignoring wrong pointing object 208 0 (offset 0)
Ignoring wrong pointing object 253 0 (offset 0)
Ignoring wrong pointing object 255 0 (offset 0)
Ignoring wrong pointing object 291 0 (offset 0)
Ignoring wrong pointing object 324 0 (offset 0)
Ignoring wrong pointing object 388 0 (offset 0)


## Define LLM and Embedding model

In [4]:
from llama_index.core.node_parser import SentenceSplitter

splitter = SentenceSplitter(chunk_size=1024)
nodes = splitter.get_nodes_from_documents(documents)

In [5]:
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

Settings.llm = OpenAI(model="gpt-3.5-turbo")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

## Define Summary Index and Vector Index over the Same Data

In [6]:
from llama_index.core import SummaryIndex, VectorStoreIndex

summary_index = SummaryIndex(nodes)
vector_index = VectorStoreIndex(nodes)

## Define Query Engines and Set Metadata

In [7]:
summary_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize",
    use_async=True,
)
vector_query_engine = vector_index.as_query_engine()

In [9]:
from llama_index.core.tools import QueryEngineTool


summary_tool = QueryEngineTool.from_defaults(
    query_engine=summary_query_engine,
    description=(
        "Useful for summarization questions related to Bert"
    ),
)

vector_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    description=(
        "Useful for retrieving specific context from the Bert paper."
    ),
)

## Define Router Query Engine

In [10]:
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector


query_engine = RouterQueryEngine(
    selector=LLMSingleSelector.from_defaults(),
    query_engine_tools=[
        summary_tool,
        vector_tool,
    ],
    verbose=True
)

In [11]:
response = query_engine.query("What is the summary of the paper?")
print(str(response))

[1;3;38;5;200mSelecting query engine 0: This choice indicates that the summary is related to summarization questions, which is likely to be the main focus of the paper..
[0mThe paper introduces BERT, a language representation model that utilizes bidirectional self-attention for pre-training on unsupervised tasks like Masked Language Model (MLM) and Next Sentence Prediction (NSP). BERT can be fine-tuned with minimal changes to achieve state-of-the-art performance on various natural language processing tasks. It is based on a multi-layer bidirectional Transformer encoder architecture with two model sizes: BERT BASE and BERT LARGE. The model's effectiveness is demonstrated through comparisons with other models, ablation studies, and experiments on tasks like SQuAD, SWAG, and CoNLL-2003 Named Entity Recognition. The study emphasizes the importance of pre-training steps, model size, and masking strategies in optimizing BERT's performance across different NLP tasks.


In [12]:
print(len(response.source_nodes))

25


In [14]:
response = query_engine.query(
    "Explain the Masking process in the fine tuning of BERT"
)
print(str(response))

[1;3;38;5;200mSelecting query engine 0: The masking process in the fine tuning of BERT is related to summarization questions, as it involves masking certain tokens in the input sequence to train the model for various tasks..
[0mThe masking process in the fine-tuning of BERT involves randomly selecting and masking a portion of the input tokens during pre-training. When a token is masked, it is replaced with either the [MASK] token most of the time, a random token occasionally, or left unchanged in some instances. This process, known as the "masked language model" (MLM) task, requires the model to predict the original token based on the context provided by the surrounding words. By training on this task, BERT learns bidirectional representations that fuse left and right context, enhancing its understanding of the input text.


In [None]:
response = query_engine.query("Tell me about the ablation study results?")
print(str(response))