### 03.03. Setting up Indexes

In [1]:
#Install prerequisite packages
!pip install python-dotenv==1.0.0

!pip install llama-index==0.10.59
!pip install llama-index-llms-openai==0.1.27
!pip install llama-index-embeddings-openai==0.1.11
!pip install llama-index-llms-azure-openai==0.1.10
!pip install llama-index-embeddings-azure-openai==0.1.11




In [2]:
#Setup Azure Open AI connection
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding

from llama_index.core import Settings
import os
import nest_asyncio

nest_asyncio.apply()

api_key = "e638bb3ebcb84b79aa5b6f93d6e6503a"
azure_endpoint = "https://agentic-ai-course-account.openai.azure.com/"
api_version = "2024-05-01-preview"

Settings.llm=AzureOpenAI(
    model="gpt-35-turbo",
    deployment_name="agentai-gpt35",
    api_key=api_key,
    azure_endpoint=azure_endpoint,
    api_version=api_version,
)

Settings.embed_model= AzureOpenAIEmbedding(
    model="text-embedding-ada-002",
    deployment_name="agentai-embedding",
    api_key=api_key,
    azure_endpoint=azure_endpoint,
    api_version=api_version,
)


In [7]:
#Create indexes for vector search
from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import  VectorStoreIndex

splitter=SentenceSplitter(chunk_size=1024)

#-------------------------------------------------------------------
#Setup NoSQL document index
#-------------------------------------------------------------------
nosql_documents=SimpleDirectoryReader(
    input_files=["Evaluating NoSQL databases for OLAP workloads.pdf"])\
            .load_data()

#Read documents into nodes
nosql_nodes=splitter.get_nodes_from_documents(nosql_documents)
#Create a vector Store
nosql_index=VectorStoreIndex(nosql_nodes)
#Create a query engine
nosql_query_engine = nosql_index.as_query_engine()

#-------------------------------------------------------------------
#Setup NLP document index
#-------------------------------------------------------------------
nlp_documents=SimpleDirectoryReader(
    input_files=["Software Infrastructure for natural language processing.pdf"])\
            .load_data()
#Read documents into nodes
nlp_nodes=splitter.get_nodes_from_documents(nlp_documents)
#Create a vector Store
nlp_index=VectorStoreIndex(nlp_nodes)
#Create a query engine
nlp_query_engine = nlp_index.as_query_engine()


### 03.04. Setup the Agentic Router

In [11]:
from llama_index.core.tools import QueryEngineTool
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector

#Create a query engine Tool for NoSQL
nosql_tool = QueryEngineTool.from_defaults(
    query_engine=nosql_query_engine,
    name="Evaluate NoSQL Databases",
    description=(
        "Evaluate NoSQL databases for OLAP use cases."
    ),
)

#Create a query engine Tool for NLP
nlp_tool = QueryEngineTool.from_defaults(
    query_engine=nlp_query_engine,
    name="Software infra for NLP",
    description=(
        "Software infrastructure for natural language processing."
    ),
)

#Create a Router Agent. Provide the Tools to the Agent
query_engine=RouterQueryEngine(
    selector=LLMSingleSelector.from_defaults(),
    query_engine_tools=[
        nosql_tool,
        nlp_tool,
    ],
    verbose=True
)

### 03.05. Route with Agentic AI

In [12]:
#Ask a question about NoSQL
response = query_engine.query("What data processing components were used for NoSQL evaluation?")
print(str(response))

[1;3;38;5;200mSelecting query engine 0: The first choice specifically mentions evaluating NoSQL databases, which is directly relevant to the question about data processing components used for NoSQL evaluation..
[0mThe data processing components used for NoSQL evaluation were Apache HDFS and Apache Spark. The raw data was loaded into HDFS and then transformed using PySpark before being loaded into the NoSQL databases.


In [13]:
response = query_engine.query("What are the key elements of GATE design?")
print(str(response))

[1;3;38;5;200mSelecting query engine 1: The key elements of GATE design involve natural language processing, so the software infrastructure for natural language processing is the most relevant choice..
[0mGATE comprises three principal elements in its design: GDM, the GATE Document Manager, CREOLE, a Collection of Reusable Objects for Language Engineering, and GGI, the GATE Graphical Interface. These elements correspond to the three key objectives identified in the paper. GDM provides a central repository for storing all information generated by an LE system, CREOLE modules or objects do the real work of analyzing texts, and GGI encapsulates GDM and CREOLE resources for interactive building and testing of LE components and systems.
