In [5]:
import nest_asyncio
import asyncio

from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex, SummaryIndex
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.llms.ollama import Ollama
from llama_index.core.tools import QueryEngineTool

nest_asyncio.apply()

In [6]:
# Load Large Language Model (LLM) and Embedding Model
Settings.llm = Ollama(model= "llama3.2", request_timeout=3500)
Settings.embedding = OllamaEmbedding(
    model_name="mxbai-embed-large")

In [8]:
# load the document
file = "source/orwellanimalfarm.pdf"
document = SimpleDirectoryReader(input_files = [file]
                                 ).load_data()
# split the document into smaller chunks
splitter = SentenceSplitter(chunk_size=1024, 
                            chunk_overlap=200)
nodes = splitter.get_nodes_from_documents(document)

In [5]:
# create the indexes
summary_index = SummaryIndex(nodes,embed_model=Settings.embedding,
                               llm=Settings.llm,)
vector_index = VectorStoreIndex(nodes, 
                               embed_model=Settings.embedding,
                               llm=Settings.llm,)

summary_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize", use_async=True)

vector_query_engine = vector_index.as_query_engine()



In [6]:
summary_tool = QueryEngineTool.from_defaults(
    query_engine=summary_query_engine,
    # name="Summary",
    description=(
        "Useful for summarization questions of the related document."),
)
vector_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    # name="Vector",
    description=(
        "Useful for retrieving specific context from the document."),
)
tools = [summary_tool, vector_tool]

In [7]:
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector


query_engine = RouterQueryEngine(
    selector=LLMSingleSelector.from_defaults(),
    query_engine_tools=tools,
    verbose=True)

In [9]:
response = query_engine.query("What is the summary of the document?")
print(response)

ReadTimeout: timed out

In [None]:
response = query_engine.query("Who is napolean?")
print(response)

[1;3;38;5;200mSelecting query engine 1: Retrieving specific relevant context from the document can provide biographical information about Napoleon, such as his early life, conquests, and legacy..
[0mHe is referred to as "our Leader, Comrade Napoleon" and is often given credit for every successful achievement and every stroke of good fortune. He is also addressed with various titles such as "Father of All Animals", "Terror of Mankind", and "Protector of the Sheep-fold".


### Function call

In [None]:
from llama_index.core.tools import FunctionTool

def add(x: int, y: int) -> int:
    """Adds two integers together."""
    return x + y

def mystery(x: int, y: int) -> int: 
    """Mystery function that operates on top of two numbers."""
    return (x + y) * (x + y)


add_tool = FunctionTool.from_defaults(fn=add)
mystery_tool = FunctionTool.from_defaults(fn=mystery)

llm = Ollama(model="llama3.2", request_timeout=3500)
# Create a custom LLM tool
response = llm.predict_and_call(
    [add_tool, mystery_tool], 
    "Tell me the output of the mystery function on 2 and 9", 
    verbose=True
)

In [None]:
from llama_index.core.agent import FunctionCallingAgentWorker
from llama_index.core.agent import AgentRunner

agent_worker = FunctionCallingAgentWorker.from_tools(
    [vector_tool, summary_tool], 
    llm=llm, 
    verbose=True
)
agent = AgentRunner(agent_worker)

NameError: name 'vector_tool' is not defined

In [None]:

response = agent.chat(
    "What is the summary of the document?"
)
print(response)

response = agent.chat("So what phrase best fits it?")
print(response)

In [None]:
import dask

### Router

In [2]:
from typing import List
from llama_index.core.schema import TextNode, Node
# from llama_index.core.query_engine import RouterQueryEngine, QueryEngineTool,LLMSingleSelector
from llama_index.core import Settings
from llama_index.core import SimpleDirectoryReader
from llama_index.core import SummaryIndex, VectorStoreIndex
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
from llama_index.core.tools import QueryEngineTool
from llama_index.core.selectors import LLMSingleSelector

class Router:
    """
    A router class for handling document processing and querying operations.
    Manages loading documents, creating indexes, and providing query tools.
    """
    
    def __init__(self):
        """Initialize the router with models and indexes."""
        self.models()
        # self.indexing()
        
    @property
    def llm(self) -> Ollama:
        """Get the Large Language Model instance."""
        Settings.llm = Ollama(model="llama3.2", request_timeout=3500)
        return Settings.llm
        
    @property
    def embedding(self) -> OllamaEmbedding:
        """Get the Embedding Model instance."""
        Settings.embedding = OllamaEmbedding(model_name="mxbai-embed-large")
        return Settings.embedding
    
    def models(self) -> None:
        """Load and initialize required models."""
        Settings.llm = Ollama(model="llama3.2", request_timeout=3500)
        Settings.embedding = OllamaEmbedding(model_name="mxbai-embed-large")
        
    def load_doc(self, file_path: str) -> List[TextNode]:
        """
        Load a document and split it into nodes.
        
        Args:
            file_path: Path to the document file
            
        Returns:
            List of nodes representing the document chunks
        """
        self.file = file_path
        self.document = SimpleDirectoryReader(input_files=[self.file]).load_data()
        splitter = SentenceSplitter(chunk_size=1024, chunk_overlap=200)
        self.nodes = splitter.get_nodes_from_documents(self.document)
        return self.nodes
    
    def indexing(self) -> None:
        """Create and configure indexes for querying."""
        self.summary_index = SummaryIndex(
            self.nodes,
            embed_model=self.embedding,
            llm=self.llm
        )
        
        self.vector_index = VectorStoreIndex(
            self.nodes,
            embed_model=self.embedding,
            llm=self.llm
        )
        
        self.summary_query_engine = self.summary_index.as_query_engine(
            response_mode="tree_summarize",
            use_async=True
        )

        self.vector_query_engine = self.vector_index.as_query_engine()
        
    def tools(self) -> List[QueryEngineTool]:
        """
        Initialize and configure query tools.
        
        Returns:
            List of configured QueryEngineTools
        """
        self.summary_tool = QueryEngineTool.from_defaults(
            query_engine=self.summary_query_engine,
            description="Useful for summarization questions of the related document."
        )
        
        self.vector_tool = QueryEngineTool.from_defaults(
            query_engine=self.vector_query_engine,
            description="Useful for retrieving specific context from the document."
        )
        
        return [self.summary_tool, self.vector_tool]
    
    def query_engine(self, query: str) -> RouterQueryEngine:
        """
        Create and configure a router query engine.
        
        Args:
            query: The query string
            
        Returns:
            Configured RouterQueryEngine instance
        """
        self.query_engine = RouterQueryEngine(
            selector=LLMSingleSelector.from_defaults(),
            query_engine_tools=self.tools(),
            verbose=True
        )
        
        return self.query_engine.query(query)

In [9]:
start = Router()
splits = start.load_doc("source/orwellanimalfarm.pdf")
# start.indexing()

In [11]:
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
embed_2 = FastEmbedEmbeddings()

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

config.json:   0%|          | 0.00/706 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.24k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

model_optimized.onnx:   0%|          | 0.00/66.5M [00:00<?, ?B/s]

In [15]:
from llama_index.core import (
    SimpleDirectoryReader,
    load_index_from_storage,
    VectorStoreIndex,
    StorageContext,
)
from llama_index.vector_stores.faiss import FaissVectorStore
import faiss

# Create a FAISS index
dimension = 768  # Adjust this to match the embedding dimension
faiss_index = faiss.IndexFlatL2(dimension)

# Initialize FaissVectorStore with the FAISS index
vector_store = FaissVectorStore(faiss_index=faiss_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# Create the VectorStoreIndex
index = VectorStoreIndex.from_documents(
    splits, storage_context=storage_context
)


AttributeError: 'TextNode' object has no attribute 'get_doc_id'

In [None]:
# import faiss
# # from langchain_community.vectorstores import FAISS
# from llama_index.vector_stores.faiss import FaissVectorStore
# vector_store = FAISS.from_documents(
#     documents=splits,
#     embedding=embed_2,
# )

# vector_store.save_local("faiss_index")
# db = FAISS.load_local("faiss_index", embed_2, allow_dangerous_deserialization=True)

ImportError: cannot import name 'FAISS' from 'llama_index.vector_stores' (unknown location)