In [35]:
import nest_asyncio

nest_asyncio.apply()
import os
import json

with open("secrets.json") as f:
    secrets = json.load(f)
    for env_var_name in secrets:
        os.environ[env_var_name] = secrets[env_var_name]

In [ ]:
from llama_index.core import SimpleDirectoryReader
# load documents
documents = SimpleDirectoryReader(input_files=["Origin_of_Species.pdf"]).load_data()

In [4]:
from llama_index.core.node_parser import SentenceSplitter

splitter = SentenceSplitter(chunk_size=1024)
nodes = splitter.get_nodes_from_documents(documents)

In [27]:
# from llama_index.core import Settings
# from llama_index.llms.ollama import Ollama
# from llama_index.embeddings.ollama import OllamaEmbedding
# 
# Settings.llm = Ollama(model="qwen:0.5b", request_timeout=60.0)
# 
# ollama_embedding = OllamaEmbedding(
#     model_name="qwen:0.5b",
#     base_url="http://localhost:11434",
#     ollama_additional_kwargs={"mirostat": 0},
# )
# Settings.embed_model = ollama_embedding

# running local model above is super slow. 127 times slower to be precise
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

Settings.llm = OpenAI(model="gpt-3.5-turbo")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")


In [28]:
from llama_index.core import SummaryIndex, VectorStoreIndex

summary_index = SummaryIndex(nodes)
vector_index = VectorStoreIndex(nodes)

In [30]:
summary_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize",
    use_async=True,
)
vector_query_engine = vector_index.as_query_engine()

In [31]:
from llama_index.core.tools import QueryEngineTool

summary_tool = QueryEngineTool.from_defaults(
    query_engine=summary_query_engine,
    description=(
        "Useful for summarization questions"
    ),
)

vector_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    description=(
        "Useful for retrieving specific context"
    ),
)

In [32]:
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector


query_engine = RouterQueryEngine(
    selector=LLMSingleSelector.from_defaults(),
    query_engine_tools=[
        summary_tool,
        vector_tool,
    ],
    verbose=True
)

In [33]:
response = query_engine.query("What is the summary of the document?")
print(str(response))

[1;3;38;5;200mSelecting query engine 0: Useful for summarization questions.
[0mThe document provides a comprehensive overview of various topics related to evolution, natural selection, adaptation, and the development of species. It discusses the principles of selection and breeding in domestic animals and plants, the concept of natural selection in shaping species traits, the role of environmental factors in species survival, the process of species evolution through natural selection, the inheritance of instincts and traits in organisms, the sterility of hybrids and species, the slow and gradual process of species modification and extinction, the distribution of species around the world, the factors influencing species presence on oceanic islands, the natural system of classification based on genealogical relationships, and the challenges and objections faced by the theory of descent with modification through natural selection. The document also explores specific examples and concept