In [None]:
!pip install llama-index
!pip install llama-index-llms-gemini
!pip install llama-index-embeddings-huggingface
!pip install sentence-transformers
!pip install transformers
!pip install google-generativeai

[0mCollecting llama-index-embeddings-huggingface
  Using cached llama_index_embeddings_huggingface-0.5.2-py3-none-any.whl.metadata (767 bytes)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers>=2.6.1->llama-index-embeddings-huggingface)
  Using cached nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers>=2.6.1->llama-index-embeddings-huggingface)
  Using cached nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch>=1.11.0->sentence-transformers>=2.6.1->llama-index-embeddings-huggingface)
  Using cached nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch>=1.11.0->sentence-transformers>=2.6.1->llama-index-embeddings-huggingface)
  Using cached nvidia_cusparse_cu12-12.3.1.170-py3-none-m

# **Step 1** : Setting Up my API Key

In [None]:
import os
from google.colab import userdata
gemini_api_key = userdata.get('gemini_api')
os.environ["gemini_api"] = gemini_api_key

# **Step 2** : Loading data and splitting it into chunks

In [None]:
# Working Environment Setup
import os
from google.colab import userdata

# If you are using jupyter notebook --> to make asynch play nice with jupyter notebook
# import nest_asyncio
# nest_asyncio.apply()

# Load Data/ Documents
from llama_index.core import SimpleDirectoryReader
documents = SimpleDirectoryReader(input_files=["/content/Data Science.pdf"]).load_data()

# Define Node Splitter
from llama_index.core.node_parser import SentenceSplitter
splitter = SentenceSplitter(chunk_size=1024)
nodes = splitter.get_nodes_from_documents(documents)

# Set up Gemini LLM and HuggingFace Sentence Transformer for embeddings
from llama_index.core import Settings
from llama_index.llms.gemini import Gemini
# Try the correct import path
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# If the above doesn't work, try this alternative import
# from llama_index.core.embeddings import HuggingFaceEmbedding

# Configure Settings with Gemini for LLM and Sentence Transformer for embeddings
Settings.llm = Gemini(api_key=gemini_api_key, model="models/gemini-1.5-flash")
Settings.embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

## **Step 3** : Define Summary index and Vector Index over the same Data

In [None]:
from llama_index.core import SummaryIndex, VectorStoreIndex

summary_index = SummaryIndex(nodes)
vector_index = VectorStoreIndex(nodes)

# **Step 4** : Creating Query Engines

In [None]:
summary_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize",
    use_async=True,
)
vector_query_engine = vector_index.as_query_engine()

# **Step 5** : Converting the above Query Engine to Query tools

In [None]:
from llama_index.core.tools import QueryEngineTool

summary_tool = QueryEngineTool.from_defaults(
    query_engine=summary_query_engine,
    description=(
        "Useful for summarization questions related to dataset"
    ),
)

vector_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    description=(
        "Useful for retrieving specific context from the dataset."
    ),
)

In [None]:
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector


query_engine = RouterQueryEngine(
    selector=LLMSingleSelector.from_defaults(),
    query_engine_tools=[
        summary_tool,
        vector_tool,
    ],
    verbose=True
)

# First query
response1 = query_engine.query("What is the summary of the document? \n")# Gist
print(response1.response.replace(". ", ".\n"))

# Second query
response2 = query_engine.query("What are Query Engines \n")
print(response2.response.replace(". ", ".\n"))

# Third Query
response3 = query_engine.query("Tell me about an idealized model \n")
print(response3.response.replace(". ", ".\n"))




[1;3;38;5;200mSelecting query engine 0: The question directly asks for a summary of the document, which aligns with the description of choice 1: 'Useful for summarization questions related to dataset'..
[0mThis book covers the theory expected to be useful in data science over the next 40 years.
 It emphasizes probability, statistics, and numerical methods, and uses modern data representation as vectors with many components.
 The book covers high-dimensional spaces, singular value decomposition (SVD), Markov chains, machine learning (including algorithms like Perceptron, stochastic gradient descent, boosting, and deep learning), algorithms for massive data problems, clustering, random graphs, topic models, nonnegative matrix factorization, hidden Markov models, graphical models, and wavelets.
 The appendix provides background material and exercises.
 The book uses consistent notation, with lower-case letters for scalars, bold lower-case for vectors, and upper-case for matrices.
 It al