In [1]:
import os
import json
from pathlib import Path
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
import sys
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(project_root)

In [3]:
from llama_index.core import SimpleDirectoryReader, StorageContext, VectorStoreIndex
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.node_parser import MarkdownNodeParser, CodeSplitter, SemanticSplitterNodeParser
from llama_index.core import Settings
from llama_index.core.extractors import SummaryExtractor, TitleExtractor, KeywordExtractor, DocumentContextExtractor
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core.schema import MetadataMode
from llama_index.core.storage.docstore import SimpleDocumentStore


In [3]:
from custom_components.custom_google_genai import CustomGoogleGenAI
from utilities import GoogleGenAIDummyTokensizer, HuggingfaceTokenizer


In [5]:
import tiktoken
import chromadb

In [6]:
from llama_index.core import VectorStoreIndex, get_response_synthesizer
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.chat_engine import ContextChatEngine
from llama_index.postprocessor.cohere_rerank import CohereRerank

resource module not available on Windows


In [7]:
from llama_index.core.retrievers import QueryFusionRetriever

In [8]:
PROCESSED_DIR = 'processed_data/data/google_genai/api'
LLM_MODEL_PROVIDER = 'litellm'  # choose from ['litellm', 'ollama', 'gemini', 'groq']
LLM_MODEL = "gemini/gemini-2.5-flash-preview-05-20" # "cerebras/llama-3.3-70b"  #  # "together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo" # "cerebras/llama-3.3-70b"  # "groq/llama-3.3-70b-versatile"  # "cerebras/llama-3.3-70b"

In [9]:
GEMINI_API_KEY = os.environ['GEMINI_API_KEY']
GROQ_API_KEY = os.environ['GROQ_API_KEY']
COHERE_API_KEY = os.environ['COHERE_API_KEY']

In [10]:
config_file = Path(PROCESSED_DIR) / 'config.json'
with open(config_file, 'r') as f:
    config = json.load(f)

In [11]:
def verify_config(config):
    fields_to_verify = ['llm_model_provider', 'vector_store', 'chromadb_path', 'chroma_collection', 'embedding_provider', 'embedding_model', 'tokenizer_provider', 'tokenizer_model_name']
    if type(config) != list:
        config = [config]
    
    # Verify each field has consistent values across all configs
    for field in fields_to_verify:
        if field not in config[0]:
            print(f"Field {field} not found in config")
            return False
            
        first_value = config[0][field]
        mismatches = []
        
        for i, cfg in enumerate(config[1:], 1):
            if field not in cfg:
                mismatches.append(f"Config {i}: Field missing")
            elif cfg[field] != first_value:
                mismatches.append(f"Config {i}: {cfg[field]}")
                
        if mismatches:
            print("Config values are not consistent across all configs")
            print(f"\nMismatch found in {field}:")
            print(f"First value: {first_value}")
            # for mismatch in mismatches:
            #     print(mismatch)
            return False
    return True

In [12]:
if not verify_config(config):
        raise ValueError("Config is not valid")
else:
    if type(config) == list:
        config = config[-1]

In [13]:
config

{'llm_model_provider': 'litellm',
 'llm_model': 'gemini/gemini-2.5-flash-preview-05-20',
 'rate_limit': 7,
 'input_dir': 'data/google_genai/api/',
 'output_dir': 'processed_data/data/google_genai/api/',
 'file_types': ['.md', '.mdx'],
 'vector_store': 'chroma',
 'chromadb_path': 'processed_data/data/google_genai/api/chromadb',
 'chroma_collection': 'contextual_api',
 'doctsore_path': 'processed_data/data/google_genai/api/docstore.json',
 'embedding_provider': 'GoogleGenAIEmbedding',
 'embedding_model': 'models/text-embedding-004',
 'tokenizer_provider': 'tiktoken',
 'tokenizer_model_name': 'cl100k_base',
 'max_node_tokens': 2000,
 'metadata_extractors': ['CustomDocumentContextExtractor'],
 'datetime': '2025-06-10T09:59:07.250086+00:00',
 'run_time': '2025-06-10T10:00:13.096049+00:00',
 'run_nodes': 1}

In [14]:
def get_llm(llm_model_provider, llm_model):
    if llm_model_provider == 'groq':
        from llama_index.llms.groq import Groq
        return Groq(model=llm_model, api_key=GROQ_API_KEY, max_retries=2, retry_on_rate_limit=True) # Number of retry attempts
    elif llm_model_provider == 'gemini':
        return CustomGoogleGenAI(
            model=llm_model,
            api_key=GEMINI_API_KEY, 
            max_retries=2,  # Number of retry attempts
            retry_on_rate_limit=True
        )
    elif llm_model_provider == 'ollama':
        from llama_index.llms.ollama import Ollama
        return Ollama(model=llm_model, request_timeout=120.0, context_window=8192, )
    elif llm_model_provider == 'litellm':
        import litellm
        from llama_index.llms.litellm import LiteLLM
        litellm.suppress_debug_info = True
        return LiteLLM(model=llm_model, max_tokens=8192, max_retries=6)
    else:
        raise NotImplementedError(f"LLM provider {llm_model_provider} invalid or not implemented")


def get_embed_model(embedding_provider, embedding_model):
    if embedding_provider == 'HuggingFaceEmbedding':
        from llama_index.embeddings.huggingface import HuggingFaceEmbedding
        return HuggingFaceEmbedding(model_name=embedding_model)
    elif embedding_provider == 'GoogleGenAIEmbedding':
        from llama_index.embeddings.google_genai import GoogleGenAIEmbedding
        return GoogleGenAIEmbedding(model_name=embedding_model, api_key=GEMINI_API_KEY)
    else:
        raise NotImplementedError(f"Embedding provider {embedding_provider} invalid or not implemented")
    

def get_tokenizer(tokenizer_provider, tokenizer_model_name, llm):
    if tokenizer_provider == 'gemini':
        return GoogleGenAIDummyTokensizer(llm=llm).encode
    elif tokenizer_provider == 'huggingface':
        return HuggingfaceTokenizer(model=tokenizer_model_name).encode
    elif tokenizer_provider == 'tiktoken':
        return tiktoken.get_encoding(encoding_name=tokenizer_model_name).encode
    else:
        raise NotImplementedError(f"{tokenizer_provider} invalid or not implemented")

In [15]:
choma_path = config['chromadb_path']
chroma_colection_name = config['chroma_collection']

In [16]:
# import litellm
# litellm.get_max_tokens("gemini/gemini-2.5-flash-preview-05-20")
# # litellm.get_max_tokens("gemini/gemini-2.0-flash")

In [17]:
llm = get_llm(config['llm_model_provider'], config['llm_model'])

# from llama_index.core.base.llms.types import LLMMetadata
# llm_metadata = LLMMetadata(
#             context_window=32768, num_output=8192, is_chat_model=True, is_function_calling_model=False, model_name=llm.model,
#         )

# llm.metadata = llm_metadata
Settings.llm = llm

In [18]:
# model info
llm.metadata.model_dump()

{'context_window': 65535,
 'num_output': 8192,
 'is_chat_model': True,
 'is_function_calling_model': True,
 'model_name': 'gemini/gemini-2.5-flash-preview-05-20',
 'system_role': <MessageRole.SYSTEM: 'system'>}

In [19]:
embed_model = get_embed_model(config['embedding_provider'], config['embedding_model'])
tokenizer = get_tokenizer(config['tokenizer_provider'], config['tokenizer_model_name'], llm)

Settings.embed_model = embed_model
Settings.tokenizer = tokenizer

In [20]:
# setup observability
from langfuse import get_client
from openinference.instrumentation.llama_index import LlamaIndexInstrumentor

langfuse = get_client()
 
# Verify connection
langfuse_available = False
if langfuse.auth_check():
    langfuse_available = True
    LlamaIndexInstrumentor().instrument()
    print("Langfuse client is authenticated and ready!")
else:
    print("Authentication failed. Please check your credentials and host.")


Langfuse client is authenticated and ready!


In [21]:
def is_notebook():
    try:
        from IPython import get_ipython
        return get_ipython() is not None
    except ImportError:
        return False

is_jupyter_notebook = is_notebook()

    # # for jupyter notebooks - to fix event loop issue
if is_jupyter_notebook:
    import nest_asyncio
    nest_asyncio.apply()

# nest_asyncio.apply()

# for jupyter notebook - Start ChromaDB server
if is_jupyter_notebook:
    import subprocess
    process = subprocess.Popen(["chroma", "run", "--path", choma_path])
    remote_db = chromadb.HttpClient()
    chroma_collection = remote_db.get_or_create_collection(chroma_colection_name)
else:
    db = chromadb.PersistentClient(path=choma_path)
    chroma_collection = db.get_or_create_collection(chroma_colection_name)

In [22]:
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
vector_index = VectorStoreIndex.from_vector_store(
    vector_store,
    embed_model=embed_model,
)
print('vector index loaded')

vector index loaded


In [23]:
# node count
chroma_collection.count()

1032

In [26]:
# def retrieve(index_path, top_k=5)
# def query(query, index_name)
# def 

## Vector retrieval

In [25]:
retriever = vector_index.as_retriever(similarity_top_k=20)

In [26]:
query = "how to upload files for chat?"

In [27]:
retrieved_nodes = retriever.retrieve(query)

In [28]:
total_context = 0
for node in retrieved_nodes:
    print(node.node.node_id)
    print(node.score)
    print(node.node.metadata['header_path'])
    print(node.node.text[:100])
    print('-'*20)
    print()
    total_context += len(Settings.tokenizer(node.node.text))

SDK_references/Python.md-49197-55891
0.46616804207958584
/
io/python-genai/genai.html#genai.caches.Caches.get)
      * [`Caches.list()`](https://googleapis.gi
--------------------

Capabilities/Files.md-13534-14441
0.4495083274471379
/ Using files 
## Method: media.upload
  * [Endpoint](https://ai.google.dev/api/files#body.HTTP_TEMPLATE)
  * [Re
--------------------

SDK_references/Python.md-33113-33345
0.4433611053938853
/Files[¶](https://googleapis.github.io/python-genai/#files "Link to this heading")
## Upload[¶](https://googleapis.github.io/python-genai/#upload "Link to this heading")
```
file1 =
--------------------

Capabilities/Files.md-11178-13532
0.4426483252338536
/
#  Using files 
  * On this page
  * [Method: media.upload](https://ai.google.dev/api/files#method
--------------------

Capabilities/Generating_content.md-112891-119659
0.4403348517596423
/ Generating content 
curl"${upload_url}"\
-H"Content-Length: ${NUM_BYTES}"\
-H"X-Goog-Upload-Offset: 0"\
-H"X-Goog-Upl
------

In [29]:
print(f"Total context: {total_context} tokens")

Total context: 13951 tokens


In [30]:
# configure response synthesizer
response_synthesizer = get_response_synthesizer(
    response_mode="tree_summarize",
)

# assemble query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
)

In [31]:

# query
if not langfuse_available:
    response = query_engine.query(query)
else:
    with langfuse.start_as_current_span(name="vector retriever query"):
        response = query_engine.query(query)    
    langfuse.flush()

    # instrumentor.start()
    # with instrumentor.observe(trace_name="Query vector Index", user_id="user123"):
    #     response = query_engine.query(query)
    # instrumentor.flush()
    # instrumentor.stop()


In [32]:
print(response)

Files can be uploaded using the `media.upload` method in the REST API or the `client.files.upload` method available in the Python, Node.js, Go, and TypeScript SDKs. This process creates a `File` resource.

Once a file is uploaded, its URI and MIME type can be included as part of a message in a chat session. For example, in the Go SDK, an uploaded file's URI and MIME type can be used to create a `genai.Part` object, which is then passed to a chat method like `chat.SendMessage`.

Additionally, for real-time interactions through the Live API, the `Session.SendClientContent` method (or `BidiGenerateContentClientContent` in the API reference) allows sending content objects that can include file data. This enables incorporating files into a conversation where the history is managed by the API server, similar to a standard chat message.


## Keyword Based Retriever - BM25

In [33]:
nodes_info = chroma_collection.get()

In [34]:
nodes_info.keys()

dict_keys(['ids', 'embeddings', 'metadatas', 'documents', 'data', 'uris', 'included'])

In [35]:
(nodes_info['metadatas'][0])

{'_node_content': '{"id_": "__root__.md-0-3432", "embedding": null, "metadata": {"file_path": "__root__.md", "file_name": "__root__.md", "file_size": 11202, "creation_date": "2025-06-06", "last_modified_date": "2025-06-06", "header_path": "/", "context": "This chunk appears to be the navigation menu or sidebar of the Google AI for Developers documentation page, specifically for the Gemini API."}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "__root__.md", "node_type": "4", "metadata": {"file_path": "__root__.md", "file_name": "__root__.md", "file_size": 11202, "creation_date": "2025-06-06", "last_modified_date": "2025-06-06"}, "hash": "1125ed6c9622fbc84faec0fc0351beccb9cdaa79c76494cf5de1b86667d413ad", "class_name": "RelatedNodeInfo"},

In [36]:
all_nodes = vector_store.get_nodes(nodes_info['ids'])

In [38]:
bm25_retriever = BM25Retriever.from_defaults(nodes=all_nodes, similarity_top_k=10)

In [39]:
bm25_retrieved_nodes = bm25_retriever.retrieve(query)

In [40]:
for node in bm25_retrieved_nodes:
    print(node.node.node_id)
    print(node.score)
    print(node.node.metadata['header_path'])
    print(node.node.text[:100])
    print('-'*20)
    print()

Capabilities/Generating_content.md-112891-119659
6.043334007263184
/ Generating content 
curl"${upload_url}"\
-H"Content-Length: ${NUM_BYTES}"\
-H"X-Goog-Upload-Offset: 0"\
-H"X-Goog-Upl
--------------------

Capabilities/Generating_content.md-30933-38003
5.922059059143066
/ Generating content 
forfile.State==genai.FileStateUnspecified||file.State!=genai.FileStateActive{
fmt.Println("Processi
--------------------

Tuning/Tuning.md-128539-135473
5.844338417053223
/ Tuning 
MIME_TYPE=$(file-b--mime-type"${VIDEO_PATH}")
NUM_BYTES=$(wc-c < "${VIDEO_PATH}")
DISPLAY_NAME=VID
--------------------

Capabilities/Generating_content.md-38003-45486
4.997640609741211
/ Generating content 
","model"),
}
chat,err:=client.Chats.Create(ctx,"gemini-2.0-flash",nil,history)
iferr!=nil{
log.
--------------------

Tuning/Tuning.md-43445-50537
4.770259857177734
/ Tuning 
while(!video.state||video.state.toString()!=='ACTIVE'){
console.log('Processing video...');
consol
--------------------

SDK_references/Pyt

In [41]:
# assemble query engine
bm25_query_engine = RetrieverQueryEngine(
    retriever=bm25_retriever,
    response_synthesizer=response_synthesizer,
)

# query
bm25_response = bm25_query_engine.query(query)

In [42]:
print(bm25_response)

To use files within a chat, you first need to upload the file. This can be done using a method like `client.files.upload()`, which will provide details about the uploaded file, including its URI and MIME type.

Once the file is uploaded and its information is available, you can then include it as part of the content when sending a message in a chat session. For example, when calling `chat.send_message()`, you would specify the text of your message along with the `file_data` containing the `mime_type` and `file_uri` of the uploaded file.


## Hybrid Retriever

In [None]:
# # hybrid retriever


# class HybridRetriever(BaseRetriever):
# def __init__(self, vector_retriever, bm25_retriever):
# self.vector_retriever = vector_retriever
# self.bm25_retriever = bm25_retriever

# def _retrieve(self, query, **kwargs):
# bm25_nodes = self.bm25_retriever.retrieve(query, **kwargs)
# vector_nodes = self.vector_retriever.retrieve(query, **kwargs)
# all_nodes = []
# node_ids = set()
# for n in bm25_nodes + vector_nodes:
# if n.node.node_id not in node_ids:
# all_nodes.append(n)
# node_ids.add(n.node.node_id)
# return all_nodes

# hybrid_retriever = HybridRetriever(vector_retriever, bm25_retriever)

In [43]:
hybrid_retriever = QueryFusionRetriever(
    [
        retriever,
        bm25_retriever,
    ],
    num_queries=4,
    similarity_top_k = 40,
    llm=llm,
    retriever_weights=[0.7, 0.3],
    mode = "dist_based_score",
    use_async=True,
)

In [44]:

import nest_asyncio
nest_asyncio.apply()

In [45]:
if not langfuse_available:
    hybrid_nodes = await hybrid_retriever.aretrieve(query)
else:
    with langfuse.start_as_current_span(name="hybrid retriever retrieve"):
        hybrid_nodes = await hybrid_retriever.aretrieve(query)
    langfuse.flush()

In [46]:
for node in hybrid_nodes:
    print(node.node.node_id)
    print(node.score)
    print(node.node.metadata['header_path'])
    print(node.node.text[:100])
    print('-'*20)
    print()

Capabilities/Generating_content.md-112891-119659
0.6377292836136748
/ Generating content 
curl"${upload_url}"\
-H"Content-Length: ${NUM_BYTES}"\
-H"X-Goog-Upload-Offset: 0"\
-H"X-Goog-Upl
--------------------

SDK_references/Python.md-49197-55891
0.5566185045992408
/
io/python-genai/genai.html#genai.caches.Caches.get)
      * [`Caches.list()`](https://googleapis.gi
--------------------

Capabilities/Files.md-13534-14441
0.44560408168886334
/ Using files 
## Method: media.upload
  * [Endpoint](https://ai.google.dev/api/files#body.HTTP_TEMPLATE)
  * [Re
--------------------

SDK_references/Go.md-131815-139027
0.4271812618126451
/genai
####  type [Chat](https://github.com/googleapis/go-genai/blob/v1.10.0/chats.go#L38) [¶](https://pkg.
--------------------

Capabilities/Live_API.md-10504-11007
0.3578793920492212
/Send messagesebSockets API reference 
### Supported client messages
See the supported client messages in the following table:
Message | 
--------------------

Capabilities/Caching

In [47]:
hybrid_query_engine = RetrieverQueryEngine(hybrid_retriever)

In [48]:
if not langfuse_available:
    hybrid_resp = hybrid_query_engine.query(query)
else:
    with langfuse.start_as_current_span(name="hybrid retriever retrieve"):
        hybrid_resp = hybrid_query_engine.query(query)
    langfuse.flush()

In [50]:
print(hybrid_resp)

To upload files for use in a chat, you first need to upload the file to the API. This process creates a `File` resource.

Here's the general approach:
1.  **Upload the file**: Use the `media.upload` method or the equivalent SDK function (e.g., `client.files.upload` in Python). This method allows you to upload various media types such as images, audio, text, video, and PDF files.
2.  **Obtain the file URI/name**: After a successful upload, the API returns information about the created file, including its URI or name.
3.  **Reference the file in your chat message**: When sending a message in a chat session, include the uploaded file's URI and MIME type as part of the content. For example, in Python, you would upload the file and then pass the file object directly into the `send_message` call along with your text.

This allows the model to process the file content as part of the conversation.


In [51]:
len(hybrid_resp.source_nodes)

40

In [52]:
for node in hybrid_resp.source_nodes:
    print(node.node.node_id)
    print(node.score)
    print(node.node.metadata['header_path'])
    print(node.node.text[:100])
    print('-'*20)
    print()

Capabilities/Generating_content.md-112891-119659
0.6377292836136748
/ Generating content 
curl"${upload_url}"\
-H"Content-Length: ${NUM_BYTES}"\
-H"X-Goog-Upload-Offset: 0"\
-H"X-Goog-Upl
--------------------

SDK_references/Python.md-49197-55891
0.5566185045992408
/
io/python-genai/genai.html#genai.caches.Caches.get)
      * [`Caches.list()`](https://googleapis.gi
--------------------

Capabilities/Files.md-13534-14441
0.44560408168886334
/ Using files 
## Method: media.upload
  * [Endpoint](https://ai.google.dev/api/files#body.HTTP_TEMPLATE)
  * [Re
--------------------

SDK_references/Go.md-131815-139027
0.4271812618126451
/genai
####  type [Chat](https://github.com/googleapis/go-genai/blob/v1.10.0/chats.go#L38) [¶](https://pkg.
--------------------

Capabilities/Live_API.md-10504-11007
0.3578793920492212
/Send messagesebSockets API reference 
### Supported client messages
See the supported client messages in the following table:
Message | 
--------------------

Capabilities/Caching

## Rerank

In [53]:
cohere_rerank = CohereRerank(
    top_n=10, model="rerank-v3.5", api_key=COHERE_API_KEY
)

In [54]:
sorted_nodes = cohere_rerank.postprocess_nodes(nodes=hybrid_nodes, query_str=query)

In [55]:
for node in sorted_nodes:
    print(node.node.node_id, node.score)

SDK_references/Python.md-49197-55891 0.59457535
Tuning/Tuning.md-128539-135473 0.5377946
Capabilities/Generating_content.md-30933-38003 0.5271467
Capabilities/Generating_content.md-112891-119659 0.5192538
Tuning/Tuning.md-43445-50537 0.48147875
Capabilities/Generating_content.md-38003-45486 0.45551404
Capabilities/Caching.md-28429-34409 0.41577685
SDK_references/Go.md-14345-21402 0.40584955
SDK_references/TypeScript.md-6251-7531 0.39508808
Capabilities/Files.md-11178-13532 0.33561262


In [56]:
for node in hybrid_nodes:
    print(node.node.node_id, node.score)

Capabilities/Generating_content.md-112891-119659 0.6377292836136748
SDK_references/Python.md-49197-55891 0.5566185045992408
Capabilities/Files.md-13534-14441 0.44560408168886334
SDK_references/Go.md-131815-139027 0.4271812618126451
Capabilities/Live_API.md-10504-11007 0.3578793920492212
Capabilities/Caching.md-32565-34569 0.348034525466761
SDK_references/Python.md-33113-33345 0.29787802781866296
Capabilities/Generating_content.md-36016-36024 0.27809983974744
Capabilities/Files.md-11178-13532 0.2520385870007634
SDK_references/Go.md-367781-375865 0.2520024166659487
Tuning/Tuning.md-128539-135473 0.2516451281250523
SDK_references/TypeScript.md-6251-7531 0.24464727167624428
All_methods.md-17624-17807 0.23634453553871465
Capabilities/Generating_content.md-30933-38003 0.23464739901635034
Capabilities/Caching.md-28429-34409 0.2320866655994635
SDK_references/Python.md-32392-32753 0.21892444365870062
Capabilities/Tokens.md-17120-17128 0.21348721514402308
SDK_references/Python.md-32083-32388 0.1

In [57]:
from llama_index.core.response_synthesizers import get_response_synthesizer
from llama_index.core.response_synthesizers.type import ResponseMode
response_synthesizer = get_response_synthesizer(llm=llm, response_mode=ResponseMode.COMPACT)
from llama_index.core.schema import NodeWithScore, QueryBundle, QueryType
query_bundle = QueryBundle(query)


In [58]:
if not langfuse_available:
    reranked_response = response_synthesizer.synthesize(query_bundle, sorted_nodes)
else:
    with langfuse.start_as_current_span(name="reranked query"):
        reranked_response = response_synthesizer.synthesize(query_bundle, sorted_nodes)
    langfuse.flush()


In [59]:
print(reranked_response)

To upload files for use in a chat, you first use the `files` client or module to upload the file. Once the file is uploaded, you can then include the uploaded file object or its URI as part of the content when sending a message in a chat session.

For example:

*   **In Python:**
    1.  Upload the file: `document = client.files.upload(file=media / "your_file.txt")`
    2.  Send it in a chat message: `response = chat.send_message(message=["Your prompt text here", document])`

*   **In Node.js:**
    1.  Upload the file: `const document = await ai.files.upload({ file: filePath, config: { mimeType: "text/plain" } });`
    2.  Send it in a chat message: `let response = await chat.sendMessage({ message: createUserContent(["Your prompt text here", createPartFromUri(document.uri, document.mimeType)]) });`

*   **In Go:**
    1.  Upload the file: `document, err := client.Files.UploadFromPath(ctx, filepath.Join(getMedia(), "your_file.txt"), &genai.UploadFileConfig{ MIMEType: "text/plain" })`
 

## Hybrid + Rerank

In [65]:
hybrid_query_engine = RetrieverQueryEngine(hybrid_retriever, node_postprocessors=[cohere_rerank])

In [67]:
import nest_asyncio
nest_asyncio.apply()

In [68]:
if not langfuse_available:
    hybrid_resp = hybrid_query_engine.query(query)
else:
    with langfuse.start_as_current_span(name="hybrid and rerank query"):
        hybrid_resp = hybrid_query_engine.query(query)
    langfuse.flush()

In [70]:
len(hybrid_resp.source_nodes)

10

In [69]:
print(hybrid_resp)

To upload files for use in a chat, you first upload the file using the `files` submodule, and then reference the uploaded file when sending a message within a chat session.

For example, in Python, you would:
1. Upload the file: `document = client.files.upload(file=media / "your_file.txt")`
2. Send the message in a chat, including the uploaded document: `response = chat.send_message(message=["Your message here.", document])`

Similarly, in Node.js, you would:
1. Upload the file: `const document = await ai.files.upload({ file: filePath, config: { mimeType: "text/plain" }, });`
2. Send the message, referencing the file's URI and MIME type: `let response = await chat.sendMessage({ message: createUserContent(["Your message here.", createPartFromUri(document.uri, document.mimeType),]), });`

In Go, the process involves:
1. Uploading the file: `document, err := client.Files.UploadFromPath(ctx, filepath.Join(getMedia(), "your_file.txt"), &genai.UploadFileConfig{ MIMEType: "text/plain", }, )`


## ReACT Agent Chat Engine

In [60]:
from llama_index.core.agent import ReActAgent, 
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.tools import QueryEngineTool, RetrieverTool
from llama_index.core.tools.types import ToolMetadata

In [87]:
# query_engine = RetrieverQueryEngine.from_args(retriever=retriever, node_postprocessors=[cohere_rerank])

# 3. (Optional) Add a name and description to the query engine tool
#    This helps the agent decide when to use it.
query_engine_tool = QueryEngineTool(
    query_engine=hybrid_query_engine,
    metadata= ToolMetadata(
            name="google_genai_api",
            description="API documentation for Google GenAI",
            return_direct=False
        ),
)

In [None]:
retrieval_tool = RetrieverTool(retriever=hybrid_retriever, node_postprocessor=[cohere_rerank])

In [None]:
retrieval_agent = ReActAgent.from_tools(
    tools=[query_engine_tool],
    llm=llm,
    verbose=True,
    react_prompt="Always use the one of the above tools to answer the question",
)

In [93]:
agent = ReActAgent.from_tools(
    tools=[query_engine_tool],
    llm=llm,
    verbose=True,
    react_prompt="Always use the one of the above tools to answer the question",
)

In [94]:
query_agent = 'how to upload files for chat in google genai?'

In [95]:
if not langfuse_available:
    agent_resp = agent.chat(query_agent)
else:
    with langfuse.start_as_current_span(name="React agent query"):
        agent_resp = agent.chat(query_agent)
    langfuse.flush()

> Running step 1b84d790-a836-4477-94d4-98c08dc73e17. Step input: how to upload files for chat in google genai?
[1;3;38;5;200mThought: (Implicit) I can answer without any more tools!
Answer: Observation: The Google GenAI API does not directly support file uploads for chat. The `google_genai_api` tool is for interacting with the GenAI API itself, not for managing file uploads for a chat interface. File uploads for a chat interface would typically be handled by the application or platform that is *using* the GenAI API, not by the API directly.
The Google GenAI API itself does not directly handle file uploads for chat. File uploads for a chat interface would typically be managed by the application or platform that is integrating with the GenAI API, not by the API directly. You would need to refer to the documentation or features of the specific chat application or platform you are using to understand how to upload files within that context.
[0m

## Chat Engine

In [None]:
from llama_index.core.chat_engine import ContextChatEngine, CondensePlusContextChatEngine
memory = ChatMemoryBuffer.from_defaults(token_limit=8000)
chat_engine = CondensePlusContextChatEngine(retriever=retriever, llm=llm, memory=memory, node_postprocessors=[cohere_rerank], verbose=True)

In [None]:
resp1 = chat_engine.chat(query)
print(resp1)

In [None]:
resp2 = chat_engine.chat('Can you answer in short?')
print(resp2)

## Router for Query Engine

In [120]:
from llama_index.core.query_engine import RouterQueryEngine
from llama_index.core.selectors import PydanticSingleSelector, LLMSingleSelector

In [121]:
# initialize router query engine (single selection, pydantic)
query_engine = RouterQueryEngine(
    # selector=PydanticSingleSelector.from_defaults(llm=llm),
    selector = LLMSingleSelector.from_defaults(llm=llm),
    query_engine_tools=[
        query_engine_tool,
    ],
    llm=llm
)

In [127]:
if not langfuse_available:
    router_resp = await query_engine.aquery(query)
else:
    with langfuse.start_as_current_span(name="Router query"):
        router_resp = await query_engine.aquery(query)
    langfuse.flush()

In [129]:
print(router_resp)

To upload files for use in a chat, you first upload the file using the `files` module. Once the file is uploaded, the resulting file object can then be included directly as part of the content in your chat messages.

For instance, in Python, you would use `client.files.upload()` to upload a file, and then pass the uploaded file object within the `message` argument of `chat.send_message()`.
