In [22]:
from llama_index.llms.gemini import Gemini
from dotenv import load_dotenv

In [23]:
import os
load_dotenv()
api_key = os.getenv("GEMINI_API_KEY")

In [24]:
from llama_index.core import VectorStoreIndex,SimpleDirectoryReader,Settings
from llama_index.embeddings.gemini import GeminiEmbedding


# Configure Gemini models
gemini_llm = Gemini(model="models/gemini-1.5-pro")
gemini_embedding = GeminiEmbedding(model="models/embedding-001")

# Tell LlamaIndex to use Gemini everywhere
Settings.llm = gemini_llm
Settings.embed_model = gemini_embedding


  gemini_llm = Gemini(model="models/gemini-1.5-pro")
  gemini_embedding = GeminiEmbedding(model="models/embedding-001")


In [25]:
from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import VectorStoreIndex

In [26]:
reader=SimpleDirectoryReader(input_files=['./data/ragintro.pdf'])
documents=reader.load_data()

In [27]:
parser=SentenceSplitter(chunk_size=100,chunk_overlap=20)
all_documents=parser.get_nodes_from_documents(documents)


In [28]:
index=VectorStoreIndex.from_documents(documents=all_documents)

print(f"Vectors Store: {index}")
print(f"Vectors Store ID: {index.index_id}")

Vectors Store: <llama_index.core.indices.vector_store.base.VectorStoreIndex object at 0x00000174C350DBD0>
Vectors Store ID: ae42d334-ce4f-4d1d-bec4-2523ad3265a6


In [29]:
chat_engine=index.as_chat_engine(
    chat_mode="context",
    llm=gemini_llm,
    system_prompt=(
        "You are a chatbot, able to have normal interactions, as well as talk"
        " about an pdf loaded in this context.If the question is not relevant to the context and in the loaded documents, just say I don't know."
    ),
)

In [30]:
response=chat_engine.chat("hi")
print(response)

Hi! How can I help you today?



In [31]:
response = chat_engine.chat("various strategies of chunking from the pdf")
print(response)

One strategy mentioned in the PDF is **file format-based chunking**.  This means respecting the natural structure of the file type.  Specifically, it gives these examples:

* **Code files:** Chunk by whole functions or classes.
* **HTML files:** Keep `<table>` and base64 encoded `<img>` elements intact.
* **PDF files:**  Similar considerations should be applied (although the specific considerations for PDFs aren't detailed in this excerpt).

