In [2]:
from llama_index.llms.gemini import Gemini
from dotenv import load_dotenv

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import os
load_dotenv()
api_key = os.getenv("GEMINI_API_KEY")

In [4]:
from llama_index.core import VectorStoreIndex,SimpleDirectoryReader,Settings
from llama_index.embeddings.gemini import GeminiEmbedding


# Configure Gemini models
gemini_llm = Gemini(model="models/gemini-1.5-pro")
gemini_embedding = GeminiEmbedding(model="models/embedding-001")

# Tell LlamaIndex to use Gemini everywhere
Settings.llm = gemini_llm
Settings.embed_model = gemini_embedding



  gemini_llm = Gemini(model="models/gemini-1.5-pro")
  gemini_embedding = GeminiEmbedding(model="models/embedding-001")


In [5]:
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine

In [7]:
reader=SimpleDirectoryReader(input_dir="data",required_exts=[".txt"])
documents = reader.load_data()

In [8]:
# Node parser to split documents into smaller chunks
# This is useful for large documents to ensure embeddings are manageable
# Chunk size means the number of characters in each chunk
# Chunk overlap means how many characters overlap between chunks, eg:
# if chunk_size=100 and chunk_overlap=20, the first chunk will be characters 0-99,
# the second chunk will be characters 80-179, and so on.
# This helps maintain context between chunks.

parser=SentenceSplitter(chunk_size=100,chunk_overlap=10)
all_documents=parser.get_nodes_from_documents(documents)


In [9]:
# store the documents in a vector store index
index=VectorStoreIndex.from_documents(documents=all_documents)
print(f"Vectors Store: {index}")
print(f"Vectors Store ID: {index.index_id}")

Vectors Store: <llama_index.core.indices.vector_store.base.VectorStoreIndex object at 0x000002D2D60670E0>
Vectors Store ID: 1d2b2a40-6695-4c30-ae2f-c5f20e99b5d3


In [10]:
retriever=VectorIndexRetriever(
    index=index,
    similarity_top_k=2,
)

In [11]:
from llama_index.core import get_response_synthesizer
response_synthesizer=get_response_synthesizer(
    response_mode="tree_summarize",
)

In [12]:
query_engine=RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
)

In [13]:
response=query_engine.query("Give me the summary of this file")
print(response)

Chat engines use memory to create natural-flowing conversations, similar to human dialogue.  Memory is crucial because it provides context continuity (allowing for pronoun resolution), enables personalization, manages long conversations through summarization, and prevents exceeding token limits by controlling conversation history.  In query engines, memory helps refine queries over multiple turns.

