In [1]:
from llama_index.llms.gemini import Gemini
from dotenv import load_dotenv

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import os
load_dotenv()
api_key = os.getenv("GEMINI_API_KEY")
pinecone_api_key=os.getenv("PINECONE_API_KEY")

In [3]:
from llama_index.core import VectorStoreIndex,SimpleDirectoryReader,Settings
from llama_index.embeddings.gemini import GeminiEmbedding


# Configure Gemini models
gemini_llm = Gemini(model="models/gemini-1.5-pro")
gemini_embedding = GeminiEmbedding(model="models/embedding-001")

# Tell LlamaIndex to use Gemini everywhere
Settings.llm = gemini_llm
Settings.embed_model = gemini_embedding


  gemini_llm = Gemini(model="models/gemini-1.5-pro")
  gemini_embedding = GeminiEmbedding(model="models/embedding-001")


In [4]:
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine

In [5]:
from llama_index.core import SimpleDirectoryReader,VectorStoreIndex
from llama_index.vector_stores.pinecone import PineconeVectorStore


In [6]:
from pinecone import Pinecone,ServerlessSpec
from llama_index.core import StorageContext
from llama_index.core.extractors import TitleExtractor
from llama_index.core.ingestion import IngestionPipeline

In [7]:
reader=SimpleDirectoryReader(input_dir="data",required_exts=[".txt"])
documents=reader.load_data()
print(f"Documents:{documents}")

Documents:[Document(id_='32b913be-539c-4e5d-9bda-7b6a13fe3206', embedding=None, metadata={'file_path': 'c:\\Users\\divyakirant\\Documents\\llamaIndex\\data\\sample.txt', 'file_name': 'sample.txt', 'file_type': 'text/plain', 'file_size': 1134, 'creation_date': '2025-08-30', 'last_modified_date': '2025-08-30'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='Memory in LlamaIndex – Key Concepts\r\n\r\nPurpose of Memory\r\n\r\nMakes interactions stateful (remembers past turns).\r\n\r\nWithout memory → each query is independent.\r\n\r\nWith memory → conversation has continuity.\r\n\r\nHow Memory Works\r\n\r\nStores chat history (messages, queri

In [8]:
pc=Pinecone(api_key=pinecone_api_key)

In [11]:
pc.create_index(name="llndex",dimension=768,spec=ServerlessSpec(cloud="aws",region="us-east-1"))

{
    "name": "llndex",
    "metric": "cosine",
    "host": "llndex-cjawalj.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "vector_type": "dense",
    "dimension": 768,
    "deletion_protection": "disabled",
    "tags": null
}

In [14]:
pinecone_index=pc.Index("llndex")

In [15]:
vector_store=PineconeVectorStore(pinecone_index=pinecone_index)

In [16]:
storage_context=StorageContext.from_defaults(vector_store=vector_store)

index=VectorStoreIndex.from_documents(documents,storage_context)

Upserted vectors: 100%|██████████| 1/1 [00:02<00:00,  2.67s/it]


In [17]:
pipeline=IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_size=25,chunk_overlap=0),
        TitleExtractor(),
        GeminiEmbedding()
    ],
    vector_store=vector_store
)

  GeminiEmbedding()


In [18]:
pipeline.run(documents)
pinecone_index.describe_index_stats()

Parsing nodes: 0it [00:00, ?it/s]
0it [00:00, ?it/s]
Generating embeddings: 0it [00:00, ?it/s]


{'dimension': 768,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'': {'vector_count': 1}},
 'total_vector_count': 1,
 'vector_type': 'dense'}

In [19]:
all_ids=list(pinecone_index.list())
print("All vector IDs:",all_ids)

All vector IDs: [['32b913be-539c-4e5d-9bda-7b6a13fe3206#82ee5742-f752-480e-a3e4-597f01992695']]


In [20]:
query_engine=index.as_query_engine()
response=query_engine.query("do i have divya in the text file?")
print(f"Response:{response}")

Response:Yes, the user's name is Divya.

