## *Documents*

In [1]:
from langchain_core.documents import Document

docs = [
    Document(
        page_content="The quick brown fox jumps over the lazy dog.",
        metadata={"source": "example.txt"}
    ),
    Document(
        page_content="A journey of a thousand miles begins with a single step.",
        metadata={"source": "example1.txt"}
    ),
    Document(
        page_content="To be or not to be, that is the question.",
        metadata={"source": "example2.txt"}
    ),
    Document(
        page_content="All that glitters is not gold.",
        metadata={"source": "example3.txt"}
    ),
    Document(
        page_content="The only thing we have to fear is fear itself.",
        metadata={"source": "example4.txt"}
    )
]

docs

[Document(metadata={'source': 'example.txt'}, page_content='The quick brown fox jumps over the lazy dog.'),
 Document(metadata={'source': 'example1.txt'}, page_content='A journey of a thousand miles begins with a single step.'),
 Document(metadata={'source': 'example2.txt'}, page_content='To be or not to be, that is the question.'),
 Document(metadata={'source': 'example3.txt'}, page_content='All that glitters is not gold.'),
 Document(metadata={'source': 'example4.txt'}, page_content='The only thing we have to fear is fear itself.')]

## *Vector Stores*

In [None]:

import os
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI

load_dotenv()

gemini_api_key = os.getenv("GOOGLE_API_KEY")
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")

# LLM Initialisation...
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", google_api_key=gemini_api_key)

In [None]:

from langchain_huggingface import HuggingFaceEmbeddings

# Embeddings initialisations...
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [None]:

from langchain_chroma import Chroma

# Vector store initialisations...
vector_store = Chroma.from_documents(docs, embedding=embeddings)

vector_store.similarity_search_with_score("What is the first step in a journey of a thousand miles?")


[(Document(id='b6ea2bfa-b116-4935-af62-7cd272d5e0dd', metadata={'source': 'example1.txt'}, page_content='A journey of a thousand miles begins with a single step.'),
  0.20588961243629456),
 (Document(id='873cee4a-8059-478a-b301-72a1dd2754a3', metadata={'source': 'example2.txt'}, page_content='To be or not to be, that is the question.'),
  1.7181787490844727),
 (Document(id='5a27e1e2-9fab-4c61-a27c-b65e523b7a16', metadata={'source': 'example3.txt'}, page_content='All that glitters is not gold.'),
  1.7797877788543701),
 (Document(id='d9b41e55-662d-4645-805f-124341e77443', metadata={'source': 'example4.txt'}, page_content='The only thing we have to fear is fear itself.'),
  1.8255505561828613)]

## *Retrievers*

In [5]:

from typing import List


retriever = vector_store.as_retriever(
    search_type="similarity",
    search_kwargs={"k":1}
)

retriever.batch(["first step", "glitters gold"])


[[Document(id='b6ea2bfa-b116-4935-af62-7cd272d5e0dd', metadata={'source': 'example1.txt'}, page_content='A journey of a thousand miles begins with a single step.')],
 [Document(id='5a27e1e2-9fab-4c61-a27c-b65e523b7a16', metadata={'source': 'example3.txt'}, page_content='All that glitters is not gold.')]]

# RAG - Pipeline

In [14]:

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

message = """
Answer the question using the provided context only.

{question}

Context:
{context}
"""

prompt = ChatPromptTemplate.from_messages( [ ("system", "You are a friendly assistant."), ( "human", message ) ] )

rag_chain = {"context": retriever, "question": RunnablePassthrough() } | prompt | llm

In [15]:

response = rag_chain.invoke("can we say all gold is glitter?")
response.content

'Based on the provided context, we cannot say that all gold is glitter. The context states, "All that glitters is not gold," which means that just because something glitters, it doesn\'t mean it\'s gold. It does not provide information about whether all gold glitters.'

In [13]:

response = rag_chain.invoke("What is capital of Telangana?")
response.content

'Based on the provided context, the capital of Telangana is not mentioned.'