### RAG Chain

In [4]:
sample_docs = [
  """Deep learning is a subfield of machine learning that uses neural networks with multiple layers. It is widely applied in image recognition, natural language processing, and speech recognition.""",

  """Machine learning is the process of teaching computers to make decisions without being explicitly programmed. It includes supervised, unsupervised, and reinforcement learning approaches.""",

  """Retrieval-Augmented Generation (RAG) combines a retrieval system with a generative model. It improves accuracy by retrieving relevant documents from a knowledge base before generating answers.""",

  """LangChain is a framework designed to build applications powered by large language models. It provides abstractions for chains, agents, and memory, making it easier to integrate LLMs with external data.""",

  """When using RAG with LangChain, the pipeline typically involves embedding documents, storing them in a vector database, retrieving relevant chunks, and passing them into the LLM for context-aware generation.""",

  """Popular vector databases used with LangChain for RAG pipelines include Pinecone, Weaviate, FAISS, and Milvus.""",

  """Deep learning models often require GPUs for efficient training because of their high computational requirements.""",

  """LangChain supports integration with multiple LLM providers, including OpenAI, Anthropic, and Hugging Face.""",

  """In a RAG pipeline, embeddings are numerical vector representations of text that allow semantic similarity search.""",

  """Fine-tuning machine learning models involves adjusting model parameters on domain-specific datasets to improve accuracy."""
]


### Document Loading
- But as I am using a dummy data over here, we can skip this part , or store it in a folder data and use directoryloader to import them

In [5]:
from langchain.docstore.document import Document
documents = [Document(page_content=doc) for doc in sample_docs] 
documents 

[Document(metadata={}, page_content='Deep learning is a subfield of machine learning that uses neural networks with multiple layers. It is widely applied in image recognition, natural language processing, and speech recognition.'),
 Document(metadata={}, page_content='Machine learning is the process of teaching computers to make decisions without being explicitly programmed. It includes supervised, unsupervised, and reinforcement learning approaches.'),
 Document(metadata={}, page_content='Retrieval-Augmented Generation (RAG) combines a retrieval system with a generative model. It improves accuracy by retrieving relevant documents from a knowledge base before generating answers.'),
 Document(metadata={}, page_content='LangChain is a framework designed to build applications powered by large language models. It provides abstractions for chains, agents, and memory, making it easier to integrate LLMs with external data.'),
 Document(metadata={}, page_content='When using RAG with LangChain,

### Document Splitting

In [6]:
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    separators=[" "],
    chunk_size=100,
    chunk_overlap=10,
    length_function=len
)
chunks = text_splitter.split_documents(documents)
print(f"Number of chunks: {len(chunks)}")
print(chunks[4])  

Number of chunks: 23
page_content='Retrieval-Augmented Generation (RAG) combines a retrieval system with a generative model. It'


### Embeddings using HuggingFace

In [7]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") 
vector = embeddings.embed_query("Hello world")
print(vector)  

  from .autonotebook import tqdm as notebook_tqdm


[-0.03447727486491203, 0.03102317824959755, 0.006734970025718212, 0.026108985766768456, -0.03936202451586723, -0.16030244529247284, 0.06692401319742203, -0.006441489793360233, -0.0474504791200161, 0.014758856035768986, 0.07087527960538864, 0.05552763119339943, 0.019193334504961967, -0.026251312345266342, -0.01010954286903143, -0.02694045566022396, 0.022307461127638817, -0.022226648405194283, -0.14969263970851898, -0.017493007704615593, 0.00767625542357564, 0.05435224249958992, 0.0032543970737606287, 0.031725890934467316, -0.0846213847398758, -0.02940601296722889, 0.05159561336040497, 0.04812406003475189, -0.0033148222137242556, -0.058279167860746384, 0.04196927323937416, 0.022210685536265373, 0.1281888335943222, -0.022338971495628357, -0.011656315997242928, 0.06292839348316193, -0.032876335084438324, -0.09122604131698608, -0.031175347045063972, 0.0526994913816452, 0.04703482985496521, -0.08420311659574509, -0.030056199058890343, -0.02074483036994934, 0.009517835453152657, -0.0037217906

### Initalize ChromaDB

In [9]:
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings

persist_dir = "./chroma_db"

embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

vector_db = Chroma.from_documents(
    documents=chunks,
    embedding=embedding_model,
    collection_name="rag_collection",
    persist_directory=persist_dir,
)
vector_db.persist()

print(f"Number of documents in the collection: {vector_db._collection.count()}")


Number of documents in the collection: 69


### Testing Similarity Search

In [18]:
query = "What is Fine tuning?"
result = vector_db.similarity_search(query,k=3)
result

[Document(metadata={}, page_content='Fine-tuning machine learning models involves adjusting model parameters on domain-specific datasets'),
 Document(metadata={}, page_content='datasets to improve accuracy.'),
 Document(metadata={}, page_content='programmed. It includes supervised, unsupervised, and reinforcement learning approaches.')]

### Advance Similarity Search with Scores
- Lower the value closer to the result

In [None]:
results = vector_db.similarity_search_with_score(query,k=3)
results

[(Document(metadata={}, page_content='Fine-tuning machine learning models involves adjusting model parameters on domain-specific datasets'),
  1.1072428226470947),
 (Document(metadata={}, page_content='datasets to improve accuracy.'),
  1.422195315361023),
 (Document(metadata={}, page_content='programmed. It includes supervised, unsupervised, and reinforcement learning approaches.'),
  1.5049494504928589)]

### Initalizing LLM 

In [40]:
import os

from dotenv import load_dotenv
load_dotenv()
from langchain.chat_models import init_chat_model
llm = init_chat_model("gemini-2.5-flash", model_provider="google_genai")

llm

ChatGoogleGenerativeAI(model='models/gemini-2.5-flash', google_api_key=SecretStr('**********'), client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x000001BE10A27A00>, default_metadata=(), model_kwargs={})

In [41]:
llm.invoke("What is LLM?")

AIMessage(content='An **LLM** stands for **Large Language Model**.\n\nIt is a type of artificial intelligence (AI) model that has been trained on a massive amount of text data to understand, generate, and process human language.\n\nLet\'s break down each part of the name:\n\n1.  **Large:** This refers to two main things:\n    *   **Parameters:** LLMs have billions, sometimes even trillions, of parameters (the internal variables the model learns during training). More parameters generally allow the model to capture more complex patterns and nuances in language.\n    *   **Training Data:** They are trained on incredibly vast datasets, often comprising a significant portion of the internet (web pages, books, articles, code, etc.).\n\n2.  **Language:** This indicates their primary domain. LLMs are designed to work with human language in various forms:\n    *   **Understanding:** Interpreting the meaning, context, and intent of text.\n    *   **Generating:** Producing coherent, relevant, an

### RAG Modern Chain

In [42]:
from langchain.prompts import ChatPromptTemplate 
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

In [43]:
vector_retriever = vector_db.as_retriever(
    search_kargs={"k":3}
)
vector_retriever

VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x000001BE5CAB6830>, search_kwargs={})

### A chatprompt template 
- Basically why I am usin this is because we need to talk to LLM at the end, so we need a prompt for it

In [44]:
system_prompt = """ You are a assistant for question-answering tasks. 
Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum to answer and keep it concise.
Context : {context}"""

In [46]:
prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}")
])

- Stuffing releveant context from db in create stuff document chain

In [47]:
document_chain = create_stuff_documents_chain(llm,prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template=" You are a assistant for question-answering tasks. \nUse the following pieces of context to answer the question at the end.\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\nUse three sentences maximum to answer and keep it concise.\nContext : {context}"), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}'), additional_kwargs={})])
| ChatGoogleGenerativeAI(model='models/gemini-2.5-flash', google_api_key=SecretStr('**********'), clie

In [50]:
## Final RAG Chain
rag_chain = create_retrieval_chain(vector_retriever,document_chain)
rag_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x000001BE5CAB6830>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template=" You are a assistant for question-answering tasks. \nUse the following pieces of context to answer the question at the end.\nIf you

- Invoking Final rag chain

In [52]:
results = rag_chain.invoke({"input":"Where is MLR Institute of Technology located?"})
results

{'input': 'Where is MLR Institute of Technology located?',
 'context': [Document(metadata={}, page_content='LLMs with external data.'),
  Document(metadata={}, page_content='requirements.'),
  Document(metadata={}, page_content='It provides abstractions for chains, agents, and memory, making it easier to integrate LLMs with'),
  Document(metadata={}, page_content='It is widely applied in image recognition, natural language processing, and speech recognition.')],
 'answer': "I'm sorry, but the provided context does not contain information about the location of the MLR Institute of Technology. Therefore, I don't know the answer to your question."}

### RAG Chain - Using LangChain Expression Language 

In [53]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough,RunnableParallel 

In [54]:
custom_prompt = ChatPromptTemplate.from_template(
    """" 
    You are a assistant for question-answering tasks. 
    Use the following pieces of context to answer the question at the end.
    If you don't know the answer, just say that you don't know, don't try to make up an answer.
    Use three sentences maximum to answer and keep it concise.
    Context : {context}
    Question : {input} 
    Answer : """
)

In [56]:
def stuff_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [57]:
rag_chain_lcel = (
    {"context" : vector_retriever | stuff_docs,
    "question" : RunnablePassthrough()
    }
    | custom_prompt
    | llm
    | StrOutputParser()
)

In [61]:
results = rag_chain.invoke({"input" : "What is RAG?"})
results['answer']

'RAG, or Retrieval-Augmented Generation, combines a retrieval system with a generative model. This approach typically involves embedding documents and storing them in a vector database. Embeddings are numerical vector representations of text that allow semantic comparisons.'

### Adding new data to VectorDB

In [11]:
new_document = """ 
    NodeJs is a JavaScript runtime built on Chrome's V8 JavaScript engine. It allows developers to run JavaScript code on the server side, enabling the creation of scalable network applications. NodeJs is known for its event-driven, non-blocking I/O model, which makes it efficient and suitable for real-time applications.
"""

In [12]:
new_doc = Document(
    page_content=new_document,
    metadata={"source":"copilot","topic": "NodeJs"}
)

In [13]:
new_chunks = text_splitter.split_documents([new_doc])

In [15]:
vector_db.add_documents(new_chunks)

['e724673b-68ba-41a3-85c1-b1eee0e33d94',
 '8f4e1689-9ac1-4e92-a522-82c5fd2628c4',
 'aef3678c-0dd4-402a-b418-55c9195818a6',
 'db58bfad-5f7d-4d15-80bf-f3ae0cbb888f']