<a href="https://colab.research.google.com/github/Mayank2177/Ask.ai/blob/main/RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Installing Frameworks

In [1]:
%pip install --quiet --upgrade langchain-text-splitters langchain-community langchain_milvus langchain_google_genai pyPDF

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m41.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m254.3/254.3 kB[0m [31m15.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.7/64.7 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.3/55.3 MB[0m [31m18.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.4/57.4 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not c

In [2]:
!pip install LangSmith



In [3]:
import getpass
import os

os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = getpass.getpass()

··········


### Using Gemini as Chat Model

In [4]:
import getpass
import os

if not os.environ.get("GOOGLE_API_KEY"):
  os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter API key for Google Gemini: ")

from langchain.chat_models import init_chat_model

model = init_chat_model("gemini-2.5-flash", model_provider="google_genai")

Enter API key for Google Gemini: ··········


### Using gemni as Embedding Model

In [9]:
import getpass
import os

if not os.environ.get("GOOGLE_API_KEY"):
  os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter API key for Google Gemini: ")

from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings_model = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")

### Using Milvus as VectorStore

In [10]:
from langchain_milvus import Milvus
import tempfile

db_file = tempfile.NamedTemporaryFile(prefix="milvus_", suffix=".db", delete=False).name
print(f"The vector database will be saved to {db_file}")

vector_db = Milvus(
    embedding_function=embeddings_model,
    connection_args={"uri": db_file},
    auto_id=True,
    index_params={"index_type": "AUTOINDEX"},
)

The vector database will be saved to /tmp/milvus_6h5w4q_p.db


### Load document from Drive

In [None]:
import os
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

In [14]:
# Get the file path from the user
file_path = input("Please enter the path to the text file in your Google Drive: ")

# Check if the file exists
if not os.path.isfile(file_path):
  print(f"Error: File not found at {file_path}")
else:
  filename = file_path

Please enter the path to the text file in your Google Drive: /dl-curriculum.pdf


### Download document from websource

In [None]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader

# Load contents from web page
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)

### Split the document into chunks

In [21]:
from langchain.document_loaders import TextLoader, PyPDFLoader, Docx2txtLoader, UnstructuredEmailLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
import os

from langchain_core.documents import Document

documents = []
if not os.path.exists(filename):
    raise FileNotFoundError(f"File not found: {filename}")
elif filename.endswith(".pdf"):
    loader = PyPDFLoader(filename)
    documents.extend(loader.load())
elif filename.endswith(".docx"):
    loader = Docx2txtLoader(filename)
    documents.extend(loader.load())
elif filename.endswith(".eml"):
    loader = UnstructuredEmailLoader(filename)
    documents.extend(loader.load())
elif filename.endswith(".txt"):
    loader = TextLoader(filename)
    documents.extend(loader.load())

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)

doc_id = 0
for text in texts:
    text.metadata["doc_id"] = (doc_id:=doc_id+1)

print(f"{len(texts)} text document chunks created")

22 text document chunks created


### Populate the database

In [22]:
ids = vector_db.add_documents(texts)
print(f"{len(ids)} documents added to the vector database")

22 documents added to the vector database


### Conduct a similarity search

In [31]:
query = str(input("Enter you Question: "))
docs = vector_db.similarity_search(query)
print(f"{len(docs)} documents returned")
      # Separator for clarity

Enter you Question: What is Backpropagation
4 documents returned


### Automate the RAG pipeline

In [32]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain

# Create a prompt for question-answering with the retrieved context
prompt_template = ChatPromptTemplate.from_template("""Answer the following question based only on the provided context:

<context>
{context}
</context>

Question: {input}""")

# Assemble the retrieval-augmented generation chain
combine_docs_chain = create_stuff_documents_chain(
    llm=model,
    prompt=prompt_template,
)

rag_chain = create_retrieval_chain(
    retriever=vector_db.as_retriever(),
    combine_docs_chain=combine_docs_chain,
)

### Generate a retrieval-augmented response to a question

In [33]:
output = rag_chain.invoke({"input": query})

print(output['answer'])

Backpropagation involves:
*   Derivation using the chain rule.
*   Computing gradients for each layer.
*   Updating weights and biases.
*   Understanding computational graphs.

For Recurrent Neural Networks, there is also "Backpropagation Through Time (BPTT)", which involves unfolding the RNN, treating the RNN as a deep network over time, calculating gradients, and applying the chain rule through timesteps.
