### 1. Basic RAG

In [29]:
## 1. Setting up Environment Variables
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['LANGCHAIN_TRACING_V2'] = "true"
os.environ['LANGCAHIN_PROJECT'] = os.getenv('LANGCHAIN_PROJECT')
os.environ['LANGCHAIN_ENDPOINT'] = "https://api.smith.langchain.com"
os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')

os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')
os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.getenv('HUGGINGFACEHUB_API_TOKEN')

In [30]:
## 2. Importing Libraries
from bs4 import BeautifulSoup, SoupStrainer
import requests
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings

In [31]:
llm = ChatGroq(
    model = "gemma2-9b-it",
    temperature=0.6
)
model_name = "BAAI/bge-small-en"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs)
print(llm)
print(embeddings)

client=<groq.resources.chat.completions.Completions object at 0x7090340e79b0> async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x7090340e6ba0> model_name='gemma2-9b-it' temperature=0.6 model_kwargs={} groq_api_key=SecretStr('**********')
model_name='BAAI/bge-small-en' cache_folder=None model_kwargs={'device': 'cpu'} encode_kwargs={'normalize_embeddings': True} multi_process=False show_progress=False


In [16]:
## 3. Loading, Splitting, Embedding, Prompt

## not working with this url
# loader = WebBaseLoader(
#     web_path=("https://huggingface.co/blog/Kseniase/mcp",),
#     bs_kwargs= dict(
#         parse_only = bs4.SoupStrainer(
#             class_=("post-content", "post-title","post-header")
#         )
#     )
# )

loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()
# print(docs)

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs_split = text_splitter.split_documents(docs)
vector_store = Chroma.from_documents(documents = docs_split, embedding = embeddings)
retriever = vector_store.as_retriever()

In [32]:
## 4. Retriever & Generation

prompt = hub.pull("rlm/rag-prompt")

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context":retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm 
    | StrOutputParser()
)

In [23]:
rag_chain.invoke("What is agent and what are 2 components important for agents?")

'An agent is an autonomous entity capable of observing, thinking, and acting within an environment. Two important components for agents are Memory, which helps in retaining past interactions, and Reasoning/Planning, which aids in making decisions and planning actions.'

### 2. Indexing

In [4]:
question = "Where did I go yesterday? With whom i went? Where we went ?"
document = "On 21st of March 2025, I and Vaishnavi went to Ellora and Bhadra Maroti"

In [33]:
## Keeping count of tokens to stay in limit

from transformers import AutoTokenizer

def num_tokens_from_string (text: str, model_name: str= "BAAI/bge-small-en") -> int:
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    tokens = tokenizer(text, return_tensors="pt", truncation=False)["input_ids"]
    return tokens.shape[1]

In [12]:
print(num_tokens_from_string(text="What can be the length of this sentence ?", model_name="BAAI/bge-small-en"))
print(num_tokens_from_string(text=question, model_name="BAAI/bge-small-en"))
print(num_tokens_from_string(text=document, model_name="BAAI/bge-small-en"))

11
17
27


In [8]:
query_result = embeddings.embed_query(question)
document_result = embeddings.embed_query(document)
print(len(query_result))
print(len(document_result))

384
384


In [13]:
import numpy as np

def cosine_similarity(vec1, vec2):
    dot_product = np.dot(vec1, vec2)
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)
    return dot_product / (norm_vec1 * norm_vec2)

similarity = cosine_similarity(query_result, document_result)
print("Cosine Similarity:", similarity)

Cosine Similarity: 0.7531669078147833


In [34]:
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
blog_docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300, 
    chunk_overlap=50)
splits = text_splitter.split_documents(blog_docs)
vectorstore = Chroma.from_documents(documents=splits, 
                                    embedding=embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k":3})

In [35]:
retriever = vectorstore.as_retriever(search_kwargs={"k":1})
docs = retriever.invoke("What is Task Decomposition ?")

In [22]:
docs

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='The AI assistant can parse user input to several tasks: [{"task": task, "id", task_id, "dep": dependency_task_ids, "args": {"text": text, "image": URL, "audio": URL, "video": URL}}]. The "dep" field denotes the id of the previous task which generates a new resource that the current task relies on. A special tag "-task_id" refers to the generated text image, audio and video in the dependency task with id as task_id. The task MUST be selected from the following options: {{ Available Task List }}. There is a logical relationship between tasks, please note their order. If the user input can\'t be parsed, you need to reply empty JSON. Here are several cases for your reference: {{ Demonstrations }}. The chat history is recorded as {{ Chat History }}. From this chat history, you can find the path of the user-mentioned resources for your task planning.\n\n(2) Model selection: LLM distributes th

In [36]:
from langchain.prompts import ChatPromptTemplate

template = """Answer the question based only on following context:

{context}:

Question: {question}"""

prompt = ChatPromptTemplate.from_template(template)
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='Answer the question based only on following context:\n\n{context}:\n\nQuestion: {question}'), additional_kwargs={})])

In [37]:
chain = prompt | llm
chain.invoke({"context":docs,"question":"What is Task Decomposition?"})


AIMessage(content='Based on the provided text, Task Decomposition refers to the process of breaking down a user\'s input into several smaller tasks that can be handled by different expert models.  \n\nThe text explains that an AI assistant parses user input into tasks, each with specific details like "task", "id", "dep" (dependency on previous tasks), and "args" (text, image, audio, video URLs).  The LLM then distributes these tasks to appropriate expert models. \n', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 100, 'prompt_tokens': 328, 'total_tokens': 428, 'completion_time': 0.181818182, 'prompt_time': 0.017223568, 'queue_time': 0.23281741000000003, 'total_time': 0.19904175}, 'model_name': 'gemma2-9b-it', 'system_fingerprint': 'fp_10c08bf97d', 'finish_reason': 'stop', 'logprobs': None}, id='run-0c84583c-c558-4693-b80d-b014695578b8-0', usage_metadata={'input_tokens': 328, 'output_tokens': 100, 'total_tokens': 428})

In [38]:
prompt = hub.pull("rlm/rag-prompt")

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context":retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm 
    | StrOutputParser()
)
rag_chain.invoke("What is Task Decomposition?")

"Task Decomposition is the process of breaking down a user's input into smaller, manageable tasks. \n\nEach task has a specific ID, dependencies on previous tasks, and required arguments.  This allows the AI to process complex requests efficiently. \n"