In [1]:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

True

In [41]:
import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_PROJECT'] = 'advanced-rag'
os.environ['GROQ_API_KEY'] = os.getenv("GROQ_API_KEY")
os.environ['LANGCHAIN_API_KEY'] = os.getenv("LANGCHAIN_API_KEY")

# Part 1: Overview

In [10]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_groq import ChatGroq
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.prompts import ChatPromptTemplate

#### INDEXING ####

# Load Documents
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2017-06-21-overview/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

##1 - 0 - 1000 , 800 - 1800

# Split - Chunking
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

# Embed
# model_name = "BAAI/bge-small-en"

model_name = "sentence-transformers/all-MiniLM-L6-v2"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
hf_embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)
vectorstore = FAISS.from_documents(documents=splits, 
                                    embedding=hf_embeddings)

retriever = vectorstore.as_retriever(search_kwargs={"k": 2}) # Dense Retrieval - Embeddings/Context based

#### RETRIEVAL and GENERATION ####

prompt= """
You are an assistant for question-answering tasks. 
Use the following pieces of retrieved context to answer the question. 
If you don't know the answer, just say that you don't know. 
Use three sentences maximum and keep the answer concise.

Question: {question} 
Context: {context} 
Answer:
"""


prompt_template = ChatPromptTemplate.from_template(prompt)


# LLM
llm = ChatGroq(model="llama3-8b-8192", temperature=0)

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
rag_chain = (
    RunnableParallel({
        "context": retriever | format_docs,
        "question": RunnablePassthrough()
    })
    | prompt_template 
    | llm
    | StrOutputParser()
)
# Question
result = rag_chain.invoke(
    "What is Deep Learning?",
    config={"timeout": 30}
)
print(result)

Deep Learning is a type of artificial intelligence that uses large and deep artificial neural networks to analyze and interpret data. These neural networks are composed of multiple layers of interconnected nodes or "neurons" that process and transmit information.


# Part 2: Indexing

In [11]:
# Documents
question = "What kinds of pets do I like?"
document = "My favorite pet is a cat."

## Document Loaders

In [12]:
#### INDEXING ####

# Load blog
import bs4
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
blog_docs = loader.load()

## Splitter

In [13]:
# Split
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300, 
    chunk_overlap=50)

# Make splits
splits = text_splitter.split_documents(blog_docs)

## Text embedding models

In [14]:
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
model_name = "BAAI/bge-small-en"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
hf_embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)
query_result = hf_embeddings.embed_query(question)
document_result = hf_embeddings.embed_query(document)
len(query_result)

384

In [15]:
import numpy as np

def cosine_similarity(vec1, vec2):
    dot_product = np.dot(vec1, vec2)
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)
    return dot_product / (norm_vec1 * norm_vec2)

similarity = cosine_similarity(query_result, document_result)
print("Cosine Similarity:", similarity)

Cosine Similarity: 0.902305234006825


## Vectorstores

In [16]:
# Index
from langchain_community.vectorstores import FAISS
vectorstore = FAISS.from_documents(documents=splits, 
                                    embedding=hf_embeddings)

retriever = vectorstore.as_retriever()

In [17]:
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceBgeEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000029F27620D70>, search_kwargs={})

# Part 3: Retrieval

In [25]:
from langchain.schema import Document

# Let's say your retriever returned docs
retrieved_docs = retriever.invoke("What is Task Decomposition?")

# Pretty print each doc
for i, doc in enumerate(retrieved_docs, 1):
    print(f"\n--- Document {i} ---")
    print(f"Metadata: {doc.metadata}")
    print(f"Content:\n{doc.page_content[:2000]}")  # show first 500 chars
    print("-" * 50)



--- Document 1 ---
Metadata: {'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}
Content:
Component One: Planning#
A complicated task usually involves many steps. An agent needs to know what they are and plan ahead.
Task Decomposition#
Chain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.
Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by 

# Part 4: Generation


In [27]:
from langchain_groq import ChatGroq
from langchain.prompts import ChatPromptTemplate

# Prompt
template = """
Answer the question based only on the following context:{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='\nAnswer the question based only on the following context:{context}\n\nQuestion: {question}\n'), additional_kwargs={})])

In [35]:
# LLM
llm = ChatGroq(model="llama3-8b-8192", temperature=0)

In [36]:
# Chain
chain = prompt | llm

In [37]:
# Run
chain.invoke({"context":retrieved_docs,"question":"What is Task Decomposition?"})

AIMessage(content='According to the provided context, Task Decomposition is a technique used by an agent to break down a complicated task into smaller and simpler steps. This is achieved by instructing the model to "think step by step" using techniques such as Chain of Thought (CoT) and Tree of Thoughts (Yao et al. 2023). Task decomposition can be done using Large Language Models (LLM) with simple prompting, task-specific instructions, or human inputs.', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 94, 'prompt_tokens': 1142, 'total_tokens': 1236, 'completion_time': 0.066561507, 'prompt_time': 0.126913199, 'queue_time': 0.270219761, 'total_time': 0.193474706}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_59f3b579f6', 'service_tier': 'on_demand', 'finish_reason': 'stop', 'logprobs': None}, id='run--42db7fb3-ea3c-4472-8ee6-5c831a0192e0-0', usage_metadata={'input_tokens': 1142, 'output_tokens': 94, 'total_tokens': 1236})

## RAG chains

In [38]:
from langchain import hub
prompt_hub_rag = hub.pull("rlm/rag-prompt")

In [39]:
prompt_hub_rag

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])

In [40]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt_hub_rag
    | llm
    | StrOutputParser()
)

rag_chain.invoke("What is Task Decomposition?")

'Task Decomposition is the process of breaking down a complicated task into smaller and simpler steps, allowing an agent to plan ahead and utilize more test-time computation. This can be done through techniques such as Chain of Thought, Tree of Thoughts, or simple prompting like "Steps for XYZ."'