# RAG APP using Groq API and Langchain

### Imports 

In [1]:
import os 
import getpass
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS
import warnings 

warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = getpass.getpass("Langsmith api key")

### Rag with web loader

load an llm

In [2]:
groq_llm = ChatGroq(
                groq_api_key = os.environ["GROQ_API_KEY"],
                model="llama-3.1-8b-instant",
                temperature = 0.6
                )
#test
groq_llm.invoke("Hello").content

'Hello. What would you like to talk about or ask?'

load an embedder 

In [3]:
embedder = HuggingFaceEmbeddings( 
    model_name = "sentence-transformers/all-MiniLM-L6-v2"
)
#test
embedder.embed_query("Hello")

[-0.0627717524766922,
 0.05495881289243698,
 0.05216474458575249,
 0.08579003810882568,
 -0.08274892717599869,
 -0.07457295060157776,
 0.06855470687150955,
 0.018396392464637756,
 -0.08201132714748383,
 -0.037384744733572006,
 0.01212496217340231,
 0.0035183527506887913,
 -0.004134288523346186,
 -0.043784428387880325,
 0.021807368844747543,
 -0.0051027447916567326,
 0.0195466298609972,
 -0.04234875738620758,
 -0.11035964637994766,
 0.005424490198493004,
 -0.055734846740961075,
 0.028052441775798798,
 -0.023158719763159752,
 0.028481436893343925,
 -0.053709618747234344,
 -0.05260162800550461,
 0.03393922746181488,
 0.045388590544462204,
 0.02371845208108425,
 -0.07312081009149551,
 0.05477769672870636,
 0.017047269269824028,
 0.08136036992073059,
 -0.0028626802377402782,
 0.011958098970353603,
 0.07355853170156479,
 -0.0942375510931015,
 -0.0813620314002037,
 0.04001542925834656,
 0.0006920791929587722,
 -0.013393252156674862,
 -0.0545380525290966,
 0.005151392892003059,
 -0.02613978832

Vector store 

In [None]:
embedding_dim = len(embedder.embed_query("test"))

In [None]:
index = faiss.IndexFlatL2(embedding_dim)

In [None]:
vectore_store = FAISS(
    embedding_function = embedder,
    index = index,
    docstore = InMemoryDocstore(),
    index_to_docstore_id = {}
)

In [None]:
vectore_store

RAG

In [None]:
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain import hub
from langchain.prompts import PromptTemplate
from typing_extensions import TypedDict, List
from langgraph.graph import StateGraph, START

In [None]:
#load contents of the blog
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)

In [None]:
docs = loader.load()

In [None]:
docs

In [None]:
splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
docs_splitted = splitter.split_documents(docs)

In [None]:
#Add docs to vectore store
_ = vectore_store.add_documents(documents = docs_splitted)

In [None]:
#prompt template
prompt = """
Answer the following question : {question}
using the relevant informations bellow :
{context}
"""
prompt = PromptTemplate(
    template = prompt,
    input_features = ["question", "context"]
    )

In [None]:
#State of application 
class State(TypedDict):
    question : str
    context : List
    answer : str

In [None]:
#test
context = vectore_store.similarity_search("what is sensory memory ?")

In [None]:
context[0].page_content

In [None]:
def retrieve (state : State):
    retrieved_docs = vectore_store.similarity_search(state["question"])
    return {"context": retrieved_docs}

In [None]:
def generate(state: State):
    context = "\n\n".join([chunk.page_content for chunk in state["context"]])
    messages = prompt.invoke({"question": state["question"], "context": context})
    response = groq_llm.invoke(messages)
    return {"answer": response.content}

In [None]:
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [None]:
response = graph.invoke({"question": "What is short term memory capacity ?"})

In [None]:
response["answer"]

Display graph

In [None]:
from IPython.display import display, Image

In [None]:
display(Image(graph.get_graph().draw_mermaid_png()))

stream 

In [None]:
for step in graph.stream(
    {"question": "What is short term memory capacity ?"}, 
    stream_mode = "updates"
):
    print (f"{step} \n\n ............ \n")

In [None]:
for message, metadata in graph.stream(
    {"question": "What is short term memory capacity ?"},
    stream_mode = "messages"
):
    print (message.content, end = '|')

### RAG with local documents 

In [None]:
#Imports 
import faiss
from langchain_groq                         import ChatGroq
from langchain_huggingface                  import HuggingFaceEmbeddings
from langchain.vectorstores                 import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain.text_splitter                import RecursiveCharacterTextSplitter
from langchain_core.documents               import Document
from typing_extensions                      import TypedDict, List
from langchain.prompts import PromptTemplate
from langgraph.graph import StateGraph
import pdfplumber

In [None]:
#LLM
groq_llm = ChatGroq(
    groq_api_key = os.environ["GROQ_API_KEY"],
    model="llama-3.1-8b-instant",
    temperature = 0.5
)

In [None]:
embedder = HuggingFaceEmbeddings(
    model_name ="sentence-transformers/all-MiniLM-L6-v2"
)

In [None]:
#Vector database 
embeddings_ex = embedder.embed_query("hi")
index = faiss.IndexFlatL2(len(embeddings_ex))
vectore_store = FAISS(
    embedding_function = embedder,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)

In [None]:
#fix directory 
os.chdir("..")
print(os.getcwd())

In [None]:
documents = []
with pdfplumber.open("documents/No More Mr. Nice Guy by Robert Glover.pdf") as book:
    for page in book.pages:
        documents.append(page.extract_text())
documents = "\n".join(documents)

In [None]:
documents_structured = Document(page_content=documents)

In [None]:
splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 100)
documents_splitted = splitter.split_documents( [documents_structured])

In [None]:
#Add documents to our vectore store 
_ = vectore_store.add_documents(documents_splitted)

In [None]:
for source in vectore_store.similarity_search(query="How a person can get help from others ?"):
    print (source.page_content)
    print ("---------------")

In [None]:
class State(TypedDict):
    question : str
    context : List
    answer : str

In [None]:
def retrieve(state : State):
    retrieved_docs = vectore_store.similarity_search(state["question"]) 
    return {"context": retrieved_docs}

prompt = """
    Use informations bellow extracted from No more Mr. Nice Guy book written by Robert Glover, to answer the following question : 
    {question}
    If you don't know the answer, don't make up one, simply say I don't know.
    relevant informations : 
    {context}
"""
prompt = PromptTemplate(template = prompt,
               input_variables = ["question", "context"])

def generate(state : State):
    context = "\n\n relevant information :\n".join(text.page_content for text in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": context})
    response = groq_llm.invoke(messages)
    return {"answer": response}

In [None]:
#Compile my graph 
workflow = StateGraph(State).add_sequence([retrieve, generate])
workflow.add_edge(START, "retrieve")
book_rag = workflow.compile()

In [None]:
response = book_rag.invoke({"question": "How a person can get help from others ?"})
response 

In [None]:
print(response["answer"].content)

### Retrieval PART 2, conversational style RAG

In [46]:
from langchain_core.vectorstores import InMemoryVectorStore
from langgraph.graph import MessagesState, StateGraph
from langchain_core.tools import tool
import pdfplumber
from langchain_core.documents import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.messages import SystemMessage
from langgraph.prebuilt import ToolNode
from langgraph.prebuilt import tools_condition
from langgraph.graph import END

In [30]:
vector_store_2 = InMemoryVectorStore(embedding=embedder)

In [18]:
#Importer un document 
documents = []
with pdfplumber.open("../documents/I-Will-Teach-You-to-Be-Rich-Book-Summary.pdf") as book:
    for page in book.pages:
        documents.append(page.extract_text())
documents = "\n".join(documents)

In [22]:
documents = Document(documents)

In [28]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
documents_splitted = splitter.split_documents([documents])

In [31]:
vector_store_2.add_documents(documents_splitted)

['3e42c180-dbd3-4474-ad0c-cd6a36cfbdba',
 '8b71f44d-4698-4aa2-afba-77c6a738d103',
 '4b1b4aaa-d7b1-4582-8a32-24ad2a7406ec',
 'e3148d8e-89e2-4d2f-a7e0-9e9fe25e46b9',
 '347fb7f0-c81f-4da0-abe8-48e14f3ac871',
 '62fbf805-4ea6-47ae-b809-2081a9a8e568',
 '4e8fe2f6-f7e3-430d-a10c-d66fd970b548',
 'd3f51d2c-fb24-4b72-bd7f-87b5ae012bba',
 '4d23940d-28b8-4b64-bd2f-56525553c730',
 '93e343b2-00e8-460c-a011-fc6879c4a3d4',
 '2058e9c5-3bf3-4e84-94ae-ef2345dd40cd',
 '4fe681c2-fde4-44a2-bb7b-0baa0ecb71fc',
 'b4dd504b-3046-4b9c-8fdf-91da71b2ac59',
 '6a408522-66fe-401f-9e2a-8cf69eb18624',
 '14d595c5-32cf-41a1-84b6-f53900087269',
 '56c03f14-980b-40b1-b59d-4f1c315fed08',
 '0d95543c-466b-46cf-a0a3-3259e85288f9',
 'c020217b-25fc-4a36-b572-cb1f22f2cc7a',
 '7c98fae2-5785-4aa8-8400-1ba3c799d2df',
 'ecaa130a-59ce-4973-9531-964b48ebdbf1',
 '3eda2473-5b11-4b2f-bf62-2c69f1f9d398',
 'abfdb901-2832-4a10-9d33-3c07ee06a178',
 '3916d400-3c27-48d7-9955-c1bf5fce3440']

In [6]:
# Add langsmith tracing 
os.environ["LANGSMITH_TRACING"] = "true"
if not os.environ.get("LANGSMITH_API_KEY"):
    os.environ["LANGSMITH_API_KEY"] = getpass.getpass("Input Langsmith API")

In [39]:
@tool(response_format="content_and_artifact")
def retrieve(query:str):
    """ Retrieve relevant informations relative to query """
    retrieved_docs = vector_store_2.similarity_search(query, k=2)
    summary = "\n\n".join (
        (f"Source : {doc.metadata}. \nContent : {doc.page_content}.") for doc in retrieved_docs
        )
    return summary, retrieved_docs

In [41]:
# Node 1 in our graph 
def query_or_respond(state:MessagesState):
    """Tool call for either formulate user query or respond directly"""
    groq_llm_with_tools = groq_llm.bind_tools([retrieve])
    response = groq_llm_with_tools.invoke(state["messages"])
    print ("query_or_respond function :", response)
    return {"messages": [response]}

In [42]:
#Node 2
tools = ToolNode([retrieve])

In [44]:
#Node 3
def generate(state: MessagesState):
    """Generate an answer to the query"""
    #Extract previous context
    recent_tool_messages = []
    for message in reversed(state["messages"]):
        if message.type == "tool":
            recent_tool_messages.append(message)
        else :
            break
    
    tool_messages = recent_tool_messages[::-1]
    docs_content = "\n\n".join(doc.content for doc in tool_messages)
    #Prompt 
    system_prompt = f"""
        You are a Q&A assistant.
        Use informations bellow to answer the given question.
        If you don't know the answer, don't make up one, simply say I don't know.
        relevant informations : 
        {docs_content}
    """
    conversation_history = [
        message
        for message in state["messages"]
        if message.type in ("human", "system")
        or (message.type == "ai" and not message.tool_calls)
    ]
    prompt = [SystemMessage(system_prompt)] + conversation_history
    response = groq_llm.invoke(prompt)    
    return {"messages": [response]}

In [56]:
#Build nodes and edges of the graph
#Initiate graph
graph_builder = StateGraph(MessagesState)
# Nodes
graph_builder.add_node(query_or_respond)
graph_builder.add_node(tools)
graph_builder.add_node(generate)
# Edges
graph_builder.set_entry_point("query_or_respond")
graph_builder.add_conditional_edges(
    "query_or_respond",
    tools_condition,
    {END:END, "tools": "tools"}
)
graph_builder.add_edge("tools", "generate")
graph_builder.add_edge("generate", END)
rag_app = graph_builder.compile()

In [61]:
#Test the app 
input = {"messages": [{"role": "user", "content": "Hello, my name is Hamza."}]}
rag_app.invoke(input)

query_or_respond function : content='Nice to meet you, Hamza. Is there anything I can help you with today?' additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 19, 'prompt_tokens': 218, 'total_tokens': 237, 'completion_time': 0.02831595, 'prompt_time': 0.011958099, 'queue_time': 0.087884563, 'total_time': 0.040274049}, 'model_name': 'llama-3.1-8b-instant', 'system_fingerprint': 'fp_ab04adca7d', 'service_tier': 'on_demand', 'finish_reason': 'stop', 'logprobs': None} id='run--1216afb4-7088-46e8-b693-5a6bff23ad15-0' usage_metadata={'input_tokens': 218, 'output_tokens': 19, 'total_tokens': 237}


{'messages': [HumanMessage(content='Hello, my name is Hamza.', additional_kwargs={}, response_metadata={}, id='be844197-c09e-4d5c-aa15-c541e7296170'),
  AIMessage(content='Nice to meet you, Hamza. Is there anything I can help you with today?', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 19, 'prompt_tokens': 218, 'total_tokens': 237, 'completion_time': 0.02831595, 'prompt_time': 0.011958099, 'queue_time': 0.087884563, 'total_time': 0.040274049}, 'model_name': 'llama-3.1-8b-instant', 'system_fingerprint': 'fp_ab04adca7d', 'service_tier': 'on_demand', 'finish_reason': 'stop', 'logprobs': None}, id='run--1216afb4-7088-46e8-b693-5a6bff23ad15-0', usage_metadata={'input_tokens': 218, 'output_tokens': 19, 'total_tokens': 237})]}

In [64]:
#Test the app 
input = {"messages": [{"role": "user", "content": "Hello, my name is Hamza."}]}
for step in rag_app.stream(
    input,
    stream_mode = "values"
):
    print ("---------------------------")
    step["messages"][-1].pretty_print()

---------------------------

Hello, my name is Hamza.
query_or_respond function : content="Hello Hamza, it's nice to meet you. Is there anything I can help you with today?" additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 22, 'prompt_tokens': 218, 'total_tokens': 240, 'completion_time': 0.035025722, 'prompt_time': 0.012083691, 'queue_time': 0.087643364, 'total_time': 0.047109413}, 'model_name': 'llama-3.1-8b-instant', 'system_fingerprint': 'fp_e32974efee', 'service_tier': 'on_demand', 'finish_reason': 'stop', 'logprobs': None} id='run--5e93bed3-aa0f-4815-92ff-28cb650dd4e9-0' usage_metadata={'input_tokens': 218, 'output_tokens': 22, 'total_tokens': 240}
---------------------------

Hello Hamza, it's nice to meet you. Is there anything I can help you with today?


In [66]:
#Test the app 
input_2 = {"messages": [{"role": "user", "content": "What is the difference between Lifecycle and index funds ?"}]}
for step in rag_app.stream(
    input_2,
    stream_mode = "values"
):
    print ("---------------------------")
    step["messages"][-1].pretty_print()

---------------------------

What is the difference between Lifecycle and index funds ?
query_or_respond function : content='' additional_kwargs={'tool_calls': [{'id': 'qn7h4r8pt', 'function': {'arguments': '{"query":"Difference between Lifecycle and index funds"}', 'name': 'retrieve'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 19, 'prompt_tokens': 220, 'total_tokens': 239, 'completion_time': 0.026616606, 'prompt_time': 0.011957453, 'queue_time': 0.088675201, 'total_time': 0.038574059}, 'model_name': 'llama-3.1-8b-instant', 'system_fingerprint': 'fp_e32974efee', 'service_tier': 'on_demand', 'finish_reason': 'tool_calls', 'logprobs': None} id='run--d39d37a2-684f-47bd-a440-86dbe2522ccd-0' tool_calls=[{'name': 'retrieve', 'args': {'query': 'Difference between Lifecycle and index funds'}, 'id': 'qn7h4r8pt', 'type': 'tool_call'}] usage_metadata={'input_tokens': 220, 'output_tokens': 19, 'total_tokens': 239}
---------------------------
Tool Calls:
  retrie

In [78]:
#Test the app 
input = {"messages": [{"role": "user", "content": "Hello, my name is Hamza."}]}
response = rag_app.invoke(input)

query_or_respond function : content="Hello Hamza, it's nice to meet you. Is there something I can help you with today?" additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 22, 'prompt_tokens': 218, 'total_tokens': 240, 'completion_time': 0.028971967, 'prompt_time': 0.0120202, 'queue_time': 0.086725526, 'total_time': 0.040992167}, 'model_name': 'llama-3.1-8b-instant', 'system_fingerprint': 'fp_33e8adf159', 'service_tier': 'on_demand', 'finish_reason': 'stop', 'logprobs': None} id='run--9efc450d-fae3-40dc-9866-314f9100651a-0' usage_metadata={'input_tokens': 218, 'output_tokens': 22, 'total_tokens': 240}


In [82]:
response["messages"][-1].content

"Hello Hamza, it's nice to meet you. Is there something I can help you with today?"