# RAG APP using Groq API and Langchain

### Imports 

In [None]:
import os 
import getpass
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS
import warnings 

warnings.filterwarnings("ignore")

In [None]:
os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = getpass.getpass("Langsmith api key")

### Rag with web loader

load an llm

In [None]:
groq_llm = ChatGroq(
                groq_api_key = os.environ["GROQ_API_KEY"],
                model="llama-3.1-8b-instant",
                temperature = 0.6
                )
#test
groq_llm.invoke("Hello").content

load an embedder 

In [None]:
embedder = HuggingFaceEmbeddings( 
    model_name = "sentence-transformers/all-MiniLM-L6-v2"
)
#test
embedder.embed_query("Hello")

Vector store 

In [None]:
embedding_dim = len(embedder.embed_query("test"))

In [None]:
index = faiss.IndexFlatL2(embedding_dim)

In [None]:
vectore_store = FAISS(
    embedding_function = embedder,
    index = index,
    docstore = InMemoryDocstore(),
    index_to_docstore_id = {}
)

In [None]:
vectore_store

RAG

In [None]:
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain import hub
from langchain.prompts import PromptTemplate
from typing_extensions import TypedDict, List
from langgraph.graph import StateGraph, START

In [None]:
#load contents of the blog
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)

In [None]:
docs = loader.load()

In [None]:
docs

In [None]:
splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
docs_splitted = splitter.split_documents(docs)

In [None]:
#Add docs to vectore store
_ = vectore_store.add_documents(documents = docs_splitted)

In [None]:
#prompt template
prompt = """
Answer the following question : {question}
using the relevant informations bellow :
{context}
"""
prompt = PromptTemplate(
    template = prompt,
    input_features = ["question", "context"]
    )

In [None]:
#State of application 
class State(TypedDict):
    question : str
    context : List
    answer : str

In [None]:
#test
context = vectore_store.similarity_search("what is sensory memory ?")

In [None]:
context[0].page_content

In [None]:
def retrieve (state : State):
    retrieved_docs = vectore_store.similarity_search(state["question"])
    return {"context": retrieved_docs}

In [None]:
def generate(state: State):
    context = "\n\n".join([chunk.page_content for chunk in state["context"]])
    messages = prompt.invoke({"question": state["question"], "context": context})
    response = groq_llm.invoke(messages)
    return {"answer": response.content}

In [None]:
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [None]:
response = graph.invoke({"question": "What is short term memory capacity ?"})

In [None]:
response["answer"]

Display graph

In [None]:
from IPython.display import display, Image

In [None]:
display(Image(graph.get_graph().draw_mermaid_png()))

stream 

In [None]:
for step in graph.stream(
    {"question": "What is short term memory capacity ?"}, 
    stream_mode = "updates"
):
    print (f"{step} \n\n ............ \n")

In [None]:
for message, metadata in graph.stream(
    {"question": "What is short term memory capacity ?"},
    stream_mode = "messages"
):
    print (message.content, end = '|')

### RAG with local documents 

In [None]:
#Imports 
import faiss
from langchain_groq                         import ChatGroq
from langchain_huggingface                  import HuggingFaceEmbeddings
from langchain.vectorstores                 import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain.text_splitter                import RecursiveCharacterTextSplitter
from langchain_core.documents               import Document
from typing_extensions                      import TypedDict, List
from langchain.prompts import PromptTemplate
from langgraph.graph import StateGraph
import pdfplumber

In [None]:
#LLM
groq_llm = ChatGroq(
    groq_api_key = os.environ["GROQ_API_KEY"],
    model="llama-3.1-8b-instant",
    temperature = 0.5
)

In [None]:
embedder = HuggingFaceEmbeddings(
    model_name ="sentence-transformers/all-MiniLM-L6-v2"
)

In [None]:
#Vector database 
embeddings_ex = embedder.embed_query("hi")
index = faiss.IndexFlatL2(len(embeddings_ex))
vectore_store = FAISS(
    embedding_function = embedder,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)

In [None]:
#fix directory 
os.chdir("..")
print(os.getcwd())

In [None]:
documents = []
with pdfplumber.open("documents/No More Mr. Nice Guy by Robert Glover.pdf") as book:
    for page in book.pages:
        documents.append(page.extract_text())
documents = "\n".join(documents)

In [None]:
documents_structured = Document(page_content=documents)

In [None]:
splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 100)
documents_splitted = splitter.split_documents( [documents_structured])

In [None]:
#Add documents to our vectore store 
_ = vectore_store.add_documents(documents_splitted)

In [None]:
for source in vectore_store.similarity_search(query="How a person can get help from others ?"):
    print (source.page_content)
    print ("---------------")

In [None]:
class State(TypedDict):
    question : str
    context : List
    answer : str

In [None]:
def retrieve(state : State):
    retrieved_docs = vectore_store.similarity_search(state["question"]) 
    return {"context": retrieved_docs}

prompt = """
    Use informations bellow extracted from No more Mr. Nice Guy book written by Robert Glover, to answer the following question : 
    {question}
    If you don't know the answer, don't make up one, simply say I don't know.
    relevant informations : 
    {context}
"""
prompt = PromptTemplate(template = prompt,
               input_variables = ["question", "context"])

def generate(state : State):
    context = "\n\n relevant information :\n".join(text.page_content for text in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": context})
    response = groq_llm.invoke(messages)
    return {"answer": response}

In [None]:
#Compile my graph 
workflow = StateGraph(State).add_sequence([retrieve, generate])
workflow.add_edge(START, "retrieve")
book_rag = workflow.compile()

In [None]:
response = book_rag.invoke({"question": "How a person can get help from others ?"})
response 

In [None]:
print(response["answer"].content)