In [12]:
import os 
from dotenv import load_dotenv
load_dotenv()
os.environ["GROQ_API_KEY"]= os.getenv("GROQ_API_KEY")


In [6]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  from tqdm.autonotebook import tqdm, trange


In [16]:
# build index
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_groq import ChatGroq


In [9]:
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]
#load
docs=[WebBaseLoader(url).load() for url in urls]
docs_list=[item for sublist in docs for item in sublist]

# Split 
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder( # Bpe tokenizer
    chunk_size=500,chunk_overlap=0
)
doc_splits=text_splitter.split_documents(docs_list)
vectorstore=FAISS.from_documents(
    documents=doc_splits,
    embedding=embeddings,
)
retriever=vectorstore.as_retriever()


In [15]:
# s=[
#     [1,2,3],[4,5,6],[7,8,9]
# ]
# q=[l for m in s for l in m]
# q
# [1,2,3,4,5,6,7,8,9]

In [14]:
## ROUTER
from typing import Literal
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel,Field

class RouteQuery(BaseModel):
    #Routes a query to the most relvant datasource
    datasource: Literal["vectorstore","wiki_search"]=Field(
        ...,#it means field is required#ellipsis
        description="Given a user question choose to route it to wikipedia or a vectorstore."

    )


In [21]:
llm=ChatGroq(groq_api_key=groq_api_key,model_name="Gemma2-9b-It")
structured_llm_router=llm.with_structured_output(RouteQuery) # it will search whether the vector search or wiki search

In [19]:
# prompt 
system='''you are an expert at routing user question to vectprstore or wikipedia.
The vectorstore contains documents related to agents, prompt engineering, and adversarial attacks.
Use the vectorstore for questions on these topics. Otherwise, use wiki-search.
'''
route_prompt=ChatPromptTemplate.from_messages(
    [
        ("system",system),
        ("human","{question}"),
    ]
)

question_router=route_prompt | structured_llm_router


In [20]:
print(question_router.invoke({
    "question": "Who is DR sanduk ruit"
}))

print(question_router.invoke({
    "question": "What is prompt engineering?"
}))

datasource='wiki_search'
datasource='vectorstore'


In [22]:
## Retrieval Grader


The "retrieval grader" is crucial for ensuring the relevance of retrieved documents to the user's question. It filters out irrelevant or erroneous results before generating an answer.

In [28]:
class GradeDocuments(BaseModel):
    """Binary check on relvance checkon retrieved documents"""
    binary_score: str =Field(
        description="Documents are Relevant to the question 'yes' or 'no' "
    )

# prompt 
system = """You are a grader assessing relevance of a retrieved document to a user question. \n
If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""

grade_prompt=ChatPromptTemplate.from_messages([
    ("system", system),
    ("human","Retrieved  document : \n \n {document} \n \n User question :{question}")
])
structured_llm_grader=llm.with_structured_output(GradeDocuments)
retreival_grader=grade_prompt | structured_llm_grader
question="Who is sharukhan"
docs=retriever.get_relevant_documents(question)


In [29]:
doc_txt=docs[1].page_content

print(retreival_grader.invoke({
    "document": doc_txt,
    "question": question
}))

binary_score='no'


In [None]:
### Generate