In [1]:
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

import os 

os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = "https://api.smith.langchain.com"
os.environ['LANGCHAIN_PROJECT'] = "langchain-tutorial"
os.environ['LANGCHAIN_API_KEY'] = os.getenv("LANGCHAIN_API_KEY")
os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')

In [3]:
from langchain_community.document_loaders import WebBaseLoader
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma

urls = [
    "https://lilianweng.github.io/posts/2024-07-07-hallucination/", 
    "https://lilianweng.github.io/posts/2024-04-12-diffusion-video/", 
    "https://lilianweng.github.io/posts/2024-02-05-human-data-quality/"
]   

model_name = "BAAI/bge-small-en"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalization": True}

hf_embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name, 
    model_kwargs=model_kwargs, 
    encode_kwargs=encode_kwargs
)

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

# Split 
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 250, 
    chunk_overlap = 50
)

docs_split = text_splitter.split_documents(docs_list)

# Add to vector store
vectore_store = Chroma.from_documents(
    docs_split, 
    collection_name="rag-chroma", 
    embedding=hf_embeddings
)

retriever = vectore_store.as_retriever()

In [5]:
# Router

from typing import Literal

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_groq import ChatGroq

# Data Modelc
class RouteQuery(BaseModel):
    """Route a query to relevant datasource"""

    datasource: Literal["vectore_store", "web_search"] = Field(
        ..., 
        description="Given a user question choose to route it  to either web search or a vector store."
    )

# LLM with function call
llm = ChatGroq(temperature=0)

structure_llm_router = llm.with_structured_output(RouteQuery)

# Prompt
system_prompt = """You are an expert at routing a user question to a vector store or a websearch. The vectorstore contains documents related to agents. Prompt engineering and adversarial attacks. Use the vectorstore for questions on these topics, otherwise use web-search."""

route_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt), 
        ("human", "{question}")
    ]
)

question_router = route_prompt | structure_llm_router

print(question_router.invoke({
    "question": "What are different types of LLM hallucinations"
}))

datasource='web_search'


In [6]:
# Retrieval Grader

# DataModel
class GradeDocuments(BaseModel):
    """Binary score for relevance chack on retrieved documents."""

    binary_score: str = Field(
        description="Documents retrieved are relevant to the question, 'yes' or 'no'"
    )

llm = ChatGroq(temperature=0)
structure_llm_grader = llm.with_structured_output(GradeDocuments)

system_prompt = """You are a hrader assessing relevance of a retrived documents to a user question\n. If the documents contains keyword(s) or semantic meaning related to the user question. grade it as relevant. If it does not need to be a stringent test. The goal is to filter out erroneous retrievals. Give a binary score 'yes' or 'no'. score to indicate whether the document is relevant to the question or not."""

grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "Retrieved documents: \n\n {document} \n\n relevant question: {question}") 
    ]
)

reteival_grader = grade_prompt | structure_llm_grader
question = "llm-halluncatios"

docs = retriever.get_relevant_documents(question)

doc_text = docs[1].page_content

print(reteival_grader.invoke(
    {
        "question": question, 
        "document": doc_text
    }
))

  docs = retriever.get_relevant_documents(question)


binary_score='yes'


In [7]:
# Generate 

from langchain import hub
from langchain_core.output_parsers import StrOutputParser

# prompt 
prompt = hub.pull("rlm/rag-prompt")

# LLM
llm = ChatGroq(temperature=0)

def format_docs(docs):
    return '\n\n'.join(doc.page_content for doc in docs)

# Chain
rag_chain = prompt | llm | StrOutputParser()

generation = rag_chain.invoke(
    {
        "context": docs, 
        "question": question
    }
)

generation

"Hallucination in large language models (LLMs) refers to the model generating unfaithful, fabricated, inconsistent, or nonsensical content. The focus here is on extrinsic hallucination, where the model output is fabricated and not grounded by the provided context or world knowledge. To avoid hallucination, LLMs should be factual and acknowledge when they don't know the answer."