## Self Reflection

In [1]:
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

import os 

os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = "https://api.smith.langchain.com"
os.environ['LANGCHAIN_PROJECT'] = "langchain-tutorial"
os.environ['LANGCHAIN_API_KEY'] = os.getenv("LANGCHAIN_API_KEY")
os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')

In [2]:
!pip install chromadb



In [3]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores import Chroma

urls = [
    "https://lilianweng.github.io/posts/2024-07-07-hallucination/", 
    "https://lilianweng.github.io/posts/2024-04-12-diffusion-video/", 
    "https://lilianweng.github.io/posts/2024-02-05-human-data-quality/"
]


docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size = 250, 
    chunk_overlap = 0
)

doc_splits = text_splitter.split_documents(docs_list)

model_name = "BAAI/bge-small-en"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
hf_embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name, 
    model_kwargs=model_kwargs, 
    encode_kwargs=encode_kwargs
)

vector_store = Chroma.from_documents(
    documents=doc_splits, 
    collection_name = "rag-chroma", 
    embedding = hf_embeddings
)

retriever = vector_store.as_retriever()

USER_AGENT environment variable not set, consider setting it to identify your requests.
  from tqdm.autonotebook import tqdm, trange


In [5]:
# Retriveal Grader
from typing import Literal

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_groq import ChatGroq

# Data Model
class GradeDocument(BaseModel):
    """Binary score for relevance check on retrieved docuements"""

    binary_score: str = Field(
        description="Documents are relevant to the question, 'yes' or 'no'"
    )

# LLm with function call
llm = ChatGroq(temperature=0)
structured_llm_grader = llm.with_structured_output(GradeDocument)

# Prompt
system_prompt = """You are a grader assessing relevance of a retrieved docuements to a user's question.\n
It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
If the docuement contains keyword(s) or semantic meaning related to the user's question, grade it as relevant\n
Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question or not.
"""

grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "Retrieved document:\n\n {document}\n\n User Question: {question}")
    ]
)

retrieval_grader = grade_prompt | structured_llm_grader

question = "LLM hallucination"
docs = retriever.get_relevant_documents(question)

doc_text = docs[1].page_content

print(retrieval_grader.invoke({
    "question": question, 
    "document": doc_text
}))



binary_score='yes'


In [6]:
# Generate

from langchain import hub
from langchain_core.output_parsers import StrOutputParser

# Prompt
prompt = hub.pull("rlm/rag-prompt")

# LLM
llm = ChatGroq(temperature=0)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# CHAIN
rag_chain = prompt | llm | StrOutputParser()

generation = rag_chain.invoke({
    "context": docs, 
    "question": question
})

print(generation)

Hallucination in large language models (LLMs) refers to the model generating unfaithful, fabricated, inconsistent, or nonsensical content. This response focuses on extrinsic hallucination, which occurs when the model output is fabricated and not grounded by either the provided context or world knowledge. It is differentiated from in-context hallucination, where the model output should be consistent with the source content. Extrinsic hallucinations can be caused by pre-training data issues, such as out-of-date, missing, or incorrect information, and by introducing new knowledge during the fine-tuning stage.


In [7]:
# Hallucination Grader

# Data Model
class GradeHallucinations(BaseModel):
    "Binary score for grading hallucination"

    binary_score: str = Field(
        description="Answer is grounded in the facts, 'yes' or 'no'"
    )

llm = ChatGroq(temperature=0)

structured_llm_grader = llm.with_structured_output(GradeHallucinations)

system_prompt = """You are a grader assessing whether an LLM is grounded in / supported by a set of retrieved facts. \n Give a binary score "yes' or 'no'. 'yes' means that the answer is grounded in / supported by the set of facts."""

hallucination_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt), 
        ("human", "Set of facts: \n\n {documents} \n\n LLM generation {generation}")
    ]
)

hallucination_grader = hallucination_prompt | structured_llm_grader

hallucination_grader.invoke({
    "documents": docs, 
    "generation": generation
})

GradeHallucinations(binary_score='no')

In [None]:
# Answer Grader

class AnswerGrader(BaseModel):
    """Binary score to assess answers"""

    binary_score: str = Field(
        description="Answer addresses the question, 'yes' or 'no'"
    )

system_prompt = """You are a grader assessing whether an answer addresses/resolves a question\n Give a binary score 'yes' or 'no'. 'yes' means the answer resolves the problem"""

llm = ChatGroq(temperature=0)

answer_grader_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt), 
        ("human", """User question: \n\n {question} \n\n LLM Generation: {generation}""")
    ]
)

answer_grader_chain = answer_grader_prompt | llm | structured_llm_grader

answer_grader_chain.invoke({
    "question": question, 
    "generation": generation
})