In [1]:
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

import os 

os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = "https://api.smith.langchain.com"
os.environ['LANGCHAIN_PROJECT'] = "langchain-tutorial"
os.environ['LANGCHAIN_API_KEY'] = os.getenv("LANGCHAIN_API_KEY")
os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')

In [None]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings.ollama import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

url = "https://lilianweng.github.io/posts/2024-04-12-diffusion-video/"

loader = WebBaseLoader(url)

docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size = 500, 
    chunk_overlap = 100
)

all_splits = text_splitter.split_documents(docs)

model_name = "BAAI/bge-small-en"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"nodmalization": True}

hf_embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name, 
    model_kwargs=model_kwargs, 
    encode_kwargs=encode_kwargs
)

vectorstore = FAISS.from_documents(
    documents=all_splits, 
    embedding=hf_embeddings
)

retriever = vectorstore.as_retriever(
    search_kwargs={
        "k": 3
    }
)

  from tqdm.autonotebook import tqdm, trange


In [None]:
# Retrieval Grader
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_groq import ChatGroq

llm = ChatGroq(
    model="llama3-70b-8192", 
    temperature=0
)

prompt = PromptTemplate(
    template="""You are a grader assessing relevance of a retrieved document to a user question\n
    Here is the retireved document: \n\n {document} \n\n
    If the document contains keywords related to the user question. grade it as relevant\n
    It does not need to be a stringgent test. The goal is to filter out erroneous retrievals.\n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question\n
    provide the binary score as a JSON with a single key 'score' and no premable or explanation and no extra text.""", 
    input_variables=["question", "document"]
)

retrieval_grader = prompt | llm | JsonOutputParser()
question = "Tell me about LLM hallucinations"
docs = retriever.get_relevant_documents(question)

doc_Text = docs[1].page_content

print(retrieval_grader.invoke({
    "question": question, 
    "document": doc_Text
}))

OutputParserException: Invalid json output: Please provide the user question, and I'll assess the relevance of the retrieved document.

(Note: I'll respond with a JSON object containing a single key 'score' with a value of 'yes' or 'no')
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE