In [28]:
LLM_name = "llama3"

In [29]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate


from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate

from langchain_community.tools.tavily_search import TavilySearchResults


In [30]:
urls = [
    "https://en.wikipedia.org/wiki/The_House_in_Fata_Morgana",
    "https://en.wikipedia.org/wiki/Muv-Luv",
    "https://en.wikipedia.org/wiki/YU-NO:_A_Girl_Who_Chants_Love_at_the_Bound_of_this_World",
]

#Load documents from the web
documents = [WebBaseLoader(url).load() for url in urls]


#Flatten list
documents_list = [items for sublist in documents for items in sublist]

#Use RecursiveCharacterTextSplitter to split the text into chunks using the TikToken encoder
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250,
    chunk_overlap=0,
)

#Split the documents into chunks
document_chunks = text_splitter.split_documents(documents_list)

#Create a vector store
vectorstore = Chroma.from_documents(
    documents=document_chunks,
    collection_name="rag-chroma",
    embedding=GPT4AllEmbeddings(model_name="all-MiniLM-L6-v2.gguf2.f16.gguf"),
)

retriever = vectorstore.as_retriever()


In [31]:
#Temparature is how much the model will answer creatively
llm = ChatOllama(model=LLM_name, format="json", temperature=0)

#Promt to ask the user to grade the relevance of a document to a question
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing relevance 
    of a retrieved document to a user question. If the document contains keywords related to the user question, 
    grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. \n
    Provide the binary score as a JSON with a single key 'score' and no premable or explanation.
     <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here is the retrieved document: \n\n {document} \n\n
    Here is the user question: {question} \n <|eot_id|><|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["question", "document"],
)

# Pipe | instantiates promt outputs it to the LLM model and then parses the output
retrieval_grader = prompt | llm | JsonOutputParser()
question = "House in Fata Morgana"
documents = retriever.invoke(question)
doc_text = documents[1].page_content
print(retrieval_grader.invoke({"question": question, "document": doc_text}))


{'score': 'yes'}


In [41]:
#Normal generic prompt for llama3
llm = ChatOllama(model=LLM_name, temperature=0)

prompt = PromptTemplate(
    template="""<|eof_id|><|start_header_id|>user<|end_header_id|> {question} <|eof_id|><|start_header_id|>assistant<|end_header_id>""",
    input_variables=["question"],
)

normalLLm = prompt | llm | StrOutputParser()

print(normalLLm.invoke({"question": "How tall is the Eiffel Tower?"}))

The Eiffel Tower stands at an impressive height of 324 meters (1,063 feet) above ground level.


In [42]:
#This section just checks if the LLM can answer a question only using the retrieved documents
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an assistant for question-answering tasks. 
    Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. 
    Use three sentences maximum and keep the answer concise <|eot_id|><|start_header_id|>user<|end_header_id|>
    Question: {question} 
    Context: {context} 
    Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["question", "document"],
)

llm = ChatOllama(model=LLM_name, temperature=0)


rag_chain = prompt | llm | StrOutputParser()

question = "How tall is the Eiffel Tower?"

documents = retriever.invoke(question)
print(documents)
generation = rag_chain.invoke({"context": documents, "question": question})
print(generation)


[Document(page_content='Retrieved from "https://en.wikipedia.org/w/index.php?title=The_House_in_Fata_Morgana&oldid=1222610050"', metadata={'language': 'en', 'source': 'https://en.wikipedia.org/wiki/The_House_in_Fata_Morgana', 'title': 'The House in Fata Morgana - Wikipedia'}), Document(page_content='Retrieved from "https://en.wikipedia.org/w/index.php?title=The_House_in_Fata_Morgana&oldid=1222610050"', metadata={'language': 'en', 'source': 'https://en.wikipedia.org/wiki/The_House_in_Fata_Morgana', 'title': 'The House in Fata Morgana - Wikipedia'}), Document(page_content='On a nearby beach, there is a tower-like rock formation going by the names Sword Cape and Triangle Mountain, at the foot of which stand two rocks dated as 8000 years old with unknown, 400 years old writing on them. Geo Technics, the company Ayumi works for, is occupying the beach as a construction survey site, but the workers are constantly injured by inexplicable lightning strikes. Kanna habitually warns people to sta

In [33]:
# Hallucination Grader

llm = ChatOllama(model=LLM_name, format="json",temperature=0)

prompt = PromptTemplate(
    template=""" <|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing whether 
    an answer is grounded in / supported by a set of facts. Give a binary 'yes' or 'no' score to indicate 
    whether the answer is grounded in / supported by a set of facts. Provide the binary score as a JSON with a 
    single key 'score' and no preamble or explanation. <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here are the facts:
    \n ------- \n
    {documents} 
    \n ------- \n
    Here is the answer: {generation}  <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["generation", "documents"],
)

hallucination_grader = prompt | llm | JsonOutputParser()
hallucination_grader.invoke({"documents": documents, "generation": generation})

{'score': 'no'}

In [34]:
# Answer Grader

llm = ChatOllama(model=LLM_name, format="json",temperature=0)

prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing whether an 
    answer is useful to resolve a question. Give a binary score 'yes' or 'no' to indicate whether the answer is 
    useful to resolve a question. Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.
     <|eot_id|><|start_header_id|>user<|end_header_id|> Here is the answer:
    \n ------- \n
    {generation} 
    \n ------- \n
    Here is the question: {question} <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["generation", "question"],
)

answer_grader = prompt | llm | JsonOutputParser()
answer_grader.invoke({"question": question, "generation": generation})

{'score': 'yes'}

In [54]:
from typing_extensions import TypedDict
from typing import List
from langchain_core.documents import Document

### State


class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        question: question
        generation: LLM generation
        documents: list of documents
    """

    question: str
    generation: str
    documents: List[str]
    generationNormal: bool


### Nodes


def retrieve(state):
    """
    Retrieve documents from vectorstore

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, documents, that contains retrieved documents
    """
    print("---RETRIEVE---")
    question = state["question"]

    # Retrieval
    documents = retriever.invoke(question)
    return {"documents": documents, "question": question}


def generate(state):
    """
    Generate answer using RAG on retrieved documents

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    print("---GENERATE---")
    question = state["question"]
    documents = state["documents"]
        
    # RAG generation
    generation = rag_chain.invoke({"context": documents, "question": question})
    return {"documents": documents, "question": question, "generation": generation}

def generate_normal(state):
    """
    Generate answer using normal LLM

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    print("---GENERATE---")
    question = state["question"]
    documents = state["documents"]
    
    # Normal generation
    generation = normalLLm.invoke({"question": question})
    return {"documents": documents, "question": question, "generation": generation}

def grade_documents(state):
    """
    Determines whether the retrieved documents are relevant to the question
    If any document is not relevant, we will set a flag to run web search

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Filtered out irrelevant documents and updated web_search state
    """

    print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
    question = state["question"]
    documents = state["documents"]

    # Score each doc
    filtered_docs = []
    for d in documents:
        score = retrieval_grader.invoke(
            {"question": question, "document": d.page_content}
        )
        grade = score["score"]
        # Document relevant
        if grade.lower() == "yes":
            print("---GRADE: DOCUMENT RELEVANT---")
            filtered_docs.append(d)
        # Document not relevant
        else:
            print("---GRADE: DOCUMENT NOT RELEVANT---")
            continue
    return {"documents": filtered_docs, "question": question}



### Conditional edge




def decide_to_generate(state):
    """
    Determines whether to generate an answer, or add web search

    Args:
        state (dict): The current graph state

    Returns:
        str: Binary decision for next node to call
    """

    print("---ASSESS GRADED DOCUMENTS---")
    question = state["question"]
    filtered_documents = state["documents"]

    # We have relevant documents, so generate answer
    print("---DECISION: GENERATE---")
    return "generate"


### Conditional edge


def grade_generation_v_documents_and_question(state):
    """
    Determines whether the generation is grounded in the document and answers question.

    Args:
        state (dict): The current graph state

    Returns:
        str: Decision for next node to call
    """

    print("---CHECK HALLUCINATIONS---")
    question = state["question"]
    documents = state["documents"]
    generation = state["generation"]

    score = hallucination_grader.invoke(
        {"documents": documents, "generation": generation}
    )
    grade = score["score"]

    # Check hallucination
    if grade == "yes":
        print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")
        # Check question-answering
        print("---GRADE GENERATION vs QUESTION---")
        score = answer_grader.invoke({"question": question, "generation": generation})
        grade = score["score"]
        if grade == "yes":
            print("---DECISION: GENERATION ADDRESSES QUESTION---")
            return "useful"
        else:
            print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
            return "not useful"
    else:
        pprint("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
        return "not useful"


from langgraph.graph import END, StateGraph

workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("retrieve", retrieve)  # retrieve
workflow.add_node("grade_documents", grade_documents)  # grade documents
workflow.add_node("generate", generate)  # generate
workflow.add_node("generate_normal", generate_normal)  # generate


In [55]:
# Build graph
workflow.set_entry_point("retrieve")

workflow.add_edge("retrieve", "grade_documents")
workflow.add_conditional_edges(
    "grade_documents",
    decide_to_generate,
    {
        "generate": "generate",
    },
)
workflow.add_conditional_edges(
    "generate",
    grade_generation_v_documents_and_question,
    {
        "not supported": "generate",
        "useful": END,
        "not useful": "generate_normal",
    },
)

workflow.add_conditional_edges(
    "generate_normal",
    grade_generation_v_documents_and_question,
    {
        "not supported": "generate",
        "useful": END,
        "not useful": "generate_normal",
    },
)

In [57]:


# Compile
app = workflow.compile()

# Test
from pprint import pprint
question = "Who is Michel"
inputs = {"question": question}
for output in app.stream(inputs):
    for key, value in output.items():
        pprint(f"Finished running: {key}:")
pprint(value["generation"])



---RETRIEVE---
'Finished running: retrieve:'
---CHECK DOCUMENT RELEVANCE TO QUESTION---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
---DECISION: GENERATE---
'Finished running: grade_documents:'
---GENERATE---
---CHECK HALLUCINATIONS---
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
'Finished running: generate:'
('Michel is a character from the story "The House in Fata Morgana". He was '
 'originally named Michelle and assigned female at birth, but he renamed '
 'himself Michel after puberty caused his voice to deepen and his body to '
 'become more masculine.')
