## Setup

In [47]:
import os

os.environ['LANGCHAIN_TRAICING_V2'] = 'True'
os.environ['LANCHAIN_ENDPOINT'] = 'https://api.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = 'xxxx'
os.environ['FIRECRAWL_API_KEY'] = 'xxxx'
os.environ['TAVILY_API_KEY'] = 'xxxx'

In [58]:
! ollama serve
local_llm = 'llama3'

Error: listen tcp 127.0.0.1:11434: bind: address already in use


## Retreive Documents

- Use FireCrawl to scrape the contents of the URLs
- Filter out metadata as FireCrawl returns metadata as an Array by defualt
- Create a vector database using GPT4AllEmbeddings and the filtered documents

In [49]:
# Index
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.document_loaders import FireCrawlLoader
from langchain_community.vectorstores.utils import filter_complex_metadata
from langchain.docstore.document import Document

# Add as many URLs as you want
urls = [
    "https://research.google/blog/advancing-personal-health-and-wellness-insights-with-ai/"
]

docs = [FireCrawlLoader(api_key='xxxx', url=url, mode="scrape").load() for url in urls]

# Split documents
docs_list = [item for sublist in docs for item in sublist]

# Split dociments
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)

# Filter out complex metadata and ensure proper document formatting
filtered_docs = []
for doc in doc_splits:
    # Ensure the doc is an instance of Document and has a 'metadata' attribute
    if isinstance(doc, Document) and hasattr(doc, "metadata"):
        clean_metadata = {k: v for k, v in doc.metadata.items() if isinstance(v, (str, int, float, bool))}
        filtered_docs.append(Document(page_content=doc.page_content, metadata=clean_metadata))

# Add to vectorDB
vectorstore = Chroma.from_documents(
    documents=filtered_docs,
    collection_name="rag-chroma",
    embedding=GPT4AllEmbeddings(model_name="all-MiniLM-L6-v2.gguf2.f16.gguf"),
)    
retriever = vectorstore.as_retriever()

## Grade Documents
Llama3 has a specific prompt style we need to follow.
[Llama3 Prompt Style doc](https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3)

This is our first point to determine if the document is relevent to the users question



In [50]:
### Retrieval Grader
from langchain.prompts import PromptTemplate
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import JsonOutputParser

# LLM
llm = ChatOllama(model=local_llm, format="json", temperature=0)

prompt = PromptTemplate(
    template="""<|begining_of_text|><|start_header_id|>system<|end+header_id|> You are a grader assessing relevance
    of a retrieved document to a user question. If the document contains keywords related to the user question,
    grade it as relevant. It doe not need to be a stringent test. The goal is to filter out errroneous retriecals. \n
    Give a binary score 'yes' or 'no' to indicate whether the document is relevant to the question. \n
    Provide the binary scroe as a JSON with a single key 'score' and no premable or explaination.  
    <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here is the retrieved document: \n\n {docunment} \n\n
    Here is the user question: {question} \n <|eot_id|><|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["question", "docunment"],
)

retrieval_grader = prompt | llm | JsonOutputParser()
question = "how to use llm for healthcare"
docs = retriever.invoke(question)
doc_txt = docs[1].page_content
print(retrieval_grader.invoke({"question": question, "docunment": doc_txt}))

{'score': 'yes'}


## Generate Answer


In [51]:
### Generate

from langchain.prompts import PromptTemplate
from langchain import hub
from langchain_core.output_parsers import StrOutputParser

# Prompt
prompt = PromptTemplate(
    template="""<|begining_of_text|><|start_header_id|>system<|end_header_id|> You are an assistant for question-answering tasks.
    Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
    Use three sentences maximum and keep the answer concise <|eot_id|><|start_header_id|>user<|end_header_id|>
    Question: {question}
    Context: {context}
    Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["question", "docunment"],
)

llm = ChatOllama(model=local_llm, temperature=0)

# Post-processing
def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])

# Chain
rag_chain = prompt | llm | StrOutputParser()

# Run
question = "how to use llm for healthcare"
docs = retriever.invoke(question)
generation = rag_chain.invoke({"context": docs, "question": question})
print(generation)

Based on the provided context, it seems that Large Language Models (LLMs) can be used in healthcare by analyzing a broader range of health information, including medical records, nutrition data, and user-provided journal entries. This can potentially offer deeper insights and more effective guidance for personal health management as LLMs continue to advance.


## Webs Search via Tavily

In [52]:
### Search

from langchain_community.tools.tavily_search import TavilySearchResults
web_search_tool = TavilySearchResults(k=3)

## Hallucination grader

In [53]:
### Answer Grader
# LIN
Llm = ChatOllama(model=local_llm, format="json", temperature=0)

# Prompt
prompt = PromptTemplate(
    template="""<|begining_of_text|><|start_header_id|>system<|end+header_id|> You are a grader assessing whether
    an answer is useful to resolve a question. Give a binary score 'yes' or 'no' to indicate whether the
    answer is useful to resolve a question. Provide the binary scroe as a JSON with a single key 'score'
    and no premable or explaination.<|eot_id|><|start_header_id|>user<|end_header_id|>
    Here is the answer:
    \n ------ \n
    {generation}
    \n ------ \n
    Here is the question: {question} \n <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["generation", "question"],
)

answer_grader = prompt | llm | JsonOutputParser()
answer_grader.invoke({"question": question, "generation": generation})

{'score': 'yes'}

In [54]:
### Hallucination Grader

# LLM
llm = ChatOllama(model=local_llm, format="json", temperature=0)

# Prompt
prompt = PromptTemplate(
    template="""<|begining_of_text|><|start_header_id|>system<|end+header_id|> You are a grader assessing whether
    an answer is grounded in / supporteed by a set of facts. Give a binary score 'yes' or 'no' to indicate whether the
    answer is grounded in / supporteed by a set of facts. Provide the binary scroe as a JSON with a single key 'score'
    and no premable or explaination.<|eot_id|><|start_header_id|>user<|end_header_id|>
    Here are the facts:
    \n ------ \n
    {documents}
    \n ------ \n
    Here is the answer: {generation} \n <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["question", "docunment"],
)

hallucination_grader = prompt | llm | JsonOutputParser()
hallucination_grader.invoke({"documents": docs, "generation": generation})

{'score': 'yes'}

## Lang graph - Setup states & nodes

In [55]:
from typing_extensions import TypedDict
from typing import List

### State

class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        question: question
        generation: LLM generation
        web_search: wether to add search
        documents: list of documents
    """
    question: str
    generation: str
    web_search: str
    documents: List[str]

from langchain.schema import Document

### Nodes

def retrieve(state):
    """
    Retrieve documents from vectorstore

    Args:
        state (dict): The currecnt graph state
    
    Returns:
        state (dict): New key added to state, documents, that contains retrieved documents
    """
    print("----RETRIEVE----")
    question = state["question"]

    # Retrieval
    documents = retriever.invoke(question)
    return {"documents": documents, "question": question}


def grade_document(state):
    """"
    Determines whether the retrieved documents are relevant to the question
    If any document is not relevent, we will set a flag to run web search

    Args:
        state (dict): The currecnt graph state

    Returns:
        state(dict): Filtered out irrelevent documents and update web_search state
    """

    print("----CHECK DOCUMENT RELEVANCE TO THE QUESTION----")
    question = state["question"]
    documents = state["documents"]

    # Search each doc
    filtered_docs = []
    web_search = "No"
    for d in documents:
        score = retrieval_grader.invoke({"question": question, "docunment": d.page_content})
        grade = score["score"]
        # Document relevant
        if grade.lower() == "yes":
            filtered_docs.append(d)
        # Document not relevant
        else:
            print("---GRAFE: DOCUMENT NOT RELEVANT---")
            # We do not include the document in filtered_docs
            # We set a flag to indicate that we need to run a web search
            web_search = "Yes"
            continue
    return {"documents": filtered_docs, "question": question, "web_search": web_search}


def generate(state):
    """
    Generate answer using RAG on retrieved documents
    
    Args:
        state (dict): The currecnt graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    print("----GENERATE----")
    question = state["question"]
    documents = state["documents"]

    # RAG Generation
    generation = rag_chain.invoke({"context": documents, "question": question})
    return {"documents": documents, "question": question, "generation": generation}


def web_search(state):
    """
    Web search based on the question

    Args:
        state (dict): The currecnt graph state

    Returns:
        state (dict): Append web results to documents
    """

    print("----WEB SEARCH----")
    question = state["question"]
    documents = state["documents"]

    # Web search
    docs = web_search_tool.invoke({"query": question})
    web_results = "\n".join([d["content"] for d in docs])
    web_results = Document(page_content=web_results)
    if documents is not None:
        documents.append(web_results)
    else:
        documents = [web_results]
    return {"documents": documents, "question": question}


### Conditional edge (Routing)

def decide_to_generate(state):
    """
    Decide whether to generate an answer, or add web search

    Args:
        state (dict): The currecnt graph state

    Returns:
        state (dict): Binary decision for next node to call
    """
    print("----ASSESS GRADED DOCUMENTS----")
    question = state["question"]
    web_search = state["web_search"]
    filtered_documents = state["documents"]

    if web_search == "Yes":
        # All documents have been filtered check_relevance
        # We will re-generate a new query
        print("---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, INCLUDE WEB SEARCH---")
        return "websearch"
    else:
        # We have relevant documents, so generate answer
        print("---DECISION: GENERATE---")
        return "generate"
    
### Conditional edge

def grade_documents(state):
    """
    Decide whether to generate an answer, or add web search

    Args:
        state (dict): The currecnt graph state

    Returns:
        state (dict): Binary decision for next node to call
    """
    print("----CHECK HALLUCINATIONS----")
    question = state["question"]
    documents = state["documents"]
    generation = state["generation"]

    score = hallucination_grader.invoke({"documents": documents, "generation": generation})
    grade = score["score"]

    # Check hallucinations
    if grade == "yes":
        print("---DECISION: GENERATION IS GROUNDED IN DOCUMNETS---")
        # Checkk the question-answer pair
        print("---CHECK GENERATION vs QUESTION---")
        score = answer_grader.invoke({"question": question, "generation": generation})
        grade = score["score"]
        if grade == "yes":
            print("---DECISION: GENERATION ADDRESSES QUESTION---")
            return "useful"
        else:
            print("---DECISION: GENERATION IS NOT GROUNDED IN QUESTION---")
            return "not useful"
    else:
        print("---DECISION: GENERATON IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
        return "not supported"
    
from langgraph.graph import END, StateGraph
workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("websearch", web_search) # web search
workflow.add_node("retrieve", retrieve) # retrieve documents
workflow.add_node("grade_documents", grade_document) # grade documents
workflow.add_node("generate", generate) # generate answer

In [56]:
# Build graph
workflow.set_entry_point("retrieve")
workflow.add_edge("retrieve", "grade_documents")
workflow.add_conditional_edges(
    "grade_documents", 
    decide_to_generate,
    {
        "websearch": "websearch",
        "generate": "generate",
    },
)
workflow.add_edge("websearch", "generate")
workflow.add_conditional_edges(
    "generate", 
    grade_generation_v_documents_and_question,
    {
        "not supported": "generate",
        "useful": END,
        "not useful": "websearch",
    },
)


In [57]:
# Compile 
app = workflow.compile()

# Test
from pprint import pprint
inputs = {"question": "how to use llm for healthcare"}
for output in app.stream(inputs):
    for key, value in output.items():
        pprint(f"Finished running: {key}:")
print(value['generation'])


----RETRIEVE----
'Finished running: retrieve:'
----CHECK DOCUMENT RELEVANCE TO THE QUESTION----
----ASSESS GRADED DOCUMENTS----
---DECISION: GENERATE---
'Finished running: grade_documents:'
----GENERATE----
----CHECK HALLUCINATIONS----
---DECISION: GENERATION IS GROUNDED IN DOCUMNETS---
---CHECK GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
'Finished running: generate:'
Based on the provided context, it seems that Large Language Models (LLMs) can be used in healthcare by analyzing a broader range of health information, including medical records, nutrition data, and user-provided journal entries. This can potentially offer deeper insights and more effective guidance for personal health management as LLMs continue to advance.
