In [1]:
! pip install requests beautifulsoup4 langchain chromadb gpt4all langchain-community tiktoken typing langgraph google-search-results

Collecting typing
  Downloading typing-3.7.4.3.tar.gz (78 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting google-search-results
  Downloading google_search_results-2.4.2.tar.gz (18 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: typing, google-search-results
  Building wheel for typing (pyproject.toml): started
  Building wheel for typing (pyproject.toml): finished with stat

In [2]:
import os

os.environ["LANGCHAIN_TRACING_V2"] = 'True'
os.environ["LANGCHAIN_ENDPOINT"] = 'https://api.smith.langchain.com'
os.environ["LANGCHAIN_API_KEY"] = "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"

In [3]:
local_llm = 'llama3'

In [4]:
import requests
from bs4 import BeautifulSoup
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import GPT4AllEmbeddings

# Define the URLs you want to scrape
urls = [
    "https://www.medscape.com/radiology",
    "https://pubmed.ncbi.nlm.nih.gov/",
    "https://radiopaedia.org/",
    "https://www.myesr.org/",
    "https://www.bmj.com/specialties/radiology"
]

# Function to scrape the content of a given URL
def scrape_content(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser')
        # Extract text content from the page
        text = soup.get_text()
        return Document(page_content=text, metadata={"url": url})
    except requests.exceptions.RequestException as e:
        print(f"Error fetching content from {url}: {e}")
        return None

# Scrape documents from the URLs
docs = [scrape_content(url) for url in urls if scrape_content(url) is not None]

# Split documents
text_spliter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250,
    chunk_overlap=0
)

docs_list = text_spliter.split_documents(docs)

# Filter out complex metadata and ensure proper document formatting
filtered_docs = []
for doc in docs_list:
    if isinstance(doc, Document) and hasattr(doc, 'metadata'):
        clean_metadata = {k: v for k, v in doc.metadata.items() if isinstance(v, (str, int, float, bool))}
        filtered_docs.append(Document(page_content=doc.page_content, metadata=clean_metadata))

# Add to vector DB
vectorstore = Chroma.from_documents(
    documents=filtered_docs,
    collection_name="rag-chroma",
    embedding=GPT4AllEmbeddings()
)

retriever = vectorstore.as_retriever()


Error fetching content from https://radiopaedia.org/: 406 Client Error: Not Acceptable for url: https://radiopaedia.org/


In [5]:
#Document grader

from langchain.prompts import PromptTemplate
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import JsonOutputParser

#LLM
llm = ChatOllama(
    model=local_llm, 
    format="json", 
    temperature=0,
    model_kwargs={
        'device': 'cuda',  # Use GPU if available
        'max_new_tokens': 2048,
        }
    )

prompt = PromptTemplate(
    template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing whether
    a document is useful to resolve a question. Give a binary score 'yes' or 'no' score to indicate
    wheater the document is useful to resolve the question. Provide the binary score as a JSON with a
    single key 'score' and no preamble or explanation.
    <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here is the document:
    \n ------- \n
    {document}
    \n ------- \n
    Here is the question: {question} <|eot_id|><start_header_id|>assistant<|end_header_id|>
    """,
    input_variable=["question","document"]
)
#Chain
retrival_grader = prompt | llm | JsonOutputParser()
# question = "what is radiogy"

#Run
# question = "X-ray in radiology"
# docs = retriever.invoke(question)
# doc_txt = docs[1].page_content
# print(retrival_grader.invoke({"question":question, "document":doc_txt}))

In [7]:
### Genarate answer

from langchain.prompts import PromptTemplate
from langchain import hub
from langchain_core.output_parsers import StrOutputParser

#Prompt
prompt = PromptTemplate(
    template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an assistant for question-answering tasks.
    Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
    Use six sentences maximum to keep the answer concise
    <|eot_id|><|start_header_id|>user<|end_header_id|>
    Question: {question}
    Context: {context}
    Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>
    """,
    input_variable=["question","document"]
)

llm = ChatOllama(model=local_llm, temperature=0.5)

#Post processing
def format_docs(docs):
  return "\n\n".join(doc.page_content for doc in docs)

#Chain
rag_chain = prompt | llm | StrOutputParser()
# generation = rag_chain.invoke({"context":docs, "question":question})

#Run
# question = "X-ray in radioogy"
# docs = retriever.invoke(question)
# generation = rag_chain.invoke({"context":docs, "question":question})
# print(generation)

In [8]:
# Search
os.environ['TAVILY_API_KEY'] = "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"
from langchain_community.tools.tavily_search import TavilySearchResults
web_search_tool = TavilySearchResults(k=3)

In [9]:
### Hallucination Grader

#LLM
llm = ChatOllama(model=local_llm, format="json", temparature = 0)

#Prompt
prompt = PromptTemplate(
    template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing whether
    an answer is grounded in / supported by a set of facts. Give a binary score 'yes' or 'no' score to indicate
    wheater the answer is grounded in / supported by facts. Provide the binary score as a JSON with a
    single key 'score' and no preamble or explanation.
    <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here are the facts:
    \n ------- \n
    {documents}
    \n ------- \n
    Here is the answer: {generation} <|eot_id|><start_header_id|>assistant<|end_header_id|>
    """,
    input_variable=["question","document"]
)

hallucination_grader = prompt | llm | JsonOutputParser()
# hallucination_grader.invoke({"documents": docs, "generation": generation})

In [10]:
### Answer Grader

#LLM
llm = ChatOllama(model=local_llm, format="json", temparature = 0)

#Prompt
prompt = PromptTemplate(
    template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing whether
    an answer is useful to resolve a question. Give a binary score 'yes' or 'no' score to indicate
    wheater the answer is useful to resolve the question. Provide the binary score as a JSON with a
    single key 'score' and no preamble or explanation.
    <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here is the answer:
    \n ------- \n
    {generation}
    \n ------- \n
    Here is the question: {question} <|eot_id|><start_header_id|>assistant<|end_header_id|>
    """,
    input_variable=["question","document"]
)

answer_grader = prompt | llm | JsonOutputParser()
# answer_grader.invoke({"question": question, "generation": generation})

In [11]:
from typing_extensions import TypedDict
from typing import List

### State

class GraphState(TypedDict):
  """
  Represents the state of our graph.

  Attributes:
    question: question
    generation: LLM generation
    web_search: whether to add search
    documents: List of documents
  """
  question: str
  generation: str
  web_search: str
  documents: List [str]

from langchain.schema import document

### Nodes

def retrieve(state):
  """
  Retrieve documents form vectorstore

  Arg:
    state (dict): The current graph state

  Returns:
    state (dict): New key added to state, documents, that contains retrieved documents

  """
  print("---RETRIEVE---")
  question = state["question"]

  #Retrieval
  documents = retriever.invoke(question)
  return {"documents": documents, "question": question}

def grade_documents(state):
  """
  Determines whether the retrieved documents are relevent to the question
  If any document is not relevant, we will set a flag to run web search

  Args:
    state (dict): The current graph state

  Returns:
    state (dict): Filtered out irrelavent documents and update web_search state

  """

  print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
  question = state["question"]
  documents = state["documents"]

  #Score each doc
  filtered_docs = []
  web_search = "No"

  for d in documents:
    score = retrival_grader.invoke({"question": question, "document":d.page_content})
    grade = score['score']
    #Document relevent
    if grade.lower() == "yes":
      print("---GRADE: DOCUMENT RELEVANT---")
      filtered_docs.append(d)
    #Document not relavent
    else:
      print("---GRADE: DOCUMENT IRRELEVANT---")
      web_search = "Yes"
      continue

  return {"documents": filtered_docs, "question": question, "web_search": web_search}

def generate(state):
  """
  Genarate answer using RAG on retrieved documents

  Args:
    state (dict): The current graph state

  Returns:
    state (dict): New key added to state, generation, that contains LLM generation

  """
  print("---GENARATE---")
  question = state["question"]
  documents = state["documents"]

  #RAG generation
  generation = rag_chain.invoke({"context":documents, "question":question})
  return {"documents": documents, "question":question, "generation": generation}

def web_search(state):
  """
  Web search based on the grade of documents

  Args:
    state (dict): The current graph state

  Returns:
    state (dict): Appended web results to documents

  """

  print("---WEB SEARCH---")
  question = state["question"]
  documents = state["documents"]

  #Web search
  docs = web_search_tool.invoke({"query": question})
  web_results = "\n".join([d["content"] for d in docs])
  web_results = Document(page_content=web_results)
  if documents is not None:
    documents.append(web_results)
  else:
    docuemnts = [web_results]

  return {"documents": documents, "question": question}

### Conditional Nodes

def decide_to_generate(state):
  """
  Determines whether to genarate an answer, or add web search

  Args:
    state (dict): The current graph state

  Returns:
    str: Binary decision for next node to call

  """

  print("---ASSESS GRADED DOCUMENTS---")
  question = state["question"]
  web_search = state["web_search"]
  filtered_documents = state["documents"]

  if web_search == "Yes":
    print("---DESION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION WEB SEARCH---")
    return "websearch"

  else:
    print("---DESION: GENERATE---")
    return "generate"

def grade_generation_v_documents_and_question(state):
  """
  Determines whether or not the answer genarate is relevant to the question

  Args:
    state (dict): The current graph state

  Returns:
    str: Binary decision for next node to call

  """

  print("---CHECK HALLUCINATION---")
  question = state["question"]
  documents = state["documents"]
  generation = state["generation"]

  # Calculate 'grade' before using it
  score = hallucination_grader.invoke({"documents": documents, "generation": generation})
  grade = score['score']

  if grade == "yes":
    print("---DECISION: generation IS GROUNDED IN DOCUMENTS---")
    print("---GRADE generation vs QUESTION---")
    score = answer_grader.invoke({"question": question, "generation": generation})
    grade = score['score']
    if grade == "yes":
      print("---DECISION: GEANRATION ADDRESSES QUESTION---")
      return "useful"
    else:
      print("---DECISION: generation DOSE NOT ADDRESS QUESTION---")
      return "not useful"

  else:
    print("---DECISION: generation IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
    return "not supported"

from langgraph.graph import END, StateGraph
workflow = StateGraph(GraphState)

#Define the nodes
workflow.add_node("websearch", web_search)
workflow.add_node("retrieve", retrieve)
workflow.add_node("grade_documents", grade_documents)
workflow.add_node("generate", generate)



In [12]:
# Build graph
# Clear existing start point and edges if they exist.
start_node = '__start__'

# If a path from `__start__` exists, remove it.
if start_node in workflow.nodes:
    connected_edges = list(workflow.edges.keys())
    for edge in connected_edges:
        if edge[0] == start_node:
            workflow.remove_edge(edge[0], edge[1])
    workflow.remove_node(start_node)

# Set new entry point and build the workflow
workflow.set_entry_point("retrieve") # Use set_entry_point to designate the starting node.
workflow.add_edge("retrieve", "grade_documents")
workflow.add_conditional_edges(
    "grade_documents",
    decide_to_generate,
    {
        "websearch": "websearch",
        "generate": "generate"
    }
)
workflow.add_edge("websearch", "generate")
workflow.add_conditional_edges(
    "generate",
    grade_generation_v_documents_and_question,
    {
        "not supported": "generate",
        "useful": END,
        "not useful": "websearch"
    }
)


In [13]:
# Compile
app = workflow.compile()

# Test
from pprint import pprint
inputs = {"question": "who won the last test match between Sri Lanka and India"}
for output in app.stream(inputs):
  for key, value in output.items():
    pprint(f"Finished running: {key}:")
print(value["generation"])


---RETRIEVE---
'Finished running: retrieve:'
---CHECK DOCUMENT RELEVANCE TO QUESTION---


ValueError: Ollama call failed with status code 500. Details: {"error":"model requires more system memory (4.1 GiB) than is available (3.9 GiB)"}