<a href="https://colab.research.google.com/github/Shen-Kuo/Self-Rag_Citation-plus/blob/main/Generative_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [65]:
! pip install -U langchain_community tiktoken langchain-openai langchainhub chromadb langchain langgraph



In [66]:
import getpass
import os


def _set_env(key: str):
    if key not in os.environ:
        os.environ[key] = getpass.getpass(f"{key}:")


_set_env("OPENAI_API_KEY")

In [67]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

urls = [
    "https://en.wikipedia.org/wiki/Sun",
    "https://en.wikipedia.org/wiki/Sun_sign_astrology",
    "https://en.wikipedia.org/wiki/Solar_System",
    "https://en.wikipedia.org/wiki/Solar_flare",
]

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)



In [68]:
# Add to vectorDB
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chroma",
    embedding=OpenAIEmbeddings(),
)
retriever = vectorstore.as_retriever(search_kwargs = {"k": 10})

In [69]:
### Retrieval Grader


from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI


# Data model
class GradeDocuments(BaseModel):
    """Score for relevance check on retrieved documents."""

    score: float = Field(
        description="Relevance score of the document to the question, from 0 (least relevant) to 1 (most relevant)"
    )


# LLM with function call
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)

# sets output as a structure we defined
structured_llm_grader = llm.with_structured_output(GradeDocuments)

# Prompt for retrieval gradr
system = """You are a grader assessing relevance of a retrieved document to a user question. \n
    The goal is to filter out erroneous retrievals. \n
    If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
    Give a score 0 to 1 to indicate whether the document is relevant to the question.\n
    0 being completely irrelevent to the question\n
    0.2 beieng moderately irrelevant\n
    0.5 being slightly related \n
    0.7 being related but does not completely answer the question\n
    1 being completely relevant and answers the question.
    """
grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
    ]
)

retrieval_grader = grade_prompt | structured_llm_grader
question = "What happens near the photosphere of the? Which part of it is cooler, the upper or lower part?"
print(f"question: {question}")
docs = retriever.get_relevant_documents(question)
for idx in range(len(docs)):
  doc_txt = docs[idx].page_content

  print(f"document: {doc_txt}")
  print(retrieval_grader.invoke({"question": question, "document": doc_txt}))
  print("\n")

question: What happens near the photosphere of the? Which part of it is cooler, the upper or lower part?
document: The photosphere is tens to hundreds of kilometers thick, and is slightly less opaque than air on Earth. Because the upper part of the photosphere is cooler than the lower part, an image of the Sun appears brighter in the center than on the edge or limb of the solar disk, in a phenomenon known as limb darkening.[76] The spectrum of sunlight has approximately the spectrum of a black-body radiating at 5,772 K (9,930 ¬∞F),[12] interspersed with atomic absorption lines from the tenuous layers above the photosphere. The photosphere has a particle density of ~1023 m‚àí3 (about 0.37% of the particle number per volume of Earth's atmosphere at sea level). The photosphere is not fully ionized‚Äîthe extent of ionization is about 3%, leaving almost all of the hydrogen in atomic form.[79]
score=0.7


document: The photosphere is tens to hundreds of kilometers thick, and is slightly less

In [70]:
### Generate

from langchain import hub
from langchain_core.output_parsers import StrOutputParser

# Prompt
prompt = hub.pull("rlm/rag-prompt")

# LLM
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)


# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


# Chain
rag_chain = prompt | llm | StrOutputParser()

# Run
generation = rag_chain.invoke({"context": docs, "question": question})
print(f"question: {question}")
print(f"generation: {generation}")



question: What happens near the photosphere of the? Which part of it is cooler, the upper or lower part?
generation: Near the photosphere of the Sun, the upper part is cooler than the lower part. This temperature difference causes an image of the Sun to appear brighter in the center than on the edge or limb of the solar disk. The photosphere is slightly less opaque than air on Earth.


In [71]:
### Groundedness


# Data model
class GradeHallucinations(BaseModel):
    """Binary score for hallucination present in generation answer."""

    score: str = Field(
        description="Answer is grounded in the facts, from 0 (least relevant) to 1 (most relevant)"
    )


# LLM with function call
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
structured_llm_grader = llm.with_structured_output(GradeHallucinations)

# Prompt
system = """You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n
     Give a score from 0 to 1. \n
     0 meaning that the answer is not grounded and supported by the set of facts, \n
     0.2 meaning that the you are unsure of whether it is grounded or not but seems believable, \n
     0.5 meaning that the answer is slightly grounded, \n
     0.7 meaning that the answer is relaively grounded and supported by the set of facts, \n
     1 meaning that the answer is most definitely grounded and supported by the set of facts"""

hallucination_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Set of facts: \n\n {documents} \n\n LLM generation: {generation}"),
    ]
)

hallucination_grader = hallucination_prompt | structured_llm_grader
hallucination_grader.invoke({"documents": docs, "generation": generation})

GradeHallucinations(score='0.7')

In [72]:
### Answer Grader

# Data model
class GradeAnswer(BaseModel):
    """Binary score to assess answer addresses question."""

    score: str = Field(
        description="Answer addresses the question, 0 to 1"
    )


# LLM with function call
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
structured_llm_grader = llm.with_structured_output(GradeAnswer)

# Prompt
system = """You are a grader assessing whether an answer addresses / resolves a question \n
     Give a score between 0 and 1. \n
     0 meaning that the answer does not address the question whatsoever. \n
     0.2 meaning that the answer dances around the question and does not address the question. \n
     0.5 meaning that the answer gives an okay answer but does not fully address the question. \n
     0.7 meaning that the answer given adrresses the question. But could be more complete. \n
     1 meaning that the answer completely addresses the question, and even provides extra related information.
     """

answer_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "User question: \n\n {question} \n\n LLM generation: {generation}"),
    ]
)

answer_grader = answer_prompt | structured_llm_grader
answer_grader.invoke({"question": question, "generation": generation})

GradeAnswer(score='1')

In [73]:
### Question Re-phrasing

# LLM
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)

# Prompt
system = """You a question re-writer that improves an input question to one that is optimized \n
     for vectorstore retrieval. Look at the initial and formulate an improved question. \n
     Here is the initial question: \n\n {question}. Improved question with no preamble: \n
     """

rewrite_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "User question: \n\n {question} "),
    ]
)

question_rewriter = rewrite_prompt | llm | StrOutputParser()
question_rewriter.invoke({"question": question})

'Improved question: \n\nWhat occurs near the photosphere of the sun, and is the upper or lower part cooler?'

In [83]:
### Combined

docs = retriever.get_relevant_documents(question)

questions = ["What happens near the photosphere of the? Which part of it is cooler, the upper or lower part?",
            "How does the sun affect the moon?",
            "What groundbreaking event happened to the sun recently? "
            ]

relevant_docs = []
for question in questions:
  for i in range(5):
    for idx in range(len(docs)):
      retrieval_score = retrieval_grader.invoke({"question": question, "document": docs[idx].page_content})
      if retrieval_score.score > 0.5:
        relevant_docs.append(docs[idx])

    docs = "\n\n".join(doc.page_content for doc in relevant_docs)

    generation = rag_chain.invoke({"context": docs, "question": question})

    print(f"question {i+1}: {question}")
    print(f"generation {i+1}: {generation}")

    hallucination_score = hallucination_grader.invoke({"documents": docs, "generation": generation})
    if float(hallucination_score.score) <= 0.7:
      print(f"Hallucination present, score: {hallucination_score.score}")
    answer_score = answer_grader.invoke({"question": question, "generation": generation})
    if float(answer_score.score) <= 0.7:
      print(f"Answer does not address question, score: {answer_score.score}")

    if float(hallucination_score.score)==1 and float(answer_score.score)==1:
      break

    question = question_rewriter.invoke({"question": question})
    if i == 4:
      print("no suitable docs used through retrieval")

  print(f"final question: {question}")
  print(f"final generation: {generation}")

  print("------------------------------------------------------------------------------------------------------------------------------")


question 1: What happens near the photosphere of the? Which part of it is cooler, the upper or lower part?
generation 1: Near the photosphere of the sun, the temperature drops significantly. The upper part of the photosphere is cooler than the lower part.
Hallucination present, score: 0.7
question 2: Improved question: 

What occurs near the photosphere of the sun, and is the upper or lower part cooler?
generation 2: Near the photosphere of the sun, the chromosphere occurs. The upper part of the chromosphere is cooler than the lower part.
final question: Improved question: 

What occurs near the photosphere of the sun, and is the upper or lower part cooler?
final generation: Near the photosphere of the sun, the chromosphere occurs. The upper part of the chromosphere is cooler than the lower part.
------------------------------------------------------------------------------------------------------------------------------
question 1: How does the sun affect the moon?
generation 1: The s