<a href="https://colab.research.google.com/github/ChowchowWorks/PN_RagPipeline/blob/main/Rag_Pipeline_Prototype_Version_7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Section 1: Setting up Environment

In [None]:
from IPython import get_ipython
from IPython.display import display
import os

!pip install "transformers==4.49.0"
!pip install -U langchain langchain-community langchainhub langchain-huggingface tiktoken langchain-cohere
!pip install -U chromadb sentence-transformers
!pip install -U pypdf
!pip install -U ragatouille


os.environ['LANGCHAIN_API_KEY'] = "API_KEY"
os.environ['LANGSMITH_API_KEY'] = "API_KEY"
os.environ['HUGGINGFACEHUB_API_TOKEN'] = "API_KEY"
os.environ['CO_API_KEY'] = "API_KEY"
os.environ['USER_AGENT'] = 'MyColabApp/1.0 (Python/3.9; GoogleColab)'

# Section 2: Loading Documents

In [None]:
from langchain_community.document_loaders import PyPDFDirectoryLoader

# For the purpose of testing, this version uses a pdf loader
loader = PyPDFDirectoryLoader("/content/RAG tester")
documents = loader.load()

# Section 3: Indexing using ColBERT

(a) Setting Up RAGatoullie and Index

In [None]:
from ragatouille import RAGPretrainedModel

# Set up the indexing model
RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")

# Index the documents
RAG.index(collection = [doc.page_content for doc in documents], index_name = "Behavioral", max_document_length= 180, split_documents= True )

(b) Set up the retrieval process

In [None]:
retriever = RAG.as_langchain_retriever(index_name = "Behavioral", k = 5)

# Section 4: Set up the Generator

In [None]:
from huggingface_hub import InferenceClient
client = InferenceClient(model="mistralai/Mistral-7B-Instruct-v0.3", token = os.environ['HUGGINGFACEHUB_API_TOKEN'])

from langchain_core.runnables import Runnable

class HuggingFaceChatRunnable(Runnable):
    def __init__(self, client, prompt_template, temperature, max_tokens):
        self.client = client
        self.prompt_template = prompt_template
        self.temperature = temperature
        self.max_tokens = max_tokens

    def invoke(self, inputs: dict, config: dict = None) -> str:
        prompt_str = self.prompt_template.format(**inputs)

        response = self.client.chat_completion(
            messages=[
                {"role": "user", "content": prompt_str}
            ],
            temperature=self.temperature,
            max_tokens=self.max_tokens
        )
        return response.choices[0].message["content"]

# Section 5: Query Analysis (Routing)

In [None]:
from typing import Literal
from langchain.prompts import ChatPromptTemplate

system = "You are an expert at routing user questions to a vectorstore or a Command R chatbot. The vectorstore contains documents related to behavioral economics, behavioral sciences and psychology by Daniel Kahneman, Richard Thaler and Cass R Sunstein. Use the vectorstore for questions related to these topics, otherwise route the question to the Command R chatbot. Your answer should be literally be either CommandR or vectorstore. Do not output anything else."
route_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)

analysis_router = HuggingFaceChatRunnable(client, route_prompt, 0, 5)

# Section 6: Grading Documents

In [None]:
grading = "You are an expert at determining whether a document is related to a user question. If the document contains keyword(s) or contain semantic meaning that is related to the user question, grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous retrievals. Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. Do not output anything else."

grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", grading),
        ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
    ]
)

grader = HuggingFaceChatRunnable(client, grade_prompt,0, 5)

#question = "Policy making"
#doc1 = retriever.invoke(question)
# print(grader.invoke({"document": doc1[2], "question": question}))

# Section 7: Intent Detection

(a) Fewshot examples

In [None]:
from langchain.prompts import ChatPromptTemplate
from langchain.prompts import FewShotChatMessagePromptTemplate

# Some fewshot examples
examples =[
    {
        "input": "What is creatine?",
        "output": "DEFINE",
    },
    {
        "input": "Why do athletes take protein after workouts?",
        "output": "EXPLAIN",
    },
    {
        "input": "How do I calculate my calorie needs?",
        "output": "PROCEDURE",
    },
    {
        "input": "Should I take whey or casein protein?",
        "output": "COMPARISON",
    },
    {
        "input": "What is the best way to embark on my weight loss journey?",
        "output": "ADVICE",
    },
]


example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}"),
        ("ai", "{output}"),
    ]
)

few_shot_examples = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
)

(b) Intent detection prompt

In [None]:
intent_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are an intent classifier for the field of interest in the query.
Given a question, classify it into one of the following intents:
- DEFINE: Asking for a definition or description
- EXPLAIN: Asking for reasoning or why something is the case
- PROCEDURE: Asking for how-to or steps
- ADVICE: Asking for personalized or practical suggestions
- COMPARISON: Asking to compare options
- GENERAL: Anything else
Return only the intent, nothing else.
Here are a few examples:""",
        ),
        # few shot examples
        few_shot_examples,
        # New question
        ("user", "{question}"),
    ]
)

(c) intent detecter

In [None]:
intent_router = HuggingFaceChatRunnable(client, intent_prompt, 0.0, 10)

# Section 8: Step-back Translation

In [None]:
# This are examples that shows the LLM what it is achieving through stepback

examples = [
    {
        "input": "Could the members of The Police perform lawful arrests?",
        "output": "what can the members of The Police do?",
    },
    {
        "input": "Jan Sindel’s was born in what country?",
        "output": "what is Jan Sindel’s personal history?",
    },
]

# Now translate this into an example_prompt
example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}"),
        ("ai", "{output}"),
    ]
)

few_shot = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
)

step_back_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are an expert at world knowledge. Your task is to step back and paraphrase a question to a more generic step-back question, which is easier to answer. Take into consideration the intent of the user and their objective of asking the question. Here are a few examples:""",
        ),
        # Few shot examples
        few_shot,
        # New question
        ("user", "Intent: {intent}\nQuestion: {question}"),
    ]
)

stepback = HuggingFaceChatRunnable(client, step_back_prompt, 0.0, 1024)

# Section 9: Generating an Output

(a) Intent routing

In [None]:
#define prompt
defineprompt = """You are an expert of world knowledge. I am going to ask you a question. Answer the question using only the information in the provided context blocks.
You are responding to a query with the intent: DEFINE.
Your answer should be:
- Use the retrieved context as your only source of truth
- Do not rely on external or prior knowledge, even if you think it’s correct
- If the context does not contain enough information, say "The information is not available in the context provided."
- Comprehensive, but concise (1–3 sentences max)
- Factually correct and aligned with the provided context
- Free of speculation, advice, or subjective judgment
- Focused only on essential information—no unnecessary background or examples unless they resolve ambiguity
- Adjusted for multiple meanings if applicable
- Written in terminology appropriate to the user's domain or field


# {normal_context}
# {step_back_context}

# Original Question: {question}
# Answer:"""

#explain prompt
explainprompt = """You are an expert of world knowledge. I am going to ask you a question. Answer the question using only the information in the provided context blocks.
You are responding to a query with the intent: EXPLAIN.
Your answer should be:
- Use the retrieved context as your only source of truth
- Do not rely on external or prior knowledge, even if you think it’s correct
- If the context does not contain enough information, say "The information is not available in the context provided."
- Clear and logically structured
- Focused on cause, reasoning, background, or significance
- Factually correct and aligned with the provided context
- Neutral in tone—avoid persuasion, speculation, or personal opinions
- Examples are welcome from the context provided, if it helps to improve understanding.

# {normal_context}
# {step_back_context}

# Original Question: {question}
# Answer:"""

#procedure prompt
procedureprompt = """You are an expert of world knowledge. I am going to ask you a question. Answer the question using only the information in the provided context blocks.
You are responding to a query with the intent: PROCEDURE.
Your answer should be:
- Use the retrieved context as your only source of truth
- Do not rely on external or prior knowledge, even if you think it’s correct
- If the context does not contain enough information, say "The information is not available in the context provided."
- Structured as a clear, ordered list of steps (e.g., 1, 2, 3...)
- Focused on how-to instructions or best-practice sequences
- Specific, practical, and applicable to the user’s likely context
- Factually accurate and based on reliable knowledge
- Aligned with the provided context; ignore context if irrelevant

# {normal_context}
# {step_back_context}

# Original Question: {question}
# Answer:
1.
2.
3."""

#advice prompt
adviceprompt = """You are an expert of world knowledge. I am going to ask you a question. Answer the question using only the information in the provided context blocks.
You are responding to a query with the intent: ADVICE.
Your answer should be:
- Use the retrieved context as your only source of truth
- Do not rely on external or prior knowledge, even if you think it’s correct
- If the context does not contain enough information, say "The information is not available in the context provided."
- Actionable and practical, tailored to a general user (not personalized)
- Fact-based, but sensitive to nuance, caution, or best practices
- Free from subjective judgment or emotional language
- Respectful of varying conditions or assumptions
- Aligned with the provided context; if not relevant, ignore the context

# {normal_context}
# {step_back_context}

# Original Question: {question}
# Answer:"""

#comparison
comparisonprompt = """You are an expert of world knowledge. I am going to ask you a question. Answer the question using only the information in the provided context blocks.
You are responding to a query with the intent: COMPARISON.
Your answer should be:
- Use the retrieved context as your only source of truth
- Do not rely on external or prior knowledge, even if you think it’s correct
- If the context does not contain enough information, say "The information is not available in the context provided."
- A neutral, side-by-side analysis of options or alternatives
- Factually grounded—avoid personal recommendations unless one option is clearly superior based on evidence
- Clearly structured with bullet points or short paragraphs
- Helpful in illustrating pros and cons, similarities, and differences
- Consistent with the context provided; ignore it if irrelevant

# {normal_context}
# {step_back_context}

# Original Question: {question}
# Answer:
Option A:
Option B: """

#general prompt
generalprompt = """You are an expert of world knowledge. I am going to ask you a question. Answer the question using only the information in the provided context blocks.
You are responding to a query with the intent: GENERAL.
Your answer should be:
- Use the retrieved context as your only source of truth
- Do not rely on external or prior knowledge, even if you think it’s correct
- If the context does not contain enough information, say "The information is not available in the context provided."
- Informative and contextually aware
- Concise but flexible in length (aim for clarity)
- Objective and based on verifiable information
- Avoid speculation or personal opinion
- Aligned with the provided context if relevant

# {normal_context}
# {step_back_context}

# Original Question: {question}
# Answer:"""


In [None]:
from langchain_core.runnables import RunnableLambda

generator = RunnableLambda(lambda x: {
    "DEFINE": HuggingFaceChatRunnable(client, defineprompt, 0.0, 1024),
    "EXPLAIN": HuggingFaceChatRunnable(client, explainprompt, 0.0, 1024),
    "PROCEDURE": HuggingFaceChatRunnable(client, procedureprompt, 0.0, 1024),
    "ADVICE": HuggingFaceChatRunnable(client, adviceprompt, 0.0, 1024),
    "COMPARISON": HuggingFaceChatRunnable(client, comparisonprompt, 0.0, 1024),
    "GENERAL": HuggingFaceChatRunnable(client, generalprompt, 0.0, 1024),
}[x["intent"].strip()]
)

# Section 10: Hallucination Detection

In [None]:
hallucinate = "You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. Give a binary score 'yes' or 'no'. 'Yes' means that the answer is grounded in / supported by the set of facts. Do not output anything else."

hallucinator_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", hallucinate),
        ("human", "Retrieved facts: \n\n {documents} \n\n LLM answer: {answer}"),
    ]
)

hallucinator = HuggingFaceChatRunnable(client, hallucinator_prompt, 0, 5)


# Section 11: Answer Grader

In [None]:
answerings = "You are a grader assessing whether an answer addresses / resolves a question. Give a binary score 'yes' or 'no'. Yes' means that the answer resolves the question. Do not output anything else."

answering_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", answerings),
        ("human", "User question: \n\n {question} \n\n LLM generation: {generation}"),
    ]
)

answer_grader = HuggingFaceChatRunnable(client, answering_prompt, 0, 5)

# Section 12: Question rewritter

In [None]:
rewrittings = "You are a expert in framing questions to enable good retrieval from the ColBERT vector store. You will rewrite the given question such that it is able to retrieve more relevant documents from the vector store. If there is a follow up question asked and an answer provided by the user, incorporate the additional information into the rewritten question. Output only the question and do not output anything else. The rewritten question should be approximately the same length as the original question."

rewritting_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", rewrittings),
        ("human", "Original question: \n\n {question} \n\n Follow-up Question: \n\n {follow_up} \n\n User response to follow-up Question: \n\n {info}"),
    ]
)

rewritter = HuggingFaceChatRunnable(client, rewritting_prompt, 0, 1024)

# Section 13: Reviewer

In [None]:
reviewings = "You are an expert in query analysis for a RAG pipeline. Your task is to examine the question posed by the user and determine if there is enough information in the query to engage in meaningful retrieval. Give a binary score 'yes' or 'no'. Do not output anything else."

reviweing_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", reviewings),
        ("human", "User question: \n\n {question}"),
    ]
)

reviewer = HuggingFaceChatRunnable(client, reviweing_prompt, 0, 5)

askings = "You are an expert at prompting additional information from users. Your task is to examine the question posed by the user and ask a follow up question that would make retrieval more meaningful. Output only the question and do not output anything else. The rewritten question should be approximately the same length as the original question."

asking_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", askings),
        ("human", "User question: \n\n {question}"),
    ]
)

asker = HuggingFaceChatRunnable(client, asking_prompt, 0, 1024)

# Section 14: Helper Functions

In [None]:
from langchain_cohere import ChatCohere

def routing(question):
  # pass the question into the router; get back vectorstore or CommandR
  try:
    route = analysis_router.invoke({"question": question})
  except Exception as e:
    print("---Error: Router failed to route query---")
    print(f"Reason: {e}")
    route = None
  print(f"--- Routed to {route}---")
  if route.lower() == "vectorstore":
    return True
  elif route.lower() == "CommandR":
    return False
  else:
    print(f"---Error: Router routed to {route}---")
    return None

def intentions(question):
  print("---Determining Intent")
  try:
    intent = intent_router.invoke({"question": question})
    print(f"---Intent identified as {intent}---")
  except Exception as e:
    print("---Error: Intent detection failed---")
    print(f"Reason: {e}")
    intent = None
  return intent

def CmdRchain(question):
  CmdR = ChatCohere(model="command-r", co_api_key=os.environ['CO_API_KEY'])
  try:
    response = CmdR.invoke(question)
  except Exception as e:
    print("---Error: CommandR failed to generate output---")
    print(f"Reason: {e}")
    response = "Error Code 1: Command R failed to generate output."
  return response

def stepback_translation(question, intent):
  print("---Step-back Translation---")
  try:
    translation = stepback.invoke({"question": question, "intent": intent})
    print(f"---Step-back Translation: {translation}")
  except Exception as e:
    print("---Error: Step-back translation failed---")
    print(f"Reason: {e}")
    translation = None
  return translation

def bad_grading(question):
  print("---Determining if additional information is required.")
  try:
    verdict = reviewer.invoke({'question': question})
  except Exception as e:
    print("---Error: Reviewer failed to determine if additional information is required---")
    print(f"Reason: {e}")
    verdict = None
  if verdict.lower() == "no":
    return rewritter.invoke({'question': question, 'info': None})
  elif verdict.lower() == "yes":
    follow_up = asker.invoke({'question': question})
    info = input(f"{follow_up}")
    return rewritter.invoke({'question': question, 'follow_up': follow_up, 'info': info})
  else:
    return None

def retrieval(question, retries): # retrieve and evaluate retrieved documents
  if retries > 3:
    print("--- Error: Too many retries, ask another question!---")
    return None
  try:
    context = retriever.invoke(question)
  except Exception as e:
    print("---Error: Retriever failed to retrieve documents")
    print(f"Reason: {e}")
    return None
  print("---Grading documents---")
  try:
    grade = grader.invoke({"document": context[2], "question": question})
  except Exception as e:
    print("---Error: Grader failed to grade---")
    print(f"Reason: {e}")
    grade = None
  if grade.lower() == "yes":
    print("---Documents are relevant---")
    return context
  elif grade.lower() == 'no':
    print("---Documents are not relevant")
    print("---Rewritting Question---")
    new_question = bad_grading(question)
    retries += 1
    return retrieval(new_question, retries)
  else:
    print(f"---Error: Grader graded documents as {grade}")
    return None

def generation(question, normal_context, step_back_context, intent):
  print("---Generating the output---")
  try:
    output = generator.invoke({'question': question, 'normal_context':normal_context, "step_back_context": step_back_context, 'intent': intent})
  except Exception as e:
    print("---Error:Generator failed to generate output---")
    print(f"Reason: {e}")
    output = None
  return output

def hallucation_detector(documents, answer):
  print("---Detecting Hallucination---")
  try:
    hallucinate = hallucinator.invoke({'documents': documents, 'answer': answer})
  except Exception as e:
    print("---Error: Hallucinator failed in detecting hallucination---")
    print(f"Reason: {e}")
  if hallucinate.lower() == 'yes':
    return True
  elif hallucinate.lower() == 'no':
    return False
  else:
    print(f"---Error: Hallucinator returned {hallucinate}")
    return None

def accuracy_checker(question, answer):
  print("---Checking OutputAccuracy---")
  try:
    grade = answer_grader.invoke({'question':question, 'generation': answer})
  except Exception as e:
    print("---Error: Answer Grader failed to Grade answer")
    print(f"Reason:{e}")
  if grade == 'yes':
    return True
  elif grade == 'no':
    return False
  else:
    print(f"---Error: Answer Grader returned {grade}")
    return None

def vectoring(question):
  # intent detection step
  intent = intentions(question)
  if intent:
  # step back translation
    step_back_question = stepback_translation(question, intent)
    if step_back_question == None:
      raise Exception("Error code 2b: Failed to generate stepback question")
  elif intent == None:
    raise Exception("Error code 2a: Intent detection failed")
  # get normal context and stepback context
  normal_context = retrieval(question, 0)
  if normal_context == None:
    raise Exception("Error code 2c: Failed to retreive normal context")
  step_back_context = retrieval(step_back_question, 0)
  if step_back_context == None:
    raise Exception("Error code 2d: Failed to retreive stepback context")
  # Generate the output and validate
  max_retries, retries = 3, 0
  while retries <= max_retries:
    output = generation(question, normal_context, step_back_context, intent)
    if output == None:
      raise Exception("Error code 2f: Failed to generate output")
    hallucinate = hallucation_detector(normal_context + step_back_context, output)
    if hallucinate == None:
      raise Exception("Error code 2e: Failed to run hallucination detector")
    if hallucinate == 'no':
      break
    else:
      retries += 1
  return output

def redo(question, output):
  if accuracy_checker(question, output):
    return output
  elif not accuracy_checker:
    return vectoring(question)
  elif accuracy_checker == None:
    raise Exception("Error code 2g: Accuracy checker failed")


def initializer(question):
  route = routing(question)
  if not route:
    return CmdRchain(question)
  elif route:
    return(redo(question, vectoring(question)))
  elif route == None:
    raise Exception("Error code 2h: Routing failed")

# Section 15: Run Everything

In [None]:
question = input("Enter your question: ")
print(initializer(question))