<a href="https://colab.research.google.com/github/ChowchowWorks/Customer_service_rag/blob/main/Rag_Pipeline_Prototype_Version_6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Section 1: Set up the environment

In [None]:
from IPython import get_ipython
from IPython.display import display
import os

!pip install "transformers==4.49.0"
!pip install -U langchain langchain-community langchainhub langchain-huggingface tiktoken
!pip install -U chromadb sentence-transformers
!pip install -U pypdf
!pip install -U ragatouille


os.environ['LANGCHAIN_API_KEY'] = "API_KEY"
os.environ['HUGGINGFACEHUB_API_TOKEN'] = "API_KEY"
os.environ['USER_AGENT'] = 'MyColabApp/1.0 (Python/3.9; GoogleColab)'

# Section 2: Loading Documents

In [2]:
from langchain_community.document_loaders import PyPDFDirectoryLoader

# For the purpose of testing, this version uses a pdf loader
loader = PyPDFDirectoryLoader("/content/RAG tester")
documents = loader.load()

# Section 3: Indexing

(a) Indexing algorithm

In [None]:
from ragatouille import RAGPretrainedModel
!pip install 'faiss-gpu'

# Set up the indexing model
RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")

# Index the documents
RAG.index(collection = [doc.page_content for doc in documents], index_name = "Behavioral", max_document_length= 180, split_documents= True )

(b) Set up the retriever

In [4]:
retriever = RAG.as_langchain_retriever(index_name = "Behavioral", k = 5)

# Section 4: Setting up the Generator

In [5]:
from huggingface_hub import InferenceClient
client = InferenceClient(model="mistralai/Mistral-7B-Instruct-v0.3", token = os.environ['HUGGINGFACEHUB_API_TOKEN'])

from langchain_core.runnables import Runnable

class HuggingFaceChatRunnable(Runnable):
    def __init__(self, client, prompt_template, temperature, max_tokens):
        self.client = client
        self.prompt_template = prompt_template
        self.temperature = temperature
        self.max_tokens = max_tokens

    def invoke(self, inputs: dict, config: dict = None) -> str:
        prompt_str = self.prompt_template.format(**inputs)

        response = self.client.chat_completion(
            messages=[
                {"role": "user", "content": prompt_str}
            ],
            temperature=self.temperature,
            max_tokens=self.max_tokens
        )
        return response.choices[0].message["content"]

# Section 5: Routing Prompts

(a) Fewshot examples of intent detection

In [7]:
from langchain.prompts import ChatPromptTemplate
from langchain.prompts import FewShotChatMessagePromptTemplate

# Some fewshot examples
examples =[
    {
        "input": "What is creatine?",
        "output": "DEFINE",
    },
    {
        "input": "Why do athletes take protein after workouts?",
        "output": "EXPLAIN",
    },
    {
        "input": "How do I calculate my calorie needs?",
        "output": "PROCEDURE",
    },
    {
        "input": "Should I take whey or casein protein?",
        "output": "COMPARISON",
    },
    {
        "input": "What is the best way to embark on my weight loss journey?",
        "output": "ADVICE",
    },
]


example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}"),
        ("ai", "{output}"),
    ]
)

few_shot_examples = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
)

(b) Set up the intent detection prompt

In [8]:
intent_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are an intent classifier for the field of interest in the query.
Given a question, classify it into one of the following intents:
- DEFINE: Asking for a definition or description
- EXPLAIN: Asking for reasoning or why something is the case
- PROCEDURE: Asking for how-to or steps
- ADVICE: Asking for personalized or practical suggestions
- COMPARISON: Asking to compare options
- GENERAL: Anything else
Return only the intent, nothing else.
Here are a few examples:""",
        ),
        # few shot examples
        few_shot_examples,
        # New question
        ("user", "{question}"),
    ]
)

(c) Set up routing

In [9]:
router = HuggingFaceChatRunnable(client, intent_prompt, 0.0, 10)

# Section 6: Step-back Translation

(a) Step-back prompts

In [10]:
# This are examples that shows the LLM what it is achieving through stepback

examples = [
    {
        "input": "Could the members of The Police perform lawful arrests?",
        "output": "what can the members of The Police do?",
    },
    {
        "input": "Jan Sindel’s was born in what country?",
        "output": "what is Jan Sindel’s personal history?",
    },
]

# Now translate this into an example_prompt
example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}"),
        ("ai", "{output}"),
    ]
)

few_shot = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
)

step_back_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are an expert at world knowledge. Your task is to step back and paraphrase a question to a more generic step-back question, which is easier to answer. Here are a few examples:""",
        ),
        # Few shot examples
        few_shot,
        # New question
        ("user", "Intent: {intent}\nQuestion: {question}"),
    ]
)

(b) Set-up stepback

In [11]:
stepback = HuggingFaceChatRunnable(client, step_back_prompt, 0.0, 1024)

# Section 7: Routing Chains

(a) Manual Chain prompts

In [18]:
#define prompt
defineprompt = """You are an expert of world knowledge. I am going to ask you a question. Answer the question using only the information in the provided context blocks.
You are responding to a query with the intent: DEFINE.
Your answer should be:
- Use the retrieved context as your only source of truth
- Do not rely on external or prior knowledge, even if you think it’s correct
- If the context does not contain enough information, say "The information is not available in the context provided."
- Comprehensive, but concise (1–3 sentences max)
- Factually correct and aligned with the provided context
- Free of speculation, advice, or subjective judgment
- Focused only on essential information—no unnecessary background or examples unless they resolve ambiguity
- Adjusted for multiple meanings if applicable
- Written in terminology appropriate to the user's domain or field


# {normal_context}
# {step_back_context}

# Original Question: {question}
# Answer:"""

#explain prompt
explainprompt = """You are an expert of world knowledge. I am going to ask you a question. Answer the question using only the information in the provided context blocks.
You are responding to a query with the intent: EXPLAIN.
Your answer should be:
- Use the retrieved context as your only source of truth
- Do not rely on external or prior knowledge, even if you think it’s correct
- If the context does not contain enough information, say "The information is not available in the context provided."
- Clear and logically structured
- Focused on cause, reasoning, background, or significance
- Factually correct and aligned with the provided context
- Neutral in tone—avoid persuasion, speculation, or personal opinions
- Examples are welcome from the context provided, if it helps to improve understanding.

# {normal_context}
# {step_back_context}

# Original Question: {question}
# Answer:"""

#procedure prompt
procedureprompt = """You are an expert of world knowledge. I am going to ask you a question. Answer the question using only the information in the provided context blocks.
You are responding to a query with the intent: PROCEDURE.
Your answer should be:
- Use the retrieved context as your only source of truth
- Do not rely on external or prior knowledge, even if you think it’s correct
- If the context does not contain enough information, say "The information is not available in the context provided."
- Structured as a clear, ordered list of steps (e.g., 1, 2, 3...)
- Focused on how-to instructions or best-practice sequences
- Specific, practical, and applicable to the user’s likely context
- Factually accurate and based on reliable knowledge
- Aligned with the provided context; ignore context if irrelevant

# {normal_context}
# {step_back_context}

# Original Question: {question}
# Answer:
1.
2.
3."""

#advice prompt
adviceprompt = """You are an expert of world knowledge. I am going to ask you a question. Answer the question using only the information in the provided context blocks.
You are responding to a query with the intent: ADVICE.
Your answer should be:
- Use the retrieved context as your only source of truth
- Do not rely on external or prior knowledge, even if you think it’s correct
- If the context does not contain enough information, say "The information is not available in the context provided."
- Actionable and practical, tailored to a general user (not personalized)
- Fact-based, but sensitive to nuance, caution, or best practices
- Free from subjective judgment or emotional language
- Respectful of varying conditions or assumptions
- Aligned with the provided context; if not relevant, ignore the context

# {normal_context}
# {step_back_context}

# Original Question: {question}
# Answer:"""

#comparison
comparisonprompt = """You are an expert of world knowledge. I am going to ask you a question. Answer the question using only the information in the provided context blocks.
You are responding to a query with the intent: COMPARISON.
Your answer should be:
- Use the retrieved context as your only source of truth
- Do not rely on external or prior knowledge, even if you think it’s correct
- If the context does not contain enough information, say "The information is not available in the context provided."
- A neutral, side-by-side analysis of options or alternatives
- Factually grounded—avoid personal recommendations unless one option is clearly superior based on evidence
- Clearly structured with bullet points or short paragraphs
- Helpful in illustrating pros and cons, similarities, and differences
- Consistent with the context provided; ignore it if irrelevant

# {normal_context}
# {step_back_context}

# Original Question: {question}
# Answer:
Option A:
Option B: """

#general prompt
generalprompt = """You are an expert of world knowledge. I am going to ask you a question. Answer the question using only the information in the provided context blocks.
You are responding to a query with the intent: GENERAL.
Your answer should be:
- Use the retrieved context as your only source of truth
- Do not rely on external or prior knowledge, even if you think it’s correct
- If the context does not contain enough information, say "The information is not available in the context provided."
- Informative and contextually aware
- Concise but flexible in length (aim for clarity)
- Objective and based on verifiable information
- Avoid speculation or personal opinion
- Aligned with the provided context if relevant

# {normal_context}
# {step_back_context}

# Original Question: {question}
# Answer:"""


(b) Load prompt into router

In [19]:
from langchain_core.runnables import RunnableLambda

intent_router = RunnableLambda(lambda x: {
    "DEFINE": HuggingFaceChatRunnable(client, defineprompt, 0.0, 1024),
    "EXPLAIN": HuggingFaceChatRunnable(client, explainprompt, 0.0, 1024),
    "PROCEDURE": HuggingFaceChatRunnable(client, procedureprompt, 0.0, 1024),
    "ADVICE": HuggingFaceChatRunnable(client, adviceprompt, 0.0, 1024),
    "COMPARISON": HuggingFaceChatRunnable(client, comparisonprompt, 0.0, 1024),
    "GENERAL": HuggingFaceChatRunnable(client, generalprompt, 0.0, 1024),
}[x["intent"].strip()]
)

(c) Build the RAG chain

In [20]:
from langchain_core.runnables import RunnableMap, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

chain = (
    RunnableMap({
        "question": lambda x: x["question"],
        "step_back_question": lambda x: x["question"],
        "intent": lambda x: router.invoke({"question": x["question"]})
    })
    | RunnableLambda(lambda x: {
        "normal_context": retriever.invoke(x["question"]),
        "step_back_q": stepback.invoke({"intent" : x["intent"],"question": x["step_back_question"]}),
        "question": x["question"],
        "intent": next(iter(x["intent"])) if isinstance(x["intent"], set) else x["intent"]
    })
    | RunnableLambda(lambda x: {
        "step_back_context": retriever.invoke(x["step_back_q"]),
        "normal_context": x["normal_context"],
        "question": x["question"],
        "intent": x["intent"]
    })
    | intent_router
    | StrOutputParser()
)

# Section 8: Testing the RAG Pipeline

In [None]:
question = input("Ask me anything! \n")

# Generate the Response
response = chain.invoke({"question": question})
print(response)