<a href="https://colab.research.google.com/github/ChowchowWorks/Customer_service_rag/blob/main/Rag_Pipeline_Prototype_Version_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Section 1: Importing Libraries

(a) Environment Toggle

In [None]:
import os

!pip install langchain_community langchain chromadb transformers sentence-transformers
!pip install -U langchain-huggingface

!pip install pypdf

os.environ['LANGCHAIN_API_KEY'] = "API_KEY"
os.environ['HUGGINGFACEHUB_API_TOKEN'] = "API_KEY"
os.environ['USER_AGENT'] = 'MyColabApp/1.0 (Python/3.9; GoogleColab)'

(b) Libraries

In [None]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.llms import HuggingFaceHub
from langchain.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain import HuggingFacePipeline
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.prompts import ChatPromptTemplate

embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Section 2: Load Documents

(a) Load the Documents into a Global Variable

In [None]:
# For the purpose of testing, this version uses a pdf loader
loader = PyPDFDirectoryLoader("/content/RAG tester")
documents = loader.load()

(b) Splitting the document into more manageable chunks

In [None]:
splitter = RecursiveCharacterTextSplitter(chunk_size = 300, chunk_overlap = 50)
texts = splitter.split_documents(documents)

# Section 3: Indexing

(a) Embedding text chunks into the vector store

In [None]:
vectorstore = Chroma.from_documents(texts, embedding)

(b) Declaring the number of chunks required to generate response

In [None]:
k = 5
retriever = vectorstore.as_retriever(search_kwargs={"k": k})

# Section 4: Implementing Step-Back Generator

(a) Chatbot

In [None]:
from huggingface_hub import InferenceClient
client = InferenceClient(model="mistralai/Mistral-7B-Instruct-v0.3", token= "API_KEY")

from langchain_core.runnables import Runnable

class HuggingFaceChatRunnable(Runnable):
    def __init__(self, client, prompt_template, temperature, max_tokens):
        self.client = client
        self.prompt_template = prompt_template
        self.temperature = temperature
        self.max_tokens = max_tokens

    def invoke(self, inputs: dict, config: dict = None) -> str:
        prompt_str = self.prompt_template.format(**inputs)

        response = self.client.chat_completion(
            messages=[
                {"role": "user", "content": prompt_str}
            ],
            temperature=self.temperature,
            max_tokens=self.max_tokens
        )
        return response.choices[0].message["content"]

(b) Step Back Prompt

In [None]:
from langchain.prompts import FewShotChatMessagePromptTemplate

# This are examples that shows the LLM what it is achieving through stepback
examples = [
    {
        "input": "Could the members of The Police perform lawful arrests?",
        "output": "what can the members of The Police do?",
    },
    {
        "input": "Jan Sindel’s was born in what country?",
        "output": "what is Jan Sindel’s personal history?",
    },
]

# Now translate this into an example_prompt
example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}"),
        ("ai", "{output}"),
    ]
)

few_shot = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
)

step_back_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are an expert at world knowledge. Your task is to step back and paraphrase a question to a more generic step-back question, which is easier to answer. Here are a few examples:""",
        ),
        # Few shot examples
        few_shot,
        # New question
        ("user", "{question}"),
    ]
)

(c) Engineering the response prompt

In [None]:
template = """You are an expert of world knowledge. I am going to ask you a question. Your response should be comprehensive and not contradicted with the following context if they are relevant. Otherwise, ignore them if they are not relevant.

# {normal_context}
# {step_back_context}

# Original Question: {question}
# Answer:"""
response_prompt = ChatPromptTemplate.from_template(template)

(d) Implement the dual LLM

In [None]:
# stepback generation
stepback = HuggingFaceChatRunnable(client, prompt_template=step_back_prompt, temperature= 0, max_tokens= 1024)

# Response generation
chat = HuggingFaceChatRunnable(client, prompt_template= response_prompt, temperature = 0, max_tokens= 1024)

(e) Buidling the response chain

In [None]:
from langchain_core.runnables import RunnableMap
from langchain_core.output_parsers import StrOutputParser

# Chain for full RAG pipeline with step-back
chain = (
    RunnableMap({
        "question": lambda x: x["question"],
        "step_back_question": lambda x: x["question"]
    })
    | RunnableLambda(lambda x: {
        "normal_context": retriever.invoke(x["question"]),
        "step_back_q": stepback.invoke({"question": x["step_back_question"]}),
        "question": x["question"]
    })
    | RunnableLambda(lambda x: {
        "step_back_context": retriever.invoke(x["step_back_q"]),
        "normal_context": x["normal_context"],
        "question": x["question"]
    })
    | chat
    | StrOutputParser()
)

# Section 5: Running the Query

In [None]:
import textwrap

# Start with the question

question = input("Ask me anything! \n")

# Generate the Response
response = chain.invoke({"question": question})

wrapped_output = textwrap.fill(response, width=150)
print(wrapped_output)