<a href="https://colab.research.google.com/github/ChowchowWorks/Customer_service_rag/blob/main/Rag_Pipeline_Prototype_Version_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Section 1: Import Libraries

In [None]:
import os
from google.colab import userdata

!pip install langchain_community langchain chromadb transformers sentence-transformers
!pip install -U langchain-huggingface
!pip install wikipedia
!pip install pypdf

os.environ['LANGCHAIN_API_KEY'] = "API_KEY"
os.environ['HUGGINGFACEHUB_API_TOKEN'] = "API_KEY"
os.environ['USER_AGENT'] = 'MyColabApp/1.0 (Python/3.9; GoogleColab)'

In [None]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.document_loaders import WikipediaLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.llms import HuggingFaceHub
from langchain.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain import HuggingFacePipeline
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.prompts import ChatPromptTemplate

embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Section 2: Load Documents

(a) Load the documents into a global variable

In [None]:
# For the purposes of testing, this code uses a Wikipedia page as a document
loader = WikipediaLoader(query="National University of Singapore", lang = 'en')
docs = loader.load()

(b) Split the documents into more manageable chunks

In [None]:
splitter = RecursiveCharacterTextSplitter(chunk_size = 300, chunk_overlap = 50)
texts = splitter.split_documents(docs)

# Section 3: Indexing

(a) Initialise the pipeline by indexing the documents

In [None]:
vectorstore = Chroma.from_documents(texts, embedding)

(b) Choose the number of similar documents to be retrieved from the pipeline

In [None]:
k = 3
retriever = vectorstore.as_retriever(search_kwargs = {'k' : k})

# Section 4: Implementing the Generator

(a) Defining the prompt template

In [None]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

(b) Huggingface Inferface Client

In [None]:
from huggingface_hub import InferenceClient
client = InferenceClient(model="mistralai/Mistral-7B-Instruct-v0.3", token= "API_KEY")

(c) Runnable Class and Initialisation

In [None]:
from langchain_core.runnables import Runnable

class HuggingFaceChatRunnable(Runnable):
    def __init__(self, client, prompt_template, temperature, max_tokens):
        self.client = client
        self.prompt_template = prompt_template
        self.temperature = temperature
        self.max_tokens = max_tokens

    def invoke(self, inputs: dict) -> str:
        prompt_str = self.prompt_template.format(**inputs)

        response = self.client.chat_completion(
            messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt_str}],
            temperature = self.temperature,
            max_tokens = self.max_tokens)
        return response.choices[0].message["content"]

chat = HuggingFaceChatRunnable(client, prompt_template=prompt, temperature= 0.4, max_tokens= 1024)

# Section 5: Retrieval and Response

(a) Retrieval

In [None]:
from logging import exception
# Start by asking the question
question = input("Ask me anything!\n")
# retreive the relevant documents
relevant = retriever.invoke(question)
# checks
if len(relevant) != k:
  raise exception(f"Wrong number of relevant documents: expected {k}, got {len(relevant)}")

(b) Response

In [None]:
response = chat.invoke({"context": relevant, "question": question})
print(response)