## Simple RAG for Exam Preparation

Load pdf documents, notes, study materials and ask questions to get relevant answers.

In [None]:
!pip install -q langchain langchain-community chromadb transformers accelerate bitsandbytes sentence-transformers pypdf gradio

In [None]:
# Necessary imports
import os
import re
import torch
import gradio as gr
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.llms import HuggingFaceHub
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

### Add HuggingFace API Token & Document to Retrieve From

In [None]:
# HuggingFace Token
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "<HUGGINGFACEHUB_API_TOKEN>" #<--- Paste hf_api_token here
HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")

# Path to the document
loader = PyPDFLoader("<content(pdf)>")

In [None]:
# models used
EMBEDDING_MODEL = "mixedbread-ai/mxbai-embed-large-v1"
LLM = "HuggingFaceH4/zephyr-7b-beta"


def remove_emojis(string):
    """Remove an emojis present in the string.
    """
    emoji_pattern = re.compile(
        "["
        u"\U0001F600-\U0001F64F"
        u"\U0001F300-\U0001F5FF"
        u"\U0001F680-\U0001F6FF"
        u"\U0001F1E0-\U0001F1FF"
        u"\U00002702-\U000027B0"
        u"\U000024C2-\U0001F251"
        "]+",
        flags=re.UNICODE
    )

    return emoji_pattern.sub(r'', string)


# load and split documents
pages = loader.load_and_split()

for i in range(len(pages)):
    pages[i].page_content = remove_emojis(pages[i].page_content)


SEPARATORS = [
    "\t",
    "\n#{1,6} ",
    "```\n",
    "\n\\*\\*\\*+\n",
    "\n---+\n",
    "\n___+\n",
    "\n\n",
    "\n",
    " ",
    "",
]

# Split text into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=512, # chunk size compatible with the max_seq_length of embedding model
    chunk_overlap=64,
    add_start_index=True,
    separators=SEPARATORS,
)
all_splits = text_splitter.split_documents(pages)

model_embeddings = SentenceTransformer(EMBEDDING_MODEL)

hf_embeddings = HuggingFaceEmbeddings(
    model_name=EMBEDDING_MODEL,
    multi_process=True,
    model_kwargs={"device": "cuda"},
    encode_kwargs={"normalize_embeddings": True},
)

# store in vectordb
vectorstore = Chroma.from_documents(documents=all_splits, embedding=hf_embeddings)
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})

# quantization config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)
model = AutoModelForCausalLM.from_pretrained(LLM, quantization_config=bnb_config)
tokenizer = AutoTokenizer.from_pretrained(LLM)

READER_LLM = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    do_sample=True,
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=False,
    max_new_tokens=512,
)

llm = HuggingFaceHub(
    repo_id="HuggingFaceH4/zephyr-7b-beta",
    task="text-generation",
    model_kwargs={
        "max_new_tokens": 1024,
        "top_k": 30,
        "temperature": 0.1,
        "repetition_penalty": 1.03,
    },
)

In [None]:
# prompt template
template = """
<|System|>
Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
You must answer in detail.

<|User|>
{context}
---

<|Assistant|>
Question: {question}
Answer:
"""
custom_rag_prompt = PromptTemplate.from_template(template)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# rag chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)

### Run Gradio Interface

In [None]:
def generate_text(question):
  """invoke rag chain to generate answer.
  """
  output = rag_chain.invoke(question)
  print(str(output.split("Answer:\n")[1]))
  return str(output.split("Answer:\n")[1])

# simple gradio interface
iface = gr.Interface(
    fn=generate_text,
    inputs=gr.Textbox(),
    outputs=gr.Markdown(),
    title="RAG Example",

)

iface.launch(debug=True)