# QA Bot

 ## AI RAG Assistant Using LangChain

In [7]:
# first installing necessary packages

#for user interface
! pip install gradio==4.44.0 

#for generative llm model and embedding model
! pip install ibm-watsonx-ai==1.1.2  

#for using relevant features from LangChain
! pip install langchain==0.2.11 
! pip install langchain-community==0.2.10 
! pip install langchain-ibm==0.1.11 

#to construct vectordatabase 
! pip install chromadb==0.4.24  

! pip install pypdf==4.3.1 
! pip install pydantic==2.9.1



In [95]:
#import all required libraries in one place
from ibm_watsonx_ai.foundation_models import ModelInference
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
from ibm_watsonx_ai.metanames import EmbedTextParamsMetaNames
from ibm_watsonx_ai import Credentials
from langchain_ibm import WatsonxLLM, WatsonxEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains import RetrievalQA
import gradio as gr

In [96]:
# to suppress warnings generated by code
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn
warnings.filterwarnings('ignore')


In [136]:
# following function defines the foundational LLM

def get_llm():
    model_id = 'mistralai/mixtral-8x7b-instruct-v01'
    # mixtral-8x7b-instruct-v01 is used as the base foundational LLM
    parameters = {
        GenParams.MAX_NEW_TOKENS: 256,  # maximum number of tokens in the generated output
        GenParams.TEMPERATURE: 0.5,  # randomness or creativity of the model's responses
    }
    project_id="b0f04176-fca0-442c-8ec6-a1a07eaf8527"
    #project_id = "skills-network"
    watsonx_llm = WatsonxLLM(
        model_id=model_id,
        url="https://eu-de.ml.cloud.ibm.com",
        apikey="lwjvBy9RyXnm6EdF-ZMmom4ENB5JMYjo5wVDyiA507kd",
        #url="https://us-south.ml.cloud.ibm.com",
        project_id=project_id,
        params=parameters,
    )
    return watsonx_llm

In [98]:
# this function takes the pdf file and loads the  document
def document_loader(file):
    loader = PyPDFLoader(file.name)
    loaded_document = loader.load()
    return loaded_document

In [99]:
#to split document, text_splitter uses recursive character splitter which is more effective for generic text
def text_splitter(data):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000, #controlls chunk size
        chunk_overlap=50, #size of overlab between chunks
        length_function=len, 
    )
    chunks = text_splitter.split_documents(data)
    return chunks

In [100]:
# function that embeds the chunks using a defined embedding model and stores the embeddings in a ChromaDB vector store
def vector_database(chunks):
    embedding_model = watsonx_embedding()
    vectordb = Chroma.from_documents(chunks, embedding_model) #Store the document's embeddings into a vector database
    return vectordb

In [146]:
# Embedding model 
# needs to convert chunks of text into vector representations
def watsonx_embedding():
    embed_params = {
        EmbedTextParamsMetaNames.TRUNCATE_INPUT_TOKENS: 3,
        EmbedTextParamsMetaNames.RETURN_OPTIONS: {"input_text": True},
    }
    watsonx_embedding = WatsonxEmbeddings(
        model_id="ibm/slate-125m-english-rtrvr", #ibm/slate-125m-english-rtrvr embedding model is used
        url='https://eu-de.ml.cloud.ibm.com',
        #url="https://us-south.ml.cloud.ibm.com",
        #project_id="skills-network",
        project_id='b0f04176-fca0-442c-8ec6-a1a07eaf8527', 
        apikey="lwjvBy9RyXnm6EdF-ZMmom4ENB5JMYjo5wVDyiA507kd",
        params=embed_params,
    )
    return watsonx_embedding

In [102]:
# an interface designed to return documents based on an unstructured query
def retriever(file):
    splits = document_loader(file)
    chunks = text_splitter(splits)
    vectordb = vector_database(chunks)
    retriever = vectordb.as_retriever() #a vector store-based retriever that retrieves information using a simple similarity search
    return retriever

In [103]:
# QA Chain
#that performs natural-language question-answering over a data source using retrieval-augmented generation (RAG)
def retriever_qa(file, query):
    llm = get_llm()
    retriever_obj = retriever(file)
    #use RetrievalQA from langchain
    qa = RetrievalQA.from_chain_type(llm=llm, 
                                    chain_type="stuff", 
                                    retriever=retriever_obj, 
                                    return_source_documents=False)
    response = qa.invoke(query)
    return response['result']

In [112]:
# Create Gradio interface
rag_application = gr.Interface(
    fn=retriever_qa,
    allow_flagging="never",
    inputs=[
        gr.File(label="Upload PDF File", file_count="single", file_types=['.pdf'], type="filepath"),  # Drag and drop file upload
        gr.Textbox(label="Input Query", lines=2, placeholder="Type your question here...") #an input textbox where the question can be asked
    ],
    outputs=gr.Textbox(label="Output"), #an output textbox where the question can be answered
    title="RAG Chatbot",
    description="Upload a PDF document and ask any question. The chatbot will try to answer using the provided document."
)


In [148]:
# Launch the app with the following line of code
import random
port = random.randint(3000, 5000)
rag_application.launch(server_name="0.0.0.0", server_port= port)

Rerunning server... use `close()` to stop if you need to change `launch()` parameters.
----

To create a public link, set `share=True` in `launch()`.


