# Gradio App answering questions about a provided pdf file

Required packages

In [7]:
from ctransformers import AutoModelForCausalLM
from transformers import AutoTokenizer, pipeline
from langchain.text_splitter import RecursiveCharacterTextSplitter 
from langchain_community.document_loaders import PyPDFLoader
import chromadb
import gradio as gr

The model is a GGUF quantized version of Mistral's Zephyr 7B

In [None]:
model=AutoModelForCausalLM.from_pretrained(
    "TheBloke/zephyr-7B-beta-GGUF", 
    model_file="zephyr-7b-beta.Q4_K_M.gguf", 
    model_type="mistral", hf=True
)

tokenizer= AutoTokenizer.from_pretrained(
    'HuggingFaceH4/zephyr-7b-beta', use_fast=True
)

pipe=pipeline(model=model, tokenizer=tokenizer, task='text-generation')

## Auxiliary functions which process the document and feed the relevant information into the model

In [5]:
#Prompt template for inference. We pass the question which is fed into the template for the tokenizer. The output is the prompt for the model
def Prompt(question: str)->str:
    #Template to be used for the tokenizer
    messages = [
        {
            "role": "system",
            "content": "You are a friendly chatbot who always responds in a polite manner",
        },
        {"role": "user", "content": ''},
    ]
    #We add the input question to the template
    messages[1]['content']=question
    query = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    return query

# We load a pdf file with PyPDFLoader. The function returns a list of documents (one per page)
def document_loader(file)->list:
    loader=PyPDFLoader(file)
    loaded_document=loader.load_and_split()
    return loaded_document 

#We split the loaded pdf into chunks of text. Returns a list of documents (one for each chunk)
def text_splitter(data):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=350,
        chunk_overlap=20,
        separators=["\n\n", ""], #this instead of the default separators to improve chunk overlaps
        length_function=len,
    )
    chunks = text_splitter.split_documents(data)
    return chunks

#Returns the 3 chunks of text most relevant for the query, as determined by chromadb
def doc_database(chunks: list[str],query: str)->list[str]:
    
    #chunk_list retains for each chunk only the text (page_content) and removes metadata. To be used for the Chroma collection
    chunk_list=list(map(lambda x: x.page_content, chunks))  
    
    #List of ids for the Chroma collection (list of strings)
    ids=list(map(str,range(len(chunks))))
    
    client=chromadb.Client()
    collection=client.get_or_create_collection(
        name='doc_collection',
        configuration={
            "hnsw": {
                "space": "cosine" #The default metric is L^2 but cosine is more suited for text similarity
            }
        }
    )
    collection.upsert(ids=ids, documents=chunk_list)
    results= collection.query(query_texts=[query], n_results=3)  
    return results['documents'][0] #results['documents'] is a list with a sinle element, which is the list of relevant chunks

def text_question(query: str, database: list[str])->str:
    '''
    inputs:
           query (the question we ask to the chatbot)
           database (the list of relevant text chunks from doc_database)
    outputs the answer of the chatbot
    '''
    #We append to the prompt template the text contained in database
    text=''
    for i in range(len(database)):
        text += database[i]
        
    template='Considering the following text, can you explain'+query+'text:'+text   
    limit=510-len(tokenizer.encode(Prompt(template)))   #set this as a limit for the generated tokens to fit in the context window (512 tokens)
    output=pipe(Prompt(template),max_new_tokens=limit, temperature=0.2, do_sample=True)
    question_answer=output[0]['generated_text'].split('<|assistant|>') #We select only the text generated by the chatbot
    return question_answer[1]

#Function which combines all the previous ones to generate a chatbot answer about the pdf file from a query
def doc_qa(query: str, file)->str:
    '''
    Example: query='your question' file='filename'
    print(doc_qa(query, file))
    '''
    data = document_loader(file)
    chunks = text_splitter(data)
    database = doc_database(chunks,query)
    return text_question(query, database)

## Gradio User Interface architecture

In [None]:
#Gradio interface
demo = gr.Interface(
    fn=doc_qa,
    allow_flagging='never',
    inputs=[
        gr.Textbox(label="Input Query", lines=2, placeholder="Type your question here..."),
        gr.File(label="Upload PDF File", file_count="single", file_types=['.pdf'], type="filepath")  # Drag and drop file upload
    ],
    outputs=gr.Textbox(label='Output'),
    title='QA Chatbot',
    description="Upload a PDF document and ask any question. The chatbot will try to answer using the provided document."
)

demo.launch()