In [5]:
import contextlib
import pandas as pd
import time 
import io
from tqdm import tqdm
# from langchain.llms import GPT4All
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.document_loaders import PyPDFLoader
# from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.indexes import VectorstoreIndexCreator
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFaceEndpoint


In [6]:
# llm = HuggingFaceEndpoint(
#     repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
#     max_length=128,
#     temperature=0.5,
#     huggingfacehub_api_token="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",
# )
# llm.invoke("Tell me a joke")

In [7]:
class RAGPDFBot:

    def __init__(self):
        self.file_path=""
        self.user_input=""
        self.sec_id="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"
        self.repo_id = "meta-llama/Meta-Llama-3-8B-Instruct"
    
    def build_vectordb(self,chunk_size,overlap):
        loader = PyPDFLoader("C:/Users/chirsh/OneDrive - Capgemini/Desktop/Python/DocumentQnAChatBot/ChatGPTEBook.pdf")
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size,chunk_overlap=overlap)
        self.index = VectorstoreIndexCreator(embedding=HuggingFaceEmbeddings(),text_splitter=text_splitter).from_loaders([loader])

    def load_model(self,n_threads,max_tokens,repeat_penalty,n_batch,top_k,temp):
        callbacks = [StreamingStdOutCallbackHandler()]

        self.llm = HuggingFaceEndpoint(
            repo_id=self.repo_id,
            max_length=128,
            temperature=0.5,
            huggingfacehub_api_token=self.sec_id,
            callbacks=callbacks
        )
        
    def retrieval(self,user_input,top_k=1,context_verbosity = False,rag_off=False):
        self.user_input = user_input
        self.context_verbosity = context_verbosity
        result = self.index.vectorstore.similarity_search(self.user_input,k=top_k)
        context = "\n".join([document.page_content for document in result])

        if self.context_verbosity:
            print(f"Retrieving information related to your question...")
            print(f"Found this content which is most similar to your question:{context}")

        if rag_off:
            template = """Question: {question}
            Answer: This is the response:
            """
            self.prompt = PromptTemplate(template=template,input_variables=["question"])
        else:
            template="""Dont't just repeat  the following context, use it in conbination with your knowledge to improve your answer to the question: {context}
            Question: {question}
            """
            self.prompt = PromptTemplate(template=template,input_variables=["context","question"]).partial(context=context)

    def inference(self):
        if self.context_verbosity:
            print(f"Your Query: {self.prompt}")
        
        llm_chain = self.prompt | self.llm
        print(f"Processing the information...\n")
        response =llm_chain.invoke({"question": self.user_input})

        return response

In [8]:
import ipywidgets as widgets
from IPython.display import display, HTML

bot = RAGPDFBot()

# Initialize previous value variables
model_loaded=False

# Create an output widget
output = widgets.Output()

def process_inputs(b):
    
    global model_loaded

    with output:
        output.clear_output()
        # Suppress output
        f = io.StringIO()
        with contextlib.redirect_stdout(f), contextlib.redirect_stderr(f):

            # Function to process inputs
            # Gather values from the widgets
            query = query_text.value
            top_k = 2
            chunk_size = 500
            overlap = 50
            threads = 64
            max_tokens = 50
            rag_off = rag_off_checkbox.value
            temp = 0.7

            if model_loaded==False:
                print("loading model due incorporate new parameters")
                bot.load_model(n_threads=threads, max_tokens=max_tokens, repeat_penalty=1.50, n_batch=threads, top_k=top_k, temp=temp)
                model_loaded=True
                #build the vector database
                print("rebuilding vector DB due to changing dataset, overlap, or chunk")
                bot.build_vectordb(chunk_size = chunk_size, overlap = overlap)

            bot.retrieval(user_input = query, rag_off = rag_off)
            response = bot.inference()
    
            styled_response = f"""
            <div style="
                background-color: lightblue;
                border-radius: 15px;
                padding: 10px;
                font-family: Arial, sans-serif;
                color: black;
                max-width: 600px;
                word-wrap: break-word;
                margin: 10px;
                font-size: 14px;">
                {response}
            </div>
            """
            display(HTML(styled_response))

def create_chat_interface():
    global query_text, rag_off_checkbox
    
    # User query text input
    query_layout = widgets.Layout(width='400px', height='200px')  # Adjust the width as needed
    query_text = widgets.Text(
        placeholder='Type your query here',
        description='Query:',
        disabled=False, 
        layout=query_layout
    )

    # RAG OFF TOGGLE
    rag_off_checkbox = widgets.Checkbox(
    value=False,
    description='RAG OFF?',
    disabled=False,
    indent=False,  # Set to True if you want the checkbox to be indented
    tooltip='Turns off RAG and Performs Inference with Raw Model and Prompt Only'
    )

    # Group the widgets except the query text into a VBox
    left_column = widgets.VBox([rag_off_checkbox])

    # Submit button
    submit_button = widgets.Button(description="Submit")
    submit_button.on_click(process_inputs)

    right_column = widgets.VBox([query_text, submit_button])

    # Use HBox to position the VBox and query text side by side
    interface_layout = widgets.HBox([left_column, right_column])


    # Display the layout
    display(interface_layout, output)

create_chat_interface()

HBox(children=(VBox(children=(Checkbox(value=False, description='RAG OFF?', indent=False, tooltip='Turns off R…

Output()