In [4]:
import requests
import contextlib
import pandas as pd
import time 
import io

from tqdm import tqdm
from langchain.llms import GPT4All
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.document_loaders import PyPDFLoader
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.indexes import VectorstoreIndexCreator
from langchain.embeddings import HuggingFaceEmbeddings

In [17]:
class RAGPDFBot:

    def __init__(self):
        self.model_path=""
        self.file_path=""
        self.user_input=""
        self.model=""
    
    def get_model(self,model,chunk_size:int=10000):
        self.model=model
        if self.model == "Falcon":
            self.model_path = "E:/Programming/Machine Learning/LLM/GPT4All_llms/gpt4all-falcon-q4_0.gguf"
        elif self.model == "Snoozy 13B":
            self.model_path = "E:/Programming/Machine Learning/LLM/GPT4All_llms/gpt4all-13b-snoozy-q4_0.gguf"
        elif self.model == "Mistral 7B":
            self.model_path = "E:/Programming/Machine Learning/LLM/GPT4All_llms/mistral-7b-openorca.Q4_0.gguf"
        elif self.model == "Nous Hermes Llama 2 13B":
            self.model_path = "E:/Programming/Machine Learning/LLM/GPT4All_llms/nous-hermes-llama2-13b.Q4_0.gguf"
        
    def build_vectordb(self,chunk_size,overlap):
        loader = PyPDFLoader("C:/Users/win10/Desktop/Materials/Deep Learning/Deep Learning Notes.pdf")
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size,chunk_overlap=overlap)
        self.index = VectorstoreIndexCreator(embedding=HuggingFaceEmbeddings(),text_splitter=text_splitter).from_loaders([loader])

    def load_model(self,n_threads,max_tokens,repeat_penalty,n_batch,top_k,temp):
        callbacks = [StreamingStdOutCallbackHandler()]

        self.llm = GPT4All(model=self.model_path,callbacks=callbacks,verbose=False,
                           n_threads=n_threads,n_predict=max_tokens,repeat_penalty=repeat_penalty,n_batch=n_batch,top_k=top_k,temp=temp)
        
    def retrieval(self,user_input,top_k=1,context_verbosity = False,rag_off=False):
        self.user_input = user_input
        self.context_verbosity = context_verbosity
        result = self.index.vectorstore.similarity_search(self.user_input,k=top_k)
        context = "\n".join([document.page_content for document in result])

        if self.context_verbosity:
            print(f"Retrieving information related to your question...")
            print(f"Found this content which is most similar to your question:{context}")

        if rag_off:
            template = """Question: {question}
            Answer: This is the response:
            """
            self.prompt = PromptTemplate(template=template,input_variables=["question"])
        else:
            template="""Dont't just repeat  the following context, use it in conbination with your knowledge to improve your answer to the question: {context}
            Question: {question}
            """
            self.prompt = PromptTemplate(template=template,input_variables=["context","question"]).partial(context=context)

    def inference(self):
        if self.context_verbosity:
            print(f"Your Query: {self.prompt}")
        
        llm_chain = LLMChain(prompt=self.prompt,llm=self.llm)
        print(f"Processing the information...\n")
        response = llm_chain.run(self.user_input)

        return response
    #'Flacon','Snoozy 13B','Mistral 7B','Nous Hermes Llama 2 13B'

In [18]:
import ipywidgets as widgets
from IPython.display import display, HTML

bot = RAGPDFBot()

# Initialize previous value variables
previous_threads = None
previous_max_tokens = None
previous_top_k = None
previous_dataset = None
previous_chunk_size = None
previous_overlap = None
previous_temp = None

# Create an output widget
output = widgets.Output()

def process_inputs(b):
    
    global previous_threads, previous_max_tokens, previous_top_k, previous_dataset, previous_chunk_size, previous_overlap, previous_temp

    with output:
        output.clear_output()
        # Suppress output
        f = io.StringIO()
        with contextlib.redirect_stdout(f), contextlib.redirect_stderr(f):

            # Function to process inputs
            # Gather values from the widgets
            model = model_dropdown.value
            query = query_text.value
            top_k = top_k_slider.value
            chunk_size = chunk_size_input.value
            overlap = overlap_input.value
            dataset = dataset_dropdown.value
            threads = threads_slider.value
            max_tokens = max_token_input.value
            rag_off = rag_off_checkbox.value
            temp = temp_slider.value
            bot.get_model(model = model)
            if threads != previous_threads or max_tokens != previous_max_tokens or top_k != previous_top_k or temp != previous_temp:
                print("loading model due incorporate new parameters")
                bot.load_model(n_threads=threads, max_tokens=max_tokens, repeat_penalty=1.50, n_batch=threads, top_k=top_k, temp=temp)
                # Update previous values
                previous_threads = threads
                previous_max_tokens = max_tokens
                previous_top_k = top_k
                previous_temp = temp
            if dataset != previous_dataset or chunk_size != previous_chunk_size or overlap != previous_overlap:
                print("rebuilding vector DB due to changing dataset, overlap, or chunk")
                bot.build_vectordb(chunk_size = chunk_size, overlap = overlap)
                previous_dataset = dataset
                previous_chunk_size = chunk_size
                previous_overlap = overlap
            bot.retrieval(user_input = query, rag_off = rag_off)
            response = bot.inference()
    
            styled_response = f"""
            <div style="
                background-color: lightblue;
                border-radius: 15px;
                padding: 10px;
                font-family: Arial, sans-serif;
                color: black;
                max-width: 600px;
                word-wrap: break-word;
                margin: 10px;
                font-size: 14px;">
                {response}
            </div>
            """
            display(HTML(styled_response))

def create_chat_interface():
    global model_dropdown, query_text, top_k_slider, rag_off_checkbox, chunk_size_input, overlap_input, dataset_dropdown, threads_slider, max_token_input, repeat_penalty_input, temp_slider
    # Model selection dropdown
    model_dropdown = widgets.Dropdown(
        options=['Falcon','Snoozy 13B','Mistral 7B','Nous Hermes Llama 2 13B'],
        description='Model:',
        disabled=False,
    )

    # User query text input
    query_layout = widgets.Layout(width='400px', height='400px')  # Adjust the width as needed
    query_text = widgets.Text(
        placeholder='Type your query here',
        description='Query:',
        disabled=False, 
        layout=query_layout
    )

    # Vector search top k slider
    top_k_slider = widgets.IntSlider(
        value=2,
        min=1,
        max=4,
        step=1,
        description='Top K:',
        disabled=False,
        continuous_update=False,
        orientation='horizontal',
        readout=True,
        readout_format='d'
    )

    # Model Temperature slider
    temp_slider = widgets.FloatSlider(
    value=0.7,
    min=0.1,
    max=1.4,
    step=0.1,
    description='Temperature:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='.1f'
)
    
    # RAG OFF TOGGLE
    rag_off_checkbox = widgets.Checkbox(
    value=False,
    description='RAG OFF?',
    disabled=False,
    indent=False,  # Set to True if you want the checkbox to be indented
    tooltip='Turns off RAG and Performs Inference with Raw Model and Prompt Only'
    )

    # Chunk size number input
    chunk_size_input = widgets.BoundedIntText(
        value=500,
        min=5,
        max=5000,
        step=1,
        description='Chunk Size:',
        disabled=False
    )

    # Overlap number input
    overlap_input = widgets.BoundedIntText(
        value=50,
        min=0,
        max=1000,
        step=1,
        description='Overlap:',
        disabled=False
    )

    # Dataset selection dropdown
    dataset_dropdown = widgets.Dropdown(
        options=['robot maintenance', 'basketball coach', 'physics professor', 'grocery cashier'],
        description='Dataset:',
        disabled=False,
    )

    # Number of threads slider
    threads_slider = widgets.IntSlider(
        value=64,
        min=2,
        max=200,
        step=1,
        description='Threads:',
        disabled=False,
        continuous_update=False,
        orientation='horizontal',
        readout=True,
        readout_format='d'
    )

    # Max token number input
    max_token_input = widgets.BoundedIntText(
        value=50,
        min=5,
        max=500,
        step=5,
        description='Max Tokens:',
        disabled=False
    )

    # Group the widgets except the query text into a VBox
    left_column = widgets.VBox([model_dropdown, top_k_slider, temp_slider, rag_off_checkbox, chunk_size_input, 
                                overlap_input, dataset_dropdown, threads_slider, max_token_input])

    # Submit button
    submit_button = widgets.Button(description="Submit")
    submit_button.on_click(process_inputs)

    right_column = widgets.VBox([query_text, submit_button])

    # Use HBox to position the VBox and query text side by side
    interface_layout = widgets.HBox([left_column, right_column])


    # Display the layout
    display(interface_layout, output)

create_chat_interface()

HBox(children=(VBox(children=(Dropdown(description='Model:', options=('Falcon', 'More Models Coming Soon!'), v…

Output()