In [2]:
! pip install -q -U pypdf faiss-cpu
! pip install -q -U InstructorEmbedding
! pip install huggingface_hub -q
! pip install gradio -q
! pip install langchain==0.1.2 
! pip install sentence_transformers==2.2.2





In [29]:
import warnings
warnings.filterwarnings("ignore")

In [30]:
import os
import glob
import textwrap
import time

In [31]:
import langchain

In [32]:
from langchain.document_loaders import PyPDFLoader

In [33]:
from langchain.document_loaders import DirectoryLoader

In [34]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [35]:
from langchain import PromptTemplate, LLMChain

In [36]:
from langchain.llms import HuggingFacePipeline

In [37]:
from InstructorEmbedding import INSTRUCTOR

In [38]:
from langchain.embeddings import HuggingFaceInstructEmbeddings

In [39]:
from langchain.chains import RetrievalQA

In [40]:
import torch

In [41]:
from langchain.vectorstores import FAISS

In [42]:
class CFG:
    model_name = 'mistralai/Mistral-7B-Instruct-v0.1'
    temperature = 0.5
    top_p = 0.95
    repetition_penalty = 1.15
    do_sample = True
    max_new_tokens = 400
    num_return_sequences=1

    split_chunk_size = 800
    split_overlap = 0
    
    embeddings_model_repo = 'sentence-transformers/all-MiniLM-L6-v2'

    k = 3
    
    PDFs_path = './Python/'
    Embeddings_path =  './faiss_index_py'

In [43]:
import os
os.environ["HUGGINGFACEHUB_API_TOKEN"] = ""

In [44]:
from langchain.llms import HuggingFaceHub

llm = HuggingFaceHub(
    repo_id = CFG.model_name,
    model_kwargs={
        "max_new_tokens": CFG.max_new_tokens,
        "temperature": CFG.temperature,
        "top_p": CFG.top_p,
        "repetition_penalty": CFG.repetition_penalty,
        "do_sample": CFG.do_sample,
        "num_return_sequences": CFG.num_return_sequences
    }
) 

In [45]:
loader = DirectoryLoader(
    CFG.PDFs_path,
    glob="./*.pdf",
    loader_cls=PyPDFLoader,
    show_progress=True,
    use_multithreading=True
)

documents = loader.load()

100%|█████████████████████████████████████████████| 1/1 [00:02<00:00,  2.74s/it]


In [46]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = CFG.split_chunk_size,
    chunk_overlap = CFG.split_overlap
)

texts = text_splitter.split_documents(documents)

In [47]:
embeddings = HuggingFaceInstructEmbeddings(
    model_name = CFG.embeddings_model_repo,
    model_kwargs = {"device": "cpu"}
)

vectordb = FAISS.from_documents(
    documents = texts, 
    embedding = embeddings
)

vectordb.save_local("faiss_index_py")

load INSTRUCTOR_Transformer
max_seq_length  512



KeyboardInterrupt



In [None]:
embeddings = HuggingFaceInstructEmbeddings(
    model_name = CFG.embeddings_model_repo,
    model_kwargs = {"device": "cpu"}
)

vectordb = FAISS.load_local(
    CFG.Embeddings_path,
    embeddings
)

In [48]:
prompt_template = """
<s>[INST] 
Don't try to make up an answer, if you don't know just say that you don't know.
Answer in the same language the question was asked.
Answer the code in python language.
Use only the following pieces of context to answer the question at the end.

{context}

Question: {question}
Answer:[/INST]"""

PROMPT = PromptTemplate(
    template = prompt_template, 
    input_variables = ["question", "context"]
)

In [49]:
llm_chain = LLMChain(prompt=PROMPT, llm=llm)

In [50]:
retriever = vectordb.as_retriever(
    search_kwargs = {"k": CFG.k, "search_type" : "similarity"})

qa_chain = RetrievalQA.from_chain_type(
    llm = llm,
    chain_type = "stuff",
    retriever = retriever,
    chain_type_kwargs = {"prompt": PROMPT},
    return_source_documents = True,
    verbose = False
)

In [51]:
def wrap_text_preserve_newlines(text, width=700):
    lines = text.split('\n')

    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]

    wrapped_text = '\n'.join(wrapped_lines)

    return wrapped_text


def process_llm_response(llm_response):
    ans = wrap_text_preserve_newlines(llm_response['result'])
    
    sources_used = ' \n'.join(
        [
            source.metadata['source'].split('/')[-1][:-4] + ' - page: ' + str(source.metadata['page'])
            for source in llm_response['source_documents']
        ]
    )
    
    ans = ans + '\n\nSources: \n' + sources_used
    return ans

In [52]:
def llm_ans(query):
    start = time.time()
    llm_response = qa_chain(query)
    ans = process_llm_response(llm_response)
    end = time.time()

    time_elapsed = int(round(end - start, 0))
    time_elapsed_str = f'\n\nTime elapsed: {time_elapsed} s'
    return ans.strip() + time_elapsed_str

In [53]:
def extract_text_after_inst(input_string):
    marker_index = input_string.find("[/INST]")
    
    if marker_index != -1:
        return input_string[marker_index + len("[/INST]"):].strip()
    else:
        return ""

In [1]:
import gradio as gr

def predict(message, history):
    output = str(llm_ans(message))
    output = extract_text_after_inst(output)
    return output


with gr.Blocks() as demo:
    gr.ChatInterface(
        fn=predict,
        title = f' Open-Source LLM ({CFG.model_name}) for Python Question Answering'
    )
demo.launch()


NameError: name 'CFG' is not defined