In [1]:
#Libraries

# !pip install pandoc langchain gradio chromadb tiktoken clean-text
# !pip install "unstructured[local-inference]"
# !pip install "detectron2@git+https://github.com/facebookresearch/detectron2.git@v0.6#egg=detectron2"
# !pip install layoutparser pypdf unidecode

import os
import glob
import pandoc
from io import StringIO
import gradio as gr
import re
import time

from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.document_loaders import TextLoader
from langchain.document_loaders import UnstructuredWordDocumentLoader
from langchain.document_loaders import UnstructuredEPubLoader
from langchain.document_loaders import PyPDFLoader


from langchain.indexes import VectorstoreIndexCreator
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma


from html.parser import HTMLParser
import chromadb    

  from .autonotebook import tqdm as notebook_tqdm


In [46]:
extensions = ['txt', 'md', 'pdf', 'doc', 'docx']


In [3]:
from auto_gptq.modeling import LlamaGPTQForCausalLM
from langchain import HuggingFacePipeline
from transformers import LlamaForCausalLM

model_path = "../models/gpt4-x-vicuna-13B-GPTQ"
model_name = "GPT4-x-Vicuna-13B-GPTQ-4bit-128g-compat-act-order"

model = LlamaGPTQForCausalLM.from_quantized(model_path, model_basename= model_name, device="cuda:0", use_triton=True, use_safetensors=True)

use_triton will force moving the whole model to GPU, make sure you have enough VRAM.
The safetensors archive passed at ../models/gpt4-x-vicuna-13B-GPTQ/GPT4-x-Vicuna-13B-GPTQ-4bit-128g-compat-act-order.safetensors does not contain metadata. Make sure to save your model with the `save_pretrained` method. Defaulting to 'pt' metadata.
100%|██████████| 12/12 [00:29<00:00,  2.48s/it]


In [4]:
from transformers import AutoTokenizer, pipeline
tokenizer = AutoTokenizer.from_pretrained(model_path)

In [5]:
from langchain.prompts import PromptTemplate
QUESTION_PROMPT_TEMPLATE = """### Instruction:
Use the following portion of a long document to see if any of the text is relevant to answer the question.

Question: {question}

### Input:
{context}

### Response:"""

QUESTION_PROMPT = PromptTemplate(template=QUESTION_PROMPT_TEMPLATE, input_variables=["question","context"])

In [6]:
COMBINE_PROMPT_TEMPLATE = """### Instruction:
Given the following extracted parts of a long document and a question, create a final answer. 
If you don't know the answer, just say that you don't know. Don't try to make up an answer.

Question:{question}

### Input:
{summaries}

### Response:"""
COMBINE_PROMPT = PromptTemplate(template=COMBINE_PROMPT_TEMPLATE, input_variables=["question","summaries"])

In [27]:
from torch.cuda import current_device
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens = 512,
    temperature=0,
    device=current_device()
)
local_llm = HuggingFacePipeline(pipeline=pipe)

The model 'LlamaGPTQForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead', 'RemBertForCausalLM', 'RobertaForCausalLM', 'RobertaPreLayerN

In [59]:
all_docs = []

def examinelibrary():
    # Define the extensions we're looking for
    # extensions = ['pdf']
    all_docs.clear()
    # Initialize a counter
    counter = 0
    # Scan the directory
    for extension in extensions:
        counter += len(glob.glob("../books/*." + extension))
            
    # Initialize library as dict and load documents into it
    library = {}
    for extension in extensions:
        for file_name in glob.glob("../books/*." + extension):
            content = ""
            if extension in ['txt', 'md']:
                loader = TextLoader(file_name)
            elif extension in ['doc', 'docx']:
                loader = UnstructuredWordDocumentLoader(file_name) 
            elif extension == 'pdf':
                loader = PyPDFLoader(file_name)
            elif extension == 'epub':  
                loader = UnstructuredEPubLoader(file_name) 
        # Load the contents of the file        
        documents = loader.load()
        # Add the file and its contents to the library
        library[file_name] = documents
        
    # Display the count of files detected   
    return(library, counter)

In [9]:
# Split the texts in the library into chunks
def processtext(library):
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    texts = []
    for file_name in library:
          texts += text_splitter.split_documents(library[file_name])
    return texts

In [43]:
# Produce a librarian (database of embedded texts)
def prodlibrarian():
    library,counter = examinelibrary()
    texts = processtext(library)
    embeddings = HuggingFaceEmbeddings(model_name="intfloat/e5-large-v2")
    # embeddings = OpenAIEmbeddings(openai_api_base=OPENAI_API_BASE)
    db = Chroma.from_documents(texts, embeddings)
    return (db, counter)


In [51]:
# Init on first run
db,counter = prodlibrarian()


# Call prodlibrarian and display progress
def scan(progress=gr.Progress()):
    progress(0.2, desc="Examining /books folder ...")
    time.sleep(1)
    progress(0.4, desc="Counting documents ...")
    time.sleep(1.5)
    progress(0.6, desc="Generating embeddings ...")
    time.sleep(1.5)
    global db
    db, counter = prodlibrarian()
    return "Librarian found and embedded " + str(counter) + " documents."

Exception ignored in: <function DuckDB.__del__ at 0x7fbecfaf96c0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/chromadb/db/duckdb.py", line 387, in __del__
    self.reset_indexes()
  File "/usr/local/lib/python3.10/dist-packages/chromadb/db/clickhouse.py", line 610, in reset_indexes
    delete_all_indexes(self._settings)
AttributeError: 'DuckDB' object has no attribute '_settings'


convert /app/books/mediations-marcus-aurelius.doc -> /tmp/tmpguv1_g7j/mediations-marcus-aurelius.docx using filter : MS Word 2007 XML
convert /app/books/mediations-marcus-aurelius.doc -> /tmp/tmpfo2d9o_o/mediations-marcus-aurelius.docx using filter : MS Word 2007 XML


Created a chunk of size 1999, which is longer than the specified 1000
Created a chunk of size 1358, which is longer than the specified 1000
Created a chunk of size 1121, which is longer than the specified 1000
Created a chunk of size 1124, which is longer than the specified 1000
Created a chunk of size 1414, which is longer than the specified 1000
Created a chunk of size 1213, which is longer than the specified 1000
Created a chunk of size 1761, which is longer than the specified 1000
Created a chunk of size 1007, which is longer than the specified 1000
Created a chunk of size 1100, which is longer than the specified 1000
Created a chunk of size 1134, which is longer than the specified 1000
Created a chunk of size 1168, which is longer than the specified 1000
Created a chunk of size 1394, which is longer than the specified 1000
Created a chunk of size 2411, which is longer than the specified 1000
Created a chunk of size 1079, which is longer than the specified 1000
Created a chunk of s

In [25]:
def check_metadata_page(docs):
    for i in range(len(docs)):
        if 'page' not in docs[i].metadata.keys():
            docs[i].metadata['page'] = "Not applicable"
    return docs

In [58]:
# Initialize language model and qa chain
# llm = OpenAI(temperature=0, openai_api_base=OPENAI_API_BASE, model="gpt4-x-vicuna-13B-GPTQ")
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from time import perf_counter

# chain_type_kwargs = {"prompt": prompt}
chain = load_qa_with_sources_chain(local_llm, chain_type="map_reduce", question_prompt=QUESTION_PROMPT, combine_prompt=COMBINE_PROMPT,verbose=True)

# Main query code.
def ask(query, search_filter=None, progress=gr.Progress()):
    start_time = perf_counter()

    progress(0.1, desc="Scanning embedded documents for matches ...")
  
    #generate docs which are texts relevant to the query
    docs = db.similarity_search(query, k=2, filter=search_filter) 

    docs = check_metadata_page(docs)

    progress(0.2, desc="Assembling request...")
    progress(0.4, desc="Appending citations and metadata ...")
    #some muckwork to log sources
    x = 0
    citations = ""
    for x in range(len(docs)): 
        # citations += docs[x].metadata['source']  + "\n"
        citations += docs[x].metadata['source'] + " in page: " + str(docs[x].metadata['page'])  + "\n"
    
    progress(0.5, desc="Talking to LLM for answers ...")

    #calls llm with the query and relevant docs in hand and returns both the response and the sources    
    librarianoutput = chain({"input_documents":docs, "question": query})
    output = "Answer: \n" + librarianoutput["output_text"] + "\n\nI found this in: \n" + citations

    query_time = str(round(perf_counter() - start_time ,2)) + " seconds"
    return (output, query_time)

#Gradio UI
with gr.Blocks() as app:


    with gr.Row():
        gr.Markdown("# Welcome to your Natassistant!")
        scan_btn = gr.Button("Scan the library again.")
        
    query = gr.Textbox(label="What can I help you find?")
    output = gr.Textbox(label="Response:")
    ask_btn = gr.Button("Ask Librarian")
    # performance_box = gr.Textbox(label=f"Time to complete query:",)
    selected_docs = gr.CheckboxGroup(all_docs, label="Documents used as input for the model")

    ask_btn.click(fn=ask, inputs=query, outputs=output)
    scan_btn.click(fn=scan, outputs=output)
    # gr.Markdown("*...a private library is not an ego-boosting appendage but a research tool. The library should contain as much of what you do not know ... You will accumulate more knowledge and more books as you grow older, and the growing number of unread books on the shelves will look at you menacingly. Indeed, the more you know, the larger the rows of unread books. Let us call this collection of unread books an antilibrary.* \n - Nassim Nicholas Taleb, The Black Swan")
app.queue(concurrency_count=1).launch()

Running on local URL:  http://127.0.0.1:7869

To create a public link, set `share=True` in `launch()`.




In [36]:
db.search("Machiavelli" ,"mmr")[0].dict()['metadata']

{'source': '../books/the-prince-machiavelli.txt'}

In [14]:
# # Initialize language model and qa chain
# # llm = OpenAI(temperature=0, openai_api_base=OPENAI_API_BASE, model="gpt4-x-vicuna-13B-GPTQ")
# from langchain.chains.qa_with_sources import load_qa_with_sources_chain

# # chain_type_kwargs = {"prompt": prompt}
# chain = load_qa_with_sources_chain(local_llm, chain_type="map_reduce", question_prompt=prompt)

# # Main query code.
# def ask(query):
#     # progress(0.1, desc="Scanning embedded documents for matches ...")
#     # time.sleep(1)
#     # progress(0.2, desc="Assembling request...")
#     # time.sleep(1.5)
#     # progress(0.4, desc="Appending citations and metadata ...")
#     # time.sleep(1.5)
#     # progress(0.5, desc="Talking to LLM for answers ...")
#     # time.sleep(1.5)
#     #generate docs which are texts relevant to the query
#     print("Similarity search!")
#     docs = db.similarity_search(query,k=2) 
#     #some muckwork to log sources
#     x = 0
#     citations = ""

#     print("Collecting relevant documents")
#     for x in range(len(docs)): 
#         citations += docs[x].metadata['source']  + "\n"
#         citations += docs[x].metadata['source'] + " in page: " + str(docs[x].metadata['page'])  + "\n"
    
#     print("Asking the LLM")
#     #calls llm with the query and relevant docs in hand and returns both the response and the sources    
#     librarianoutput = chain({"input_documents":docs, "question": query})
#     # output = "Answer: \n" + librarianoutput + "\n\nI found this in: \n" + citations
#     return (librarianoutput["output_text"])



In [15]:
local_llm.get_num_tokens("The conclusion of the document states that a distributed infrastructure has been used to train and apply large-scale language models to machine translation. The results show that increasing the amount of training data up to 2 trillion tokens results in a 5-gram language model of up to 300 billion n-grams. The technique is made efficient by batching score requests by the decoder in a server-client architecture. A simple smoothing technique suitable for distributed computation was proposed and performed as well as more sophisticated methods. Additionally, translation quality, as indicated by BLEU score, continued to improve with increasing language model size at even the largest sizes considered.")

128