In [1]:
import os
import re

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import DirectoryLoader
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.vectorstores import FAISS

from langchain import PromptTemplate
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import CTransformers
from langchain.chains import RetrievalQA
from langchain.llms import LlamaCpp
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.callbacks.manager import CallbackManager
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
import time


In [2]:
custom_prompt_template = """ Use the following pieces of information to answer the user's question.
If you don't know the answer, please just say that you don't know the answer, don't try to make up
an answer. 

Context : {context}
Question : {question}

The answer should consist of at least 1 sentence for short questions or 7 sentences for more detailed qeustions. Only returns the helpful and reasonable answer below and nothing else.
No need to return the question. I just want answer. Please don't show unhelpful answers.
Helpful answer:
"""

In [3]:
def set_custom_prompt(custom_prompt_template):
    prompt = PromptTemplate(template=custom_prompt_template, input_variables=['context',
                                                                              'question'])
    return prompt

In [4]:
def load_llm():
    n_gpu_layers = 32  # Change this value based on your model and your GPU VRAM pool.
    n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
    callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
    llm = LlamaCpp(
        model_path="/home/sira/sira_project/meta-Llama2/llama-2-7b-chat.ggmlv3.q8_0.bin",
        n_gpu_layers=n_gpu_layers,
        n_batch=n_batch,
        callback_manager=callback_manager,
        verbose=True,n_ctx = 4096, temperature = 0.1, max_tokens = 4096
    )
    return llm

def load_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name = "thenlper/gte-base",
                                       model_kwargs = {'device': 'cpu'})
    return embeddings

In [5]:
def check_duplicate(source_list):
    res = []
    for i in source_list:
        if i not in res:
            res.append(i)
    return res

def convert_to_website_format(urls):
    convert_urls = []
    for url in urls:
        # Remove any '.html' at the end of the URL
        url = re.sub(r'\.html$', '', url)
        # Check if the URL starts with 'www.' or 'http://'
        if not re.match(r'(www\.|http://)', url):
            url = 'https://' + url
        if '/index' in url:
            url = url.split('/index')[0]
        match = re.match(r'^([^ ]+)', url)
        if match:
            url = match.group(1)
        convert_urls.append(url)
    return convert_urls

def regex_source(answer):
    pattern = r"'source': '(.*?)'"
    matchs = re.findall(pattern, str(answer))
    convert_urls = convert_to_website_format(matchs)
    res_urls = check_duplicate(source_list=convert_urls)
    #res_urls = filter_similar_url(res_urls)
    return res_urls

def filter_similar_url(urls):
    urls_remove = ["www.omniscien.com/aboutus/company","www.omniscien.com/lsev6/asr/automatic-speech-recognition-overview", "www.omniscien.com/lsev6/features/asr/autonomous-speech-recognition-overview","www.omniscien.com/lsev6/asr"]
    # Remove the URL from the list
    filtered_urls = [url for url in urls if url not in  urls_remove]
    return filtered_urls

In [6]:
def filter_search(db_similarity, diff_val):
    filter_list = []
    top_score = db_similarity[0][1]
    for index, score in enumerate(db_similarity) :
        if score[1] - top_score <= diff_val:
              filter_list.append(score)
    return filter_list  

In [7]:
custom_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question. At the end of standalone question add this 'Answer the question in German language.' If you do not know the answer reply with 'I am sorry'.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(custom_template)

In [8]:
DB_FAISS_PATH = "/home/sira/sira_project/meta-Llama2/vectorstores_clean_doc_gte-base/db_faiss"
embeddings = load_embeddings()
db = FAISS.load_local(DB_FAISS_PATH, embeddings)
llm = load_llm()
qa_prompt = set_custom_prompt(custom_prompt_template)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, input_key="query", output_key="result")
qa_chain = RetrievalQA.from_chain_type(
    llm = llm,
    chain_type = "stuff",
    retriever = db.as_retriever(search_kwargs = {'k':3}), 
    return_source_documents = True,
    memory = memory,
    chain_type_kwargs = {"prompt":qa_prompt}) 


# diff_val = st.slider(label ='Select a diff value',
#                    min_value = 0.00, 
#                    max_value = 1.00, 
#                    step = 0.01, value = 0.01, format = "%f")

  from .autonotebook import tqdm as notebook_tqdm
llama.cpp: loading model from /home/sira/sira_project/meta-Llama2/llama-2-7b-chat.ggmlv3.q8_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 7 (mostly Q8_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: mem required  = 7354.73 MB (+ 2048.0

In [32]:
history_log = []
query = "Who is Dion Wiggins"
start = time.time()
#db_similarity = db.similarity_search_with_score(query, k=10)
#filter_list = filter_search(db_similarity, diff_val)
response = qa_chain({'query': query})
print(response["result"])
urls = regex_source(response)
for count, url in enumerate(urls):
    print(str(count+1)+":", url)
end = time.time()
print("Respone time:",int(end-start),"sec")
history_log.append(memory.load_memory_variables({})["chat_history"])

Llama.generate: prefix-match hit


Dion Wiggins is a highly experienced ICT industry visionary, entrepreneur, analyst, and consultant. He has an impressive knowledge in the fields of software development, architecture, and management, as well as an in-depth understanding of Asian ICT markets. As Chief Technology Officer and Co-Founder of Omniscien, he provides guidance on technology solutions that drive business success.Dion Wiggins is a highly experienced ICT industry visionary, entrepreneur, analyst, and consultant. He has an impressive knowledge in the fields of software development, architecture, and management, as well as an in-depth understanding of Asian ICT markets. As Chief Technology Officer and Co-Founder of Omniscien, he provides guidance on technology solutions that drive business success.
1: https://omniscien.com/about-us/company
2: https://omniscien.com/blog/hype-cycle-for-ai-technologies-in-business
Respone time: 47 sec



llama_print_timings:        load time = 135304.14 ms
llama_print_timings:      sample time =    50.43 ms /    86 runs   (    0.59 ms per token,  1705.47 tokens per second)
llama_print_timings: prompt eval time = 19456.52 ms /    68 tokens (  286.13 ms per token,     3.49 tokens per second)
llama_print_timings:        eval time = 27288.26 ms /    85 runs   (  321.04 ms per token,     3.11 tokens per second)
llama_print_timings:       total time = 47046.17 ms


In [15]:
memory.load_memory_variables({})["chat_history"]

[HumanMessage(content='Who is Dion Wiggins', additional_kwargs={}, example=False),
 AIMessage(content='Dion Wiggins is a highly experienced ICT industry visionary, entrepreneur, analyst, and consultant. He has an impressive knowledge in the fields of software development, architecture, and management, as well as an in-depth understanding of Asian ICT markets. As the Chief Technology Officer and Co-Founder of Omniscien, he has advised literally hundreds of enterprises on their ICT strategy.', additional_kwargs={}, example=False)]

In [11]:
response["result"]

'Dion Wiggins is a highly experienced ICT industry visionary, entrepreneur, analyst, and consultant. He has an impressive knowledge in the fields of software development, architecture, and management, as well as an in-depth understanding of Asian ICT markets. As the Chief Technology Officer and Co-Founder of Omniscien, he has advised literally hundreds of enterprises on their ICT strategy.'

In [16]:
query = "Who is Dion Wiggins 12"
start = time.time()
#db_similarity = db.similarity_search_with_score(query, k=10)
#filter_list = filter_search(db_similarity, diff_val)
response = qa_chain({'query': query})
print(response["result"])
urls = regex_source(response)
for count, url in enumerate(urls):
    print(str(count+1)+":", url)
end = time.time()
print("Respone time:",int(end-start),"sec")

Llama.generate: prefix-match hit


Dion Wiggins is a highly experienced ICT industry visionary, entrepreneur, analyst, and consultant. He has an impressive knowledge in the fields of software development, architecture, and management, as well as an in-depth understanding of Asian ICT markets. As the Chief Technology Officer and Co-Founder of Omniscien, he has advised literally hundreds of enterprises on their ICT strategy.Dion Wiggins is a highly experienced ICT industry visionary, entrepreneur, analyst, and consultant. He has an impressive knowledge in the fields of software development, architecture, and management, as well as an in-depth understanding of Asian ICT markets. As the Chief Technology Officer and Co-Founder of Omniscien, he has advised literally hundreds of enterprises on their ICT strategy.
1: https://omniscien.com/about-us/company
2: https://omniscien.com/blog/hype-cycle-for-ai-technologies-in-business
Respone time: 50 sec



llama_print_timings:        load time = 135304.14 ms
llama_print_timings:      sample time =    49.36 ms /    92 runs   (    0.54 ms per token,  1863.86 tokens per second)
llama_print_timings: prompt eval time = 19706.92 ms /    71 tokens (  277.56 ms per token,     3.60 tokens per second)
llama_print_timings:        eval time = 30011.02 ms /    91 runs   (  329.79 ms per token,     3.03 tokens per second)
llama_print_timings:       total time = 50073.92 ms


In [26]:
memory.load_memory_variables({})

KeyError: 0

In [25]:
len(str(memory.load_memory_variables([])))

1087

In [31]:
memory.load_memory_variables({})["chat_history"]

[HumanMessage(content='Who is Dion Wiggins', additional_kwargs={}, example=False),
 AIMessage(content='Dion Wiggins is a highly experienced ICT industry visionary, entrepreneur, analyst, and consultant. He has an impressive knowledge in the fields of software development, architecture, and management, as well as an in-depth understanding of Asian ICT markets. As the Chief Technology Officer and Co-Founder of Omniscien, he has advised literally hundreds of enterprises on their ICT strategy.', additional_kwargs={}, example=False),
 HumanMessage(content='Who is Dion Wiggins 12', additional_kwargs={}, example=False),
 AIMessage(content='Dion Wiggins is a highly experienced ICT industry visionary, entrepreneur, analyst, and consultant. He has an impressive knowledge in the fields of software development, architecture, and management, as well as an in-depth understanding of Asian ICT markets. As the Chief Technology Officer and Co-Founder of Omniscien, he has advised literally hundreds of en

In [23]:
str(memory.load_memory_variables({})["chat_history"])

"[HumanMessage(content='Who is Dion Wiggins', additional_kwargs={}, example=False), AIMessage(content='Dion Wiggins is a highly experienced ICT industry visionary, entrepreneur, analyst, and consultant. He has an impressive knowledge in the fields of software development, architecture, and management, as well as an in-depth understanding of Asian ICT markets. As the Chief Technology Officer and Co-Founder of Omniscien, he has advised literally hundreds of enterprises on their ICT strategy.', additional_kwargs={}, example=False), HumanMessage(content='Who is Dion Wiggins 12', additional_kwargs={}, example=False), AIMessage(content='Dion Wiggins is a highly experienced ICT industry visionary, entrepreneur, analyst, and consultant. He has an impressive knowledge in the fields of software development, architecture, and management, as well as an in-depth understanding of Asian ICT markets. As the Chief Technology Officer and Co-Founder of Omniscien, he has advised literally hundreds of ente

In [20]:
memory

ConversationBufferMemory(chat_memory=ChatMessageHistory(messages=[]), output_key='result', input_key='query', return_messages=True, human_prefix='Human', ai_prefix='AI', memory_key='chat_history')

In [17]:
memory.clear()

In [18]:
memory.load_memory_variables({})

{'chat_history': []}