In [None]:
import gradio as gr
import os
from backend.llm.baseLLM import Remote_LLM 
from backend.retrieval.ciena_retreival import CienaRetrieval
from backend.embedder.baseEmbedder import baseEmbedder
from backend.retrieval.utils import *
from backend.retrieval.rereanker import Reranker
from langchain.document_loaders import JSONLoader

import asyncio
import time
from concurrent.futures import ThreadPoolExecutor


In [2]:
embedding_function = baseEmbedder().embedding_function
retriael_kwargs = {
    "threshold": "0.8",
    "k": 20,
    "embedder": embedding_function,
    "hybrid": True,
}
ciena_retreival = CienaRetrieval(**retriael_kwargs)
reranker = Reranker()

In [3]:
def load_db():
    """Load Ciena database."""
    
    dir = './output/'
    loaded_data = []
    for r, d, f in os.walk(dir):
        
        for file in f:
            # if "10Aug-BP_Engineering_guide" in file or "Blue_Planet_MLA_Cloud_Deployment_Guide" in file:
            if '.json' in file and file != 'structuredData.json':
                dir_ = file.split('.')[0] + '.pdf'
                file_name = os.path.join(dir, dir_, file)
                print(file_name)
                try:
                    loader = JSONLoader(
                        file_path=file_name,
                        jq_schema='.[].content[]',
                        content_key="text", 
                        metadata_func=metadata_func)

                    loaded_data.extend(loader.load())
                    print(f"Successfully loaded file {file_name}")
                except Exception as e:
                    # import pdb; pdb.set_trace()
                    print(f"error in loading  file {file_name}")
                    print(e)

    return loaded_data


In [4]:
def clean(docs):
    loaded_data = filter_empty(docs)
    loaded_data = filter_redundant(loaded_data)
    loaded_data = exclude_toc(loaded_data)
    return loaded_data


In [5]:
def get_relevant_docs(query, docs):
    """Get relevant documents from Ciena database."""
    if len(query) == 0 or len(docs) == 0:
        return []
    ciena_retrieval = CienaRetrieval(**retriael_kwargs)
    relevant_docs = ciena_retrieval.get_res(query, docs)
    reranked_res = reranker.rerank(query, relevant_docs)
    return reranked_res

In [6]:
def get_context(docs, headers):
    if len(headers) == 0:
        return [], []
    context, sources = ciena_retreival.get_context(docs, headers)
    return context, sources


In [7]:
def add_text(history, text):
    history = history + [(text, None)]
    return history, gr.Textbox(value="", interactive=False)

def bot(history):
    response = "**That's cool!**"
    history[-1][1] = ""
    for character in response:
        history[-1][1] += character
        time.sleep(0.05)
        yield history

In [8]:
def main_get_src_ctx(message, seconds):
    query = message
    relevant_docs = get_relevant_docs(query, cleaned_db)
    rel_headers = relevant_headers(relevant_docs)
    rel_headers = [x for x in rel_headers if x != 'Table of Contents ']
    context, sources = get_context(loaded_db, rel_headers)
    return context, sources


In [9]:
def gt_llm_answer(question, ctx, src):
    endpoint = " http://0.0.0.0:8000/answer"
    LLM_kwargs={'max_new_tokens': 500, 'temperature': 0.5}

    llm = Remote_LLM(
        endpoint="http://0.0.0.0:8000/answer",
        generation_config=LLM_kwargs
    )
    ctx = ctx[len(ctx) // 2:]
    if len(ctx) > 2000: 
        ctx = ctx[:2000]
    prompt = f"""
    You are a powerful AI asistant that answers only based on the given contex. If the context is not enough, you can ask for more information.
    Given the following context {ctx}, answer the following question: {question}
    """

    answer = llm(prompt)
    return answer, src



In [10]:
def slow_echo(message, history):
    ctx, src = main_get_src_ctx(message, 3)
    # convert list ctx to string
    ctx =' '.join(ctx)
    answer, src = gt_llm_answer(message, ctx, src)
    return answer #

In [11]:
loaded_db = load_db()
cleaned_db = clean(loaded_db)

In [12]:
query = "give me a table for ciena's BPO Runtime License"


In [13]:
relevant_docs = get_relevant_docs(query, cleaned_db)
rel_headers = relevant_headers(relevant_docs)
rel_headers = [x for x in rel_headers if x != 'Table of Contents ']
context, sources = get_context(loaded_db, rel_headers)

In [14]:
def remove_duplicates_preserve_order(seq):
    seen = set()
    return [x for x in seq if not (x in seen or seen.add(x))]

ctx = remove_duplicates_preserve_order(context)
ctx = '\n'.join(ctx)
print(ctx)

The following tables list the licenses to run the software application. 
Table 3. BPO Runtime Licenses 
|    | PRODUCT _x000D_             | PART NUMBER _x000D_          | PRODUCT DESCRIPTION _x000D_                                |
|---:|:----------------------------|:-----------------------------|:-----------------------------------------------------------|
|  0 | MDSO _x000D_                | S19-APP-MDSO-S1P1 _x000D_    | BP APPLICATION MDSO LICENSE _x000D_                        |
|  1 | MDSO Geo Redundancy _x000D_ | S19-APP-MDSO-GR-S1P1 _x000D_ | BP APPLICATION MDSO GEO REDUNDANCY LICENSE _x000D_         |
|  2 | NFVO _x000D_                | S19-APP-NFVO-S1P1 _x000D_    | BP APPLICATION NFVO LICENSE _x000D_                        |
|  3 | NFVO Geo Redundancy _x000D_ | S19-APP-NFVO-GR-S1P1 _x000D_ | BP APPLICATION NFVO GEO REDUNDANCY LICENSE _x000D_         |
|  4 | Bandwidth on Demand _x000D_ | S19-APP-BOD-S1P1 _x000D_     | BP ADD-ON APPLICATION BANDWIDTH ON DEMAND LICENSE _x00

In [15]:
full_prompt = f"""\
<|system|> Given a part of a lengthy markdown document, answer the following question: `{query}`. Please, follow the same format as the source document given. </s>
<|user|>
please ONLY respond with: {{not_found_response}}, if the context does not provide the answer </s>
CONTEXT: {ctx} 

<|assistant|> """

In [17]:
endpoint = " http://0.0.0.0:8000/answer"
LLM_kwargs={'max_new_tokens': 1500, 'temperature': 0.4}

llm = Remote_LLM(
        endpoint="http://0.0.0.0:8000/answer",
        generation_config=LLM_kwargs
    )

In [18]:
answer = llm(full_prompt)

In [19]:
print(answer)

<|system|> Given a part of a lengthy markdown document, answer the following question: `give me a table for ciena's BPO Runtime License`. Please, follow the same format as the source document given.  
<|user|>
please ONLY respond with: {not_found_response}, if the context does not provide the answer  
CONTEXT: The following tables list the licenses to run the software application. 
Table 3. BPO Runtime Licenses 
|    | PRODUCT _x000D_             | PART NUMBER _x000D_          | PRODUCT DESCRIPTION _x000D_                                |
|---:|:----------------------------|:-----------------------------|:-----------------------------------------------------------|
|  0 | MDSO _x000D_                | S19-APP-MDSO-S1P1 _x000D_    | BP APPLICATION MDSO LICENSE _x000D_                        |
|  1 | MDSO Geo Redundancy _x000D_ | S19-APP-MDSO-GR-S1P1 _x000D_ | BP APPLICATION MDSO GEO REDUNDANCY LICENSE _x000D_         |
|  2 | NFVO _x000D_                | S19-APP-NFVO-S1P1 _x000D_    | 

In [20]:
bot_response = answer.split('<|assistant|>')[1].split('</s>')[0]
print(bot_response)

with open('output.txt', 'w') as f:
    f.write(bot_response)

 
Table 3. BPO Runtime Licenses 
|    | PRODUCT _x000D_             | PART NUMBER _x000D_          | PRODUCT DESCRIPTION _x000D_                                |
|---:|:----------------------------|:-----------------------------|:-----------------------------------------------------------|
|  0 | MDSO _x000D_                | S19-APP-MDSO-S1P1 _x000D_    | BP APPLICATION MDSO LICENSE _x000D_                        |
|  1 | MDSO Geo Redundancy _x000D_ | S19-APP-MDSO-GR-S1P1 _x000D_ | BP APPLICATION MDSO GEO REDUNDANCY LICENSE _x000D_         |
|  2 | NFVO _x000D_                | S19-APP-NFVO-S1P1 _x000D_    | BP APPLICATION NFVO LICENSE _x000D_                        |
|  3 | NFVO Geo Redundancy _x000D_ | S19-APP-NFVO-GR-S1P1 _x000D_ | BP APPLICATION NFVO GEO REDUNDANCY LICENSE _x000D_         |
|  4 | Bandwidth on Demand _x000D_ | S19-APP-BOD-S1P1 _x000D_     | BP ADD-ON APPLICATION BANDWIDTH ON DEMAND LICENSE _x000D_  |
|  5 | 5G Network Slicing _x000D_  | S19-APP-5GSLI-S1P0 _x000D_ 

## GPU safety check

To calculate the number of needed GPU ram, we calculate the following:

- Input data size.
- Model Size.
- Intermediate Activation.
- other stuff (framework overhead + GPU overhead).

$Total GPU Memory=Model Size+Input Data Size+Intermediate Activation +Framework Overheads+GPU Overheads$

In [22]:
import torch
free, total = torch.cuda.mem_get_info()
(total - free)

5052825600

In [28]:
import pynvml
def get_memory_free_MiB(gpu_index):
    pynvml.nvmlInit()
    handle = pynvml.nvmlDeviceGetHandleByIndex(int(gpu_index))
    mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
    return mem_info.free // 1024 ** 2

get_memory_free_MiB(0)

7226

In [30]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("llmware/dragon-mistral-7b-v0")
tokenized = tokenizer(full_prompt, return_tensors="pt")
print(len(tokenized["input_ids"][0]))

788


In [49]:
788 * 4 / 1024


3.078125

In [39]:
(tokenized["input_ids"][0]).dtype

torch.int64

## Langchain agents

In [None]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import DeepLake
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import LLMChain, RetrievalQA, ConversationChain
from langchain.agents import initialize_agent, AgentType, Tool

embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")


llm = OpenAI(model="text-davinci-003", temperature=0)

In [None]:
retrival_chain = RetrievalQA.from_chain_type(
    llm= llm,
    chain_type="stuff",
    retriever=db.as_retriever()
)