In [2]:
%load_ext autoreload
%autoreload 2
import gradio as gr
import os
from backend.llm.baseLLM import Remote_LLM 
from backend.retrieval.ciena_retreival import CienaRetrieval
from backend.embedder.baseEmbedder import baseEmbedder
from backend.retrieval.utils import *
from backend.retrieval.rereanker import Reranker
from langchain.document_loaders import JSONLoader

import asyncio
import time
from concurrent.futures import ThreadPoolExecutor


  from .autonotebook import tqdm as notebook_tqdm




In [3]:
embedding_function = baseEmbedder().embedding_function
retriael_kwargs = {
    "threshold": "0.8",
    "k": 20,
    "embedder": embedding_function,
    "hybrid": True,
}
ciena_retreival = CienaRetrieval(**retriael_kwargs)
reranker = Reranker()

In [4]:
def load_db(dir= './output/'):
    """Load Ciena database."""
    
    loaded_data = []
    for r, d, f in os.walk(dir):
        
        for file in f:
            if '.json' in file and file != 'structuredData.json':
                file_name = os.path.join(r, file)
                try:
                    loader = JSONLoader(
                        file_path=file_name,
                        jq_schema='.[].content[]',
                        content_key="text", 
                        text_content=False,
                        metadata_func=metadata_func)

                    loaded_data.extend(loader.load())
                    print(f"Successfully loaded file {file_name}")
                except Exception as e:
                    print(f"error in loading  file {file_name}")
                    print(e)

    return loaded_data


In [1]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"]


KeyError: 'PYTORCH_CUDA_ALLOC_CONF'

In [5]:
def clean(docs):
    loaded_data = filter_empty(docs)
    loaded_data = filter_redundant(loaded_data)
    loaded_data = exclude_toc(loaded_data)
    return loaded_data


In [6]:
def get_relevant_docs(query, docs):
    """Get relevant documents from Ciena database."""
    if len(query) == 0 or len(docs) == 0:
        return []
    ciena_retrieval = CienaRetrieval(**retriael_kwargs)
    relevant_docs = ciena_retrieval.get_res(query, docs)
    reranked_res = reranker.rerank(query, relevant_docs)
    return reranked_res

In [7]:
def get_context(docs, headers):
    if len(headers) == 0:
        return [], []
    context, sources = ciena_retreival.get_context(docs, headers)
    return context, sources


In [8]:
def add_text(history, text):
    history = history + [(text, None)]
    return history, gr.Textbox(value="", interactive=False)

def bot(history):
    response = "**That's cool!**"
    history[-1][1] = ""
    for character in response:
        history[-1][1] += character
        time.sleep(0.05)
        yield history

In [9]:
def main_get_src_ctx(message, seconds):
    query = message
    relevant_docs = get_relevant_docs(query, cleaned_db)
    rel_headers = relevant_headers(relevant_docs)
    rel_headers = [x for x in rel_headers if x != 'Table of Contents ']
    context, sources = get_context(loaded_db, rel_headers)
    return context, sources


In [10]:
def gt_llm_answer(question, ctx, src):
    endpoint = " http://0.0.0.0:8000/answer"
    LLM_kwargs={'max_new_tokens': 500, 'temperature': 0.5}

    llm = Remote_LLM(
        endpoint="http://0.0.0.0:8000/answer",
        generation_config=LLM_kwargs
    )
    ctx = ctx[len(ctx) // 2:]
    if len(ctx) > 2000: 
        ctx = ctx[:2000]
    prompt = f"""
    You are a powerful AI asistant that answers only based on the given contex. If the context is not enough, you can ask for more information.
    Given the following context {ctx}, answer the following question: {question}
    """

    answer = llm(prompt)
    return answer, src



In [11]:
def slow_echo(message, history):
    ctx, src = main_get_src_ctx(message, 3)
    # convert list ctx to string
    ctx =' '.join(ctx)
    answer, src = gt_llm_answer(message, ctx, src)
    return answer

In [12]:
loaded_db = load_db()
cleaned_db = clean(loaded_db)

Successfully loaded file ./output/17-Dec-UAA_21.10_rhel_install_guide.pdf/17-Dec-UAA_21.json
Successfully loaded file ./output/10Aug-BP_Engineering_guide.pdf/10Aug-BP_Engineering_guide.json
Successfully loaded file ./output/uaa-admin-guide.pdf/uaa-admin-guide.json
Successfully loaded file ./output/5G_Netwrok_Slicing_Installation_Guide_21.06.pdf/5G_Netwrok_Slicing_Installation_Guide_21.json
Successfully loaded file ./output/Blue Planet Cloud Deployment Guide 20.06.pdf/Blue Planet Cloud Deployment Guide 20.json


Successfully loaded file ./output/Blue_Planet_Engineering_Guide_2208_Issue_1-0.pdf/Blue_Planet_Engineering_Guide_2208_Issue_1-0.json
Successfully loaded file ./output/Blue_Planet_Engineering_Guide_2208.pdf/Blue_Planet_Engineering_Guide_2208.json
Successfully loaded file ./output/Blue_Planet_MLA_Cloud_Deployment_Guide.pdf/Blue_Planet_MLA_Cloud_Deployment_Guide.json
Successfully loaded file ./output/BP_Engineering_Guide.pdf/BP_Engineering_Guide.json
Successfully loaded file ./output/25Aug_Blue_Planet_Engineering_Guide_2208.pdf/25Aug_Blue_Planet_Engineering_Guide_2208.json
Successfully loaded file ./output/25July-BP_Engineering_guide.pdf/25July-BP_Engineering_guide.json
Successfully loaded file ./output/24-Jan-450-3704-300-2110-UAA_21.10_rhel_install_guid_1-0.pdf/24-Jan-450-3704-300-2110-UAA_21.json
Successfully loaded file ./output/updateddraft_5g_nsinstallguide_21.06.pdf/updateddraft_5g_nsinstallguide_21.json
Successfully loaded file ./output/BP_Security_Guide_2304_1-0.pdf/BP_Security_G

In [70]:
query = "ًgive me a table for ciena's BPO Runtime License"

In [71]:
relevant_docs = get_relevant_docs(query, cleaned_db)
rel_headers = relevant_headers(relevant_docs)
context, sources = get_context(loaded_db, rel_headers)

In [72]:
def remove_duplicates_preserve_order(seq):
    seen = set()
    return [x for x in seq if not (x in seen or seen.add(x))]

ctx = remove_duplicates_preserve_order(context)
ctx = '\n'.join(ctx)
print(ctx)

Blue Planet UAA RHEL/CentOS/Linux Installation Guide 
Blue Planet Release 21.10 
December 2021: Issue 1.0 
sudo dd if=rhel-server-7.7-x86_64-dvd.iso of=/dev/sdb bs=8M 
sudo fdisk -l 
diskutil list 
sudo dd if=rhel-server-7.7-x86_64-dvd.iso of=/dev/disk4 bs=4096 
./bpi --site /opt/ciena/loads/21.10/lineup-uaa-base-single-rhel.yml 
./bpi --site /opt/ciena/loads/21.10/lineup-uaa-base-multi-rhel.yml 
./bpi --site /opt/ciena/loads/21.10/lineup-uaa-all-single-rhel.yml 
./bpi --site /opt/ciena/loads/21.10/lineup-uaa-all-multi-rhel.yml 
If you are provisioning your own NTP timing, add the following playbook arguments: 
./bpi --site /opt/ciena/loads/21.10/<lineup_file> --playbook-args='--skip-tags ntp' 
./bpi --install /opt/ciena/loads/21.10/lineup-uaa-base-single-rhel.yml 
./bpi --install /opt/ciena/loads/21.10/lineup-uaa-base-multi-rhel.yml 
./bpi --install /opt/ciena/loads/21.10/lineup-uaa-all-single-rhel.yml 
./bpi --installs /opt/ciena/loads/21.10/lineup-uaa-all-multi-rhel.yml 
The lineup 

In [79]:
full_prompt = f"""\
<|system|> Based on the context below, answer the following question: `{query}`. </s>
<|user|>
please ONLY respond with: {{not_found_response}}, if the context is not enough </s>
CONTEXT: {ctx} 

<|assistant|> """

In [67]:
full_prompt = f"""\
<|system|> Given a part of a lengthy markdown document, answer the following question: `{query}`. Please, follow the same format as the source document given. </s>
<|user|>
please ONLY respond with: {{not_found_response}}, if the context does not provide the answer </s>
CONTEXT: {ctx} 

<|assistant|> """

In [60]:
endpoint = " http://0.0.0.0:8000/answer"
LLM_kwargs={'max_new_tokens': 1500, 'temperature': 0.4}

llm = Remote_LLM(
        endpoint="http://0.0.0.0:8000/answer",
        generation_config=LLM_kwargs
    )

In [68]:
answer = llm(full_prompt)

In [69]:
print(answer)

<|system|> Given a part of a lengthy markdown document, answer the following question: `ًHow to create the bpadmin user`. Please, follow the same format as the source document given.  
<|user|>
please ONLY respond with: {not_found_response}, if the context does not provide the answer  
CONTEXT: The bpadmin user is given full passwordless sudo privileges. For multi-host installations, use the same bpadmin password on all hosts. The bpadmin user with full sudo access is needed to install the BPUAA applications and to create the bpuser, which has limited sudo access. 
Complete the following procedure to create the bpadmin user on each host where you will install BPUAA. This is a mandatory procedure. 
Before you begin, ensure that you have a password that you want to assign for the bpadmin user. 
The bpadmin user is given full passwordless sudo privileges. For multi-host installations, use the same bpadmin password on all hosts. The bpadmin user with full sudo access is needed to install t

In [63]:
bot_response = answer.split('<|assistant|>')[1].split('</s>')[0]
print(bot_response)

with open('output.txt', 'w') as f:
    f.write(bot_response)

 
You can deploy Blue Planet products using the Blue Planet native platform, or using the Kubernetes open source container orchestration platform.


## GPU safety check

To calculate the number of needed GPU ram, we calculate the following:

- Input data size.
- Model Size.
- Intermediate Activation.
- other stuff (framework overhead + GPU overhead).

$Total GPU Memory=Model Size+Input Data Size+Intermediate Activation +Framework Overheads+GPU Overheads$

In [22]:
import torch
free, total = torch.cuda.mem_get_info()
(total - free)

5052825600

In [28]:
import pynvml
def get_memory_free_MiB(gpu_index):
    pynvml.nvmlInit()
    handle = pynvml.nvmlDeviceGetHandleByIndex(int(gpu_index))
    mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
    return mem_info.free // 1024 ** 2

get_memory_free_MiB(0)

7226

In [27]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("llmware/dragon-mistral-7b-v0")
tokenized = tokenizer(full_prompt, return_tensors="pt")
print(len(tokenized["input_ids"][0]))

2099


In [49]:
788 * 4 / 1024


3.078125

In [39]:
(tokenized["input_ids"][0]).dtype

torch.int64

## Langchain agents

In [None]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import DeepLake
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import LLMChain, RetrievalQA, ConversationChain
from langchain.agents import initialize_agent, AgentType, Tool

embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")


llm = OpenAI(model="text-davinci-003", temperature=0)

In [None]:
retrival_chain = RetrievalQA.from_chain_type(
    llm= llm,
    chain_type="stuff",
    retriever=db.as_retriever()
)