## Local RAG

#### Imports + Const

In [30]:
import os
import json
import glob
import shutil
import dotenv

from tqdm.notebook import tqdm

from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.docstore.document import Document
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

from langchain.chat_models import AzureChatOpenAI

In [24]:
DATASET = 'computer_vision_6'

#### Parsing

In [25]:
# Get list of raw .pdf files names from specified dataset
pdf_files = []
for file in glob.glob(f"./data/{DATASET}/raw/*.pdf"):
    pdf_files.append(file)

# For each pdf parse it and save content + metadata in json format
for e in pdf_files:
    
    # Load pdf page by page
    loader = PyPDFLoader(e)
    pages = loader.load()
    
    # Split pages in pseudo paragraphs
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=150,
        length_function=len,
        is_separator_regex=False
    )
    docs = text_splitter.split_documents(pages)
    
    # Dirty but tmtc
    jsonl_filename = e[:-4]+'.jsonl'
    jsonl_filename = jsonl_filename.split('/raw/')[0]+'/parsed/'+jsonl_filename.split('/raw/')[1]
    with open(jsonl_filename, "w") as jsonl_file:
        for i in range(len(docs)):
            print(docs[i].json(), file=jsonl_file)

#### Vector

In [26]:
# Get list of parsed .jsonl files names from specified dataset
jsonl_files = []
for file in glob.glob(f"./data/{DATASET}/parsed/*.jsonl"):
    jsonl_files.append(file)

# Delete exisiting local vector db
shutil.rmtree(f"./db/{DATASET}")
os.makedirs(f"./db/{DATASET}")

# For each .jsonl file, load and use its content for embedding in vector db
for e in jsonl_files:

    # Load .jsonl file
    with open(e, 'r') as jsonl_files:
        json_list = list(jsonl_files)
    docs = []
    for json_str in json_list:
        t = json.loads(json_str)
        docs.append(Document(page_content=t['page_content'], metadata=t['metadata']))
    docs_content = []
    for i in range(len(docs)):
        docs_content.append(docs[i].page_content)

    # Make embeddings for documents content
    model_name = "sentence-transformers/all-mpnet-base-v2"  # By default, input text longer than 384 word pieces is truncated.
    model_kwargs = {'device': 'cpu'}
    encode_kwargs = {'normalize_embeddings': False}
    embeddings_model = HuggingFaceEmbeddings(
        model_name=model_name,
        model_kwargs=model_kwargs,
        encode_kwargs=encode_kwargs
    )
    embeddings = embeddings_model.embed_documents(docs_content)
    
    # Append db
    vectordb = Chroma.from_documents(
        documents=docs,
        embedding=embeddings_model,
        persist_directory=f"./db/{DATASET}"
    )
    print(f"Done.  '{e}'")

Done.  './data/covid_6/parsed/Dai_ChamNet_Towards_Efficient_Network_Design_Through_Platform-Aware_Model_Adaptation_CVPR_2019_paper.jsonl'


KeyboardInterrupt: 

#### LLM RAG QA

In [31]:
# Load vector db and embedding model
persist_directory = f"./db/{DATASET}"
model_name = "sentence-transformers/all-mpnet-base-v2"  # By default, input text longer than 384 word pieces is truncated.
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embeddings_model = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings_model)

# Initialize Azure Open AI
ENV = dotenv.dotenv_values(".env")
llm = AzureChatOpenAI(
    deployment_name=ENV["AZURE_OPENAI_CHATGPT_DEPLOYMENT"],
    openai_api_key=ENV["AZURE_OPENAI_KEY"],
    openai_api_base=ENV["AZURE_OPENAI_ENDPOINT"],
    openai_api_version=ENV["AZURE_OPENAI_API_VERSION"]
)

# Create prompt template
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

# Create QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)


def local_qa(question:str, qa_chain=qa_chain):
    return qa_chain({"query": question})

  warn_deprecated(


In [40]:
answer = local_qa("Tell me more about: 'Image importance and object saliency'")

In [42]:
answer['query']

"Tell me more about: 'Image importance and object saliency'"

In [43]:
answer['result']

'There is no information provided in the given context about "Image importance and object saliency."'

In [38]:
for e in answer["source_documents"]:
    print(e, end='\n\n')

page_content='ing [11, 29, 6, 34, 32, 7] has been a popular approachfor removing redundancy in NNs. For example, Ne-\ntAdapt [33] utilizes a hardware-aware ﬁlter pruning algo-\nrithm and achieves up to 1.2 ×speedup for MobileNetV2\non the ImageNet dataset [8]. AMC [13] employs RL for\nautomated model compression and achieves 1.53 ×speedup\nfor MobileNetV1 on a Titan XP GPU. Quantization [10, 17]\nhas also emerged as a powerful tool for signiﬁcantly cutting\ndown computation cost with no or little accuracy loss. For\nexample, Zhu et al. [36] show that there is only a 2% top-5\naccuracy loss for ResNet-18 when using a 3-bit representa-\ntion for weights compared to its full-precision counterpart.\nCompact architecture: Apart from simplifying existing\nmodels, handcrafting more efﬁcient building blocks and op-\nerators for mobile-friendly architectures can also substan-\ntially improve the accuracy-efﬁciency trade-offs [18, 30].\nFor example, at the same accuracy level, MobileNet [15]' me