# Experimental - QA 
**by Instructor Embedding, HF LocalPipline(more model opportunities), Langchain, Chromadb**  


Ref:
- [Question Answering over Docs](https://python.langchain.com/en/latest/use_cases/question_answering.html)
- [Langchain Integration - GPT4All](https://python.langchain.com/en/latest/modules/models/llms/integrations/gpt4all.html)
- [Retrieval Question/Answering](https://python.langchain.com/en/latest/modules/chains/index_examples/vector_db_qa.html)
- [HKUNLP/instructor-embedding](https://github.com/HKUNLP/instructor-embedding/)
- [Hugging Face Hub](https://python.langchain.com/en/latest/modules/models/llms/integrations/huggingface_hub.html)

## Import packages

In [None]:
# The Embedding Model
from langchain.embeddings import HuggingFaceInstructEmbeddings

# The Inference LLM 
from langchain import HuggingFacePipeline

from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA

from langchain.prompts import PromptTemplate


from langchain.document_loaders import TextLoader


## Setup models

In [None]:
# For embedding
model_name = "hkunlp/instructor-large"
model_kwargs = {'device': 'cuda'}
hf_instructor_embedding = HuggingFaceInstructEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs
)

In [None]:

# See https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads for some other options
model_id = "bigscience/bloom-560m"   # "bigscience/bloom-1b7"

# device -1 => CPU, device > -1 => GPU
# Refer to the below doc for model_kargs
#     https://huggingface.co/docs/transformers/main_classes/text_generation
llm = HuggingFacePipeline.from_model_id(
    model_id=model_id, 
    task="text-generation", 
    device= 0, 
    model_kwargs={"temperature":0.9, "max_length":1024}
)

## Prepare doc

In [None]:
loader = TextLoader("dataset/kxxxxxx.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=512, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

docsearch = Chroma.from_documents(texts, hf_instructor_embedding)


## Prompt and QA module setup

In [None]:

prompt_template = """Here is the background:

{context}

My Question: {question}
Answer: """

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

chain_type_kwargs = {"prompt": PROMPT}

qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=docsearch.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)



## Evaluation

In [None]:
%%time


query = '''why should I rebuild mcpd database?'''
result = qa({"query": query})


In [None]:
%%time

query = '''How to rebuild mcpd database on version 16?'''
result = qa({"query": query})

In [None]:
print (result['result'])
# print ('SORUCE DOC:')
# print (result["source_documents"])
