In [1]:
import os
import torch
import gc
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

from langchain import PromptTemplate
from langchain.llms import HuggingFacePipeline
from langchain.chains.question_answering import load_qa_chain

# Chroma
import chromadb 
from chromadb.utils import embedding_functions
from langchain.vectorstores import Chroma

# Sentence Transformers
from sentence_transformers import SentenceTransformer
from langchain.embeddings import SentenceTransformerEmbeddings

import time
from IPython.display import display, HTML, clear_output

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
torch.cuda.empty_cache()
gc.collect()

20

# Set up

In [3]:
chroma_client = client = chromadb.PersistentClient(path='Path')
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

In [4]:
langchain_chroma = Chroma(
    client=chroma_client,
    collection_name="PRODUCTX",
    embedding_function=embedding_function,
)
print("There are", langchain_chroma._collection.count(), "in the collection")

There are 473 in the collection


In [5]:
def get_similar_docs(question, similar_doc_count):
    retrieved_docs = langchain_chroma.similarity_search(question, k=similar_doc_count)
    return retrieved_docs
    

# Build QA chain

In [6]:
template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.

  Instruction:
  You are an assistant to answer question about system management in Company .
  Use only information in the following paragraphs to answer the question at the end.
  Explain the answer with reference to these paragraphs.
  If you don't have the information in paragraph then give response "I dont't know".

  {context}

  Question: {question}

  Response:
  """

In [31]:
def build_qa_chain(model_name_key):
    model_paths = {
        "Dolly": "/Path/models/Dolly",
        "Dolly_7": "/Path/models/Dolly_7",
        "Mistral": "/Path/models/Mistral"
    }

    # Retrieve the model path using the provided key
    model_path = model_paths.get(model_name_key)
    if not model_path:
        raise ValueError(f"Model name key '{model_name_key}' is not valid. Choose from {list(model_paths.keys())}.")
    
    torch.cuda.empty_cache()
    model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto", load_in_4bit=True)#torch_dtype=torch.bfloat16)
    tokenizer = AutoTokenizer.from_pretrained(model_path, padding_side="left")
    tokenizer.pad_token = tokenizer.eos_token
    
    instruct_pipeline = pipeline(
    'text-generation',
    model=model,
    tokenizer=tokenizer,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    return_full_text=True,
    max_new_tokens=100,
    do_sample=False,  # Greedy decoding
    num_beams=1,  # Explicitly setting to single-beam (greedy) decoding
    pad_token_id=tokenizer.eos_token_id,
    model_kwargs={'load_in_4bit': False}
)
    
    prompt = PromptTemplate(input_variables=['context', 'question'], template=template)

    hf_pipe = HuggingFacePipeline(pipeline=instruct_pipeline)
    return load_qa_chain(llm=hf_pipe, chain_type="stuff", prompt=prompt, verbose=True)# Note: if you use dolly 12B or smaller model but a GPU with less than 24GB RAM, use 8bit. This requires %pip install bitsandbytes
  # instruct_pipeline = pipeline(model=model_name, trust_remote_code=True, device_map="auto", model_kwargs={'load_in_8bit': True})
  # For GPUs without bfloat16 support, like the T4 or V100, use torch_dtype=torch.float16 below

In [37]:
qa_chain = build_qa_chain("Dolly_7")

# Answer Function

In [33]:
def displayHTML(html):
    """Display HTML in Jupyter notebook."""
    from IPython.display import HTML
    display(HTML(html))

In [34]:
def answer_question(question):
    similar_docs = get_similar_docs(question, similar_doc_count=3)
    result = qa_chain({"input_documents": similar_docs, "question": question})
    result_html = f"<p><blockquote style=\"font-size:24\">{question}</blockquote></p>"
    result_html += f"<p><blockquote style=\"font-size:18px\">{result['output_text']}</blockquote></p>"
    result_html += "<p><hr/></p>"
    for d in result["input_documents"]:
        source_id = d.metadata["source"]
        result_html += f"<p><blockquote>{d.page_content}<br/>(Source: {source_id}\">{source_id}</a>)</blockquote></p>"
    displayHTML(result_html)

In [35]:
def answer_question_stream(question):
    # Retrieve similar documents based on the question.
    similar_docs = get_similar_docs(question, similar_doc_count=3)

    # Process the question through your QA chain.
    result = qa_chain({"input_documents": similar_docs, "question": question})

    # Start building your HTML content (though this part won't be "typed" out word by word).
    result_html = f"<p><blockquote style=\"font-size:24\">{question}</blockquote></p>"

    # Here is where we will start the "typing" effect for the answer.
    # First, we split the result into words.
    words = result['output_text'].split()

    # "Type" each word with a delay.
    for word in words:
        # Append the word to your HTML content.
        result_html += word + ' '
        
        # This will create the dynamic "typing" display.
        clear_output(wait=True)
        display(HTML(result_html))  # Display current state.
        time.sleep(0.5)  # Delay between "typing" each word.

    # After the answer, continue with your HTML content.
    result_html += "<p><hr/></p>"
    for d in result["input_documents"]:
        source_id = d.metadata["source"]
        result_html += f"<p><blockquote>{d.page_content}<br/>(Source: <a href=\"URL{source_id}\">{source_id}</a>)</blockquote></p>"

    # Finally, display the full content.
    clear_output(wait=True)
    display(HTML(result_html))

# Test

In [None]:
answer_question_stream("What is The Warranty Inspection")

In [36]:
answer_question("What should i do before making a purchase")



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mBelow is an instruction that describes a task. Write a response that appropriately completes the request.

  Instruction:
  You are an assistant to answer question about system management in Company .
  Use only information in the following paragraphs to answer the question at the end.
  Explain the answer with reference to these paragraphs.
  If you don't have the information in paragraph then give response "I dont't know".

  Context : (Documentation = Administrastion Manual, Title = Data & filesystems, Chapter = Disks) SDD drives are highly recommended . No RAID is required , although a RAID-0 is useful to benefit from larger volume of storage . Tool is also designed to operate on HDD , in such cases fast HDD with RAID10 is recommended . Each Tool node is designed hold up an amount of about 4TiB of data and requires a margin for maintenance operation

In [None]:
answer_question("What is Tool")

In [26]:
answer_question_stream('How to push constellation Data')

In [27]:
answer_question(' Explain me in detail how to push constellation Data')



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mBelow is an instruction that describes a task. Write a response that appropriately completes the request.

  Instruction:
  You are an assistant to answer question about system management .
  Use only information in the following paragraphs to answer the question at the end.
  Explain the answer with reference to these paragraphs.
  If you don't have the information in paragraph then give response "I dont't know".

  Context : (Documentation = Administrastion Manual, Title = Throwaway (no storage) datastore) This datastore is not saving any data . It can be used for testing performances or behaviour of Tool data points push interfaces and/or of the network under stress . It provides a single metric with a counter of datapoints received over the past hour .

Context : (Documentation = Administrastion Manual, Title = Technology for the Satellite Ground Ec

In [28]:
answer_question('What is the add data points in Tool ?')



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mBelow is an instruction that describes a task. Write a response that appropriately completes the request.

  Instruction:
  You are an assistant to answer question about system management .
  Use only information in the following paragraphs to answer the question at the end.
  Explain the answer with reference to these paragraphs.
  If you don't have the information in paragraph then give response "I dont't know".

  Context : (Documentation = Administrastion Manual, Title = Time Series Python Connector, Chapter = Send data points to server) Use DataPointBuilder and add_data_points to send data points to the Tool server . from Product1TS import Product1TSPythonConnector from Product1TS import DataPoint from datetime import datetime timestamp_Product2 = int ( datetime . now (). timestamp ()*1000 ) point_value = 12 api = Product1TSPythonConnector . API . 

In [29]:
answer_question('how do I correlate 2 datas?')



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mBelow is an instruction that describes a task. Write a response that appropriately completes the request.

  Instruction:
  You are an assistant to answer question about system management in Company .
  Use only information in the following paragraphs to answer the question at the end.
  Explain the answer with reference to these paragraphs.
  If you don't have the information in paragraph then give response "I dont't know".

  Context : (Documentation = User Manual, Title = User Interface, Chapter = Overview) The correlation UI is a web client that allows you to build correlation queries , execute them , and display their result .

Context : (Documentation = User Manual, Title = User Interface, Chapter = Analyzing results, Paragraph = Correlation search) When you execute your correlation search query , the results are returned in the form of a bar char

In [30]:
answer_question_stream('Give me a list of aggregators in syminer')