In [1]:
import chromadb
from langchain_community.document_loaders import PDFMinerLoader 
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from torch import cuda
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline
from langchain import PromptTemplate
from langchain.chains import LLMChain
from langchain import HuggingFacePipeline
from huggingface_hub import notebook_login
from torch import cuda, bfloat16
import transformers
from langchain.llms import HuggingFacePipeline
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA
from nemoguardrails import LLMRails, RailsConfig
from nemoguardrails.llm.helpers import get_llm_instance_wrapper
from nemoguardrails.llm.providers import(
    HuggingFacePipelineCompatible,
    register_llm_provider
)

In [2]:
#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
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [3]:
client = chromadb.Client()

In [4]:
loader = PDFMinerLoader("./ragPDF.pdf")
docs = loader.load()
splitter = RecursiveCharacterTextSplitter(
    chunk_size=600, chunk_overlap=200, add_start_index=True
)
doc_splitted = splitter.split_documents(docs)
doc_splitted

[Document(page_content='Math Example questions:\nWhat is the Pythagorean theorem and how is it used in geometry?\nExplain the concept of derivatives in calculus and give an example of its application.\nWhat are prime numbers, and what significance do they hold in number theory?\nHow does probability theory help in decision-making and risk assessment?\nCan you explain the concept of vectors and their applications in physics and engineering?', metadata={'source': './ragPDF.pdf', 'start_index': 0}),
 Document(page_content='Medicine Example questions:\nWhat are the major differences between viral and bacterial infections, and how are they\ntreated differently?\nDescribe the process of how vaccines work and their importance in preventing diseases.\nWhat are the main causes and risk factors associated with cardiovascular diseases?\nExplain the role of antibiotics in combating bacterial infections and the issue of antibiotic\nresistance.\nHow does the immune system function, and what are some

In [5]:
embed_model_name = "sentence-transformers/all-MiniLM-L6-v2"

device = f"cuda:{cuda.current_device()}" if cuda.is_available() else "cpu"

embedding_model = HuggingFaceEmbeddings(
    model_name = embed_model_name,
    model_kwargs = {"device": device},
    encode_kwargs = {"device": device, "batch_size": 32}
)

In [6]:
index_name = "Test"

if index_name not in [c.name for c in client.list_collections()]:
    client.create_collection(
        name = index_name,
        metadata = {"hnsw:space": "cosine"}
    )

In [7]:
db = Chroma.from_documents(
    doc_splitted, embedding = embedding_model
)
retriever = db.as_retriever()
docs = retriever.get_relevant_documents("Medicine")
docs

[Document(page_content='Medicine Example questions:\nWhat are the major differences between viral and bacterial infections, and how are they\ntreated differently?\nDescribe the process of how vaccines work and their importance in preventing diseases.\nWhat are the main causes and risk factors associated with cardiovascular diseases?\nExplain the role of antibiotics in combating bacterial infections and the issue of antibiotic\nresistance.\nHow does the immune system function, and what are some common disorders related to\nimmune dysfunction?', metadata={'source': './ragPDF.pdf', 'start_index': 416}),
 Document(page_content='Math Example questions:\nWhat is the Pythagorean theorem and how is it used in geometry?\nExplain the concept of derivatives in calculus and give an example of its application.\nWhat are prime numbers, and what significance do they hold in number theory?\nHow does probability theory help in decision-making and risk assessment?\nCan you explain the concept of vectors

In [8]:
model_id = 'meta-llama/Llama-2-7b-chat-hf'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

# begin initializing HF items, need auth token for these
hf_auth = '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'
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [9]:
tokenizer = AutoTokenizer.from_pretrained(model_id)

In [10]:
generation_config = GenerationConfig.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
generation_config.max_new_tokens = 1024
generation_config.temperature = 0.0001
generation_config.top_p = 0.95
generation_config.do_sample = True
generation_config.repetition_penalty = 1.15

text_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    generation_config=generation_config,
)

llm = HuggingFacePipelineCompatible(pipeline=text_pipeline)


In [11]:
HFPipeline = get_llm_instance_wrapper(
    llm_instance=llm, llm_type="hf_pipeline_llama2"
)
register_llm_provider("hf_pipeline_llama2", HFPipeline)

In [12]:
YAML_CONFIG = """
models:
    - type: main
      engine: hf_pipeline_llama2
      parameters:
        path: meta-llama/Llama-2-7b-chat-hf
        device: "cuda"
"""

COLANG_CONFIG = """
define user express ill intent
    "I hate you"
    "I want to harm you"
    "I want to destroy the world"

define user express question
    "How was your day?"

define bot express cannot respond
    "I am sorry but that is outside of my capabilities"

define bot express easter egg
    ":D"

#Ill intent flow
define flow
    user express ill intent
    bot express cannot respond

#Question flow
define flow
    user ...
    $answer = execute rag_response(inputs=$last_user_message)
    bot $answer
"""

In [13]:
template = """
<s>[INST] <<SYS>>
    You are an interviewer that asks questions, here you have some relevant info for the topic and examples
<</SYS>>
 
{text} [/INST]
"""
 
prompt = PromptTemplate(
    input_variables=["text"],
    template=template,
)

In [14]:
chain = LLMChain(llm=llm, prompt=prompt)
async def pruebaresponse(inputs: str):
    return chain.invoke(inputs)["text"]

In [15]:
rag_pipeline = RetrievalQA.from_chain_type(
    llm=llm, chain_type='stuff',
    retriever = retriever
)
async def responder_rag(inputs: str):
    return rag_pipeline(inputs)["result"]

In [16]:
config = RailsConfig.from_content(COLANG_CONFIG, YAML_CONFIG)
rails = LLMRails(config)
rails.register_action(action=pruebaresponse, name="response")
rails.register_action(action=responder_rag, name="rag_response")

Fetching 7 files:   0%|          | 0/7 [00:00<?, ?it/s]

model.onnx:   0%|          | 0.00/90.4M [00:00<?, ?B/s]

In [17]:
res = await rails.generate_async(prompt="Ask me a question about medicine") 
print(res)

  warn_deprecated(


 Sure! Here's an example question for the "Medicine" category: What are the major differences between viral and bacterial infections, and how are they treated differently?
