In [None]:
import torch
torch.cuda.is_available()

In [None]:
from langchain.llms import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline

model_name = "TheBloke/Llama-2-13b-Chat-GPTQ"

model = AutoModelForCausalLM.from_pretrained(model_name,
                                             device_map="auto",
                                             trust_remote_code=True)

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)

gen_cfg = GenerationConfig.from_pretrained(model_name)
gen_cfg.max_new_tokens=512
gen_cfg.temperature=0.0000001 # 0.0
gen_cfg.return_full_text=True
gen_cfg.do_sample=True
gen_cfg.repetition_penalty=1.11

pipe=pipeline(
    task="text-generation",
    model=model,
    tokenizer=tokenizer,
    generation_config=gen_cfg
)

llm = HuggingFacePipeline(pipeline=pipe)

In [None]:
from textwrap import fill
from langchain.prompts import PromptTemplate

template = """
[INST] <>
You are an AI assistant. You are truthful, unbiased and honest in your response.

If you are unsure about an answer, truthfully say "I don't know"
<>

{text} [/INST]
"""

prompt = PromptTemplate(
    input_variables=["text"],
    template=template,
)

text = "Explain artificial intelligence in a few lines"
result = llm.invoke(prompt.format(text=text))
print(fill(result.strip(), width=100))

In [None]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [None]:
from langchain.vectorstores import Chroma

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings()

In [None]:
persist_directory = 'db/'
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings)

In [None]:
#persist_directory = 'db2/'
#vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings)

In [None]:
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

prompt_template = """
[INST] <>
Use the following context to answer the question at the end. Do not use any other information. 
If you can't find the relevant information in the context, just say you don't have enough information to answer the question. Don't try to make up an answer.
Keep your answer short but descriptive. 
<>

{context}

Question: {question} [/INST]
"""

prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
Chain_pdf = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    # retriever=db.as_retriever(search_type="similarity_score_threshold", search_kwargs={'k': 5, 'score_threshold': 0.8})
    # Similarity Search is the default way to retrieve documents relevant to a query, but we can use MMR by setting search_type = "mmr"
    # k defines how many documents are returned; defaults to 4.
    # score_threshold allows to set a minimum relevance for documents returned by the retriever, if we are using the "similarity_score_threshold" search type.
    # return_source_documents=True, # Optional parameter, returns the source documents used to answer the question
    retriever=vectordb.as_retriever(), # (search_kwargs={'k': 5, 'score_threshold': 0.8}),
    chain_type_kwargs={"prompt": prompt},
)


In [None]:
query = "Explain the concept of heat equity"
result = Chain_pdf.invoke(query)
print(fill(result['result'].strip(), width=100))

In [None]:
query = "Who are some vulnerable populations in Arizona "
result = Chain_pdf.invoke(query)
print(fill(result['result'].strip(), width=100))

In [None]:
query = "What is the need for a Chief Heat Officer"
result = Chain_pdf.invoke(query)
print(fill(result['result'].strip(), width=100))

In [None]:
prompt_template = """
[INST] <>
Use the following context to answer the question at the end. Do not use any other information. 
If you can't find the relevant information in the context, just say you don't have enough information to answer the question. Don't try to make up an answer.
Keep your answer short but descriptive. 

<>

{context}

Question: {question} [/INST]
"""

prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
refine = RetrievalQA.from_chain_type(llm=llm,
                                 chain_type="map_reduce",
                                 return_source_documents=True,
                                 chain_type_kwargs=chain_type_kwargs,
                                 retriever=retriever=vectordb.as_retriever(),
                                 verbose=True)

In [None]:
query = "Explain the concept of heat equity"
result = refine.invoke(query)
print(fill(result['result'].strip(), width=100))

In [None]:
query = "Who are some vulnerable populations in Arizona "
result = refine.invoke(query)
print(fill(result['result'].strip(), width=100))

In [None]:
query = "What is the need for a Chief Heat Officer"
result = refine.invoke(query)
print(fill(result['result'].strip(), width=100))