# Using Llama-2 

### Install dependencies

In [None]:
!pip -q install langchain==0.0.264 pypdf==3.15.1 torch accelerate==0.21.0 transformers==4.31.0 sentence_transformers==2.2.2 ctransformers==0.2.22 faiss-gpu

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m271.0/271.0 kB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m30.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.3/9.3 MB[0m [31m21.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.0/90.0 kB[0m [31m6.6 MB/s[0m eta 

### Import modules

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import JSONLoader, DirectoryLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import PromptTemplate
from langchain.llms import CTransformers
from langchain.chains import RetrievalQA

In [None]:
DATA_PATH = './documents'
DB_FAISS_PATH = 'vectorstore/db_faiss'

### Load documents

In [None]:
try:
    loader = DirectoryLoader(DATA_PATH, glob="./*.pdf", loader_cls=PyPDFLoader, show_progress=True, use_multithreading=True)
    #loader = DirectoryLoader(DATA_PATH, glob="./*.json", loader_cls=JSONLoader, loader_kwargs = {'jq_schema':'.pages[]'}, show_progress=True, use_multithreading=True)
except Exception as e:
    print(e)

### Create embeddings and store in FAISS vectore db

In [None]:
 #Create vector database
def create_vector_db():
    documents = loader.load()
    print(f"Loaded {len(documents)} documents")
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    texts = text_splitter.split_documents(documents)

    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', model_kwargs={'device': 'cuda'})

    db = FAISS.from_documents(texts, embeddings)
    db.save_local(DB_FAISS_PATH)

create_vector_db()

100%|██████████| 91/91 [00:52<00:00,  1.74it/s]


Loaded 767 documents


### Define the prompt template

In [None]:
custom_prompt_template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

### Create the prompt

In [None]:
prompt = PromptTemplate(template=custom_prompt_template,
                            input_variables=['context', 'question'])

In [None]:
# define embedding model
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
                                       model_kwargs={'device': 'cuda'})

Downloading (…)e9125/.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)7e55de9125/README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading (…)55de9125/config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading (…)125/data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)e9125/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

Downloading (…)9125/train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

Downloading (…)7e55de9125/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)5de9125/modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [None]:
# load embeddings from disk
db = FAISS.load_local(DB_FAISS_PATH, embeddings)

In [None]:
retriever = db.as_retriever(search_kwargs={'k': 2})

### Load LLM

In [None]:
# Loading model from HuggingFace
def load_llm_from_hf():
  config = {
      'max_new_tokens': 512,
      'temperature': 0.5
  }
  llm = CTransformers(
      model = 'TheBloke/Llama-2-7B-Chat-GGML',
      model_file = 'llama-2-7b-chat.ggmlv3.q8_0.bin',
      config=config
  )
  return llm

llm = load_llm_from_hf()

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading (…)ca8f5daf/config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading (…)chat.ggmlv3.q8_0.bin:   0%|          | 0.00/7.16G [00:00<?, ?B/s]

In [None]:
# create the chain to answer questions
qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                  chain_type="stuff",
                                  retriever=retriever,
                                  return_source_documents=True,
                                  chain_type_kwargs={'prompt': prompt})

In [None]:
## Cite sources
def final_result(llm_response, query):
  print(f"\x1b[34mQuery:\x1b[0m {query}")
  print(f"\x1b[32mAnswer:\x1b[0m {llm_response['result']}")
  #print('\nSources:')
  #for source in llm_response["source_documents"]:
        #print(f"{source.metadata['source']}")

In [None]:
query = "What oral TB drug that has entered clinical trials have a half-life of > 100?"

In [None]:
response = qa_chain(query)

In [None]:
final_result(response, query)

[34mQuery:[0m What oral TB drug that has entered clinical trials have a half-life of > 100?
[32mAnswer:[0m Bedaquiline (BDQ) has an estimated half-life of approximately 120 hours (5 days). [17]


In [None]:
query1 = "What are the antivirals with longest half lives in humans and a dose less that 20mg per day?"
response1 = qa_chain(query1)

In [None]:
final_result(response1, query1)

[34mQuery:[0m What are the antivirals with longest half lives in humans and a dose less that 20mg per day?
[32mAnswer:[0m 
The antiviral with the longest half-life in humans is nelfinavir, which has a half-life of approximately 14 hours. It is achievable with oral dosing and has synergistic properties when combined with other antiviral medications, such as mefloquine. The dose for nelfinavir is typically less than 20mg per day.


In [None]:
query2 = "What host factors are known to affect influenza replication in human cells and mice vivo?"
response2 = qa_chain(query2)

In [None]:
final_result(response2, query2)

[34mQuery:[0m What host factors are known to affect influenza replication in human cells and mice vivo?
[32mAnswer:[0m There are several host factors that are known to affect influenza replication in human cells and mice in vivo, including:

1. Interferons (IFNs): IFNs can inhibit influenza virus replication by reducing the amount of viral RNA available for translation and by inducing cellular processes that limit virus growth.
2. Toll-like receptors (TLRs): TLRs can recognize pathogen-associated molecular patterns (PAMPs) on the surface of influenza virus particles and trigger an immune response that inhibits viral replication.
3. Nucleukinhibit may also known host cellular protein kinase1. RIGFibrinhibit cannab2. Retinoxygen radical oligand 5. RIGFibrinhibit-5. Nucleukine-5. RIGFibrinhibitor (dendosmicroRNAIFIRNAIFIMHCa. Nucleukinhibits: Nucleukinhibitaminogout of the nucleopsonic3. Nuclein receptors (MXBothers: Nucleukinhibitors): Nucleuk4. Retinoctnucleukinhibitrin (dendosmicro

In [None]:
query3 = "What host factors are known to affect influenza replication in human cells and mice vivo?"
response3 = qa_chain(query3)

In [None]:
final_result(response3, query3)

[34mQuery:[0m What host factors are known to affect influenza replication in human cells and mice vivo?
[32mAnswer:[0m There is no information available on the effect of VeroE6/TMPRSS2 on influenza replication in human cells or mice.
