<a href="https://colab.research.google.com/github/RuthNjeri6/LLM-llama-2-demo/blob/main/reacto_w3_indexing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Indexing using LLama-2 on colab

In [None]:
!pip -q install langchain==0.0.264 pypdf==3.15.1 torch accelerate==0.21.0 transformers==4.31.0 sentence_transformers==2.2.2 ctransformers==0.2.22 faiss-gpu

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m271.0/271.0 kB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m33.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.3/9.3 MB[0m [31m24.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.0/90.0 kB[0m [31m11.9 MB/s[0m 

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import JSONLoader, DirectoryLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
DATA_PATH = './first_80_papers'
DB_FAISS_PATH = 'vectorstore/db_faiss'

In [4]:
try:
    loader = DirectoryLoader(DATA_PATH, glob="./*.pdf", loader_cls=PyPDFLoader, show_progress=True, use_multithreading=True)
    #loader = DirectoryLoader(DATA_PATH, glob="./*.json", loader_cls=JSONLoader, loader_kwargs = {'jq_schema':'.pages[]'}, show_progress=True, use_multithreading=True)
except Exception as e:
    print(e)

In [5]:
# Create vector database
def create_vector_db():
    documents = loader.load()
    print(f"Loaded {len(documents)} documents")
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    texts = text_splitter.split_documents(documents)

    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', model_kwargs={'device': 'cuda'})

    db = FAISS.from_documents(texts, embeddings)
    db.save_local(DB_FAISS_PATH)

create_vector_db()

100%|██████████| 86/86 [00:44<00:00,  1.92it/s]


Loaded 727 documents


Downloading (…)e9125/.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)7e55de9125/README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading (…)55de9125/config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading (…)125/data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)e9125/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

Downloading (…)9125/train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

Downloading (…)7e55de9125/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)5de9125/modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [6]:
from langchain import PromptTemplate
from langchain.llms import CTransformers
from langchain.chains import RetrievalQA

In [7]:
DB_FAISS_PATH = 'vectorstore/db_faiss'

custom_prompt_template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [8]:
# define embedding model
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
                                       model_kwargs={'device': 'cuda'})

In [9]:
# load embeddings from disk
db = FAISS.load_local(DB_FAISS_PATH, embeddings)

In [10]:
retriever = db.as_retriever(search_kwargs={"k": 2})

In [11]:
# Loading model from HuggingFace
def load_llm_from_hf():
  config = {
      'max_new_tokens': 512,
      'temperature': 0.5
  }
  llm = CTransformers(
      model = 'TheBloke/Llama-2-7B-Chat-GGML',
      model_file = 'llama-2-7b-chat.ggmlv3.q8_0.bin',
      config=config
  )
  return llm

llm = load_llm_from_hf()

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading (…)ca8f5daf/config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading (…)chat.ggmlv3.q8_0.bin:   0%|          | 0.00/7.16G [00:00<?, ?B/s]

In [12]:
# create the chain to answer questions
qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                  chain_type="stuff",
                                  retriever=retriever,
                                  return_source_documents=True)

In [13]:
## Cite sources
def final_result(llm_response):
  print(f"Query: {llm_response['query']}")
  print(f"Answer: {llm_response['answer']}")
  print('\nSources:')
  for source in llm_response["source_documents"]:
        print(f"{source.metadata['source']}")
  print("\n")

In [14]:
test_questions = [
    "What oral TB drug that has entered clinical trials have a half-life of > 100?",
    "What host factors are known to affect influenza replication in human cells and mice vivo?",
    "What are the antivirals with longest half lives in humans and a dose less that 20mg per day?",
    "What were other study drug related adverse events occurring in ≥15% of patients",
    "What are the known safety liabilities of teixobactin and are there other lipin II compounds with bettter safety?"
    "What ReFrame compounds or mechanism of action can reduce the dose of approved TB therapies by lowering the dose by >5x of the approved TB drug"
]
responses = []
for question in test_questions:
  response = qa_chain(question)
  obj = {
      'query': question,
      'answer': response['result'],
      'source_documents': response['source_documents']
  }
  responses.append(obj)

In [15]:
for res in responses:
  final_result(res)

Query: What oral TB drug that has entered clinical trials have a half-life of > 100?
Answer:  Bedaquiline (BDQ) has a half-life of approximately 60 hours, which is longer than 100. [2]

Sources:
first_80_papers/A Review of the Evidence for Using Bedaquiline (TMC207) to Treat Multi-Drug Resistant Tuberculosis_30273.pdf
first_80_papers/A Review of the Evidence for Using Bedaquiline (TMC207) to Treat Multi-Drug Resistant Tuberculosis_30273.pdf


Query: What host factors are known to affect influenza replication in human cells and mice vivo?
Answer:  Based on the provided context, it appears that cysteinyl cathepsin inhibitors have been shown to have anti-inflammatory effects in various disease models. However, I cannot answer the question regarding host factors affecting influenza replication as there is no direct mention of this topic in the provided text.

Sources:
first_80_papers/Fatigue during treatment for hepatitis C virus: results of self-reported fatigue severity in two Phase IIb 