### Necessary imports

In [None]:
!pip install -q -U torch datasets transformers tensorflow langchain playwright html2text sentence_transformers faiss-cpu
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 trl==0.4.7

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.8/8.8 MB[0m [31m40.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m589.8/589.8 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m810.5/810.5 kB[0m [31m39.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m37.3/37.3 MB[0m [31m36.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.5/56.5 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m156.5/156.5 kB[0m [31m19.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m46.2 

### Dependencies

In [None]:
import os
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline
)
from datasets import load_dataset
from peft import LoraConfig, PeftModel

from langchain.text_splitter import CharacterTextSplitter
from langchain.document_transformers import Html2TextTransformer
from langchain.document_loaders import AsyncChromiumLoader

from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain

### Load quantized Mistal 7B

In [None]:
#################################################################
# Tokenizer
#################################################################

model_name='mistralai/Mistral-7B-Instruct-v0.1'

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

#################################################################
# Set up quantization config
#################################################################
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

#################################################################
# Load pre-trained config
#################################################################
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/1.47k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

`low_cpu_mem_usage` was None, now set to True since model is quantized.


model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

You are calling `save_pretrained` to a 4-bit converted model, but your `bitsandbytes` version doesn't support it. If you want to save 4-bit models, make sure to have `bitsandbytes>=0.41.3` installed.


### Count number of trainable parameters

In [None]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(model))

trainable model parameters: 262410240
all model parameters: 3752071168
percentage of trainable model parameters: 6.99%


### Build Mistral text generation pipeline

In [None]:
text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=1000,
)

In [None]:
mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

In [None]:
import json

# Define a simple document class
class SimpleDocument:
    def __init__(self, text):
        self.page_content = text
        self.metadata = {}

# Function to load and split JSON data
def load_and_split_json(file_path):
    # Load the JSON data from the file
    with open(file_path, 'r') as file:
        data = json.load(file)

    # Splitting the data into smaller chunks
    chunks = []
    for person in data["grants"]:
        # Convert each person's data into a JSON string
        chunk = json.dumps(person, indent=2)
        chunks.append(chunk)

    return chunks

# Load and split the JSON data
file_path = '/content/grant.json'
chunks = load_and_split_json(file_path)

# Split the chunks into smaller texts if necessary
# If your chunks are already small enough, you may skip this step
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
# texts = text_splitter.split_documents(chunks)



# Convert chunks to document objects
documents = [SimpleDocument(chunk) for chunk in chunks]

In [None]:
# from langchain.embeddings.huggingface import HuggingFaceEmbeddings
# # Hugging Face model for embeddings
# embeddings = HuggingFaceEmbeddings(
#     model_name="BAAI/bge-base-en", model_kwargs={"device": 'cuda'}, encode_kwargs={'normalize_embeddings': True}
# )



config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/90.1k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/719 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
!pip install -qqq chromadb


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/525.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━[0m [32m286.7/525.5 kB[0m [31m8.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m525.5/525.5 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m25.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.1/92.1 kB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.8/60.8 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.3/41.3 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.4/5.4 MB[0m [31m50.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━

In [None]:
from langchain.vectorstores import Chroma

In [None]:
# Hugging Face model for embeddings
embeddings = HuggingFaceEmbeddings(
    model_name="BAAI/bge-base-en", model_kwargs={"device": 'cuda'}, encode_kwargs={'normalize_embeddings': True}
)



modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/90.1k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/719 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
# Store the embeddings in the Chroma database
db = Chroma.from_documents(documents, embeddings, persist_directory="db")


OperationalError: attempt to write a readonly database

### Load and chunk documents. Load chunked documents into FAISS index

In [None]:
# !playwright install
# !playwright install-deps

Downloading Chromium 123.0.6312.4 (playwright build v1105)[2m from https://playwright.azureedge.net/builds/chromium/1105/chromium-linux.zip[22m
[1G154.7 MiB [] 0% 0.0s[0K[1G154.7 MiB [] 0% 28.2s[0K[1G154.7 MiB [] 0% 16.3s[0K[1G154.7 MiB [] 0% 8.9s[0K[1G154.7 MiB [] 1% 5.4s[0K[1G154.7 MiB [] 2% 3.3s[0K[1G154.7 MiB [] 3% 2.8s[0K[1G154.7 MiB [] 4% 2.4s[0K[1G154.7 MiB [] 5% 2.1s[0K[1G154.7 MiB [] 7% 1.8s[0K[1G154.7 MiB [] 8% 1.6s[0K[1G154.7 MiB [] 9% 1.6s[0K[1G154.7 MiB [] 11% 1.5s[0K[1G154.7 MiB [] 12% 1.5s[0K[1G154.7 MiB [] 13% 1.4s[0K[1G154.7 MiB [] 15% 1.3s[0K[1G154.7 MiB [] 17% 1.2s[0K[1G154.7 MiB [] 18% 1.2s[0K[1G154.7 MiB [] 20% 1.1s[0K[1G154.7 MiB [] 21% 1.1s[0K[1G154.7 MiB [] 22% 1.1s[0K[1G154.7 MiB [] 24% 1.1s[0K[1G154.7 MiB [] 25% 1.0s[0K[1G154.7 MiB [] 26% 1.0s[0K[1G154.7 MiB [] 27% 1.0s[0K[1G154.7 MiB [] 29% 1.0s[0K[1G154.7 MiB [] 30% 1.0s[0K[1G154.7 MiB [] 31% 0.9s[0K[1G154.7 MiB [] 32% 1.0s[0K[1G154.7 MiB [] 33% 0.

In [None]:
# import nest_asyncio
# nest_asyncio.apply()

# # Articles to index
# articles = ["https://grants.nih.gov/grants/guide/pa-files/PAR-24-117.html"]

# # Scrapes the blogs above
# loader = AsyncChromiumLoader(articles)
# docs = loader.load()

In [None]:
# # Converts HTML to plain text
# html2text = Html2TextTransformer()
# docs_transformed = html2text.transform_documents(docs)

# # Chunk text
# text_splitter = CharacterTextSplitter(chunk_size=100,
#                                       chunk_overlap=0)
# chunked_documents = text_splitter.split_documents(docs_transformed)

# # Load chunked documents into the FAISS index
# db = FAISS.from_documents(chunked_documents,
#                           HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))

# retriever = db.as_retriever()



modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
## Default LLaMA-2 prompt style
B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
DEFAULT_SYSTEM_PROMPT = """\
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."""

def get_prompt(instruction, new_system_prompt=DEFAULT_SYSTEM_PROMPT ):
    SYSTEM_PROMPT = B_SYS + new_system_prompt + E_SYS
    prompt_template =  B_INST + SYSTEM_PROMPT + instruction + E_INST
    return prompt_template

In [None]:
sys_prompt = """You are a helpful, respectful and honest assistant. Always answer as helpfully as possible using the context text provided. Your answers should only answer the question once and not have any text after the answer is done.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. """

instruction = """CONTEXT:/n/n {context}/n

Question: {question}"""
get_prompt(instruction, sys_prompt)

"[INST]<<SYS>>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible using the context text provided. Your answers should only answer the question once and not have any text after the answer is done.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. \n<</SYS>>\n\nCONTEXT:/n/n {context}/n\n\nQuestion: {question}[/INST]"

In [None]:
llm = HuggingFacePipeline(pipeline=text_generation_pipeline, model_kwargs={"temperature": 0.1})

In [None]:
from langchain.prompts import PromptTemplate
prompt_template = get_prompt(instruction, sys_prompt)

llama_prompt = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [None]:
from langchain.chains import RetrievalQA
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={"k": 2}),
    return_source_documents=True,
    chain_type_kwargs={"prompt": llama_prompt},
)

In [None]:
result = qa_chain("What is the funding opportunity purpose of the grant title The NCI Transition Career Development Award (K22 Independent Clinical Trial Optional)?")

  warn_deprecated(


### Create PromptTemplate and LLMChain

In [None]:
prompt_template = """
### [INST] Instruction: You are a helpful, respectful and honest assistant. Always answer as helpfully as possible using the context text provided. Your answers should only answer the question once and not have any text after the answer is done.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. Here is context to help:

{context}

### QUESTION:
{question} [/INST]
 """

# Create prompt from prompt template
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

# Create llm chain
llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)

In [None]:
llm_chain.invoke({"context": "", "question": "What is the funding opportunity purpose of the grant title The NCI Transition Career Development Award (K22 Independent Clinical Trial Optional)?"})



{'context': '',
 'question': 'What is the funding opportunity purpose of the grant title The NCI Transition Career Development Award (K22 Independent Clinical Trial Optional)?',
 'text': '\n### [INST] Instruction: You are a helpful, respectful and honest assistant. Always answer as helpfully as possible using the context text provided. Your answers should only answer the question once and not have any text after the answer is done.\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don\'t know the answer to a question, please don\'t share false information. Here is context to help:\n\n\n\n### QUESTION:\nWhat is the funding opportunity purpose of the grant title The NCI Transition Career Development Award (K22 Independent Clinical Trial Optional)? [/INST]\n \nThe funding opportunity purpose of the grant title "The NCI Transition Career Development Award (K22 Independent Clinical Trial Optional)" is to supp

### Build RAG Chain

In [None]:
rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

result = rag_chain.invoke("What is the funding opportunity purpose of the grant title The NCI Transition Career Development Award (K22 Independent Clinical Trial Optional)?")



In [None]:
result['context']

[Document(page_content='The NCI Transition Career Development Award (K22 Independent Clinical Trial\nOptional)', metadata={'source': 'https://grants.nih.gov/grants/guide/pa-files/PAR-24-117.html'}),
 Document(page_content='The NCI Transition Career Development Award (K22 Independent Clinical Trial\nOptional)\n\nActivity Code', metadata={'source': 'https://grants.nih.gov/grants/guide/pa-files/PAR-24-117.html'}),
 Document(page_content='The objective of the NCI Transition Career Development Award (K22) is to\nprovide support to mentored, non-independent investigators in transitioning to\ntheir first independent tenure-track faculty cancer research positions, or\ntheir equivalent, with an enhanced probability of success for obtaining\nindependent NIH or other research project grant support. Candidates in\nmentored, non-independent cancer research positions who have had at least 2\nyears of postdoctoral cancer research training, but, no more than a total of 8\nyears of mentored, non-indepe

In [None]:
print(result['text'])


### [INST] Instruction: You are a helpful, respectful and honest assistant. Always answer as helpfully as possible using the context text provided. Your answers should only answer the question once and not have any text after the answer is done.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. Here is context to help:

[Document(page_content='The NCI Transition Career Development Award (K22 Independent Clinical Trial\nOptional)', metadata={'source': 'https://grants.nih.gov/grants/guide/pa-files/PAR-24-117.html'}), Document(page_content='The NCI Transition Career Development Award (K22 Independent Clinical Trial\nOptional)\n\nActivity Code', metadata={'source': 'https://grants.nih.gov/grants/guide/pa-files/PAR-24-117.html'}), Document(page_content='The objective of the NCI Transition Career Development Award (K22) is to\nprovide s

In [None]:
rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

result = rag_chain.invoke("What is the expiration date of the grant title The NCI Transition Career Development Award (K22 Independent Clinical Trial Optional)?")



In [None]:
print(result['text'])


### [INST] Instruction: You are a helpful, respectful and honest assistant. Always answer as helpfully as possible using the context text provided. Your answers should only answer the question once and not have any text after the answer is done.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. Here is context to help:

[Document(page_content='The NCI Transition Career Development Award (K22 Independent Clinical Trial\nOptional)', metadata={'source': 'https://grants.nih.gov/grants/guide/pa-files/PAR-24-117.html'}), Document(page_content='The NCI Transition Career Development Award (K22 Independent Clinical Trial\nOptional)\n\nActivity Code', metadata={'source': 'https://grants.nih.gov/grants/guide/pa-files/PAR-24-117.html'}), Document(page_content='* Applications that are merely cancer-related or not highly related to the mission 

In [None]:
rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

result = rag_chain.invoke("What are the Eligibility Information of the grant title The NCI Transition Career Development Award (K22 Independent Clinical Trial Optional)?")



In [None]:
print(result['text'])


### [INST] Instruction: You are a helpful, respectful and honest assistant. Always answer as helpfully as possible using the context text provided. Your answers should only answer the question once and not have any text after the answer is done.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. Here is context to help:

[Document(page_content='* Applications that are merely cancer-related or not highly related to the mission of the NCI.\n  * Applications in which the candidate has more than a total of 8 years of mentored, non-independent research training experience after the terminal research doctorate or clinical degree at the time of application submission or resubmission unless the candidate received prior approval from the Program Director before application submission; or\n  * Applications in which the candidate currently ho