### Necessary imports

In [None]:
!pip install -q -U torch datasets transformers
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 trl==0.4.7

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m670.2/670.2 MB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m521.2/521.2 kB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.2/8.2 MB[0m [31m46.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m38.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m823.6/823.6 kB[0m [31m40.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.1/14.1 MB[0m [31m37.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m731.7/731.7 MB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m410.6/410.6 MB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━

In [None]:
!pip install --upgrade langchain

Collecting langchain
  Downloading langchain-0.0.350-py3-none-any.whl (809 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m809.1/809.1 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)
  Downloading dataclasses_json-0.6.3-py3-none-any.whl (28 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)
Collecting langchain-community<0.1,>=0.0.2 (from langchain)
  Downloading langchain_community-0.0.3-py3-none-any.whl (1.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m24.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-core<0.2,>=0.1 (from langchain)
  Downloading langchain_core-0.1.0-py3-none-any.whl (189 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m189.1/189.1 kB[0m [31m16.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langsmith<0.1.0,>=0.0.63 (from langchain)
  Downloading langsmith-0.

### Dependencies

In [None]:
import os
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline
)
from datasets import load_dataset
from peft import LoraConfig, PeftModel

from langchain.text_splitter import CharacterTextSplitter
from langchain.document_transformers import Html2TextTransformer
from langchain.document_loaders import AsyncChromiumLoader

from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain

### Load quantized Mistal 7B

In [None]:
#################################################################
# Tokenizer
#################################################################

model_name='mistralai/Mistral-7B-Instruct-v0.1'

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

#################################################################
# bitsandbytes parameters
#################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

#################################################################
# Set up quantization config
#################################################################
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

#################################################################
# Load pre-trained config
#################################################################
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
)



tokenizer_config.json:   0%|          | 0.00/1.47k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

### Count number of trainable parameters

In [None]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(model))

trainable model parameters: 262410240
all model parameters: 3752071168
percentage of trainable model parameters: 6.99%


### Build Mistral text generation pipeline

In [None]:
text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=1000,
)

In [None]:
mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

### Load and chunk documents. Load chunked documents into FAISS index

In [None]:
!pip install playwright

Collecting playwright
  Downloading playwright-1.40.0-py3-none-manylinux1_x86_64.whl (37.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m37.2/37.2 MB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
Collecting pyee==11.0.1 (from playwright)
  Downloading pyee-11.0.1-py3-none-any.whl (15 kB)
Installing collected packages: pyee, playwright
Successfully installed playwright-1.40.0 pyee-11.0.1


In [None]:
!pip show playwright

Name: playwright
Version: 1.40.0
Summary: A high-level API to automate web browsers
Home-page: https://github.com/Microsoft/playwright-python
Author: Microsoft Corporation
Author-email: 
License: Apache-2.0
Location: /usr/local/lib/python3.10/dist-packages
Requires: greenlet, pyee
Required-by: 


In [None]:
!playwright install

Downloading Chromium 120.0.6099.28 (playwright build v1091)[2m from https://playwright.azureedge.net/builds/chromium/1091/chromium-linux.zip[22m
[1G153.1 Mb [] 0% 10.1s[0K[1G153.1 Mb [] 0% 24.7s[0K[1G153.1 Mb [] 0% 21.4s[0K[1G153.1 Mb [] 0% 16.0s[0K[1G153.1 Mb [] 0% 11.3s[0K[1G153.1 Mb [] 0% 9.1s[0K[1G153.1 Mb [] 1% 8.4s[0K[1G153.1 Mb [] 1% 8.2s[0K[1G153.1 Mb [] 1% 7.5s[0K[1G153.1 Mb [] 1% 7.6s[0K[1G153.1 Mb [] 2% 7.3s[0K[1G153.1 Mb [] 2% 7.4s[0K[1G153.1 Mb [] 2% 7.6s[0K[1G153.1 Mb [] 2% 8.1s[0K[1G153.1 Mb [] 3% 8.1s[0K[1G153.1 Mb [] 3% 7.8s[0K[1G153.1 Mb [] 3% 7.4s[0K[1G153.1 Mb [] 4% 6.9s[0K[1G153.1 Mb [] 4% 7.1s[0K[1G153.1 Mb [] 5% 6.9s[0K[1G153.1 Mb [] 5% 6.7s[0K[1G153.1 Mb [] 5% 6.4s[0K[1G153.1 Mb [] 6% 6.3s[0K[1G153.1 Mb [] 7% 6.0s[0K[1G153.1 Mb [] 7% 5.9s[0K[1G153.1 Mb [] 8% 5.8s[0K[1G153.1 Mb [] 8% 5.9s[0K[1G153.1 Mb [] 9% 5.7s[0K[1G153.1 Mb [] 9% 5.6s[0K[1G153.1 Mb [] 10% 5.5s[0K[1G153.1 Mb [] 11% 5.4s[0K[1G153.1

In [None]:
import nest_asyncio
nest_asyncio.apply()

# Articles to index
articles = ["https://health.gov/sites/default/files/2019-10/DGA_Healthy-Eating-Pattern.pdf",
            "https://www.hsph.harvard.edu/nutritionsource/2021/01/19/healthy-living-guide-2020-2021/",
            "https://www.cdc.gov/healthyweight/healthy_eating/index.html",
            "https://www.heart.org/en/healthy-living/healthy-eating/eat-smart/nutrition-basics/food-diary-how-to-keep-track-of-what-you-eat",
            "https://www.healthline.com/nutrition/food-journaling-instructions-and-tips"]

# Scrapes the blogs above
loader = AsyncChromiumLoader(articles)
docs = loader.load()

In [None]:
!pip install html2text

Collecting html2text
  Downloading html2text-2020.1.16-py3-none-any.whl (32 kB)
Installing collected packages: html2text
Successfully installed html2text-2020.1.16


In [None]:
!pip install sentence-transformers

Collecting sentence-transformers
  Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting sentencepiece (from sentence-transformers)
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
Collecting torch>=1.6.0 (from sentence-transformers)
  Downloading torch-2.1.0-cp310-cp310-manylinux1_x86_64.whl (670.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m670.2/670.2 MB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: sentence-transformers
  Building wheel for sentence-transformers (setup.py) ... [?25l[?25hdone
  Created wheel for sentence-transformers: filename=sentence_transformer

In [None]:
!pip install faiss-gpu

Collecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-gpu
Successfully installed faiss-gpu-1.7.2


In [None]:
# Converts HTML to plain text
html2text = Html2TextTransformer()
docs_transformed = html2text.transform_documents(docs)

# Chunk text
text_splitter = CharacterTextSplitter(chunk_size=100,
                                      chunk_overlap=0)
chunked_documents = text_splitter.split_documents(docs_transformed)

# Load chunked documents into the FAISS index
db = FAISS.from_documents(chunked_documents,
                          HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))

retriever = db.as_retriever()



### Create PromptTemplate and LLMChain

In [None]:
prompt_template = """
### [INST] Instruction: Answer the question based on your health care knowledge. Here is context to help:

{context}

### QUESTION:
{question} [/INST]
 """

# Create prompt from prompt template
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

# Create llm chain
llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)

### Build RAG Chain

In [None]:
rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

rag_chain.invoke("How many sugar should we intake everyday for age 65 or higher?")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{'context': [Document(page_content='According to the _Dietary Guidelines for Americans 2020–2025_ [PDF-30.6MB], a\nhealthy eating plan:', metadata={'source': 'https://www.cdc.gov/healthyweight/healthy_eating/index.html'}),
  Document(page_content='**Rethink Your Drink**  \nTips for cutting added sugars by changing your beverages.', metadata={'source': 'https://www.cdc.gov/healthyweight/healthy_eating/index.html'}),
  Document(page_content='* Fats \n\n  * Sodium \n\n  * Sugar \n\n  * Healthy For Good: Spanish Infographics', metadata={'source': 'https://www.heart.org/en/healthy-living/healthy-eating/eat-smart/nutrition-basics/food-diary-how-to-keep-track-of-what-you-eat'}),
  Document(page_content='Source: Division of Nutrition, Physical Activity, and Obesity, National Center\nfor Chronic Disease Prevention and Health Promotion', metadata={'source': 'https://www.cdc.gov/healthyweight/healthy_eating/index.html'})],
 'question': 'How many sugar should we intake everyday for age 65 or highe