In [1]:
import os
import requests

In [None]:
# If the pdf is not installed locally, downloading it
pdf_path = "human-nutrition.pdf"

if not os.path.exists(pdf_path):
    print("[INFO] PDF was not found. Downloading...")

    url = "https://pressbooks.oer.hawaii.edu/humannutrition2/open/download?type=pdf"

    filename = pdf_path

    response = requests.get(url)

    if response.status_code == 200:
        print(f"[INFO] File download successful, saving {filename}")

        with open(filename, "wb") as file:
            file.write(response.content)
    else:
        print(f"[INFO] Failed to download file. Status Code: {response.status_code}")
else:
    print(f"[INFO] File already exists")

In [3]:
import fitz #PyMyPDF library
from tqdm.auto import tqdm # shows progress and time taken for operations


def text_formatter(text: str) -> str:
    # Reading from pdf can return noisy text so we preprocess it
    cleaned_text = text.replace("\n", " ").strip()

    return cleaned_text


def open_and_read_pdf(pdf_path: str) -> str:
    doc = fitz.open(pdf_path)

    # Storing Text data from the pdf and its metadata
    pages_and_text = []
    for page_number, page in tqdm(enumerate(doc)):
        text = page.get_text()
        text = text_formatter(text)

        pages_and_text.append({
            "page_number": page_number - 41, # Subtract 41 because the main content starts from page 41 (not required to do)
            "page_char_count": len(text),
            "page_word_count": len(text.split(" ")),
            "page_sentence_count_raw": len(text.split(". ")),
            "page_token_count": len(text) // 4, # A token is generally 4 characters long
            "text": text
        })

    return pages_and_text


pages_and_text = open_and_read_pdf(pdf_path)

  from .autonotebook import tqdm as notebook_tqdm
1208it [00:02, 588.07it/s]


In [4]:
from spacy.lang.en import English

sentencizer = English()

# Creating an instance of sentencizer - to perform operations on sentences/text
sentencizer.add_pipe("sentencizer")

<spacy.pipeline.sentencizer.Sentencizer at 0x2995480c950>

In [5]:
# using the sentencizer to split text data into individual sentences
for item in tqdm(pages_and_text):
    item["sentences"] = list(sentencizer(item["text"]).sents)

    item["sentences"] = [str(sentence) for sentence in item["sentences"]]

    item["page_sentence_count_spacy"] = len(item["sentences"])

  0%|          | 0/1208 [00:00<?, ?it/s]

100%|██████████| 1208/1208 [00:02<00:00, 434.94it/s]


In [6]:
num_sentences_per_chunk = 10

# To split the sentence per page into fixed size chunks 
# This is required so the embedding model only gets the amount of tokens it can process
def split_list(input_list: list[str], slice_size: int) -> list[list[str]]:
    return [input_list[i: slice_size + i] for i in range(0, len(input_list), slice_size)]

In [7]:
for item in tqdm(pages_and_text):
    item["sentence_chunks"] = split_list(item["sentences"], slice_size = num_sentences_per_chunk)

    item["num_chunks"] = len(item["sentence_chunks"])

100%|██████████| 1208/1208 [00:00<00:00, 297395.04it/s]


In [8]:
import re

pages_and_chunks = []

# Creating chunks and storing them with some additional information
for item in tqdm(pages_and_text):
    for sentence_chunk in item["sentence_chunks"]:
        chunk_dict = {}

        chunk_dict["page_number"] = item["page_number"]

        joined_sentence_chunk = "".join(sentence_chunk).replace("  ", " ").strip()
        joined_sentence_chunk = re.sub(r'\.([A-Z])', r'. \1', joined_sentence_chunk)

        chunk_dict["sentence_chunks"] = joined_sentence_chunk

        chunk_dict["chunk_char_count"] = len(joined_sentence_chunk)
        chunk_dict["chunk_word_count"] = len(joined_sentence_chunk.split(" "))
        chunk_dict["chunk_token_count"] = len(joined_sentence_chunk) // 4

        pages_and_chunks.append(chunk_dict)

len(pages_and_chunks)

100%|██████████| 1208/1208 [00:00<00:00, 38207.09it/s]


1843

In [9]:
import pandas as pd

df = pd.DataFrame(pages_and_chunks)
df.describe().round(2)

Unnamed: 0,page_number,chunk_char_count,chunk_word_count,chunk_token_count
count,1843.0,1843.0,1843.0,1843.0
mean,583.38,734.83,112.72,183.34
std,347.79,447.43,71.07,111.86
min,-41.0,12.0,3.0,3.0
25%,280.5,315.0,45.0,78.0
50%,586.0,746.0,114.0,186.0
75%,890.0,1118.5,173.0,279.0
max,1166.0,1831.0,297.0,457.0


In [10]:
min_token_length = 30

for row in df[df["chunk_token_count"] <= min_token_length].sample(5).iterrows():
    print(f"Chunk token count: {row[1]['chunk_token_count']} | Text: {row[1]["sentence_chunks"]}")

Chunk token count: 4 | Text: 516 | Introduction
Chunk token count: 19 | Text: PART XVIII CHAPTER 18. NUTRITIONAL ISSUES Chapter 18. Nutritional Issues | 1041
Chunk token count: 18 | Text: Published February 6, 2018. Accessed April 15, 2018. Comparing Diets | 1055
Chunk token count: 16 | Text: Complementary foods include baby meats, vegetables, Infancy | 837
Chunk token count: 30 | Text: Kwashiorkor affects millions of children worldwide. When it was first described in 1935, Diseases Involving Proteins | 395


In [11]:
# Filtering the chunks - saving only the ones that have more than 30 tokens
pages_and_chunks_over_min_token_length = df[df["chunk_token_count"] > min_token_length].to_dict(orient="records")


In [30]:
from sentence_transformers  import util, SentenceTransformer

# all-mpnet-base-v2 model requires less compute and gives good efficiency, can choose other model from huggingface
# https://huggingface.co/sentence-transformers/all-mpnet-base-v2
embedding_model = SentenceTransformer(model_name_or_path="all-mpnet-base-v2", device="cpu")



In [13]:
%%time

text_chunks = [item["sentence_chunks"] for item in pages_and_chunks_over_min_token_length]

CPU times: total: 0 ns
Wall time: 1.71 ms


In [16]:
%%time
import torch 

# requires a GPU and pytorch compiled with gpu
device = "cuda" if torch.cuda.is_available() else "cpu"
embedding_model.to(device) 

# Creating the embeddings on gpu
for item in tqdm(pages_and_chunks_over_min_token_length):
    item["embedding"] = embedding_model.encode(item["sentence_chunks"])

100%|██████████| 1676/1676 [00:35<00:00, 47.55it/s]

CPU times: total: 2.38 s
Wall time: 36.7 s





In [17]:
%%time

# Adjust batch size based on the compute availible
text_chunk_embeddings = embedding_model.encode(text_chunks, batch_size=16, convert_to_tensor=True)

CPU times: total: 2.36 s
Wall time: 26.2 s


In [18]:
# Saving the compute in a csv file to later load directly, can also be saved in a vector database
text_chunks_and_embeddings = pd.DataFrame(pages_and_chunks_over_min_token_length)

embeddings_save_path = "text_chunks_and_embeddings.csv"
text_chunks_and_embeddings.to_csv(embeddings_save_path, index=False)

In [19]:
import pandas as pd
import numpy as np

# Loading the saved embeddings
text_chunks_and_embeddings_df = pd.read_csv("text_chunks_and_embeddings.csv")

# Loading from csv files converts the vector embeddings to string to converting them back to vectors
text_chunks_and_embeddings_df["embedding"] = text_chunks_and_embeddings_df["embedding"].apply(lambda x: np.fromstring(x.strip("[]"), sep=" "))

# Convert embeddings to torch tensor
embeddings = torch.tensor(np.stack(text_chunks_and_embeddings_df["embedding"].tolist(), axis=0), dtype=torch.float32).to(device)

pages_and_chunks = text_chunks_and_embeddings_df.to_dict(orient="records")

In [64]:
# query = "protien rich foods" # manual query for testing
# print(f"Query: {query}")

# query_embedding = embedding_model.encode(query, convert_to_tensor=True).to(device)
# query_embedding = query_embedding.to(torch.float32)

# from time import perf_counter as timer

# start_time = timer()
# dot_scores = util.dot_score(query_embedding, embeddings)[0]
# end_time = timer()

# print(f"[INFO] Time taken to get dot scores on {len(embeddings)} is {end_time - start_time:.5f}")

# top_results_dot_product = torch.topk(dot_scores, k=5)

Query: protien rich foods
[INFO] Time taken to get dot scores on 1676 is 0.00161


In [20]:
import textwrap

# helper function to display text in proper format
def wrapped_text(text, wrap_length=80):
    wrapped_text = textwrap.fill(text, wrap_length)
    return wrapped_text


In [None]:
# import fitz
# import matplotlib.pyplot as plt

# pdf_path = "human-nutrition.pdf"
# doc = fitz.open(pdf_path)

# page = doc.load_page(411 + 41)

# img = page.get_pixmap(dpi=300)
# doc.close()

# img_array = np.frombuffer(img.samples_mv, dtype=np.uint8).reshape((img.h, img.w, img.n))

# plt.figure(figsize = (13, 10))
# plt.axis("off")
# plt.title("Most relevant query")

# plt.imshow(img_array)
# plt.show()

In [33]:
from time import perf_counter as timer

# Converts the query into embedding and returns the top n similar results
def retrieve_relevant_resources(query: str, embeddings: torch.tensor, model: SentenceTransformer=embedding_model, top_n_results: int=5, print_time: bool=True):

    query_embedding = model.encode(query, convert_to_tensor=True)
    query_embedding = query_embedding.to(device).to(torch.float32)

    start_time = timer()
    dot_scores = util.dot_score(query_embedding, embeddings)[0]
    end_time = timer()

    if print_time:
        print(f"[INFO] Time taken to get cosine similarity on {len(embeddings)} embeddings: {end_time - start_time:.5f}")

    scores, indices = torch.topk(dot_scores, top_n_results)

    return scores, indices

# Prints the top n results in a formatted manner
def print_and_retrieve_top_resources(query: str, embeddings: torch.tensor, pages_and_chunks: list[dict]=pages_and_chunks,top_n_results: int=5):
    scores, indices = retrieve_relevant_resources(query=query, embeddings=embeddings, top_n_results=top_n_results, print_time = False)

    for score, idx in zip(scores, indices):
        print(f"Score: {score:.4f}")
        print(f"Text: ")
        print(wrapped_text(pages_and_chunks[idx]["sentence_chunks"]))
        print(f"Page number: {pages_and_chunks[idx]["page_number"]}")
        print()

In [None]:
query = "Foods high in fiber" # Example query

retrieve_relevant_resources(query, embeddings)
print_and_retrieve_top_resources(query, embeddings)

In [35]:
# Checking local machine capability to select proper llm to run locally

gpu_memory_bytes = torch.cuda.get_device_properties(0).total_memory
gpu_memory_gb = round(gpu_memory_bytes / (2**30))
print(f"Available GPU memory: {gpu_memory_gb} GB")

Available GPU memory: 6 GB


In [36]:
# Helper code snippet to find the most suitable version of the gemma llm

if gpu_memory_gb < 5.1:
    print(f"Your available GPU memory is {gpu_memory_gb}GB, you may not have enough memory to run a Gemma LLM locally without quantization.")
elif gpu_memory_gb < 8.1:
    print(f"GPU memory: {gpu_memory_gb} | Recommended model: Gemma 2B in 4-bit precision.")
    use_quantization_config = True 
    model_id = "google/gemma-2b-it"
elif gpu_memory_gb < 19.0:
    print(f"GPU memory: {gpu_memory_gb} | Recommended model: Gemma 2B in float16 or Gemma 7B in 4-bit precision.")
    use_quantization_config = False 
    model_id = "google/gemma-2b-it"
elif gpu_memory_gb > 19.0:
    print(f"GPU memory: {gpu_memory_gb} | Recommend model: Gemma 7B in 4-bit or float16 precision.")
    use_quantization_config = False 
    model_id = "google/gemma-7b-it"

print(f"use_quantization_config set to: {use_quantization_config}")
print(f"model_id set to: {model_id}")

GPU memory: 6 | Recommended model: Gemma 2B in 4-bit precision.
use_quantization_config set to: True
model_id set to: google/gemma-2b-it


In [37]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers.utils import is_flash_attn_2_available 
from transformers import BitsAndBytesConfig

# Loading the model in 4bit 
# Original model can be quite big so we load the quanitized version
quantization_config = BitsAndBytesConfig(load_in_4bit=True,
                                         bnb_4bit_compute_dtype=torch.float16)

# install flash attention 2, works for linux only 
# https://pypi.org/project/flash-attn/
# SDPA - Scaled Dot Product Attention 
if (is_flash_attn_2_available()) and (torch.cuda.get_device_capability(0)[0] >= 8):
  attn_implementation = "flash_attention_2"
else:
  attn_implementation = "sdpa"
print(f"[INFO] Using attention implementation: {attn_implementation}")

print(f"[INFO] Using model_id: {model_id}")

# Instantiate tokenizer to convert text to vectors which can be fed to the model
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_id)

# loading model in float 16 dtype
llm_model = AutoModelForCausalLM.from_pretrained(
    pretrained_model_name_or_path=model_id,
    torch_dtype=torch.float16,
    quantization_config=quantization_config if use_quantization_config else None,
    device_map="auto",
    low_cpu_mem_usage=True,
    attn_implementation=attn_implementation
) 

# If we have enough GPU RAM, we can also use the model on only CUDA
if not use_quantization_config: 
    llm_model.to("cuda")

[INFO] Using attention implementation: sdpa
[INFO] Using model_id: google/gemma-2b-it


`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.
Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use
`config.hidden_activation` if you want to override this behaviour.
See https://github.com/huggingface/transformers/pull/29402 for more details.
Loading checkpoint shards: 100%|██████████| 2/2 [00:14<00:00,  7.14s/it]


In [39]:
# Helper functions to get more knowledges about the LLM

def get_model_num_params(model: torch.nn.Module):
    return sum([param.numel() for param in model.parameters()])

def get_model_mem_size(model: torch.nn.Module):

    # Get the model parameters and buffer sizes
    mem_params = sum([param.nelement() * param.element_size() for param in model.parameters()])
    mem_buffers = sum([buf.nelement() * buf.element_size() for buf in model.buffers()])

    # Calculate various model sizes
    model_mem_bytes = mem_params + mem_buffers 
    model_mem_mb = model_mem_bytes / (1024**2) 
    model_mem_gb = model_mem_bytes / (1024**3) 

    return {"model_mem_bytes": model_mem_bytes,
            "model_mem_mb": round(model_mem_mb, 2),
            "model_mem_gb": round(model_mem_gb, 2)}


print(get_model_mem_size(llm_model))
print(get_model_num_params(llm_model))

{'model_mem_bytes': 2039641088, 'model_mem_mb': 1945.15, 'model_mem_gb': 1.9}
1515268096


In [40]:
input_text = "What are the macronutrients, and what roles do they play in the human body?" # sample query for testings
print(f"Input text:\n{input_text}")

# Create prompt template for instruction-tuned model
dialogue_template = [
    {"role": "user",
     "content": input_text}
]

# Apply the chat template
prompt = tokenizer.apply_chat_template(conversation=dialogue_template,
                                       tokenize=False, 
                                       add_generation_prompt=True)
print(f"\nPrompt (formatted):\n{prompt}")

Input text:
What are the macronutrients, and what roles do they play in the human body?

Prompt (formatted):
<bos><start_of_turn>user
What are the macronutrients, and what roles do they play in the human body?<end_of_turn>
<start_of_turn>model



In [41]:
%%time

# tokenize the input text
input_ids = tokenizer(prompt, return_tensors="pt").to("cuda")
print(f"Model input (tokenized):\n{input_ids}\n")

# outputs will contain the LLM output as vectors 
outputs = llm_model.generate(**input_ids,
                             max_new_tokens=256) 
print(f"Model output (tokens):\n{outputs[0]}\n")

Model input (tokenized):
{'input_ids': tensor([[     2,      2,    106,   1645,    108,   1841,    708,    573, 186809,
         184592, 235269,    578,   1212,  16065,    749,    984,   1554,    575,
            573,   3515,   2971, 235336,    107,    108,    106,   2516,    108]],
       device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1]], device='cuda:0')}



  attn_output = torch.nn.functional.scaled_dot_product_attention(


Model output (tokens):
tensor([     2,      2,    106,   1645,    108,   1841,    708,    573, 186809,
        184592, 235269,    578,   1212,  16065,    749,    984,   1554,    575,
           573,   3515,   2971, 235336,    107,    108,    106,   2516,    108,
         21404, 235269,   1517, 235303, 235256,    476,  25497,    576,    573,
        186809, 184592,    578,   1024,  16065,    575,    573,   3515,   2971,
        235292,    109,    688,  12298,   1695, 184592,  66058,    109, 235287,
          5231, 156615,  56227,  66058,   5626,   2971,   7177,  72780,    604,
          4134, 235265,   2365,    708,    573,   7920,   9719,    604,   1167,
          5999,    578,  29703, 235265,    108, 235287,   5231,  49471,  66058,
         33849,    603,   8727,    604,   4547,    578,  68808,  29703, 235269,
          3547,  44760, 235269,    578,  17839,  53186, 235265,    108, 235287,
          5231,  33690,  66058,  22904,   6572,   4134, 235269,   7154,  33398,
         48765, 2

In [42]:
# Decode the output tokens(vectors) to text
outputs_decoded = tokenizer.decode(outputs[0])
print(f"Model output (decoded):\n{outputs_decoded}\n")

Model output (decoded):
<bos><bos><start_of_turn>user
What are the macronutrients, and what roles do they play in the human body?<end_of_turn>
<start_of_turn>model
Sure, here's a breakdown of the macronutrients and their roles in the human body:

**Macronutrients:**

* **Carbohydrates:** Our body uses carbohydrates for energy. They are the primary fuel for our cells and tissues.
* **Protein:** Protein is essential for building and repairing tissues, making enzymes, and producing hormones.
* **Fat:** Fat provides energy, helps absorb vitamins, and helps to insulate the body.

**Macronutrient Ratios:**

* **Carbohydrates:** Our bodies need around 45-65% of our total calories from carbohydrates.
* **Protein:** We need around 1.6-2.2 grams of protein per kilogram of body weight per day.
* **Fat:** We need around 20-30% of our total calories from fat.

**How Macronutrients Work Together:**

Macronutrients work together to provide the body with the energy and building blocks it needs to func

In [43]:
print(f"Input text: {input_text}\n")
print(f"Output text:\n{outputs_decoded.replace(prompt, '').replace('<bos>', '').replace('<eos>', '')}")

Input text: What are the macronutrients, and what roles do they play in the human body?

Output text:
Sure, here's a breakdown of the macronutrients and their roles in the human body:

**Macronutrients:**

* **Carbohydrates:** Our body uses carbohydrates for energy. They are the primary fuel for our cells and tissues.
* **Protein:** Protein is essential for building and repairing tissues, making enzymes, and producing hormones.
* **Fat:** Fat provides energy, helps absorb vitamins, and helps to insulate the body.

**Macronutrient Ratios:**

* **Carbohydrates:** Our bodies need around 45-65% of our total calories from carbohydrates.
* **Protein:** We need around 1.6-2.2 grams of protein per kilogram of body weight per day.
* **Fat:** We need around 20-30% of our total calories from fat.

**How Macronutrients Work Together:**

Macronutrients work together to provide the body with the energy and building blocks it needs to function properly. For example:

* Carbohydrates are broken down i

In [44]:

# A list of questions to ask the llm
query_list = [
    "What are the macronutrients, and what roles do they play in the human body?",
    "How do vitamins and minerals differ in their roles and importance for health?",
    "Describe the process of digestion and absorption of nutrients in the human body.",
    "What role does fibre play in digestion? Name five fibre containing foods.",
    "Explain the concept of energy balance and its importance in weight management.",
    "How often should infants be breastfed?",
    "What are symptoms of pellagra?",
    "How does saliva help with digestion?",
    "What is the RDI for protein per day?",
    "water soluble vitamins"
]

In [45]:
import random
query = random.choice(query_list)

print(f"Query: {query}")

# Get the scores and indices of top related results
scores, indices = retrieve_relevant_resources(query=query,
                                              embeddings=embeddings)
scores, indices

Query: How do vitamins and minerals differ in their roles and importance for health?
[INFO] Time taken to get cosine similarity on 1676 embeddings: 0.01150


(tensor([0.6322, 0.6220, 0.6187, 0.6178, 0.6128], device='cuda:0'),
 tensor([ 51,  47, 873,  41, 926], device='cuda:0'))

In [46]:
def prompt_formatter(query: str, 
                     context_items: list[dict]) -> str:

    # Join contextual items in a single paragraph
    context = "- " + "\n- ".join([item["sentence_chunks"] for item in context_items])

    # The context is injected in the base prompt
    # Base prompt is formatted in a way to get the best results from the model
    base_prompt = """Based on the following context items, please answer the query.
Give yourself room to think by extracting relevant passages from the context before answering the query.
Don't return the thinking, only return the answer.
Make sure your answers are as explanatory as possible.
Use the following examples as reference for the ideal answer style.
\nExample 1:
Query: What are the fat-soluble vitamins?
Answer: The fat-soluble vitamins include Vitamin A, Vitamin D, Vitamin E, and Vitamin K. These vitamins are absorbed along with fats in the diet and can be stored in the body's fatty tissue and liver for later use. Vitamin A is important for vision, immune function, and skin health. Vitamin D plays a critical role in calcium absorption and bone health. Vitamin E acts as an antioxidant, protecting cells from damage. Vitamin K is essential for blood clotting and bone metabolism.
\nExample 2:
Query: What are the causes of type 2 diabetes?
Answer: Type 2 diabetes is often associated with overnutrition, particularly the overconsumption of calories leading to obesity. Factors include a diet high in refined sugars and saturated fats, which can lead to insulin resistance, a condition where the body's cells do not respond effectively to insulin. Over time, the pancreas cannot produce enough insulin to manage blood sugar levels, resulting in type 2 diabetes. Additionally, excessive caloric intake without sufficient physical activity exacerbates the risk by promoting weight gain and fat accumulation, particularly around the abdomen, further contributing to insulin resistance.
\nExample 3:
Query: What is the importance of hydration for physical performance?
Answer: Hydration is crucial for physical performance because water plays key roles in maintaining blood volume, regulating body temperature, and ensuring the transport of nutrients and oxygen to cells. Adequate hydration is essential for optimal muscle function, endurance, and recovery. Dehydration can lead to decreased performance, fatigue, and increased risk of heat-related illnesses, such as heat stroke. Drinking sufficient water before, during, and after exercise helps ensure peak physical performance and recovery.
\nNow use the following context items to answer the user query:
{context}
\nRelevant passages: <extract relevant passages from the context here>
User query: {query}
Answer:"""
 
    base_prompt = base_prompt.format(context=context, query=query)

    dialogue_template = [
        {"role": "user",
        "content": base_prompt}
    ]

    # Apply the chat template
    prompt = tokenizer.apply_chat_template(conversation=dialogue_template,
                                          tokenize=False,
                                          add_generation_prompt=True)
    return prompt

In [53]:
query = random.choice(query_list)
print(f"Query: {query}")

# Get relevant resources
scores, indices = retrieve_relevant_resources(query=query,
                                              embeddings=embeddings)
    
# Create a list of context items
context_items = [pages_and_chunks[i] for i in indices]

# Format prompt with context items
prompt = prompt_formatter(query=query,
                          context_items=context_items)
print(prompt)

Query: water soluble vitamins
[INFO] Time taken to get cosine similarity on 1676 embeddings: 0.00024
<bos><start_of_turn>user
Based on the following context items, please answer the query.
Give yourself room to think by extracting relevant passages from the context before answering the query.
Don't return the thinking, only return the answer.
Make sure your answers are as explanatory as possible.
Use the following examples as reference for the ideal answer style.

Example 1:
Query: What are the fat-soluble vitamins?
Answer: The fat-soluble vitamins include Vitamin A, Vitamin D, Vitamin E, and Vitamin K. These vitamins are absorbed along with fats in the diet and can be stored in the body's fatty tissue and liver for later use. Vitamin A is important for vision, immune function, and skin health. Vitamin D plays a critical role in calcium absorption and bone health. Vitamin E acts as an antioxidant, protecting cells from damage. Vitamin K is essential for blood clotting and bone metaboli

In [54]:
%%time

input_ids = tokenizer(prompt, return_tensors="pt").to("cuda")

# Generate an output
# Higher temperature - more creative outputs
# Lower temperature - output strictly from provided context
outputs = llm_model.generate(**input_ids,
                             temperature=0.7, 
                             do_sample=True,
                             max_new_tokens=256) 

# Turn the output tokens into text
output_text = tokenizer.decode(outputs[0])

print(f"Query: {query}")
print(f"RAG answer:\n{output_text.replace(prompt, '')}")

Query: water soluble vitamins
RAG answer:
<bos>Sure, here's the answer to the user's query:

Water-soluble vitamins are absorbed in the small intestine and transported to the liver through blood vessels. They are not directly absorbed in the body and are instead packaged and transported to the liver. The body can synthesize some vitamins, but others must be obtained from the diet.<eos>
CPU times: total: 578 ms
Wall time: 5.27 s


In [55]:
# takes a query and returns relevant output along with the context items
def ask(query, 
        temperature=0.7,
        max_new_tokens=512,
        format_answer_text=True, 
        return_answer_only=True):

    scores, indices = retrieve_relevant_resources(query=query,
                                                  embeddings=embeddings)
    
    # Create a list of context items
    context_items = [pages_and_chunks[i] for i in indices]

    # Add score to context item
    for i, item in enumerate(context_items):
        item["score"] = scores[i].cpu() # return score back to CPU 

    prompt = prompt_formatter(query=query,
                              context_items=context_items)
    
    input_ids = tokenizer(prompt, return_tensors="pt").to("cuda")

    # Generate an output of tokens
    outputs = llm_model.generate(**input_ids,
                                 temperature=temperature,
                                 do_sample=True,
                                 max_new_tokens=max_new_tokens)
    
    # Turn the output tokens into text
    output_text = tokenizer.decode(outputs[0])

    if format_answer_text:
        # Replace the unnecessary tokens given by the model
        output_text = output_text.replace(prompt, "").replace("<bos>", "").replace("<eos>", "").replace("Sure, here is the answer to the user query:\n\n", "")

    if return_answer_only:
        return output_text
    
    return output_text, context_items


In [56]:
query = random.choice(query_list)
print(f"Query: {query}")

# returns the answer and context items
answer, context_items = ask(query=query, 
                            temperature=0.7,
                            max_new_tokens=512,
                            return_answer_only=False)

print(f"Answer:\n")
print(wrapped_text(answer))
print(f"Context items:")
context_items

Query: What are the macronutrients, and what roles do they play in the human body?
[INFO] Time taken to get cosine similarity on 1676 embeddings: 0.00057
Answer:

**Macronutrients**  The context provides a comprehensive overview of
macronutrients, including carbohydrates, lipids, and proteins.
**Carbohydrates**  - Provide energy and serve as the body's primary source of
fuel. - Found in foods such as fruits, vegetables, and grains. - Contribute to
the formation of cell structures and the production of energy.  **Lipids**  -
Are a source of energy, providing around 9 kilocalories per gram. - Found in
foods such as fats, oils, and some plant-based sources. - Play a role in cell
membrane formation and hormone production.  **Proteins**  - Provide structure
and support for cells. - Involved in various biochemical processes, including
enzyme production and tissue repair. - Provide energy through the process of
protein metabolism. - Found in foods such as meats, dairy products, and legumes.
C

[{'page_number': 5,
  'sentence_chunks': 'Macronutrients Nutrients that are needed in large amounts are called macronutrients. There are three classes of macronutrients: carbohydrates, lipids, and proteins. These can be metabolically processed into cellular energy. The energy from macronutrients comes from their chemical bonds. This chemical energy is converted into cellular energy that is then utilized to perform work, allowing our bodies to conduct their basic functions. A unit of measurement of food energy is the calorie. On nutrition food labels the amount given for “calories” is actually equivalent to each calorie multiplied by one thousand. A kilocalorie (one thousand calories, denoted with a small “c”) is synonymous with the “Calorie” (with a capital “C”) on nutrition food labels. Water is also a macronutrient in the sense that you require a large amount of it, but unlike the other macronutrients, it does not yield calories. Carbohydrates Carbohydrates are molecules composed of 