In [None]:
%pip install -q einops
%pip install -q chromadb
%pip install -q langchain
%pip install -q accelerate
# %pip install -q bitsandbytes
%pip install -q transformers

In [1]:
import os
import torch
import accelerate
import transformers
from time import time
from torch import cuda, bfloat16
from dotenv import load_dotenv, find_dotenv
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain.llms import HuggingFacePipeline
from langchain.llms import LlamaCpp
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from transformers import AutoModelForCausalLM, AutoTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Setting up environment variables

load_dotenv(find_dotenv())
HF_KEY = os.environ['HUGGINGFACE_API_KEY']

In [3]:
device = f'cuda: {cuda.current_device()}' if cuda.is_available() else 'cpu'
device

'cpu'

## 1. Using LlamaCpp Approach

In [None]:
# Loading model

from langchain.llms import LlamaCpp

model = LlamaCpp(
    streaming=True,
    model_path = 'mistral-7b-instruct-v0.1.Q3_K_S.gguf',
    temperature = 0.75,
    top_p = 1, 
    n_ctx = 4096
)
model

### Querying Model directly

In [13]:
query = "What are some of the most popular Yoga asanas?"
prompt = f"""
<|system|>
You are a Yoga GPT that gives advices and suggestions related to Yoga. Please give correct and detailed stepwise instructions.
</s>
<|user|>
{query}
</s>
<|Yoga GPT|>
"""

t1 = time()
response = model.invoke(prompt)
t2 = time()
print(f"AI: {response} /n Time taken: {t2-t1} sec")

Llama.generate: prefix-match hit

llama_print_timings:        load time =    5117.94 ms
llama_print_timings:      sample time =      53.47 ms /   256 runs   (    0.21 ms per token,  4788.09 tokens per second)
llama_print_timings: prompt eval time =  136417.56 ms /     2 tokens (68208.78 ms per token,     0.01 tokens per second)
llama_print_timings:        eval time =   62558.84 ms /   255 runs   (  245.33 ms per token,     4.08 tokens per second)
llama_print_timings:       total time =   66900.32 ms /   257 tokens


AI: There are many popular yoga poses or "asanas" in Yoga practice, but here are some of the most commonly practiced and well-known ones:

1. Downward Facing Dog (Adho Mukha Svanasana) - This is a great pose to start with, as it stretches your entire back, arms, and legs.
2. Mountain Pose (Uttasana) - This pose strengthens the spine, calves, thighs, and ankles while also stretching the groins and chest.
3. Warrior II (Virabhadrasana II) - This pose is great for building strength in your legs and core, as well as increasing balance and focus.
4. Tree Pose (Vrksasana) - This challenging pose helps improve balance, stability, and strength in your legs.
5. Child's Pose (Balasana) - This restorative pose is great for stretching the hips, thighs, and ankles while also providing a gentle spine stretch.
6. Cobra Pose (Bakasana) - This pose helps to stretch the neck, chest, and abdomen while strengthening the /n Time taken: 66.95218062400818 sec


### RAG Implementation

In [14]:
import PyPDF2

In [15]:
# Reading PDF and extracting ToC
def extract_ToC(pdf_path, start_page, end_page):

  with open(pdf_path, 'rb') as file:
    pdf_reader = PyPDF2.PdfReader(file)

    toc_entries = []

    for page in range(start_page, end_page+1):
      page = pdf_reader.pages[page]
      text = page.extract_text()
      text = text.replace("vii", "").replace("viii", "").replace("i17", "17")

      toc_lines = text.splitlines()

      for i in toc_lines:
        toc_entries.append(i)
    return toc_entries

pdf_path = "Yoga Education for Children Vol 1.pdf"
toc = extract_ToC(pdf_path, 7, 8)
toc

['Contents',
 'Introduction  1',
 'Yoga and Education  ',
 ' 1. The Need for a Y oga-Based Education System  13',
 ' 2. Yoga and Children’s Problems  22',
 ' 3. Yoga with Pre-School Children  25',
 ' 4. Yoga Lessons Begin at Age Eight  31',
 ' 5. Student Unr est and Its Remedy  34',
 ' 6. Yoga and the Youth Problem  39',
 ' 7. Better Ways of Educatio n 45',
 ' 8. Yoga at School  50',
 ' 9. Yoga and Education  57',
 '10. Questions and Answers  65',
 'Yoga as Therapy  ',
 '11. Yoga for Emotional Disturbances  77',
 '12. Yoga for the Disabled  83',
 '13. Yoga Benefits Juvenile Diabetes  87',
 'Practices  ',
 '14. Yoga Techniques for Pre-School Children  93',
 '15. Yoga Techniques for 7–14 Y ear-Olds  101',
 '16. Yoga Techniques for the Classroom  110',
 '17. Introduction to Asana  133',
 '18. Pawanmuktasana Series  139',
 'Pawanmuktasana 1: Anti-Rheumatic Asanas  141',
 'Pawanmuktasana 2: Anti-Gastric Asanas  156',
 'Pawanmuktasana 3: Energizing Asanas  165',
 '19.  Eye Exercises  171',
 

In [16]:
# Topics to extract text from
topics = []
for topic in range(18, 29):
  topics.append(toc[topic])
topics

['14. Yoga Techniques for Pre-School Children  93',
 '15. Yoga Techniques for 7–14 Y ear-Olds  101',
 '16. Yoga Techniques for the Classroom  110',
 '17. Introduction to Asana  133',
 '18. Pawanmuktasana Series  139',
 'Pawanmuktasana 1: Anti-Rheumatic Asanas  141',
 'Pawanmuktasana 2: Anti-Gastric Asanas  156',
 'Pawanmuktasana 3: Energizing Asanas  165',
 '19.  Eye Exercises  171',
 '20. Surya Namaskara: Salutations to the Sun  176',
 '21. Chandra Namaskara: Salutations to the Moon  182']

In [17]:
# Separating topics and their pages
topics_page = []
for i in topics:
  # Splitting string into words
  parts = i.split()
  topic = ' '.join(parts[:-1])
  page_num = parts[-1]
  topics_page.append((topic, page_num))
topics

['14. Yoga Techniques for Pre-School Children  93',
 '15. Yoga Techniques for 7–14 Y ear-Olds  101',
 '16. Yoga Techniques for the Classroom  110',
 '17. Introduction to Asana  133',
 '18. Pawanmuktasana Series  139',
 'Pawanmuktasana 1: Anti-Rheumatic Asanas  141',
 'Pawanmuktasana 2: Anti-Gastric Asanas  156',
 'Pawanmuktasana 3: Energizing Asanas  165',
 '19.  Eye Exercises  171',
 '20. Surya Namaskara: Salutations to the Sun  176',
 '21. Chandra Namaskara: Salutations to the Moon  182']

In [18]:
# Function to extract text from specified topics
def extract_text_by_topic(topic, start_page, end_page):
  with open(pdf_path, 'rb') as file:
    pdf_reader = PyPDF2.PdfReader(file)
    text = ''
    for page_num in range(start_page, end_page):
      page = pdf_reader.pages[page_num]
      text += page.extract_text()
  return text

In [28]:
all_splits = []
documents = {}
for i in range(len(topics_page)-1):
  topic, start_page = topics_page[i]
  if i+1 < len(topics_page):
    _, end_page = topics_page[i + 1]
    text = extract_text_by_topic(topic, int(start_page) + 8, int(end_page) + 8)
    documents['metadata'] = topic
    documents['page_content'] = text
    all_splits.extend(documents)

In [31]:
all_splits

['9314\nYoga Techniques \nfor Pre-School Children\nRishi Arundhati\nThe setting and the sessions are two important aspects to  \nconsider when presenting yoga, especially if it is being \nincorporated into regular playgroup activities. Let us first \nconsider the setting for these activities.\n Place of practice : A space that is free from furniture or \nobjects with sharp edges is necessary. It should be carpeted if possible but this is not absolutely necessary; small carpet pieces or a small towel, large enough for the child to sit on, will be enough.\n Time of practice : The practice should not immediately \nfollow a meal. Usually two hours are allowed between eating and starting yoga activities. \n Length of practice : Children at this pre-school age \nnor mally have a fairly short concentration span; ther efore, \nwe need to time the exercises according to the children involved, always watching their involvement. The constant \nmove  ment of arms and legs and not being able to sit

In [40]:
# Loading documents
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader("Yoga Education for Children Vol 1.pdf")
pages = loader.load()

In [None]:
# %matplotlib inline
# from lib.common import plot_chunk_histogram, get_token_length

# page_content = [page.page_content for page in pages]

# plot_chunk_histogram(
#     chunks=page_content,
#     length_fn=len,
#     title="Distribution of page character lengths",
#     xlabel="Page character length",
#     ylabel="Page count")
# plot_chunk_histogram(
#     chunks=page_content,
#     length_fn=get_token_length,
#     title="Distribution of page token lengths",
#     xlabel="Page token length",
#     ylabel="Page count")

In [41]:
# Embedding text into vector database - ChromaDB
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
                                   model_kwargs={"device": "cpu"})
vector_db = Chroma.from_documents(documents=pages, 
                                  embedding=embeddings, 
                                  persist_directory="chroma_db")

In [43]:
# Initializing chain
retriever = vector_db.as_retriever()

qa = RetrievalQA.from_chain_type(
    llm = model,
    chain_type = "stuff",
    retriever = retriever,
    verbose = True
)

In [44]:
query = "What are some of the most popular Pawanmuktasana?"
prompt = f"""
<|system|>
You are a Yoga GPT that gives advices and suggestions related to Yoga. Please give correct and detailed stepwise instructions.
</s>
<|user|>
{query}
</s>
<|Yoga GPT|>
"""

t1 = time()
response = qa.invoke(prompt)
t2 = time()
print(f"AI: {response} /n Time taken: {round(t2-t1, 3)} sec")



[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    5117.94 ms
llama_print_timings:      sample time =      61.89 ms /   231 runs   (    0.27 ms per token,  3732.43 tokens per second)
llama_print_timings: prompt eval time =  340348.73 ms /  1256 tokens (  270.98 ms per token,     3.69 tokens per second)
llama_print_timings:        eval time =   88607.05 ms /   230 runs   (  385.25 ms per token,     2.60 tokens per second)
llama_print_timings:       total time =  431332.36 ms /  1486 tokens



[1m> Finished chain.[0m
AI: {'query': '\n<|system|>\nYou are a Yoga GPT that gives advices and suggestions related to Yoga. Please give correct and detailed stepwise instructions.\n</s>\n<|user|>\nWhat are some of the most popular Pawanmuktasana?\n</s>\n<|Yoga GPT|>\n', 'result': ' \n\nThe most common Pawanmuktasanas are:\n\n1. Ankle Bending (5 times): Bend your ankles towards each other, one at a time.\n2. Knee Bending (5 times each leg): Sit on the ground and bend your knees. One at a time, reach for your toes and straighten your legs.\n3. Squat and Rise Pose: Stand with feet shoulder-width apart and lower yourself into a squatting position. Hold for 2-3 seconds before rising back up to standing.\n4. Hand Clenching (5 times): Make fists with both hands and hold them tight. Release and repeat.\n5. Wrist Bending (5 times): Flex your wrists towards each other, one at a time.\n6. Shoulder Rotations (5 times clockwise/anti-clockwise): Hold arms straight in front of you, rotate shoulder

In [45]:
import json

In [46]:
# Checking doc sources
docs = vector_db.similarity_search(query)
print(f"Query: {query}")
print(f"Retrieved docs: {len(docs)}")

for doc in docs:
    doc_details = doc.to_json()['kwargs']
    print("Source: ", doc_details['metadata']['source'])
    print("Text: ", doc_details['page_content'], "\n")

Query: What are some of the most popular Pawanmuktasana?
Retrieved docs: 4
Source:  Yoga Education for Children Vol 1.pdf
Text:  13918
Pawanmuktasana Series
Pawanmuktasana is a group of exercises which release  
wind and gases from the body. Pawan means ‘wind’; 
mukta means ‘release’, asana  means ‘posture’. The pawan -
muktasana series is very simple, yet it is most effective in 
regulating what are referred to in India as the humours: phlegm or kapha , wind or vata and acid/bile, pitta.
 According to the ancient medical science known as 
ayurveda, these three humours control all the functions of the body. If any irregularity arises in their functions, negative reactions take place in the metabolism of the body and disease results.
Physical yoga 
Pawanmuktasana are simple exercises for the development 
of body awareness. During the practice of each exercise the children should be encouraged to feel the part of the body being used. Since one covers all the body parts with these exercis

In [None]:
def generate_embedding(text):
  encoded_input = tokenizer(text, return_tensors="pt")

  with torch.no_grad():
    output = embedding_model(**encoded_input)

  # Extracting sentence embeddings
  return output.last_hidden_state[:, 0].tolist()

In [None]:
def get_sentence_embedding(text):
  sentences = text.split(". ") # Splitting by sentences
  embeddings = []
  for sentence in sentences:
    embedding = generate_embedding(sentence)
    embeddings.append(embedding)

  # Returning average of sentence embeddings
  return np.mean(embeddings, axis=0)

In [None]:
def createStore_doc(topic, text_content):
  # Generating vector embedding for the content
  embedding = get_sentence_embedding(text_content)

  # Creating class obj
  obj = {
      'topic': topic,
      'content': text_content
  }
  uuid = client.data_object.create(
      class_name = "TopicContent",
      data_object = obj,
      # uuid = generate_uuid5(data_object)
  )

In [None]:
for i in range(len(topics_page)-1):
  topic, start_page = topics_page[i]
  if i+1 < len(topics_page):
    _, end_page = topics_page[i + 1]
    text = extract_text_by_topic(topic, int(start_page) + 8, int(end_page) + 8)
    createStore_doc(topic, text)

print("Text data stored successfully in ChromaDB!")

### 2. Using Model API Inference

In [None]:
# Setting quantization config to load LLM with less GPU memory
# bnb_config = transformers.BitsAndBytesConfig(
#     load_in_8bit=False,
#     load_in_4bit=True,
#     bnb_4bit_compute_dtype=bfloat16,
#     bnb_4bit_quant_type='nf4',
#     bnb_4bit_use_double_quant=True
# )
# bnb_config

In [None]:
model_id = 'mistralai/Mistral-7B-Instruct-v0.2'
pipeline = HuggingFacePipeline.

In [9]:
# Loading tokenizer

tokenizer = AutoTokenizer.from_pretrained('mistralai/Mistral-7B-Instruct-v0.2')
tokenizer

LlamaTokenizerFast(name_or_path='mistralai/Mistral-7B-Instruct-v0.2', vocab_size=32000, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>'}, clean_up_tokenization_spaces=False),  added_tokens_decoder={
	0: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}

In [10]:
# Defining query pipeline
t1 = time()

query_pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device_map='auto'
)

t2 = time()
print(f"Preparing pipeline: {round(t2-t1, 3)} sec.")

TypeError: Could not infer framework from class <class 'langchain_community.llms.llamacpp.LlamaCpp'>.

In [None]:
# model = transformers.AutoModelForCausalLM.from_pretrained(
#     model_id,
#     trust_remote_code = True,
#     config = model_cfg,
#     # quantization_config = bnb_config,
#     device_map = 'auto'
# )
# model