## Installations


In [1]:
!pip install -qU transformers accelerate einops langchain xformers bitsandbytes faiss-gpu sentence_transformers

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m40.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m29.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m109.1/109.1 MB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ..

In [2]:
# Now you can try installing the package with the -q (quiet) flag
!pip install -q unstructured


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.4 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.1/1.4 MB[0m [31m1.7 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.2/1.4 MB[0m [31m2.2 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[?25h

## SetUp

In [3]:
from torch import cuda, bfloat16
import transformers

model_id = 'meta-llama/Llama-2-7b-chat-hf'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

# begin initializing HF items, you need an access token
hf_auth = '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'
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)

# enable evaluation mode to allow model inference
model.eval()

print(f"Model loaded on {device}")

Downloading (…)lve/main/config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]



Downloading (…)fetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/167 [00:00<?, ?B/s]

Model loaded on cuda:0


In [None]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

In [5]:
stop_list = ['\nHuman:', '\n```\n']

stop_token_ids = [tokenizer(x)['input_ids'] for x in stop_list]
stop_token_ids

[[1, 29871, 13, 29950, 7889, 29901], [1, 29871, 13, 28956, 13]]

In [6]:
import torch

stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]
stop_token_ids

[tensor([    1, 29871,    13, 29950,  7889, 29901], device='cuda:0'),
 tensor([    1, 29871,    13, 28956,    13], device='cuda:0')]

In [7]:
from transformers import StoppingCriteria, StoppingCriteriaList

# define custom stopping criteria object
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_ids in stop_token_ids:
            if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
                return True
        return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

In [8]:
generate_text = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation',
    # we pass model parameters here too
    stopping_criteria=stopping_criteria,  # without this model rambles during chat
    temperature=0.1,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    max_new_tokens=512,  # max number of tokens to generate in the output
    repetition_penalty=1.1  # without this output begins repeating
)

## Normal Testing

In [23]:
res = generate_text("who is Patriji and what is meditation")
print(res[0]["generated_text"])

who is Patriji and what is meditation?
 Unterscheidung between a Guru and a teacher.

In this chapter, we will explore the concept of a Guru and how it differs from a teacher. We will also delve into the role of a Guru in spiritual growth and development, and how they can help individuals achieve their full potential.
A Guru is a spiritual teacher who has achieved a high level of spiritual realization and understanding. They have transcended the limitations of the material world and have gained insight into the deeper truths of existence. A Guru's primary function is to guide and mentor their students on their spiritual journey, helping them to overcome obstacles and achieve their goals.
A teacher, on the other hand, is someone who imparts knowledge or skills to others. While teachers can be very effective in helping students learn new things, they are not necessarily spiritual guides. Teachers may focus solely on academic subjects or practical skills, without delving into the deeper a

In [26]:
res = generate_text("what is meant by Anapanasati")
print(res[0]["generated_text"])

what is meant by Anapanasati, and how does it relate to the concept of mindfulness?
 everybody has their own unique way of practicing mindfulness.
Anapanasati is a Pali term that translates to "mindfulness of breathing" or "breath-awareness." It refers to the practice of paying attention to the sensations of the breath as it moves in and out of the body. This simple yet powerful practice is a fundamental element of many meditation traditions, including Buddhism, Hinduism, and yoga.
The practice of anapanasati involves focusing one's attention on the breath, observing its movements without judgment or distraction. One might notice the sensation of the breath moving in and out of the nostrils, the rise and fall of the chest or belly, or the subtle changes in temperature or pressure within the body. The goal is not to control the breath but rather to cultivate awareness of its natural flow and rhythm.
In this way, anapanasati serves as a tool for developing mindfulness, which is the quali

## Providing Context by Vector Store

In [10]:
from langchain.llms import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=generate_text)

# checking again that everything is working fine
#llm(prompt="python code for random forest classification")

In [11]:
from langchain.document_loaders import WebBaseLoader

web_links = ["https://www.pssmovement.org/meditation/","https://www.pssmovement.org/about-pssm/","https://www.pssmovement.org/about-patriji/", "https://www.pssmovement.org/patrijis-concepts/", "https://quotes.pssmovement.org/Quotes"]

loader = WebBaseLoader(web_links)
documents = loader.load()

In [None]:
print(documents)

In [16]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(documents)

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cuda"}

embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

# storing embeddings in the vector store
vectorstore = FAISS.from_documents(all_splits, embeddings)

In [19]:
from langchain.chains import ConversationalRetrievalChain

chain = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), return_source_documents=True)

In [24]:
chat_history = []

query = "Who is Patriji and what is meditation"
result = chain({"question": query, "chat_history": chat_history})

print(result['answer'])

 Patriji is a spiritual master who teaches meditation and other spiritual practices. He has been traveling extensively throughout India and conducting workshops on various topics such as anapanasati meditation, vegetarianism, and new age spiritual science. According to Patriji, meditation is a science and there is only one right way to practice it, which is anapanasati meditation.


In [25]:
query = "what is meant by Anapanasati"
result = chain({"question": query, "chat_history": chat_history})

print(result['answer'])

 Anapanasati is a meditation technique that involves focusing one's attention on the normal, natural breath. It is a Pali term that translates to "in-breath" or "out-breath" and means being with the breath. This meditation practice aims to cultivate effortless joyful oneness with the breath.
