In [1]:
from langchain.chains import LLMChain, RetrievalQA
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter, TokenTextSplitter
from langchain.retrievers import ParentDocumentRetriever
from langchain.vectorstores import Chroma, Vectara
from langchain.embeddings import OpenAIEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.storage import InMemoryStore
from langchain.document_loaders import TextLoader
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.retrievers import ContextualCompressionRetriever

from langchain.chat_models.openai import ChatOpenAI
from langchain_community.llms import HuggingFaceHub
from langchain.document_loaders.unstructured import UnstructuredFileLoader

import urllib.request

In [2]:
from dotenv import load_dotenv
import os

In [3]:
load_dotenv()
HF_TOKEN = os.getenv("Huggingface_api")

In [4]:
url = 'https://www.buddhanet.net/pdf_file/chanmed1.pdf'
file_name = 'meditation.pdf'
urllib.request.urlretrieve(url, file_name)

('meditation.pdf', <http.client.HTTPMessage at 0x1eb5bcefcd0>)

In [13]:
def get_answer_character(doc_text, chunk_size: int, chunk_overlap: int, query: str):
    text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, separator=" ")
    docs = text_splitter.split_documents(doc_text)
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    vs = Chroma.from_documents(docs, embeddings)
    qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vs.as_retriever())
    return qa.run(query)

def get_answer_token(doc_text, chunk_size: int, chunk_overlap: int, query: str):
    text_splitter = TokenTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    docs = text_splitter.split_documents(doc_text)
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    vs = Chroma.from_documents(docs, embeddings)
    qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vs.as_retriever())
    return qa.run(query)

def get_answer_recursive(doc_text, chunk_size: int, chunk_overlap: int, query: str):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    docs = text_splitter.split_documents(doc_text)
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    vs = Chroma.from_documents(docs, embeddings)
    qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vs.as_retriever())
    return qa.run(query)

def get_answer_parent(doc_text, query: str):
    from langchain.retrievers.document_compressors import EmbeddingsFilter
    llm = HuggingFaceHub(
        repo_id="mistralai/Mistral-7B-Instruct-v0.2",
        huggingfacehub_api_token = HF_TOKEN,
        model_kwargs={
            "max_new_tokens": 512,
            "top_k": 30,
            "temperature": 0.1,
        },
    )

    # This text splitter is used to create the parent documents - The big chunks
    parent_splitter = RecursiveCharacterTextSplitter(chunk_size=4000)
    # This text splitter is used to create the child documents - The small chunks
    # It should create documents smaller than the parent
    child_splitter = RecursiveCharacterTextSplitter(chunk_size=200)

    #docs = text_splitter.split_documents(doc_text)
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

    vs = Chroma(collection_name="split_parents", embedding_function=embeddings)
    # The storage layer for the parent documents
    store = InMemoryStore()

    big_chunks_retriever = ParentDocumentRetriever(
        vectorstore=vs,
        docstore=store,
        child_splitter=child_splitter,
        parent_splitter=parent_splitter,
    )

    big_chunks_retriever.add_documents(doc_text)
    # compressor = LLMChainExtractor.from_llm(relevant_filter)
    # compression_retriever = ContextualCompressionRetriever(
    #     base_compressor=compressor, base_retriever=big_chunks_retriever,
    # )

    qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=big_chunks_retriever)
    return qa.invoke(query)
    # return vs.similarity_search(query)

In [14]:
llm = HuggingFaceHub(
        repo_id="mistralai/Mistral-7B-Instruct-v0.2",
        huggingfacehub_api_token = HF_TOKEN,
        model_kwargs={
            "max_new_tokens": 512,
            "top_k": 30,
            "temperature": 0.1,
        },
    )

In [7]:
loader = UnstructuredFileLoader(file_name, mode="single", strategy="fast")
doc_text = loader.load()

The PDF <_io.BufferedReader name='meditation.pdf'> contains a metadata field indicating that it should not allow text extraction. Ignoring this field and proceeding. Use the check_extractable if you want to raise an error in this case


In [8]:
query1 = "what is realization of the state of purity?"
query2 = "What are the siz profound dharma gates?"

In [15]:
query = query1

for chunk_size in [500, 1000, 2000]:
    for chunk_overlap in [0, 100]:
        response = get_answer_character(doc_text, chunk_size, chunk_overlap, query)
        print(f"chunk={chunk_size}, overlap={chunk_overlap}, response={response}\n\n")



chunk=500, overlap=0, response=Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Realization of the State of Purity

The practice of purification consists of contemplation on discriminating views. When the mind is still like calm water and there is an absence of false thinking, the Real Mind, which does not exist apart from false thinking, manifests. This water-without-waves sort of Mind is called The Realization of Purity.

69

Purity

The practice of purification consists of contemplation on discriminating views. When the mind is still like calm water and there is an absence of false thinking, the Real Mind, which does not exist apart from false thinking, manifests. This water-without-waves sort of Mind is called The Realization of Purity.

69

These Six Profound Dharma Gates may be seen as consisting of a preliminary set of methods, involving counting and following the b



chunk=500, overlap=100, response=Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Realization of the State of Purity

The practice of purification consists of contemplation on discriminating views. When the mind is still like calm water and there is an absence of false thinking, the Real Mind, which does not exist apart from false thinking, manifests. This water-without-waves sort of Mind is called The Realization of Purity.

69

Purity

The practice of purification consists of contemplation on discriminating views. When the mind is still like calm water and there is an absence of false thinking, the Real Mind, which does not exist apart from false thinking, manifests. This water-without-waves sort of Mind is called The Realization of Purity.

69

These Six Profound Dharma Gates may be seen as consisting of a preliminary set of methods, involving counting and following the



chunk=1000, overlap=0, response=Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Realization of the State of Purity

The practice of purification consists of contemplation on discriminating views. When the mind is still like calm water and there is an absence of false thinking, the Real Mind, which does not exist apart from false thinking, manifests. This water-without-waves sort of Mind is called The Realization of Purity.

69

Purity

The practice of purification consists of contemplation on discriminating views. When the mind is still like calm water and there is an absence of false thinking, the Real Mind, which does not exist apart from false thinking, manifests. This water-without-waves sort of Mind is called The Realization of Purity.

69

These Six Profound Dharma Gates may be seen as consisting of a preliminary set of methods, involving counting and following the 



chunk=1000, overlap=100, response=Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Realization of the State of Purity

The practice of purification consists of contemplation on discriminating views. When the mind is still like calm water and there is an absence of false thinking, the Real Mind, which does not exist apart from false thinking, manifests. This water-without-waves sort of Mind is called The Realization of Purity.

69

Purity

The practice of purification consists of contemplation on discriminating views. When the mind is still like calm water and there is an absence of false thinking, the Real Mind, which does not exist apart from false thinking, manifests. This water-without-waves sort of Mind is called The Realization of Purity.

69

These Six Profound Dharma Gates may be seen as consisting of a preliminary set of methods, involving counting and following th



chunk=2000, overlap=0, response=Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Realization of the State of Purity

The practice of purification consists of contemplation on discriminating views. When the mind is still like calm water and there is an absence of false thinking, the Real Mind, which does not exist apart from false thinking, manifests. This water-without-waves sort of Mind is called The Realization of Purity.

69

Purity

The practice of purification consists of contemplation on discriminating views. When the mind is still like calm water and there is an absence of false thinking, the Real Mind, which does not exist apart from false thinking, manifests. This water-without-waves sort of Mind is called The Realization of Purity.

69

These Six Profound Dharma Gates may be seen as consisting of a preliminary set of methods, involving counting and following the 



chunk=2000, overlap=100, response=Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Realization of the State of Purity

The practice of purification consists of contemplation on discriminating views. When the mind is still like calm water and there is an absence of false thinking, the Real Mind, which does not exist apart from false thinking, manifests. This water-without-waves sort of Mind is called The Realization of Purity.

69

Purity

The practice of purification consists of contemplation on discriminating views. When the mind is still like calm water and there is an absence of false thinking, the Real Mind, which does not exist apart from false thinking, manifests. This water-without-waves sort of Mind is called The Realization of Purity.

69

These Six Profound Dharma Gates may be seen as consisting of a preliminary set of methods, involving counting and following th

In [16]:
query = query1

for chunk_size in [500, 1000]:
    for chunk_overlap in [0, 100]:
        response = get_answer_token(doc_text, chunk_size, chunk_overlap, query)
        print(f"chunk={chunk_size}, overlap={chunk_overlap}, response={response.split(':')[-1]}\n\n")



chunk=500, overlap=0, response= The Realization of Purity is a state of mind that manifests when the mind is still like calm water and there is an absence of false thinking. It is the Real Mind, which does not exist apart from false thinking, and is called The Realization of Purity. It is a water-without-waves sort of Mind. This realization is a part of the Six Profound Dharma Gates practice, which also includes practices of stopping (Chih), contemplation (Kuan), returning, and purifying. The goal of these practices is to realize Great Dhyana and Great Prajna, which can only be achieved with a relaxed and regulated mind.






chunk=500, overlap=100, response= The Realization of Purity is a state of mind that manifests when the mind is still like calm water and there is an absence of false thinking. It is the Real Mind, which does not exist apart from false thinking, and is called The Realization of Purity. It is a water-without-waves sort of Mind. This realization is a part of the Six Profound Dharma Gates practice, which also includes practices of stopping (Chih), contemplation (Kuan), returning, and purifying. The goal of these practices is to realize Great Dhyana and Great Prajna, which can only be achieved with a relaxed and regulated mind.






chunk=1000, overlap=0, response= The Realization of Purity is a state of mind that manifests when the mind is still like calm water and there is an absence of false thinking. It is the Real Mind, which does not exist apart from false thinking, and is called The Realization of Purity. It is a water-without-waves sort of Mind. This realization is a part of the Six Profound Dharma Gates practice, which also includes practices of stopping (Chih), contemplation (Kuan), returning, and purifying. The goal of these practices is to realize Great Dhyana and Great Prajna, which can only be achieved with a relaxed and regulated mind.






chunk=1000, overlap=100, response= The Realization of Purity is a state of mind that manifests when the mind is still like calm water and there is an absence of false thinking. It is the Real Mind, which does not exist apart from false thinking, and is called The Realization of Purity. It is a water-without-waves sort of Mind. This realization is a part of the Six Profound Dharma Gates practice, which also includes practices of stopping (Chih), contemplation (Kuan), returning, and purifying. The goal of these practices is to realize Great Dhyana and Great Prajna, which can only be achieved with a relaxed and regulated mind.




In [17]:
query = query1

for chunk_size in [500, 1000]:
    for chunk_overlap in [0, 100]:
        response = get_answer_recursive(doc_text, chunk_size, chunk_overlap, query)
        print(f"chunk={chunk_size}, overlap={chunk_overlap}, response={response}\n\n")



chunk=500, overlap=0, response=Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Realization of the State of Purity

The practice of purification consists of contemplation on discriminating views. When the mind is still like calm water and there is an absence of false thinking, the Real Mind, which does not exist apart from false thinking, manifests. This water-without-waves sort of Mind is called The Realization of Purity.

69

Realization of the State of Purity

The practice of purification consists of contemplation on discriminating views. When the mind is still like calm water and there is an absence of false thinking, the Real Mind, which does not exist apart from false thinking, manifests. This water-without-waves sort of Mind is called The Realization of Purity.

69

Purity

The practice of purification consists of contemplation on discriminating views. When the mind



chunk=500, overlap=100, response=Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Realization of the State of Purity

The practice of purification consists of contemplation on discriminating views. When the mind is still like calm water and there is an absence of false thinking, the Real Mind, which does not exist apart from false thinking, manifests. This water-without-waves sort of Mind is called The Realization of Purity.

69

Realization of the State of Purity

The practice of purification consists of contemplation on discriminating views. When the mind is still like calm water and there is an absence of false thinking, the Real Mind, which does not exist apart from false thinking, manifests. This water-without-waves sort of Mind is called The Realization of Purity.

69

Realization of the State of Purity

The practice of purification consists of contemplation on discr



chunk=1000, overlap=0, response=Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Realization of the State of Purity

The practice of purification consists of contemplation on discriminating views. When the mind is still like calm water and there is an absence of false thinking, the Real Mind, which does not exist apart from false thinking, manifests. This water-without-waves sort of Mind is called The Realization of Purity.

69

Realization of the State of Purity

The practice of purification consists of contemplation on discriminating views. When the mind is still like calm water and there is an absence of false thinking, the Real Mind, which does not exist apart from false thinking, manifests. This water-without-waves sort of Mind is called The Realization of Purity.

69

Realization of the State of Purity

The practice of purification consists of contemplation on discri



chunk=1000, overlap=100, response=Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Realization of the State of Purity

The practice of purification consists of contemplation on discriminating views. When the mind is still like calm water and there is an absence of false thinking, the Real Mind, which does not exist apart from false thinking, manifests. This water-without-waves sort of Mind is called The Realization of Purity.

69

Realization of the State of Purity

The practice of purification consists of contemplation on discriminating views. When the mind is still like calm water and there is an absence of false thinking, the Real Mind, which does not exist apart from false thinking, manifests. This water-without-waves sort of Mind is called The Realization of Purity.

69

Realization of the State of Purity

The practice of purification consists of contemplation on disc

In [19]:
query = query2

for chunk_size in [500, 1000]:
    for chunk_overlap in [0, 100]:
        response_1 = get_answer_character(doc_text, chunk_size, chunk_overlap, query)
        response_2 = get_answer_token(doc_text, chunk_size, chunk_overlap, query)
        response_3 = get_answer_recursive(doc_text, chunk_size, chunk_overlap, query)
        print(f"chunk={chunk_size}, overlap={chunk_overlap}\n, character_response={response_1.split(':')[-1]}\n, token_response = {response_2.split(':')[-1]}\n, recursive_response = {response_3.split(':')[-1]}\n\n\n")



chunk=500, overlap=0
, character_response= The Six Profound Dharma Gates are a set of meditation practices consisting of counting, following the breath, stopping thoughts, contemplating, returning to the present moment, and purifying the mind. These practices help to regulate the mind and enable it to relax, allowing for deeper meditation and the realization of Great Dhyana and Great Prajna.
, token_response =  The Six Profound Dharma Gates are a set of meditation practices consisting of counting, following the breath, stopping thoughts, contemplating, returning to the present moment, and purifying the mind. These practices help to regulate the mind and enable it to relax, allowing for deeper meditation and the realization of Great Dhyana and Great Prajna.
, recursive_response =  The Six Profound Dharma Gates are a series of meditation practices designed to help regulate the mind and enable it to relax, leading to the realization of Great Dhyana (deep meditation) and Great Prajna (wisd



chunk=500, overlap=100
, character_response= The Six Profound Dharma Gates refer to a set of practices in Mahayana Buddhism that help regulate the mind and enable it to relax, leading to the realization of Great Dhyana (deep meditation) and Great Prajna (wisdom). The practices include counting and following the breath, the two main practices of Chih (calm abiding) and Kuan (insight), and the concluding practices of seeing distinctions at the level of conceptions and no longer being at that level, which relates to the more subtle level of perceptions.
, token_response =  The Six Profound Dharma Gates refer to a set of practices in Mahayana Buddhism that help regulate the mind and enable it to relax, leading to the realization of Great Dhyana (deep meditation) and Great Prajna (wisdom). The practices include counting and following the breath, the two main practices of Chih (calm abiding) and Kuan (insight), and the concluding practices of seeing distinctions at the level of conceptions a



chunk=1000, overlap=0
, character_response= The Six Profound Dharma Gates refer to a set of practices in Mahayana Buddhism that help regulate the mind and enable it to relax, leading to the realization of Great Dhyana (deep meditation) and Great Prajna (wisdom). The practices include counting and following the breath, the two main practices of Chih (calm abiding) and Kuan (insight), and the concluding practices of seeing distinctions at the level of conceptions and no longer being at that level, which relates to the more subtle level of perceptions.
, token_response =  The Six Profound Dharma Gates refer to a set of practices in Mahayana Buddhism that help regulate the mind and enable it to relax, leading to the realization of Great Dhyana (deep meditation) and Great Prajna (wisdom). The practices include counting and following the breath, the two main practices of Chih (calm abiding) and Kuan (insight), and the concluding practices of seeing distinctions at the level of conceptions an



chunk=1000, overlap=100
, character_response= The Six Profound Dharma Gates refer to a set of practices in Mahayana Buddhism that help regulate the mind and enable it to relax, leading to the realization of Great Dhyana (deep meditation) and Great Prajna (wisdom). The practices include counting and following the breath, the two main practices of Chih (calm abiding) and Kuan (insight), and the concluding practices of seeing distinctions at the level of conceptions and no longer being at that level, which relates to the more subtle level of perceptions.
, token_response =  The Six Profound Dharma Gates refer to a set of practices in Mahayana Buddhism that help regulate the mind and enable it to relax, leading to the realization of Great Dhyana (deep meditation) and Great Prajna (wisdom). The practices include counting and following the breath, the two main practices of Chih (calm abiding) and Kuan (insight), and the concluding practices of seeing distinctions at the level of conceptions 