In [27]:
from langchain_community.vectorstores import Chroma
from langchain_community.chat_models import ChatOllama
from langchain_community.embeddings import FastEmbedEmbeddings
from langchain.schema.output_parser import StrOutputParser
from langchain_community.document_loaders import PyPDFLoader,DirectoryLoader, JSONLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema.runnable import RunnablePassthrough
from langchain.prompts import PromptTemplate
from langchain.vectorstores.utils import filter_complex_metadata
import pickle
import json
from tqdm.notebook import tqdm

In [41]:
model = ChatOllama(model = "llama2")
db = Chroma()

In [42]:
# docs = DirectoryLoader("../data/geeta_v4", use_multithreading=True, show_progress=True).load()
# chunks = RecursiveCharacterTextSplitter().split_documents(docs)

In [43]:
docs = TextLoader("../data/geeta_txt/geeta.txt").load()
chunks = RecursiveCharacterTextSplitter().split_documents(docs)

In [33]:
chunks = filter_complex_metadata(chunks)

In [34]:
pickle.dump(chunks, open("../data/chunks/chunks.pkl", "wb"))

In [35]:
c = pickle.load(open("../data/chunks/chunks.pkl", "rb"))

In [44]:
vector_store = db.from_documents(documents=c, embedding=FastEmbedEmbeddings(cache_dir="../cache"))

Fetching 9 files:   0%|          | 0/9 [00:00<?, ?it/s]

In [45]:
prompt = PromptTemplate.from_template(


    """
            <s> [INST] You are assistant for question-answering tasks. Use the following context to answer the question. 
            If you don't know the answer, just say you don't know and do not make up any answer. Use three sentences
             and be concise in your answer. [/INST] </s> 
            [INST] Question: {question} 
            Context: {context} 
            Answer: [/INST]
            """
)
retriever = vector_store.as_retriever(search_type="similarity_score_threshold",
                                      search_kwargs={
                                          "k": 3,
                                          "score_threshold": 0.5,
                                      },)

In [46]:
chain = ({"context": retriever, "question": RunnablePassthrough()}
                      | prompt
                      | model
                      | StrOutputParser())

In [48]:
chain.invoke("who is arjuna?")

'The passage you provided is a dialogue between Lord Krishna and Arjuna in the Bhagavad Gita, where they are discussing the nature of the mind and how it can be controlled through practice and detachment. Here is a more detailed analysis of the passage:\n\n* The passage begins with Lord Krishna addressing Arjuna, saying that the mind is untamable and restless, but it can be brought under control through practice and dispassion.\n* Lord Krishna explains that the mind is difficult to control and restless, but with practice and renunciation, it can be restrained.\n* The Bhagavat adds that even though the mind is unsteady and hard to control, it can be controlled through practice and an attitude of desirelessness.\n* Lord Krishna responds by saying that Yoga is difficult to attain for one with an uncontrolled mind, but it can be attained through the right means by one who strives and has a subdued mind.\n* My conviction is that Yoga is hard to be attained by one with an uncontrolled self, 