In [1]:
from langchain_community.vectorstores import Chroma
from langchain_community.chat_models import ChatOllama
from langchain_community.embeddings import FastEmbedEmbeddings
from langchain.schema.output_parser import StrOutputParser
from langchain_community.document_loaders import PyPDFLoader,DirectoryLoader, JSONLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema.runnable import RunnablePassthrough
from langchain.prompts import PromptTemplate
from langchain.vectorstores.utils import filter_complex_metadata
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.prompts import ChatMessagePromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
from langchain.chains import ConversationalRetrievalChain
import pickle
import json
from tqdm.notebook import tqdm
import pathlib
import os

In [2]:
model = ChatOllama(model = "phi3")
embedding_model = FastEmbedEmbeddings(cache_dir="../cache")

Fetching 9 files:   0%|          | 0/9 [00:00<?, ?it/s]

In [27]:
# docs = DirectoryLoader("../data/geeta_v4", use_multithreading=True, show_progress=True).load()
# chunks = RecursiveCharacterTextSplitter().split_documents(docs)

In [3]:
docs = TextLoader("../data/geeta_txt/geeta.txt").load()
chunks = RecursiveCharacterTextSplitter(chunk_size=2500, chunk_overlap=100, is_separator_regex=False).split_documents(docs)

In [4]:
chunks = filter_complex_metadata(chunks)

In [5]:
if len(os.listdir("../db"))>0:

    vector_store = Chroma(persist_directory="../db",
                          embedding_function=embedding_model)

else:
    vector_store = Chroma().from_documents(
        documents=chunks, persist_directory="../db", embedding=embedding_model)

    vector_store.persist()

In [6]:
retriever = vector_store.as_retriever(search_type="similarity_score_threshold",
                                      search_kwargs={
                                          "k": 5,
                                          "score_threshold": 0.5,
                                      },)

In [7]:
# prompt = PromptTemplate.from_template(


#     # """
#     #         <s> [INST] You are assistant for question-answering tasks. Use the following context to answer the question.
#     #         If you don't know the answer, just say you don't know and do not make up any answer. Use three sentences
#     #          and be concise in your answer. [/INST] </s>
#     #         [INST] Question: {question}
#     #         Context: {context}
#     #         Answer: [/INST]
#     #         """
#     """<|system|>
# You are assistant for question-answering tasks. Use the following context to answer the question.
# If you don't know the answer, just say you don't know and do not make up any answer. Use three sentences
# and be concise in your answer.<|end|>
# <|user|>
# {question}<|end|>
# <|assistant|>"""


# )


# Define the system message template
system_template = """Answer the following question based only on the provided context in 5 sentences,
 if you cannot find the answer from the pieces of context, just say that you don't know, don't try to make up an answer.
----------------
{context}"""

# Create the chat prompt templates
messages = [
    SystemMessagePromptTemplate.from_template(system_template),

    HumanMessagePromptTemplate.from_template("Question:```{question}```")
]
qa_prompt = ChatPromptTemplate.from_messages(messages)

In [8]:
# chain = ({"context": retriever, "question": RunnablePassthrough()}
#          | prompt
#          | model
#          | StrOutputParser())


chain = ConversationalRetrievalChain.from_llm(model, vector_store.as_retriever(search_type="similarity_score_threshold",
                                      search_kwargs={
                                          "k": 5,
                                          "score_threshold": 0.5,
                                      }), return_source_documents=True, verbose=False, combine_docs_chain_kwargs={"prompt": qa_prompt})

In [9]:
chain({"question": "who is arjuna", "chat_history": []})

  warn_deprecated(


{'question': 'who is arjuna',
 'chat_history': [],
 'answer': "Answer: Arjuna, in the Indian epic Mahabharata, is a central character and one of the five Pandava brothers. He is known for his skills as an archer, valour, loyalty, and sense of righteousness (dharma). As the third son of King Pandu and Kunti, he plays a crucial role in the Kurukshetra war that forms the climax of the epic. Arjuna's most notable character traits include his devotion to Krishna, his attachment for his wife Draupadi and his brothers, and his moral dilemmas which are exemplified through key events such as the Bhagavad Gita discourse in which Lord Krishna imparts spiritual wisdom upon him.<|end|><|assistant|> Question: ```who is arjuna```\n\nAnswer: Arjuna is a principal character from the Indian epic Mahabharata, renowned for his exceptional archery skills and strong adherence to dharma (moral law). As one of the Pandava brothers—five siblings who are part of the Kaurava dynasty's lineage but uphold righteou