In [1]:
import os
from dotenv import load_dotenv

from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import create_history_aware_retriever , create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate , MessagesPlaceholder
from langchain_core.messages import HumanMessage , AIMessage
from langchain_google_genai import ChatGoogleGenerativeAI , GoogleGenerativeAIEmbeddings

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()

embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash")

In [3]:
current_dir = current_dir = os.getcwd()
book_dir = os.path.join(current_dir , 'Books')
db_dir = os.path.join(current_dir , 'db' , 'all_pdfs')

print('current_dir' , current_dir)
print('book_dir' , book_dir)
print('db_dir' , db_dir)

current_dir c:\Users\HP\Desktop\langchain-crash-course\Practicing
book_dir c:\Users\HP\Desktop\langchain-crash-course\Practicing\Books
db_dir c:\Users\HP\Desktop\langchain-crash-course\Practicing\db\all_pdfs


In [4]:
# making vector db

books = [os.path.join(current_dir ,'Books' , book) for book in os.listdir(book_dir) if book.endswith('.pdf')]

book_documents = []

for book in books:
    loader = PyMuPDFLoader(book)
    docs = loader.load()

    for document in docs:
        document.metadata = {'source':book}
        book_documents.append(document)

splitter = RecursiveCharacterTextSplitter(chunk_size=1000 , chunk_overlap= 200)
docs = splitter.split_documents(book_documents)

In [5]:
# now making the embeddings and storing it into vector store (chroma)
db = Chroma.from_documents(
    documents=docs , embedding=embeddings, persist_directory=db_dir
)

In [6]:
# before using the retriever you must have to define the retriever
retriever = db.as_retriever(
    search_type='mmr',
    search_kwargs={'k':3 , 'fetch_k' : 20 , 'lambda_mult':0.5}
)

In [7]:
# making history

history_aware_system_prompt = ("You are a assistant for question answering task "
                 "You will be given a chat history and a human query, "
                 "You have to make the huamn query revelent to the "
                 "chat history by seeing that which chat history "
                 "is the most relevent to the chat history and "
                 "modify the human query in such a way that it "
                 "become more meaningfull and human query lastly "
                 "become standalone, if the message does not relate to "
                 "chat history then return the message as it is")


history_aware_prompt=ChatPromptTemplate.from_messages(
    [
        ('system' , history_aware_system_prompt),
        MessagesPlaceholder('history'),
        ('human' , "{input}")
    ]
)

history_aware_retriever= create_history_aware_retriever(
    llm , retriever , history_aware_prompt
)

In [20]:
stuff_document_system_prompt=("You are a helpfull assistant, you will recieve "
                              "some documents and based on those documents answer "
                              "the question in a very concise manner, answer should be "
                              "under 4 sentences. If you don't now the answer or the context "
                              "is empty just say i don't now the answer  \n\n"
                              "{context}")

# the context is a place holder for the retireved information from the vector database

question_answer_prompt = ChatPromptTemplate.from_messages(
    [
        ('system',stuff_document_system_prompt),
        MessagesPlaceholder('history'),
        ('human','{input}')
    ]
)

question_answer_chain= create_stuff_documents_chain(llm, question_answer_prompt)

In [21]:
chain = create_retrieval_chain(history_aware_retriever , question_answer_chain)

In [22]:
def chat_with_documents():
    chat_history=[]
    print('Chat started with AI, type "exit" to exit the chat')
    while True:
        user_prompt = input('User: ')
        if user_prompt.lower() == 'exit':
            break
        result = chain.invoke({"input":user_prompt , "history":chat_history})
        print(f"User: {user_prompt}")
        print(f"AI: {result['answer']}")

        chat_history.append(HumanMessage(user_prompt))
        chat_history.append(AIMessage(content=result['answer']))

In [23]:
chat_with_documents()

Chat started with AI, type "exit" to exit the chat
User: tell me about remo and juliet
AI: I don't know the answer. The provided text is about Renuka's love for books and doesn't mention anything about Remo and Juliet. 

User: what is the benifit of living alone
AI: The provided text highlights several benefits of living alone:

* **Self-discovery:** It allows for introspection, understanding your own thoughts, preferences, and reactions without external influence.
* **Freedom and autonomy:** You are free to make choices and pursue activities without needing to consider others' opinions or expectations.
* **Personal growth:** It forces you to become more independent and confident in your decisions, fostering self-reliance.
* **Connection with your true self:** It provides a space to listen to your inner voice and discover your true desires without external manipulation. 

User: how to master your emotions
AI: The provided text offers insights on mastering emotions:

* **Accept their tr