In [1]:
from langchain_community.document_loaders import PyPDFDirectoryLoader, PyMuPDFLoader
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma, FAISS
from langchain_community.chat_models import ChatOllama
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
import os
import glob
from dotenv import load_dotenv

In [2]:
load_dotenv(override=True)
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')

In [3]:
loader = DirectoryLoader(
    path="./Bookshelf",
    glob="**/*.pdf",
    loader_cls=PyPDFLoader
)
documents = loader.load()

Ignoring wrong pointing object 28 0 (offset 0)


In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(all_documents)

In [17]:
len(chunks)

21584

In [6]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [7]:
vectordb = FAISS.from_documents(chunks, embedding=embeddings)

In [8]:
# llm = ChatOllama(model="deepseek-r1", temperature=0.0)
llm = ChatOpenAI(model='gpt-4o-mini', temperature=0)

In [9]:
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)


In [10]:
retriever = vectordb.as_retriever()

In [11]:
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm,
                                                           retriever=retriever,
                                                           memory=memory)

In [14]:
conversation_chain.run('What is the main topic of the documents ? ')

  conversation_chain.run('What is the main topic of the documents ? ')


'The main topic of the documents appears to be knowledge representation languages and related concepts, as indicated by the mention of sections discussing representation languages and the structure of the book intended for educational purposes in the field of anthropology, psychology, and neuroscience.'

In [15]:
def chat(message, history):
    result = conversation_chain.invoke({"question": message})
    return result["answer"]

In [16]:
import gradio as gr 
gr.ChatInterface(chat, type="messages").launch()

* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.




In [34]:
# Debugging why the llm doesn't know the answer despite being fed into the vector store
from langchain_core.callbacks import StdOutCallbackHandler
llm = ChatOpenAI(temperature=0.7, model='gpt-4o-mini')
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
retriever = vectordb.as_retriever()
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory, callbacks=[StdOutCallbackHandler()])

query = 'Who wrote the book AI engineering ?'
result = conversation_chain.invoke({"question": query})
answer = result['answer']
print("\nAnswer:", answer)



[1m> Entering new ConversationalRetrievalChain chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
But now I have a story to tell. It’s an important story—one
that every engineer and software developer should hear. I’m
not entirely satisfied with the way others have told it, so I
wrote the book that I wish I had had when I was learning the
craft. It starts with the basics and leads you on a journey
to the heights of ML and AI. By the end, you’ll understand

a constant source of inspiration. Their commitment to AI advancements made my experience of reviewing 
this book insightful and enriching. Special thanks to my family for their ongoing encouragement throughout 
this journey.

engineer.
• Anyone wanting t

* The llm didn't know the answer because it wasn't retrieving enough information chunks
* The retriever is an abstraction over the VectorStore that will be used during RAG; "k" is how many chunks to use
* Putting all together again, and now everytime we invoke the `conversation_chain` it will retrieve **500** chunks from the VectorStore.

In [28]:
retriever = vectordb.as_retriever(search_kwargs={"k":500})

conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

def chat(question, history):
    result = conversation_chain.invoke({"question": question})
    return result["answer"]

In [29]:
gr.ChatInterface(chat, type="messages").launch()

* Running on local URL:  http://127.0.0.1:7866
* To create a public link, set `share=True` in `launch()`.




* Problem solved