In [38]:
import pymupdf
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.prompts import PromptTemplate
from langchain_groq import ChatGroq
from langchain.schema import Document
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from langchain.memory import ConversationBufferMemory

from dotenv import load_dotenv

load_dotenv()

True

# Extract text from book

In [39]:
doc = pymupdf.open("zero.pdf")
toc = doc.get_toc()

chapters_info = {}
chapter_pages = []

print("PDF Table of Contents:")
for item in toc:
    level, title, page_num = item
    print(f"Level {level}: '{title}' - Page {page_num}")

    if level <= 2:
        chapters_info[len(chapter_pages)] = {
            'title': title,
            'page': page_num - 1,
            'level': level
        }
        chapter_pages.append(page_num-1)

PDF Table of Contents:
Level 1: 'Preface: Zero to One' - Page 5
Level 2: '1€€€The Challenge of the Future' - Page 7
Level 2: '2   Party Like It’s 1999' - Page 13
Level 2: '3€€€All Happy Companies Are Different' - Page 21
Level 2: '4€€€The Ideology of Competition' - Page 30
Level 2: '5€€€Last Mover Advantage' - Page 36
Level 2: '6€€€You Are Not a Lottery Ticket' - Page 46
Level 2: '7€€€Follow the Money' - Page 61
Level 2: '8€€€Secrets' - Page 69
Level 2: '9€€€Foundations' - Page 78
Level 2: '10€€€The Mechanics of Mafia' - Page 87
Level 2: '11€€€If You Build It, Will They Come?' - Page 94
Level 2: '12€€€Man and Machine' - Page 104
Level 2: '13€€€Seeing Green' - Page 112
Level 2: '14   The Founder’s Paradox' - Page 127
Level 1: 'Conclusion: Stagnation or Singularity?' - Page 142
Level 1: 'Acknowledgments' - Page 146
Level 1: 'Illustration Credits' - Page 147
Level 1: 'Index' - Page 148
Level 1: 'About the Authors' - Page 160


### First three chapters of the book

In [40]:
# start_page = chapter_pages[0]
# end_page = chapter_pages[4]

first_three_chapters = []

for start_page, end_page in ((chapter_pages[1],chapter_pages[2]),(chapter_pages[2],chapter_pages[3]),(chapter_pages[3],chapter_pages[4])):

    txt = ""
    for page_num in range(start_page,min(end_page,doc.page_count)):
        page = doc[page_num]
        txt+=page.get_text()
    
    first_three_chapters.append(txt)

doc.close()

# Initialize LLM and Embeddings

In [41]:
llm = ChatGroq(
    model="llama-3.1-8b-instant",
    temperature=0.1,
    max_tokens=1000
)

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    encode_kwargs={"normalize_embeddings":True}
)

# Create Document Chunks and Vector Store

In [42]:
print(chapters_info[1]['title'].strip('123€'))

The Challenge of the Future


In [43]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=100,
    length_function=len,
    separators=["\n\n","\n",".","!","?",","," ",""]
)

chapters = [text_splitter.split_text(text) for text in first_three_chapters]

documents = []

for i,chunks in enumerate(chapters):
    for txt in chunks:
        # print(chapters_info[i+1]['title'].strip('123€'))
        doc = Document(
            page_content=txt,
            metadata={
                "chunk id": i,
                "source": "Zero to One - Chapters 1-3",
                "chunk_size": len(txt),
                "chapter": i+1,
                "chapter_title": chapters_info[i+1]['title'].strip('123€')
            }
        )
        documents.append(doc)

### Vector Store

In [44]:
vector_store = FAISS.from_documents(
    documents=documents,
    embedding=embeddings
)

# RAG Pipeline

In [45]:
# Create retriever with improved search
retriever = vector_store.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 5}  # Retrieve top 5 most relevant chunks
)

# Create enhanced prompt template that uses chapter metadata and conversation history
prompt = PromptTemplate(
    template="""You are a helpful assistant for questions about 'Zero to One' by Peter Thiel (Chapters 1-3).

INSTRUCTIONS:
- Use the provided context from the book to answer questions
- Reference the conversation history to maintain context and provide coherent responses
- If referring to previous answers, make connections clear
- If the context is insufficient, say you don't know

CONVERSATION HISTORY:
{chat_history}

RELEVANT CONTEXT FROM BOOK:
{context}

CURRENT QUESTION: {question}

ANSWER:""",
    input_variables = ['context', 'question', 'chat_history']
)

In [46]:
question          = "what is the capital of india"
retrieved_docs    = retriever.invoke(question)
context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
final_prompt = prompt.invoke({"context": context_text, "question": question,"chat_history":""})
answer = llm.invoke(final_prompt)
print(answer.content)

I don't know. The conversation history doesn't mention the capital of India, and it seems unrelated to the context of the book "Zero to One" by Peter Thiel. If you'd like to discuss something related to the book, I'd be happy to help.


In [47]:
def format_docs(retrieved_docs):
    context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
    return context_text

def extract_question(inputs):
    return inputs.get('question', '')

def extract_chat_history(inputs):
    return inputs.get('chat_history', '')

parallel_chain = RunnableParallel({
    'context': RunnableLambda(extract_question) | retriever | RunnableLambda(format_docs),
    'question': RunnableLambda(extract_question),
    'chat_history': RunnableLambda(extract_chat_history)
})

In [48]:
parser = StrOutputParser()
main_chain = parallel_chain | prompt | llm | parser

In [57]:
# Set up conversational memory with persistent storage
memory = ConversationBufferMemory(
    memory_key="chat_history",
    input_key="question",
    output_key="answer",
    return_messages=False  # Store as strings for better persistence
)

def display_conversation_history():
    """Display the full conversation history"""
    chat_history = memory.load_memory_variables({})["chat_history"]
    if chat_history and chat_history.strip():
        print("=== CONVERSATION HISTORY ===")
        print(chat_history)
        print("=== END HISTORY ===")
    else:
        print("No conversation history yet.")

def clear_conversation_history():
    """Clear all conversation history"""
    memory.clear()
    print("Conversation history cleared.")

def chat_qa_with_memory(question):
    # Get chat history from memory
    chat_history = memory.load_memory_variables({})["chat_history"]
    # Ensure chat_history is a string
    if isinstance(chat_history, list):
        chat_history = "\n".join(str(x) for x in chat_history)
    elif chat_history is None:
        chat_history = ""
    
    # print(f"DEBUG - Current chat history: '{chat_history}'")  # Debug line
    
    # Run the main_chain with chat history as additional context
    result = main_chain.invoke({
        "question": question,
        "chat_history": chat_history
    })
    
    # Extract answer if it's a dict
    if isinstance(result, dict) and "answer" in result:
        answer = result["answer"]
    else:
        answer = result
    
    # print(f"DEBUG - Saving to memory: Q='{question}', A='{answer[:100]}...'")  # Debug line
    
    # Save to memory - this preserves all previous conversations
    memory.save_context({"question": question}, {"answer": answer})
    
    # Verify memory was saved
    updated_history = memory.load_memory_variables({})["chat_history"]
    # print(f"DEBUG - Updated history: '{updated_history[:200]}...'")  # Debug line
    
    return answer

def interactive_chat():
    """Interactive chat session"""
    print("=== Zero to One QA Chatbot ===")
    print("Type 'quit' to exit, 'history' to see conversation history, 'clear' to clear history")
    
    while True:
        user_question = input("\nYou: ")
        print(f"Human: {user_question}")
        if user_question.lower() == 'quit':
            break
        elif user_question.lower() == 'history':
            display_conversation_history()
            continue
        elif user_question.lower() == 'clear':
            clear_conversation_history()
            continue
        
        try:
            response = chat_qa_with_memory(user_question)
            print(f"Bot: {response}\n")
        except Exception as e:
            print(f"Error: {e}")

# Single question mode
# user_question = input("Ask a question about Zero to One: ")
# response = chat_qa_with_memory(user_question)
# print("Bot:", response)

# print("\n--- Conversation History ---")
# display_conversation_history()


In [None]:
# Run this cell for testing memory persistence
interactive_chat()


=== Zero to One QA Chatbot ===
Type 'quit' to exit, 'history' to see conversation history, 'clear' to clear history
Human: clear
Conversation history cleared.
Human: clear
Conversation history cleared.
Human: Hello I am Harshil
Human: Hello I am Harshil
Bot: Hello Harshil, it's nice to meet you. I'm here to help with your questions about "Zero to One" by Peter Thiel. What would you like to discuss?

Bot: Hello Harshil, it's nice to meet you. I'm here to help with your questions about "Zero to One" by Peter Thiel. What would you like to discuss?

Human: What is my  name? 
Human: What is my  name? 
Bot: Your name is Harshil.

Bot: Your name is Harshil.

Human: Summarise the book
Human: Summarise the book
Bot: Based on the provided context from the book, "Zero to One" by Peter Thiel, the book is about the principles and strategies required to succeed in the business of creating new things. It emphasizes the importance of thinking differently, questioning received ideas, and rethinking bus