***ChatBot_pdf_EXP 01 ***

Submitted By - Prabhat Singh

In [None]:
# Install dependencies
!pip install openai faiss-cpu pydantic PyPDF2 tiktoken

import os
import openai
import faiss
import numpy as np
import PyPDF2
import tiktoken
from typing import List, Tuple, Dict
from pydantic import BaseModel
from dataclasses import dataclass
from IPython.display import display, Markdown

# === STEP 1: Setup API Key ===
openai.api_key = input("Enter your OpenAI API key: ")

# === STEP 2: PDF Loading & Chunking ===
def load_pdf_chunks(pdf_path: str, max_tokens: int = 300) -> List[Dict]:
    reader = PyPDF2.PdfReader(pdf_path)
    tokenizer = tiktoken.get_encoding("cl100k_base")
    chunks = []
    for i, page in enumerate(reader.pages):
        text = page.extract_text()
        if not text:
            continue
        tokens = tokenizer.encode(text)
        start = 0
        while start < len(tokens):
            end = min(start + max_tokens, len(tokens))
            chunk_text = tokenizer.decode(tokens[start:end])
            chunks.append({"text": chunk_text, "page": i + 1})
            start = end
    return chunks

# === STEP 3: Embedding Generator ===
class Embedder:
    def __init__(self, model: str = "text-embedding-3-small"):
        self.model = model

    def get_embedding(self, text: str) -> List[float]:
        response = openai.embeddings.create(input=[text], model=self.model)
        return response.data[0].embedding

# === STEP 4: FAISS Vector Store ===
@dataclass
class DocumentChunk:
    text: str
    page: int
    embedding: List[float]

class LocalVectorStore:
    def __init__(self, dim: int):
        self.index = faiss.IndexFlatL2(dim)
        self.chunks: List[DocumentChunk] = []

    def add(self, chunk: DocumentChunk):
        self.index.add(np.array([chunk.embedding], dtype=np.float32))
        self.chunks.append(chunk)

    def search(self, query_embedding: List[float], k: int = 5) -> List[Tuple[str, int]]:
        D, I = self.index.search(np.array([query_embedding], dtype=np.float32), k)
        return [(self.chunks[i].text, self.chunks[i].page) for i in I[0]]

# === STEP 5: Chatbot with Memory ===
class Message(BaseModel):
    role: str
    content: str

class RAGChatbot:
    def __init__(self, embedder: Embedder, store: LocalVectorStore):
        self.embedder = embedder
        self.store = store
        self.messages: List[Message] = []

    def ask(self, user_input: str) -> str:
        self.messages.append(Message(role="user", content=user_input))

        # Query embedding & retrieval
        query_embedding = self.embedder.get_embedding(user_input)
        retrieved = self.store.search(query_embedding, k=5)

        # Format context from retrieved chunks
        context = "\n\n".join([f"(Page {page}) {text}" for text, page in retrieved])

        # Construct system prompt with memory
        conversation = "\n".join([f"{msg.role}: {msg.content}" for msg in self.messages])
        prompt = f"""You are a helpful assistant discussing the book 'The Hard Thing About Hard Things' by Ben Horowitz.
Use the context below to answer the user's question. Always cite relevant quotes and page numbers.

Context:
{context}

Conversation so far:
{conversation}

assistant:"""

        response = openai.chat.completions.create(
            model="gpt-4o",
            messages=[{"role": "system", "content": prompt}],
            temperature=0.2
        )
        answer = response.choices[0].message.content
        self.messages.append(Message(role="assistant", content=answer))
        return answer

# === STEP 6: Run Everything ===
pdf_path = "/content/the_hard_thing_about_hard_things.pdf"
print("📄 Loading and chunking PDF...")
raw_chunks = load_pdf_chunks(pdf_path)

print("🔎 Embedding and indexing chunks...")
embedder = Embedder()
dim = len(embedder.get_embedding("test"))
store = LocalVectorStore(dim=dim)

for chunk in raw_chunks:
    embedding = embedder.get_embedding(chunk["text"])
    store.add(DocumentChunk(text=chunk["text"], page=chunk["page"], embedding=embedding))

bot = RAGChatbot(embedder, store)

# === STEP 7: Chat Loop ===
print("\n✅ Ready! Ask questions about the book (type 'exit' to stop):")
while True:
    user_input = input("You: ")
    if user_input.strip().lower() in {"exit", "quit"}:
        break
    answer = bot.ask(user_input)
    display(Markdown(f"**Assistant:** {answer}"))


Enter your OpenAI API key: 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
📄 Loading and chunking PDF...
🔎 Embedding and indexing chunks...

✅ Ready! Ask questions about the book (type 'exit' to stop):
You: what is the pdf about


**Assistant:** The book "The Hard Thing About Hard Things" by Ben Horowitz is about the challenges and complexities of building and running a startup. It provides insights and advice based on Horowitz's experiences as a CEO and entrepreneur. The book covers topics such as managing employees, making tough decisions, and dealing with the unpredictable nature of business. Horowitz shares personal anecdotes and lessons learned from his time at companies like Loudcloud and Opsware, offering practical guidance for leaders facing difficult situations.

You: what are the key points in chapter 1


**Assistant:** Chapter 1 of "The Hard Thing About Hard Things" is titled "From Communist to Venture Capitalist." This chapter likely discusses Ben Horowitz's personal journey and transformation from his early life experiences to becoming a venture capitalist. While the specific details of the chapter are not provided in the context, it can be inferred that Horowitz shares insights into his background and the foundational experiences that shaped his career in the tech industry and venture capital. If you have access to the book, you can find more detailed information in Chapter 1.

You: tell me more about it


**Assistant:** In Chapter 1 of "The Hard Thing About Hard Things," Ben Horowitz shares his personal journey and the experiences that shaped his career. He discusses his early life, including his upbringing and the influences that led him to the tech industry. The chapter likely covers his transition from working at Lotus to discovering the potential of the Internet through Mosaic, a graphical interface to the Internet developed by students at the University of Illinois (Page 14). This discovery led him to pursue a job at Netscape, a company co-founded by Marc Andreessen, the inventor of Mosaic. Horowitz's decision to join Netscape was driven by his belief in the transformative power of the Internet and his desire to be part of a company that would change the world (Page 16).

The chapter sets the stage for Horowitz's later experiences as a CEO and entrepreneur, highlighting the pivotal moments and decisions that influenced his career path. It provides a foundation for understanding his perspective on the challenges of building and running a startup, which he explores in more detail throughout the book.

You: give all the chapters name list wise 


**Assistant:** Here is the list of chapter names from "The Hard Thing About Hard Things" by Ben Horowitz:

1. From Communist to Venture Capitalist
2. “I Will Survive”
3. This Time with Feeling
4. When Things Fall Apart
   - The Struggle
   - CEOs Should Tell It Like It Is
   - The Right Way to Lay People Off
   - Preparing to Fire an Executive
   - Demoting a Loyal Friend
   - Lies That Losers Tell
   - Lead Bullets
   - Nobody Cares
5. Take Care of the People, the Products, and the Profits—in That Order
   - A Good Place to Work
   - Why Startups Should Train Their People
   - Is It Okay to Hire People from Your Friend’s Company?
   - Why It’s Hard to Bring Big Company Execs into Little Companies
   - Hiring Executives: If You’ve Never Done the Job, How Do You Hire Somebody Good?
   - When Employees Misinterpret Managers
   - Management Debt
   - Management Quality Assurance
6. Concerning the Going Concern
   - How to Minimize Politics in
7. How to Lead Even When You Don’t Know Where You Are Going
   - The Most Difficult CEO Skill
   - The Fine Line Between Fear and Courage
   - Ones and Twos
   - Follow the Leader
   - Peacetime CEO/Wartime CEO
   - Making Yourself a CEO
   - How to Evaluate CEOs
8. First Rule of Entrepreneurship: There Are No Rules
   - Solving the Accountability vs. Creativity Paradox
   - The Freaky Friday Management Technique
   - Staying Great
   - Should You Sell Your Company?
9. The End of the Beginning

Additionally, the book includes an appendix titled "Questions for Head of Enterprise Sales Force," as well as sections for acknowledgments, about the author, credits, copyright, and about the publisher.

You: Ones and Twos


**Assistant:** In the book "The Hard Thing About Hard Things" by Ben Horowitz, the concept of "Ones and Twos" is discussed in the context of CEO characteristics and management styles. 

- **Ones** are described as managers who are more comfortable setting the direction of the company. They enjoy gathering information from various sources, making decisions, and engaging in strategic thinking. Ones are characterized by their ability to make decisions even with limited information and their enjoyment of strategic challenges. However, they may get bored with the execution details required to run a company, such as process design and performance management (Page 177).

- **Twos**, on the other hand, enjoy the process of making the company run well. They prefer clear goals and are hesitant to change direction unless necessary. Twos may struggle with strategic thinking and feel anxious about big decisions, often overcomplicating the decision-making process to feel thorough. Despite their love of action, Twos can sometimes halt decision-making in a company due to their cautious nature (Page 178).

Horowitz emphasizes that to be a good CEO, one needs both characteristics. While people tend to be either Ones or Twos, with discipline and hard work, they can become competent in the tasks of the other type. Ignoring the dimension of management one doesn't like can lead to failure, as Ones may end up in chaos and Twos may fail to pivot when necessary (Page 178).

You: q


**Assistant:** Could you please clarify your question or provide more details so I can assist you better?

You: quit
