In [64]:
from dotenv import load_dotenv
import os
import re
from pinecone import Pinecone , ServerlessSpec
from langchain_pinecone import PineconeVectorStore
from langchain.schema import Document
from gtts import gTTS
import io
import pygame

In [2]:
load_dotenv()
filenames = [
    "021.txt",
    "2477the-hard-thing-about-hard-things.txt",
    "Atomic habits ( PDFDrive ).txt",
    "angela-duckworth-grit.txt",
    "Hooked-How-to-Build-Habit-Forming-Products-_Nir-Eyal_.txt",
    "Measure-What-Matters-John-Doerr.txt",
    "Rich-Dad-Poor-Dad-eBook.txt",
    "Super Founders PDF.txt",
    "The Lean Startup - Erick Ries.txt",
    "The-100-Startup-Chris-Guillebeau.txt",
    "The-100-Startup-Chris-Guillebeau.txt",
    "Venture-deals.txt"
]

In [3]:
def clean_text(text: str) -> str:
    text = text.encode("ascii", "ignore").decode()
    text = re.sub(r'\s+', ' ', text)
    text = re.sub(r'[^\w\s.,:;!?\'"-]', '', text)
    return text.strip()

In [4]:
combined_data = ""
for file in filenames:
    with open(file, "r", encoding="utf-8") as f:
        raw_text = f.read()
        cleaned_text = clean_text(raw_text)
        combined_data += cleaned_text + "\n"

In [5]:
combined_data[:500]

'Copyright 2014 by Peter Thiel All rights reserved. Published in the United States by Crown Business, an imprint of the Crown Publishing Group, a division of Random House LLC, a Penguin Random House Company, New York. www.crownpublishing.com CROWN BUSINESS is a trademark and CROWN and the Rising Sun colophon are registered trademarks of Random House LLC. Crown Business books are available at special discounts for bulk purchases for sales promotions or corporate use. Special editions, including pe'

In [6]:
chunks = [combined_data[i:i+1000] for i in range(0 , len(combined_data) , 1000)]

In [7]:
chunks[100]

'saving, at least they could expect to have money to spend later. And if American companies were investing, they could expect to reap the rewards of new wealth in the future. But U.S. households are saving almost nothing. And U.S. companies are letting cash pile up on their balance sheets without investing in new projects because they dont have any concrete plans for the future. The other three views of the future can work. Definite optimism works when you build the future you envision. Definite pessimism works by building what can be copied without expecting anything ne w. Indefinite pessimism works because its self-fulfilling: if youre a slacker with low expectations, theyll probably be met. But indefinite optimism seems inherently unsustainable: how can the future get better if no one plans for it? Actually, most everybody in the modern world has already heard an answer to this question: progress without planning is what we call evolution. Darwin himself wrote that life tends to pro

In [41]:
pine_cone = os.getenv('pine_cone')
groq = os.getenv('groq')
hugging_face = os.getenv("hugging_face")
eleven_lab = os.getenv("eleven_lab")

## Embedding

In [9]:
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

In [10]:
embedding = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-base-en-v1.5",
                                    model_kwargs={"token" : hugging_face})

  embedding = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-base-en-v1.5",


## pinecone

In [11]:
pc = Pinecone(api_key = pine_cone)

In [12]:
index = "teacher"
if index not in pc.list_indexes().names():
    pc.create_index(name=index,
    dimension=768,
    metric= 'cosine',
    spec = ServerlessSpec(region = "us-east-1" , cloud = "AWS")
    )

In [13]:
index = pc.Index(index ,host=os.getenv('host_bg'))

In [14]:
docs = [Document(page_content = chunk) for chunk in chunks]

In [15]:
docs[1]

Document(metadata={}, page_content='Title. HD62.5.T525 2014 685.11dc23 2014006653 Hardcover ISBN: 978-0-8041-3929-8 eBook ISBN: 978-0-8041-3930-4 Book design by Ralph Fowler  rlfdesign Graphics by Rodrigo Corral Design Illustrations by Matt Buck Cover design by Michael Nagin Additional credits appear on this page, which constitutes a continuation of this copyright page. v3.1 Contents Preface: Zero to One 1 The Challenge of the Future 2 Party Like Its 1999 3 All Happy Companies Are Different 4 The Ideology of Competition 5 Last Mover Advantage 6 You Are Not a Lottery Ticket 7 Follow the Money 8 Secrets 9 Foundations 10 The Mechanics of Mafia 11 If You Build It, Will They Come? 12 Man and Machine 13 Seeing Green 14 The Founders Paradox Conclusion: Stagnation or Singularity? Acknowledgments Illustration Credits Index About the Authors Preface ZERO TO ONE EVERY MOMENT IN BUSINESS happens only once. The next Bill Gates will not build an operating system. The next Larry Page or Sergey Brin w

In [16]:
vector = PineconeVectorStore(index=index,
    embedding=embedding,
    text_key="page_content",)

In [17]:
from tqdm import tqdm
batch_size = 32
for i in tqdm(range(0, len(docs), batch_size)):
    batch = docs[i:i+batch_size]
    try:
        vector.add_documents(batch)
    except Exception as e:
        print(f"Error in batch {i}-{i+batch_size}: {e}")

100%|█████████████████████| 154/154 [07:21<00:00,  2.87s/it]


In [18]:
from langchain_groq import ChatGroq
from langchain.chains import create_history_aware_retriever  , create_retrieval_chain
from langchain_core.prompts import MessagesPlaceholder , ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain

In [20]:
model = ChatGroq(api_key = groq , temperature=0.5 , model ="llama-3.3-70b-versatile")

In [21]:
model.invoke("hii")

AIMessage(content='Hello! How can I assist you today?', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 37, 'total_tokens': 47, 'completion_time': 0.099461733, 'prompt_time': 0.001846167, 'queue_time': 0.055530711999999996, 'total_time': 0.1013079}, 'model_name': 'llama-3.3-70b-versatile', 'system_fingerprint': 'fp_6507bcfb6f', 'finish_reason': 'stop', 'logprobs': None}, id='run--45068fc3-b0ba-45cb-b7c5-d1292201af4c-0', usage_metadata={'input_tokens': 37, 'output_tokens': 10, 'total_tokens': 47})

In [22]:
retriver_prompt = ("Given a chat history and the latest user question which might reference context in the chat history,"
    "formulate a standalone question which can be understood without the chat history."
    "Do NOT answer the question, just reformulate it if needed and otherwise return it as is.")

In [23]:
retrive = vector.as_retriever(search_kwargs={"k":3})

In [24]:
contextual_qa = ChatPromptTemplate.from_messages(
    [
        ("system",retriver_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human",'{input}')
    ]
)

In [25]:
contextual_qa

ChatPromptTemplate(input_variables=['chat_history', 'input'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(tag='HumanMessageChunk')], typing.Annotated[langchain_core.messages.chat.ChatMessageChunk, Tag(tag='ChatMessageChunk')], typing.Annotated[langchain_core.messages.system.SystemMessageChunk, Tag(tag='SystemMessageChunk')], typing.Annotated[l

In [26]:
history = create_history_aware_retriever(model , retrive , contextual_qa)

In [72]:
BOT = """You are an AI Startup Coach, specifically designed to guide entrepreneurs during tough startup situations using wisdom from top entrepreneurship books.

Whenever a user shares a startup problem (e.g., failure, funding stress, team issues, growth confusion, burnout), follow this structure strictly:

1. Carefully understand the user’s problem.
2. Identify if it is truly **startup/business-related**.
   - If NOT, politely decline:  
     "I'm designed to help with startup challenges. Please ask a question related to your startup journey."
     if nessesory then refer the book and author name otherwise don't take book name and author name only genrate relevent query

Make sure:
- You **do NOT hallucinate** fake answers.
- If no book supports the answer, say:  
  “This isn’t directly covered in my book knowledge, but based on similar principles, here’s a possible direction…”
- Keep answers short, focused, and strategic. No storytelling unless highly relevant.

CONTEXT:
{context}

QUESTION: {input}

YOUR ANSWER:

"""

In [73]:
qa_bot = ChatPromptTemplate.from_messages(
    [
        ("system",BOT),
        MessagesPlaceholder(variable_name = "chat_history"),
        ("human","{input}")
    ]
)

In [74]:
question_answer_chain = create_stuff_documents_chain(model,qa_bot)

In [75]:
chain = create_retrieval_chain(history , question_answer_chain)

In [76]:
store = {}
chat_history = []

In [77]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.runnables import RunnableWithMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory

In [78]:
def get_session_history(session_id: str)-> BaseChatMessageHistory:
  if session_id not in store:
    store[session_id]= ChatMessageHistory()
  return store[session_id]

In [79]:
from langchain_core.runnables import RunnableWithMessageHistory

chat_with_memory = RunnableWithMessageHistory(
    chain, 
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)


In [80]:
def speak_text(text: str,speed: float = 1.5):
    # Convert text to speech and write to memory
    tts = gTTS(text)
    fp = io.BytesIO()
    tts.write_to_fp(fp)
    fp.seek(0)
    
    # Initialize pygame mixer (only once)
    if not pygame.mixer.get_init():
        pygame.mixer.init()

    # Load and play audio
    pygame.mixer.music.load(fp, 'mp3')
    pygame.mixer.music.play()

    # Wait until playback is finished
    while pygame.mixer.music.get_busy():
        continue

In [81]:
while True:
    user_input = input("You: ")
    if user_input.lower() in ["exit", "quit", "stop"]:
        break

    response = chat_with_memory.invoke(
        {"input": user_input},
        config={
            "configurable": {"session_id": "143"}
        },
    )

    print("AI:", response["answer"])
    speak_text(response["answer"])

You:  hii mai self ankush


AI: I'm designed to help with startup challenges. Please ask a question related to your startup journey.


You:  my self jai


AI: I'm designed to help with startup challenges. Please ask a question related to your startup journey.


You:  what is mvp


AI: A Minimum Viable Product (MVP) is a product with just enough features to satisfy early customers and provide feedback for future development, as described in "The Lean Startup" by Eric Ries. It's the fastest way to get through the Build-Measure-Learn feedback loop with minimal effort.


You:  actually my startup is for gamming how i grow this fast as soon as possible


AI: To grow your gaming startup quickly, focus on understanding your target audience and delivering a high-quality user experience. As mentioned in "The Lean Startup" by Eric Ries, build a Minimum Viable Product (MVP) to test your assumptions and gather feedback. Additionally, consider the advice from "Play Bigger" by Christopher Lochhead, which emphasizes the importance of creating a unique category and owning it. Identify your niche in the gaming industry and develop a go-to-market strategy that resonates with your target audience.


You:  how i target audience


AI: To target your audience for your gaming startup, consider the advice from "Traction" by Gabriel Weinberg and Justin Mares. Identify your ideal customer persona by examining demographics, preferences, and behaviors of gamers. Ask yourself: What type of games do they play? What platforms do they use? What are their pain points? Create a survey or gather feedback from potential customers to validate your assumptions. You can also analyze online communities, social media, and gaming forums to understand your target audience's interests and needs. As suggested in "Blue Ocean Strategy" by W. Chan Kim and Renée Mauborgne, focus on a specific niche to differentiate yourself from competitors and attract a dedicated audience.


KeyboardInterrupt: Interrupted by user