This notebook sets up the Agentic system.
The simplified flow of the system is as follows:
User gives input question as a prompt -> Preprocess the question via prompt tuning -> do a semantic search query over the summary database  -> 

(branch 1): is the answer in a summary (or spread over multiple)? -> use the summary/summaries to generate summary answer -> output answer to user

(branch 2): is the answer not in a summary? -> semantic search query the entire dataset for the answer -> retrieve the relevant chunks -> generate summary of the relevant results -> output answer to user

In [1]:
# rizzbot_agentic.py

import os
from typing import List, Dict, Optional
from langchain.schema.runnable import RunnableLambda, RunnableBranch
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.agents import AgentExecutor, Tool, initialize_agent, AgentType
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone
from langsmith import Client
from langchain.retrievers.multi_query import MultiQueryRetriever


class Rizzbot:
    def __init__(self):
        _ = self._load_env()
        self.similarity_threshold = 0.85
        self.top_k = 3

        # LangSmith logging
        os.environ["LANGCHAIN_TRACING_V2"] = "true"
        os.environ["LANGCHAIN_PROJECT"] = "rizzbot"
        self.client = Client()

        # Models
        self.main_llm = ChatOpenAI(model="gpt-4o", temperature=0.25)
        self.expand_llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.6)
        self.embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

        # Pinecone indexes
        self.pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
        self.summary_index = self.pc.Index("rizzbot-summaries-full-text")
        self.full_vectorstore = PineconeVectorStore(
            index_name="rizzbot", embedding=self.embeddings, text_key="full_text"
        )

        # Prompt template
        self.base_prompt_template = ChatPromptTemplate.from_template("""
        You are a charisma and personal development expert.

        Based on the following content, answer the user's question clearly and helpfully.

        User Question: {question}

        Relevant Content:
        {content}

        Answer:
        """)
        self.no_answer_response = "Sorry bro, I couldn't find enough info to answer that confidently."

        self._build_agent_chain()

    def _load_env(self):
        from dotenv import load_dotenv, find_dotenv
        return load_dotenv(find_dotenv())

    def _embed_question(self, question: str) -> List[float]:
        return self.embeddings.embed_query(question)

    def _multi_query_search(self, question: str) -> List[str]:
        try:
            retriever = MultiQueryRetriever.from_llm(
                retriever=self.full_vectorstore.as_retriever(search_kwargs={"k": self.top_k}),
                llm=self.expand_llm
            )
            docs = retriever.get_relevant_documents(question)
            return [doc.page_content for doc in docs]
        except Exception as e:
            print(f"Multi-query retrieval failed: {e}")
            return []

    def _search_summaries(self, question: str) -> List[str]:
        try:
            vec = self._embed_question(question)
            res = self.summary_index.query(vector=vec, top_k=self.top_k, include_metadata=True)
            matches = []
            for match in res.get("matches", []):
                score = match.get("score", 0.0)
                if score >= self.similarity_threshold:
                    txt = match.get("metadata", {}).get("full_text")
                    if txt:
                        matches.append(txt)
            return matches
        except Exception as e:
            print(f"Summary search failed: {e}")
            return []

    def _build_agent_chain(self):
        # Retrieval functions
        get_summaries = lambda q: self._search_summaries(q)
        get_full = lambda q: self._multi_query_search(q)

        # Summary response flow
        answer_from_summaries = (
            {
                "question": lambda q: q,
                "content": get_summaries,
            }
            | self.base_prompt_template
            | self.main_llm
            | StrOutputParser()
        )

        # Full dataset fallback
        answer_from_full = (
            {
                "question": lambda q: q,
                "content": get_full,
            }
            | self.base_prompt_template
            | self.main_llm
            | StrOutputParser()
        )

        has_summaries = lambda q: len(self._search_summaries(q)) > 0

        # Agent decision logic
        self.agent_chain = RunnableBranch(
            (has_summaries, answer_from_summaries),
            answer_from_full
        )

        # AgentExecutor tool wrapping
        self.tools = [
            Tool(
                name="AnswerQuestion",
                func=self.answer_question,
                description="Answer user questions with the Rizzbot Agentic pipeline."
            )
        ]

        self.executor = AgentExecutor.from_agent_and_tools(
            agent=initialize_agent(
                agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
                tools=self.tools,
                llm=self.main_llm,
                verbose=True
            ),
            tools=self.tools,
            verbose=True
        )

    def answer_question(self, question: str) -> str:
        try:
            return self.agent_chain.invoke(question)
        except Exception as e:
            print(f"Agentic pipeline failed: {e}")
            return self.no_answer_response

    def ask_with_agent_executor(self, question: str) -> str:
        try:
            return self.executor.invoke({"input": question})["output"]
        except Exception as e:
            print(f"AgentExecutor failed: {e}")
            return self.no_answer_response


In [6]:
from rizzbot_agentic import Rizzbot

bot = Rizzbot()

# Agentic chain route
response = bot.answer_question("What makes David Dobrik popular?")
print("Chain Output:\n", response)

response2 = bot.answer_question("What are the masculine archetypes, and how do these relate to charisma?")
print("Chain Output:\n", response2)


Chain Output:
 David Dobrik's popularity can be attributed to several key factors that resonate with his audience and make him stand out in the digital content landscape:

1. **Charismatic Personality**: David Dobrik is known for his charismatic and engaging personality. He has a natural ability to make people feel good and entertained, which draws viewers to his content. His energy and enthusiasm are infectious, making his videos enjoyable to watch.

2. **Innovative Content**: Dobrik's content is often creative and unique, featuring a mix of humor, surprise, and excitement. His vlogs are fast-paced and filled with unexpected moments, which keeps viewers engaged and coming back for more.

3. **Relatable and Authentic**: Despite his fame, Dobrik maintains a relatable and authentic persona. He often shares personal stories and experiences that his audience can connect with, making them feel like they are part of his life.

4. **Collaborative Spirit**: David frequently collaborates with o

In [10]:
from rizzbot_agentic_v2 import Rizzbot


def test_rizzbot():
    print("Starting Rizzbot test...")
    
    # Initialize bot (you'll see all the initialization logs)
    bot = Rizzbot()
    
    # Check system stats first
    print("\n System Stats:")
    stats = bot.get_stats()
    for key, value in stats.items():
        print(f"  {key}: {value}")
    
    print("\n" + "="*80)
    print("TEST 1: Game of Thrones Characters")
    print("="*80)
    
    response1 = bot.answer_question("Who are the most charismatic characters in Game of Thrones and why?")
    print("\nChain Output:")
    print(response1)
    
    print("\n" + "="*80)
    print("TEST 2: Masculine Archetypes")
    print("="*80)
    
    response2 = bot.answer_question("What are the masculine archetypes, and how do these relate to charisma?")
    print("\n Chain Output:")
    print(response2)
    
    print("\n Test completed!")

if __name__ == "__main__":
    test_rizzbot()


2025-07-02 16:56:57,307 - INFO - Initializing Rizzbot...
2025-07-02 16:56:57,309 - INFO - Loading environment variables...
2025-07-02 16:56:57,314 - INFO - Setting up LLM models...


Starting Rizzbot test...


2025-07-02 16:56:58,037 - INFO - Connecting to Pinecone vector stores...
2025-07-02 16:56:58,040 - INFO - Setting up prompt templates...
2025-07-02 16:56:58,041 - INFO - Building agent chain...
2025-07-02 16:56:58,041 - INFO - Building agent execution chain...


TypeError: 'function' object is not a mapping