This notebook sets up the Agentic system.
The simplified flow of the system is as follows:
User gives input question as a prompt -> Preprocess the question via prompt tuning -> do a semantic search query over the summary database  -> 

(branch 1): is the answer in a summary (or spread over multiple)? -> use the summary/summaries to generate summary answer -> output answer to user

(branch 2): is the answer not in a summary? -> semantic search query the entire dataset for the answer -> retrieve the relevant chunks -> generate summary of the relevant results -> output answer to user

In [6]:
# This code implements the RAG system for the Agentic chatbot, called Rizzbot.
import os
import json
import boto3
from typing import List, Dict, Optional, Tuple
from dataclasses import dataclass
from dotenv import load_dotenv, find_dotenv
from pydantic import SecretStr

# LangChain imports
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_pinecone import PineconeVectorStore
from langchain.schema import Document

# LangSmith imports
from langsmith import Client
import langsmith

# Pinecone
import pinecone
from pinecone import Pinecone


# Initialize LangSmith
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "rizzbot"

# Set up API keys
_ = load_dotenv(find_dotenv())
openai_api_key = os.getenv("OPENAI_API_KEY")
langsmith_api_key = os.getenv("LANGSMITH_API_KEY")
pinecone_api_key = os.getenv("PINECONE_API_KEY")

# Initialize Pinecone
pc = Pinecone(api_key=pinecone_api_key)


@dataclass
class SearchResult:
    """Container for search results"""
    content: str
    score: float
    source: str
    metadata: Dict = None

class Rizzbot:
    """
    Agentic chatbot system for charisma and personal development questions.
    Uses RAG with Pinecone vector stores and AWS S3 for summaries.
    """
    
    def __init__(self):
        """Initialize the RAG system with all required components"""
        self.similarity_threshold = 0.85
        self.top_k = 3
        
        # Initialize LangSmith client
        self.langsmith_client = Client(api_key=langsmith_api_key)
        
        # Initialize OpenAI components
        self.llm = ChatOpenAI(
            model="gpt-4o",
            temperature=0.25,
            api_key=openai_api_key,
            tags=["rizzbot_v1"]
        )
        self.embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
        
        # Initialize Pinecone
        self.pc = Pinecone(api_key=pinecone_api_key)
        
        # Initialize vector stores
        self._init_vector_stores()
        
        # Initialize AWS S3 client
        self.s3_client = boto3.client("s3")
        
        # Initialize prompts
        self._init_prompts()
    
    def _init_vector_stores(self):
        """Initialize Pinecone vector stores"""
        # Main dataset index
        self.main_vectorstore = PineconeVectorStore(
            index_name="rizzbot",
            embedding=self.embeddings
        )
        
        # Clusters index
        self.clusters_vectorstore = PineconeVectorStore(
            index_name="rizzbot-clusters", 
            embedding=self.embeddings
        )
        
        # Summaries index
        self.summaries_vectorstore = PineconeVectorStore(
            index_name="rizzbot-summaries",
            embedding=self.embeddings
        )
    
    def _init_prompts(self):
        """Initialize prompt templates"""
        self.summary_answer_prompt = ChatPromptTemplate.from_template("""
        You are a charisma and personal development expert. Based on the following summary/summaries, 
        provide a comprehensive and helpful answer to the user's question.
        
        User Question: {question}
        
        Relevant Summary/Summaries:
        {summaries}
        
        Instructions:
        - Provide a clear, actionable answer based on the summaries
        - Be conversational and engaging
        - If the summaries don't fully address the question, mention this limitation
        - Keep your response focused and practical
        
        Answer:
        """)
        
        self.full_search_answer_prompt = ChatPromptTemplate.from_template("""
        You are a charisma and personal development expert. Based on the following relevant content 
        from the database, provide a comprehensive and helpful answer to the user's question.
        
        User Question: {question}
        
        Relevant Content:
        {content}
        
        Instructions:
        - Synthesize the information to provide a clear, actionable answer
        - Be conversational and engaging
        - Focus on practical advice and insights
        - If the content doesn't fully address the question, mention this limitation
        
        Answer:
        """)
        
        self.no_answer_response = """My apologies, but I don't have a good answer for you based on the information in my database. My database is limited up to the past 5 years. For further research, we recommend you visit the source of my knowledge: https://www.youtube.com/user/Charismaoncommand/"""
    
    @langsmith.traceable(tags=["rizzbot_v1"])
    def search_summaries(self, question: str) -> List[SearchResult]:
        """Search in the summaries index"""
        try:
            results = self.summaries_vectorstore.similarity_search_with_score(
                question, 
                k=self.top_k,
                filter=None  # Could add topic_id filtering here if needed
            )
            
            search_results = []
            for doc, score in results:
                if score >= self.similarity_threshold:
                    search_results.append(SearchResult(
                        content=doc.page_content,
                        score=score,
                        source="summary",
                        metadata=doc.metadata
                    ))
            
            return search_results
        except Exception as e:
            print(f"Error searching summaries: {e}")
            return []
    
    @langsmith.traceable(tags=["rizzbot_v1"])
    def search_full_dataset(self, question: str) -> List[SearchResult]:
        """Search in the full dataset index"""
        try:
            results = self.main_vectorstore.similarity_search_with_score(
                question, 
                k=self.top_k
            )
            
            search_results = []
            for doc, score in results:
                search_results.append(SearchResult(
                    content=doc.page_content,
                    score=score,
                    source="full_dataset",
                    metadata=doc.metadata
                ))
            
            return search_results
        except Exception as e:
            print(f"Error searching full dataset: {e}")
            return []
    
    @langsmith.traceable(tags=["rizzbot_v1"])
    def get_s3_summaries(self, topic_ids: List[int]) -> List[str]:
        """Retrieve full summaries from S3 based on topic IDs"""
        summaries = []
        bucket_name = "rizzbot-temp-storage"
        base_path = "rizzbot/Summaries/run_2_1000_words/"
        
        for topic_id in topic_ids:
            try:
                key = f"{base_path}topic_{topic_id}.json"
                response = self.s3_client.get_object(Bucket=bucket_name, Key=key)
                content = response['Body'].read().decode('utf-8')
                # Since JSON files contain just text content
                summary_text = json.loads(content) if content.startswith('{') else content
                summaries.append(str(summary_text))
            except Exception as e:
                print(f"Could not retrieve topic_{topic_id}.json: {e}")
                continue
        
        return summaries
    
    @langsmith.traceable(tags=["rizzbot_v1"])
    def generate_summary_answer(self, question: str, summaries: List[str]) -> str:
        """Generate answer based on summaries"""
        combined_summaries = "\n\n".join(summaries)
        
        chain = self.summary_answer_prompt | self.llm | StrOutputParser()
        
        response = chain.invoke({
            "question": question,
            "summaries": combined_summaries
        })
        
        return response
    
    @langsmith.traceable(tags=["rizzbot_v1"])
    def generate_full_search_answer(self, question: str, search_results: List[SearchResult]) -> str:
        """Generate answer based on full dataset search results"""
        combined_content = "\n\n".join([result.content for result in search_results])
        
        chain = self.full_search_answer_prompt | self.llm | StrOutputParser()
        
        response = chain.invoke({
            "question": question,
            "content": combined_content
        })
        
        return response
    
    @langsmith.traceable(tags=["rizzbot_v1"])
    def answer_question(self, question: str) -> Dict[str, any]:
        """
        Main method to answer user questions following the RAG flow
        
        Returns:
            Dict containing answer, source, and metadata
        """
        try:
            # Step 1: Search summaries
            print("Searching summaries...")
            summary_results = self.search_summaries(question)
            
            if summary_results:
                print(f"Found {len(summary_results)} relevant summaries")
                
                # Extract topic IDs if available in metadata
                topic_ids = []
                for result in summary_results:
                    if result.metadata and 'topic_id' in result.metadata:
                        topic_ids.append(result.metadata['topic_id'])
                
                # Get full summaries from S3 if topic_ids available
                if topic_ids:
                    full_summaries = self.get_s3_summaries(topic_ids)
                    if full_summaries:
                        answer = self.generate_summary_answer(question, full_summaries)
                        return {
                            "answer": answer,
                            "source": "summaries",
                            "num_sources": len(full_summaries),
                            "confidence_scores": [r.score for r in summary_results]
                        }
                
                # Fallback: use summary search results directly
                summaries = [result.content for result in summary_results]
                answer = self.generate_summary_answer(question, summaries)
                return {
                    "answer": answer,
                    "source": "summaries",
                    "num_sources": len(summaries),
                    "confidence_scores": [r.score for r in summary_results]
                }
            
            # Step 2: Search full dataset
            print("Searching full dataset...")
            full_results = self.search_full_dataset(question)
            
            if full_results:
                print(f"Found {len(full_results)} relevant chunks")
                answer = self.generate_full_search_answer(question, full_results)
                return {
                    "answer": answer,
                    "source": "full_dataset", 
                    "num_sources": len(full_results),
                    "confidence_scores": [r.score for r in full_results]
                }
            
            # Step 3: No relevant results found
            print("No relevant results found")
            return {
                "answer": self.no_answer_response,
                "source": "none",
                "num_sources": 0,
                "confidence_scores": []
            }
            
        except Exception as e:
            print(f"Error in answer_question: {e}")
            return {
                "answer": self.no_answer_response,
                "source": "error",
                "num_sources": 0,
                "confidence_scores": [],
                "error": str(e)
            }

# Convenience function for easy usage
def create_rizzbot():
    """Factory function to create and return a Rizzbot instance"""
    return Rizzbot()

# Example usage function
@langsmith.traceable(tags=["rizzbot_v1"])
def ask_charisma_question(bot: Rizzbot, question: str, verbose: bool = True):
    """
    Ask a question to the charisma bot and get a formatted response
    
    Args:
        bot: Rizzbot instance
        question: User's question
        verbose: Whether to print detailed information
    
    Returns:
        Dict with answer and metadata
    """
    result = bot.answer_question(question)
    
    if verbose:
        print(f"\n{'='*50}")
        print(f"Question: {question}")
        print(f"{'='*50}")
        print(f"Answer: {result['answer']}")
        print(f"\n Metadata:")
        print(f"  Source: {result['source']}")
        print(f"  Number of sources: {result['num_sources']}")
        if result['confidence_scores']:
            print(f"  Confidence scores: {[f'{score:.3f}' for score in result['confidence_scores']]}")
        print(f"{'='*50}\n")
    
    return result

In [3]:
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())
import os
print(os.getenv("OPENAI_TEST_KEY_KdR"))
print(os.getenv("LANGSMITH_API_KEY"))
print(os.getenv("PINECONE_API_KEY"))

print(find_dotenv())




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
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
pcsk_oy1iv_Nwmd5f6D2u2DP6frne8f2p4G3CdzRdNsjMcoijRB3Wqn7LGBaaqfteRLhH4z11w
c:\Users\karel\Ironhack-Bootcamp-Assignments\.env


In [22]:
import os
print(os.getcwd())

c:\Users\karel\Ironhack-Bootcamp-Assignments\Rizzbot


In [7]:
# Demo cell 
# Demo: How to use the Rizzbot System
# Run this in a Jupyter notebook cell after the main system code

# Initialize the bot (this will take a moment as it connects to all services)
print("Initializing Rizzbot System...")
bot = create_rizzbot()
print("System ready!")

# Test questions
test_questions = [
    "How can I improve my body language during conversations?",
    "What are the best techniques for building rapport with strangers?", 
    "How do I become more confident in public speaking?",
    "What should I do if someone is being rude to me in a conversation?"
]

# Test the system with different questions
for question in test_questions:
    result = ask_charisma_question(bot, question, verbose=True)
    
    # You can also access individual components:
    # print(f"Just the answer: {result['answer']}")
    # print(f"Source type: {result['source']}")

# You can also use the bot directly for more control:
# custom_result = bot.answer_question("Your custom question here")
# print(custom_result)

Initializing Rizzbot System...
System ready!
Searching summaries...
Error searching summaries: (400)
Reason: Bad Request
HTTP response headers: HTTPHeaderDict({'Date': 'Tue, 01 Jul 2025 13:20:48 GMT', 'Content-Type': 'application/json', 'Content-Length': '103', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '162', 'x-pinecone-request-id': '8753381339742639365', 'x-envoy-upstream-service-time': '48', 'server': 'envoy'})
HTTP response body: {"code":3,"message":"Vector dimension 1536 does not match the dimension of the index 384","details":[]}

Searching full dataset...
Error searching full dataset: (400)
Reason: Bad Request
HTTP response headers: HTTPHeaderDict({'Date': 'Tue, 01 Jul 2025 13:20:50 GMT', 'Content-Type': 'application/json', 'Content-Length': '104', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '172', 'x-pinecone-request-id': '3260924727421911001', 'x-envoy-upstream-service-time': '60', 'server': 'envoy'})
HTTP response body: {"code":3,"message":