**CREW AI IMPLEMENTATION WITH RAG**

In [None]:
# --- Install Required Packages ---
!pip install -q langchain langchain-core langchain-experimental
!pip install -q -U langchain-community pypdf
!pip install -q langchain-community langchain-groq
!pip install -q crewai spacy textblob gradio chromadb sentence-transformers pydantic

# --- Imports ---
import os
import shutil
import spacy
import tempfile
import time
from textblob import TextBlob
from langchain_groq import ChatGroq
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
import gradio as gr
import subprocess

from crewai import Agent, Task, Crew, Process, LLM
from crewai.tools import BaseTool
from typing import Type
from pydantic import BaseModel, Field

# --- Global Variables ---
VECTOR_DB = None
NLP_MODEL = None
CHATBOT_INSTANCE = None  # Store single chatbot instance

# --- Utility: Clean ChromaDB Directory ---
def clean_chromadb_dir():
    # Use a unique temporary directory with timestamp to avoid conflicts
    timestamp = str(int(time.time()))
    chroma_db_path = os.path.join(tempfile.gettempdir(), f'chroma_db_{timestamp}')

    # Also clean any old chroma_db directories
    temp_dir = tempfile.gettempdir()
    for item in os.listdir(temp_dir):
        if item.startswith('chroma_db'):
            old_path = os.path.join(temp_dir, item)
            if os.path.isdir(old_path):
                try:
                    shutil.rmtree(old_path)
                    print(f'Deleted old ChromaDB directory: {old_path}')
                except:
                    pass  # Ignore if can't delete (might be in use)

    print(f'Using ChromaDB directory: {chroma_db_path}')
    return chroma_db_path

# --- NLP Initialization ---
def initialize_nlp():
    global NLP_MODEL
    if NLP_MODEL is None:
        try:
            NLP_MODEL = spacy.load("en_core_web_sm")
        except OSError:
            os.system("python -m spacy download en_core_web_sm")
            NLP_MODEL = spacy.load("en_core_web_sm")
    return NLP_MODEL

# --- Vector DB Loader/Creator ---
def fetch_pdfs_from_repo():
    repo_url = "https://github.com/DeepakAC3/CAMIT-Intern-MentalHealthChatbot.git"
    repo_folder = "CAMIT-Intern-MentalHealthChatbot"
    data_folder = "./data/"

    # Only clone if data folder doesn't exist or is empty
    if not os.path.exists(data_folder) or not os.listdir(data_folder):
        # Clone repo if not already cloned
        if not os.path.exists(repo_folder):
            print(f"Cloning repo {repo_url}...")
            subprocess.run(["git", "clone", repo_url], check=True)
        else:
            print(f"Repo folder '{repo_folder}' already exists. Skipping clone.")

        # Copy PDF files from repo data folder to local data folder
        os.makedirs(data_folder, exist_ok=True)
        src_data_folder = os.path.join(repo_folder, "data")
        if os.path.exists(src_data_folder):
            print(f"Copying PDFs from {src_data_folder} to {data_folder}...")
            for filename in os.listdir(src_data_folder):
                if filename.endswith(".pdf"):
                    src_file = os.path.join(src_data_folder, filename)
                    dst_file = os.path.join(data_folder, filename)
                    shutil.copy2(src_file, dst_file)
            print("PDF files copied successfully.")
        else:
            print(f"No data folder found in repo at {src_data_folder}")

        # Delete the cloned repo folder to save space
        if os.path.exists(repo_folder):
            print(f"Removing cloned repo folder '{repo_folder}'...")
            shutil.rmtree(repo_folder)
    else:
        print("PDF files already exist in ./data/, skipping download.")

def load_existing_vector_db():
    global VECTOR_DB
    db_path = clean_chromadb_dir()  # Get a clean, writable directory
    print("Creating new ChromaDB in writable location...")
    return create_vector_db_from_data(db_path)

def create_vector_db_from_data(db_path):
    global VECTOR_DB
    os.makedirs('./data/', exist_ok=True)

    # Ensure the ChromaDB directory exists and is writable
    os.makedirs(db_path, exist_ok=True)

    loader = DirectoryLoader("./data/", glob='*.pdf', loader_cls=PyPDFLoader)
    documents = loader.load()
    embeddings = HuggingFaceBgeEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

    if not documents:
        print("No PDF documents found in ./data/ directory. Creating empty vector store...")
        VECTOR_DB = Chroma(persist_directory=db_path, embedding_function=embeddings)
        return VECTOR_DB

    print(f"Found {len(documents)} PDF documents. Processing...")
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    texts = text_splitter.split_documents(documents)
    print(f"Split into {len(texts)} text chunks")

    # Create vector database with explicit collection name to avoid conflicts
    VECTOR_DB = Chroma.from_documents(
        texts,
        embeddings,
        persist_directory=db_path,
        collection_name=f"mental_health_docs_{int(time.time())}"  # Unique collection name
    )

    try:
        VECTOR_DB.persist()
        print("Database persisted successfully")
    except AttributeError:
        print("Auto-persistence enabled")

    print(f"ChromaDB created with {len(texts)} document chunks and saved to {db_path}")
    return VECTOR_DB

# --- CrewAI Tools ---
class KnowledgeRetrievalInput(BaseModel):
    query: str = Field(description="The query to search for in the knowledge base")

class KnowledgeRetrievalTool(BaseTool):
    name: str = "knowledge_retrieval"
    description: str = "Retrieves relevant mental health information from the knowledge base"
    args_schema: Type[BaseModel] = KnowledgeRetrievalInput
    def _run(self, query: str) -> str:
        global VECTOR_DB
        try:
            if VECTOR_DB is None:
                return "Knowledge base not initialized"
            retriever = VECTOR_DB.as_retriever(search_kwargs={"k": 3})
            docs = retriever.get_relevant_documents(query)
            context = "\n".join([doc.page_content for doc in docs])
            return f"Retrieved context: {context}" if context else "No relevant context found"
        except Exception as e:
            return f"Error retrieving knowledge: {str(e)}"

class EmotionAnalysisInput(BaseModel):
    text: str = Field(description="The text to analyze for emotional content")

class EmotionAnalysisTool(BaseTool):
    name: str = "emotion_analysis"
    description: str = "Analyzes emotional state and sentiment from user input"
    args_schema: Type[BaseModel] = EmotionAnalysisInput
    def _run(self, text: str) -> str:
        try:
            nlp = initialize_nlp()
            doc = nlp(text)
            blob = TextBlob(text)
            lemmatized_tokens = [
                token.lemma_.lower()
                for token in doc
                if not token.is_stop and not token.is_punct and len(token.text) > 2
            ]
            emotion_lexicon = {
                'anxiety': ['anxious', 'worried', 'nervous', 'stressed', 'panic'],
                'depression': ['sad', 'depressed', 'hopeless', 'empty', 'worthless'],
                'anger': ['angry', 'frustrated', 'irritated', 'mad', 'furious'],
                'joy': ['happy', 'excited', 'joyful', 'content', 'pleased']
            }
            detected_emotions = []
            for emotion, words in emotion_lexicon.items():
                for token in lemmatized_tokens:
                    if token in words:
                        detected_emotions.append(emotion)
            analysis = {
                'sentiment_polarity': blob.sentiment.polarity,
                'sentiment_subjectivity': blob.sentiment.subjectivity,
                'detected_emotions': list(set(detected_emotions)),
                'emotional_intensity': 'high' if abs(blob.sentiment.polarity) > 0.5 else 'moderate' if abs(blob.sentiment.polarity) > 0.2 else 'low'
            }
            return f"Emotional analysis: {analysis}"
        except Exception as e:
            return f"Error analyzing emotion: {str(e)}"

class SafetyMonitorInput(BaseModel):
    text: str = Field(description="The text to monitor for safety concerns")

class SafetyMonitorTool(BaseTool):
    name: str = "safety_monitor"
    description: str = "Monitors for crisis indicators and safety concerns"
    args_schema: Type[BaseModel] = SafetyMonitorInput
    def _run(self, text: str) -> str:
        crisis_keywords = [
            'suicide', 'kill myself', 'end it all', 'hurt myself', 'want to die',
            'self harm', 'cut myself', 'overdose', 'jump off', 'hang myself'
        ]
        urgent_keywords = [
            'emergency', 'crisis', 'help me', 'cant take it', 'breaking point'
        ]
        text_lower = text.lower()
        crisis_detected = any(keyword in text_lower for keyword in crisis_keywords)
        urgent_detected = any(keyword in text_lower for keyword in urgent_keywords)
        if crisis_detected:
            return "CRISIS_ALERT: Immediate suicide risk detected. Emergency intervention required."
        elif urgent_detected:
            return "URGENT_CONCERN: High distress level detected. Immediate support recommended."
        else:
            return "SAFE: No immediate safety concerns detected."

# --- CrewAI Chatbot Class ---
class CrewAIMentalHealthChatbot:
    def __init__(self):
        self.setup_llm()
        self.setup_vector_db()
        self.setup_tools()
        self.setup_agents()

    def setup_llm(self):
        self.llm = LLM(
            model="groq/llama-3.3-70b-versatile",
            api_key=os.environ.get("GROQ_API_KEY", "YOUR_GROK_API_KEY"),
            temperature=0.3
        )

    def setup_vector_db(self):
        global VECTOR_DB
        fetch_pdfs_from_repo()
        VECTOR_DB = load_existing_vector_db()
        if VECTOR_DB is not None:
            try:
                test_docs = VECTOR_DB.get()
                doc_count = len(test_docs['documents'])
                print(f"Vector database ready with {doc_count} documents")
                if doc_count == 0:
                    print("Note: Vector database is empty. Add PDF files to ./data/ folder and restart.")
            except Exception as e:
                print(f"Warning: Vector database may not be functioning properly: {str(e)}")

    def setup_tools(self):
        initialize_nlp()
        self.knowledge_tool = KnowledgeRetrievalTool()
        self.emotion_tool = EmotionAnalysisTool()
        self.safety_tool = SafetyMonitorTool()

    def setup_agents(self):
        self.emotion_agent = Agent(
            role='Emotional Intelligence Specialist',
            goal='Analyze user emotions and psychological state accurately, within',
            backstory="""You are an expert in emotional intelligence and psychological assessment.
            Your role is to carefully analyze user messages for emotional content, sentiment,
            and psychological indicators. You provide detailed emotional assessments that help
            other agents understand the user's mental state.""",
            tools=[self.emotion_tool],
            llm=self.llm,
            verbose=True,
            allow_delegation=False
        )
        self.knowledge_agent = Agent(
            role='Mental Health Knowledge Specialist',
            goal='Retrieve and synthesize relevant mental health information',
            backstory="""You are a mental health knowledge specialist with access to
            comprehensive mental health resources. Your role is to find and synthesize
            relevant information from the knowledge base to support therapeutic conversations.""",
            tools=[self.knowledge_tool],
            llm=self.llm,
            verbose=True,
            allow_delegation=False
        )
        self.safety_agent = Agent(
            role='Crisis Intervention Specialist',
            goal='Monitor for safety risks and crisis situations',
            backstory="""You are a crisis intervention specialist responsible for identifying
            immediate safety risks and crisis situations. Your primary concern is user safety,
            and you must flag any concerning content that requires immediate attention.""",
            tools=[self.safety_tool],
            llm=self.llm,
            verbose=True,
            allow_delegation=False
        )
        self.therapist_agent = Agent(
            role='Compassionate Mental Health Therapist',
            goal='Provide empathetic, therapeutic responses based on analysis from other agents in no more than 2 or 3 paragraphs unless specified otherwise',
            backstory="""You are a compassionate mental health therapist with expertise in
            various therapeutic approaches. You synthesize information from emotional analysis,
            knowledge retrieval, and safety assessments to provide thoughtful, empathetic,
            and therapeutically appropriate responses to users.""",
            tools=[],
            llm=self.llm,
            verbose=True,
            allow_delegation=False
        )

    def chat(self, user_input: str) -> str:
        try:
            emotion_task = Task(
                description=f"Use the emotion_analysis tool to analyze this message: '{user_input}'. Provide detailed emotional insights.",
                agent=self.emotion_agent,
                expected_output="Detailed emotional analysis including sentiment, detected emotions, and intensity level"
            )
            knowledge_task = Task(
                description=f"Use the knowledge_retrieval tool to find relevant mental health information for: '{user_input}'. Focus on therapeutic guidance.",
                agent=self.knowledge_agent,
                expected_output="Relevant mental health information and therapeutic guidance from knowledge base"
            )
            safety_task = Task(
                description=f"Use the safety_monitor tool to assess this message for crisis indicators: '{user_input}'. Report any safety concerns.",
                agent=self.safety_agent,
                expected_output="Safety assessment with risk level and any crisis alerts"
            )
            therapy_task = Task(
                description=f"""Based on the previous analyses, provide a compassionate therapeutic response to: '{user_input}'.
                Consider the emotional state, relevant knowledge, and safety concerns to craft an appropriate response.
                Be warm, empathetic, and provide helpful guidance while validating the user's feelings.""",
                agent=self.therapist_agent,
                expected_output="Compassionate therapeutic response addressing the user's needs",
                context=[emotion_task, knowledge_task, safety_task]
            )
            task_crew = Crew(
                agents=[self.emotion_agent, self.knowledge_agent, self.safety_agent, self.therapist_agent],
                tasks=[emotion_task, knowledge_task, safety_task, therapy_task],
                process=Process.sequential,
                verbose=False
            )
            result = task_crew.kickoff()
            if hasattr(result, 'raw'):
                return result.raw
            else:
                return str(result)
        except Exception as e:
            return f"I apologize, but I encountered an issue processing your message. Please try again. Error: {str(e)}"

# --- Gradio Interface ---
def create_gradio_interface():
    global CHATBOT_INSTANCE

    # Reuse existing chatbot instance instead of creating a new one
    if CHATBOT_INSTANCE is None:
        print("Initializing CrewAI Mental Health Chatbot for Gradio...")
        CHATBOT_INSTANCE = CrewAIMentalHealthChatbot()
    else:
        print("Using existing chatbot instance for Gradio...")

    def respond(user_message, chat_history):
        bot_response = CHATBOT_INSTANCE.chat(user_message)
        return bot_response

    demo = gr.ChatInterface(
        fn=respond,
        title="🧠 CrewAI Multi-Agent Mental Health Chatbot",
        description="""
        An advanced mental health chatbot powered by CrewAI multi-agent workflow:
        • **Emotion Agent**: Analyzes your emotional state
        • **Knowledge Agent**: Retrieves relevant mental health information
        • **Safety Agent**: Monitors for crisis situations
        • **Therapist Agent**: Provides compassionate therapeutic responses
        """,
        examples=[
            "I'm feeling overwhelmed with work stress",
            "I've been having trouble sleeping and feeling anxious",
            "Can you help me understand depression better?",
            "I'm going through a difficult breakup"
        ],
        theme="soft"
    )
    return demo

# --- Command Line Interface ---
def main():
    global CHATBOT_INSTANCE

    print("Initializing CrewAI Multi-Agent Mental Health Chatbot...")
    CHATBOT_INSTANCE = CrewAIMentalHealthChatbot()

    print("\n🧠 CrewAI Mental Health Chatbot Ready!")
    print("Type 'exit' to quit, 'gradio' to launch web interface\n")

    while True:
        user_input = input("You: ")
        if user_input.lower() == 'exit':
            print("Chatbot: Take care! Remember, professional help is always available if you need it.")
            break
        elif user_input.lower() == 'gradio':
            print("Launching Gradio interface...")
            demo = create_gradio_interface()
            demo.launch()
            break

        print("🤖 Processing with multi-agent workflow...")
        response = CHATBOT_INSTANCE.chat(user_input)
        print(f"Chatbot: {response}\n")

if __name__ == "__main__":
    # Set environment variable for Groq API key
    os.environ["GROQ_API_KEY"] = ""
    # Download spaCy model (if needed)
    os.system("python -m spacy download en_core_web_sm")
    main()


**TELEGRAM BOT**

In [None]:
!pip install nest-asyncio python-telegram-bot
import nest_asyncio
nest_asyncio.apply()

from telegram import Update
from telegram.ext import ApplicationBuilder, CommandHandler, MessageHandler, ContextTypes, filters



async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
    await update.message.reply_text("Hello! I'm your mental health assistant. How can I help you today?")

async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
    user_message = update.message.text
    chatbot = CrewAIMentalHealthChatbot()
    response = chatbot.chat(user_message)
    await update.message.reply_text(response)

def main():
    app = ApplicationBuilder().token("YOUR_TELEGRAM_BOT_TOKEN").build()
    app.add_handler(CommandHandler("start", start))
    app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message))
    app.run_polling()

if __name__ == "__main__":
    main()

**METRICS**

In [None]:
import time
import psutil
import threading
import asyncio
from concurrent.futures import ThreadPoolExecutor
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
import spacy
from textblob import TextBlob
from langchain_groq import ChatGroq
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
import os
import gc
import json
import tempfile

class PerformanceMetricsCalculator:
    def __init__(self):
        self.setup_components()
        self.metrics = {}
        self.test_queries = [
            "I'm feeling anxious about my exam",
            "I've been having trouble sleeping",
            "Can you help me with stress management?",
            "I feel overwhelmed with work",
            "I'm experiencing panic attacks"
        ]

    def setup_components(self):
        """Initialize all system components"""
        # LLM setup
        self.llm = ChatGroq(
            temperature=0,
            groq_api_key="YOUR_GROK_API_KEY",
            model_name="llama-3.3-70b-versatile"
        )

        # Embeddings and vector store
        self.embeddings = HuggingFaceBgeEmbeddings(
            model_name='sentence-transformers/all-MiniLM-L6-v2'
        )
        # Load or create vector database using temporary directory (same as your working chatbot)

        db_path = '/tmp/chroma_db_1752075061'
        if os.path.exists(db_path):
            self.vector_db = Chroma(persist_directory=db_path, embedding_function=self.embeddings)
        else:
            # Create empty vector store for testing
            self.vector_db = Chroma(persist_directory=db_path, embedding_function=self.embeddings)

        # NLP components
        try:
            self.nlp = spacy.load("en_core_web_sm")
        except OSError:
            os.system("python -m spacy download en_core_web_sm")
            self.nlp = spacy.load("en_core_web_sm")

        # Memory
        self.memory = ConversationBufferMemory()

        # Sentence transformer for similarity calculations
        self.sentence_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

    def convert_to_serializable(self, obj):
        """Convert numpy types to Python native types for JSON serialization"""
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        elif isinstance(obj, dict):
            return {key: self.convert_to_serializable(value) for key, value in obj.items()}
        elif isinstance(obj, list):
            return [self.convert_to_serializable(item) for item in obj]
        else:
            return obj

    def calculate_cosine_similarity_metrics(self):
        """Calculate cosine similarity effectiveness"""
        print("Calculating Cosine Similarity Metrics...")

        # Create test embeddings
        test_embeddings = self.sentence_model.encode(self.test_queries)

        # Calculate pairwise similarities
        similarity_matrix = cosine_similarity(test_embeddings)

        # Calculate average similarity scores
        avg_similarity = np.mean(similarity_matrix[np.triu_indices_from(similarity_matrix, k=1)])
        max_similarity = np.max(similarity_matrix[np.triu_indices_from(similarity_matrix, k=1)])
        min_similarity = np.min(similarity_matrix[np.triu_indices_from(similarity_matrix, k=1)])

        self.metrics['cosine_similarity'] = {
            'average_similarity': float(avg_similarity),
            'max_similarity': float(max_similarity),
            'min_similarity': float(min_similarity),
            'similarity_matrix': similarity_matrix.tolist()
        }

        return self.metrics['cosine_similarity']

    def calculate_task_completion_metrics(self):
        """Calculate task completion time and success rate"""
        print("Calculating Task Completion Metrics...")

        completion_times = []
        success_count = 0
        total_tasks = len(self.test_queries)

        for query in self.test_queries:
            start_time = time.time()
            try:
                # Simulate full pipeline
                response = self.process_query_pipeline(query)
                end_time = time.time()

                completion_time = (end_time - start_time) * 1000  # Convert to milliseconds
                completion_times.append(completion_time)

                if response and len(response) > 10:  # Basic success criteria
                    success_count += 1

            except Exception as e:
                print(f"Task failed for query '{query}': {e}")
                completion_times.append(float('inf'))

        success_rate = (success_count / total_tasks) * 100
        avg_completion_time = np.mean([t for t in completion_times if t != float('inf')])

        self.metrics['task_completion'] = {
            'success_rate': float(success_rate),
            'average_completion_time_ms': float(avg_completion_time),
            'completion_times': [float(t) if t != float('inf') else None for t in completion_times],
            'total_tasks': int(total_tasks),
            'successful_tasks': int(success_count)
        }

        return self.metrics['task_completion']

    def calculate_inter_agent_latency(self):
        """Calculate latency between different agent components"""
        print("Calculating Inter-Agent Latency...")

        latencies = {
            'retrieval_latency': [],
            'emotion_analysis_latency': [],
            'safety_check_latency': [],
            'response_generation_latency': [],
            'total_pipeline_latency': []
        }

        for query in self.test_queries:
            # Measure retrieval agent latency
            start_time = time.time()
            try:
                retriever = self.vector_db.as_retriever(search_kwargs={"k": 3})
                docs = retriever.get_relevant_documents(query)
                retrieval_time = (time.time() - start_time) * 1000
                latencies['retrieval_latency'].append(retrieval_time)
            except:
                latencies['retrieval_latency'].append(0)

            # Measure emotion analysis latency
            start_time = time.time()
            try:
                doc = self.nlp(query)
                blob = TextBlob(query)
                sentiment = blob.sentiment
                emotion_time = (time.time() - start_time) * 1000
                latencies['emotion_analysis_latency'].append(emotion_time)
            except:
                latencies['emotion_analysis_latency'].append(0)

            # Measure safety check latency
            start_time = time.time()
            try:
                crisis_keywords = ['suicide', 'kill myself', 'hurt myself']
                safety_check = any(keyword in query.lower() for keyword in crisis_keywords)
                safety_time = (time.time() - start_time) * 1000
                latencies['safety_check_latency'].append(safety_time)
            except:
                latencies['safety_check_latency'].append(0)

            # Measure response generation latency
            start_time = time.time()
            try:
                response = self.llm.invoke(f"Respond to: {query}")
                response_time = (time.time() - start_time) * 1000
                latencies['response_generation_latency'].append(response_time)
            except:
                latencies['response_generation_latency'].append(0)

            # Calculate total pipeline latency
            total_latency = sum([
                latencies['retrieval_latency'][-1],
                latencies['emotion_analysis_latency'][-1],
                latencies['safety_check_latency'][-1],
                latencies['response_generation_latency'][-1]
            ])
            latencies['total_pipeline_latency'].append(total_latency)

        # Calculate averages and convert to serializable format
        avg_latencies = {}
        for key, values in latencies.items():
            avg_latencies[f'avg_{key}'] = float(np.mean(values))
            avg_latencies[f'max_{key}'] = float(np.max(values))
            avg_latencies[f'min_{key}'] = float(np.min(values))

        self.metrics['inter_agent_latency'] = {
            'detailed_latencies': {k: [float(v) for v in values] for k, values in latencies.items()},
            'average_latencies': avg_latencies
        }

        return self.metrics['inter_agent_latency']

    def calculate_memory_usage(self):
        """Calculate memory usage and computational efficiency"""
        print("Calculating Memory Usage...")

        process = psutil.Process()

        # Baseline memory
        baseline_memory = process.memory_info().rss / 1024 / 1024  # MB

        # Memory during operations
        memory_readings = []

        for query in self.test_queries:
            # Memory before processing
            mem_before = process.memory_info().rss / 1024 / 1024

            # Process query
            try:
                self.process_query_pipeline(query)
            except:
                pass

            # Memory after processing
            mem_after = process.memory_info().rss / 1024 / 1024
            memory_readings.append({
                'before': float(mem_before),
                'after': float(mem_after),
                'difference': float(mem_after - mem_before)
            })

        # Calculate memory statistics
        avg_memory_usage = float(np.mean([r['after'] for r in memory_readings]))
        peak_memory_usage = float(np.max([r['after'] for r in memory_readings]))
        avg_memory_increase = float(np.mean([r['difference'] for r in memory_readings]))

        self.metrics['memory_usage'] = {
            'baseline_memory_mb': float(baseline_memory),
            'average_memory_mb': avg_memory_usage,
            'peak_memory_mb': peak_memory_usage,
            'average_memory_increase_mb': avg_memory_increase,
            'detailed_readings': memory_readings
        }

        return self.metrics['memory_usage']

    def calculate_parallelism_efficiency(self):
        """Calculate parallelism efficiency vs sequential processing"""
        print("Calculating Parallelism Efficiency...")

        # Sequential processing time
        start_time = time.time()
        sequential_results = []
        for query in self.test_queries:
            try:
                result = self.process_query_pipeline(query)
                sequential_results.append(result)
            except:
                sequential_results.append(None)
        sequential_time = time.time() - start_time

        # Parallel processing time
        start_time = time.time()
        with ThreadPoolExecutor(max_workers=3) as executor:
            parallel_results = list(executor.map(self.process_query_pipeline, self.test_queries))
        parallel_time = time.time() - start_time

        # Calculate efficiency
        speedup = sequential_time / parallel_time if parallel_time > 0 else 0
        efficiency = speedup / 3  # 3 workers

        self.metrics['parallelism_efficiency'] = {
            'sequential_time_seconds': float(sequential_time),
            'parallel_time_seconds': float(parallel_time),
            'speedup_factor': float(speedup),
            'efficiency_percentage': float(efficiency * 100),
            'theoretical_max_speedup': 3
        }

        return self.metrics['parallelism_efficiency']

    def calculate_throughput(self):
        """Calculate system throughput"""
        print("Calculating Throughput...")

        # Test different batch sizes
        batch_sizes = [1, 3, 5, 10]
        throughput_results = {}

        for batch_size in batch_sizes:
            test_batch = self.test_queries[:min(batch_size, len(self.test_queries))]

            start_time = time.time()
            processed_count = 0

            for query in test_batch:
                try:
                    self.process_query_pipeline(query)
                    processed_count += 1
                except:
                    pass

            end_time = time.time()
            duration = end_time - start_time

            throughput = processed_count / duration if duration > 0 else 0

            throughput_results[f'batch_size_{batch_size}'] = {
                'queries_per_second': float(throughput),
                'total_queries': int(len(test_batch)),
                'successful_queries': int(processed_count),
                'duration_seconds': float(duration)
            }

        self.metrics['throughput'] = throughput_results
        return self.metrics['throughput']

    def calculate_accuracy_quality_score(self):
        """Calculate accuracy and quality metrics"""
        print("Calculating Accuracy and Quality Score...")

        quality_scores = []
        accuracy_metrics = {
            'context_relevance': [],
            'response_coherence': [],
            'emotional_appropriateness': [],
            'safety_compliance': []
        }

        for query in self.test_queries:
            try:
                response = self.process_query_pipeline(query)

                # Context relevance (cosine similarity between query and response)
                query_embedding = self.sentence_model.encode([query])
                response_embedding = self.sentence_model.encode([response])
                relevance_score = float(cosine_similarity(query_embedding, response_embedding)[0][0])
                accuracy_metrics['context_relevance'].append(relevance_score)

                # Response coherence (length and structure)
                coherence_score = min(len(response.split()) / 50, 1.0)  # Normalize by expected length
                accuracy_metrics['response_coherence'].append(float(coherence_score))

                # Emotional appropriateness (sentiment alignment)
                query_sentiment = TextBlob(query).sentiment.polarity
                response_sentiment = TextBlob(response).sentiment.polarity
                emotion_score = 1 - abs(query_sentiment - response_sentiment) / 2
                accuracy_metrics['emotional_appropriateness'].append(float(emotion_score))

                # Safety compliance (no harmful content)
                harmful_keywords = ['harmful', 'dangerous', 'suicide', 'self-harm']
                safety_score = 1.0 if not any(word in response.lower() for word in harmful_keywords) else 0.5
                accuracy_metrics['safety_compliance'].append(float(safety_score))

                # Overall quality score
                overall_quality = np.mean([
                    relevance_score,
                    coherence_score,
                    emotion_score,
                    safety_score
                ])
                quality_scores.append(float(overall_quality))

            except Exception as e:
                print(f"Error calculating quality for query '{query}': {e}")
                quality_scores.append(0.0)
                for key in accuracy_metrics:
                    accuracy_metrics[key].append(0.0)

        # Calculate averages
        avg_quality_score = float(np.mean(quality_scores))
        avg_accuracy_metrics = {key: float(np.mean(values)) for key, values in accuracy_metrics.items()}

        self.metrics['accuracy_quality'] = {
            'overall_quality_score': avg_quality_score,
            'individual_quality_scores': quality_scores,
            'accuracy_breakdown': avg_accuracy_metrics,
            'detailed_accuracy_metrics': {k: [float(v) for v in values] for k, values in accuracy_metrics.items()}
        }

        return self.metrics['accuracy_quality']

    def calculate_false_positive_rate(self):
        """Calculate false positive rate for crisis detection"""
        print("Calculating False Positive Rate...")

        # Test queries with known crisis/non-crisis labels
        crisis_test_cases = [
            ("I want to hurt myself", True),
            ("I'm feeling sad today", False),
            ("I can't take it anymore, I want to end it all", True),
            ("I'm stressed about work", False),
            ("I'm having suicidal thoughts", True),
            ("I feel overwhelmed but I'll manage", False)
        ]

        true_positives = 0
        false_positives = 0
        true_negatives = 0
        false_negatives = 0

        for query, is_crisis in crisis_test_cases:
            detected_crisis = self.detect_crisis(query)

            if is_crisis and detected_crisis:
                true_positives += 1
            elif is_crisis and not detected_crisis:
                false_negatives += 1
            elif not is_crisis and detected_crisis:
                false_positives += 1
            else:
                true_negatives += 1

        # Calculate rates
        total_negative = true_negatives + false_positives
        false_positive_rate = false_positives / total_negative if total_negative > 0 else 0

        precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
        recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
        f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

        self.metrics['false_positive_rate'] = {
            'false_positive_rate': float(false_positive_rate),
            'precision': float(precision),
            'recall': float(recall),
            'f1_score': float(f1_score),
            'confusion_matrix': {
                'true_positives': int(true_positives),
                'false_positives': int(false_positives),
                'true_negatives': int(true_negatives),
                'false_negatives': int(false_negatives)
            }
        }

        return self.metrics['false_positive_rate']

    def detect_crisis(self, text):
        """Helper method to detect crisis indicators"""
        crisis_keywords = ['suicide', 'kill myself', 'end it all', 'hurt myself', 'want to die', 'suicidal']
        return any(keyword in text.lower() for keyword in crisis_keywords)

    def process_query_pipeline(self, query):
        """Simulate the full query processing pipeline"""
        try:
            # Retrieval
            retriever = self.vector_db.as_retriever(search_kwargs={"k": 3})
            docs = retriever.get_relevant_documents(query)
            context = "\n".join([doc.page_content for doc in docs]) if docs else "No context available"

            # Emotion analysis
            doc = self.nlp(query)
            blob = TextBlob(query)
            sentiment = blob.sentiment

            # Safety check
            crisis_detected = self.detect_crisis(query)

            if crisis_detected:
                return "I understand you're going through a difficult time. Please reach out to a mental health professional or crisis hotline immediately."

            # Generate response
            prompt = f"Context: {context}\nUser query: {query}\nProvide a compassionate response:"
            response = self.llm.invoke(prompt)

            return response.content if hasattr(response, 'content') else str(response)

        except Exception as e:
            return f"Error processing query: {str(e)}"

    def run_all_calculations(self):
        """Run all performance metric calculations"""
        print("Starting comprehensive performance analysis...")
        print("=" * 60)

        # Run all calculations
        self.calculate_cosine_similarity_metrics()
        self.calculate_task_completion_metrics()
        self.calculate_inter_agent_latency()
        self.calculate_memory_usage()
        self.calculate_parallelism_efficiency()
        self.calculate_throughput()
        self.calculate_accuracy_quality_score()
        self.calculate_false_positive_rate()

        # Convert all metrics to serializable format
        self.metrics = self.convert_to_serializable(self.metrics)

        return self.metrics

    def generate_report(self):
        """Generate a comprehensive performance report"""
        if not self.metrics:
            self.run_all_calculations()

        report = """
MENTAL HEALTH CHATBOT PERFORMANCE ANALYSIS REPORT
================================================

1. COSINE SIMILARITY METRICS
----------------------------
Average Similarity: {:.4f}
Max Similarity: {:.4f}
Min Similarity: {:.4f}

2. TASK COMPLETION METRICS
--------------------------
Success Rate: {:.2f}%
Average Completion Time: {:.2f} ms
Total Tasks: {}
Successful Tasks: {}

3. INTER-AGENT LATENCY
----------------------
Average Retrieval Latency: {:.2f} ms
Average Emotion Analysis Latency: {:.2f} ms
Average Safety Check Latency: {:.2f} ms
Average Response Generation Latency: {:.2f} ms
Average Total Pipeline Latency: {:.2f} ms

4. MEMORY USAGE
---------------
Baseline Memory: {:.2f} MB
Average Memory Usage: {:.2f} MB
Peak Memory Usage: {:.2f} MB
Average Memory Increase: {:.2f} MB

5. PARALLELISM EFFICIENCY
-------------------------
Sequential Time: {:.2f} seconds
Parallel Time: {:.2f} seconds
Speedup Factor: {:.2f}x
Efficiency: {:.2f}%

6. THROUGHPUT ANALYSIS
----------------------
Batch Size 1: {:.2f} queries/second
Batch Size 3: {:.2f} queries/second
Batch Size 5: {:.2f} queries/second

7. ACCURACY & QUALITY SCORE
----------------------------
Overall Quality Score: {:.4f}
Context Relevance: {:.4f}
Response Coherence: {:.4f}
Emotional Appropriateness: {:.4f}
Safety Compliance: {:.4f}

8. FALSE POSITIVE RATE ANALYSIS
-------------------------------
False Positive Rate: {:.4f}
Precision: {:.4f}
Recall: {:.4f}
F1 Score: {:.4f}

9. ORCHESTRATION EFFECTIVENESS SUMMARY
--------------------------------------
Task Success Rate: {:.2f}%
Average Response Time: {:.2f} ms
System Reliability: {:.2f}%
Resource Efficiency: {:.2f}%
        """.format(
            self.metrics['cosine_similarity']['average_similarity'],
            self.metrics['cosine_similarity']['max_similarity'],
            self.metrics['cosine_similarity']['min_similarity'],

            self.metrics['task_completion']['success_rate'],
            self.metrics['task_completion']['average_completion_time_ms'],
            self.metrics['task_completion']['total_tasks'],
            self.metrics['task_completion']['successful_tasks'],

            self.metrics['inter_agent_latency']['average_latencies']['avg_retrieval_latency'],
            self.metrics['inter_agent_latency']['average_latencies']['avg_emotion_analysis_latency'],
            self.metrics['inter_agent_latency']['average_latencies']['avg_safety_check_latency'],
            self.metrics['inter_agent_latency']['average_latencies']['avg_response_generation_latency'],
            self.metrics['inter_agent_latency']['average_latencies']['avg_total_pipeline_latency'],

            self.metrics['memory_usage']['baseline_memory_mb'],
            self.metrics['memory_usage']['average_memory_mb'],
            self.metrics['memory_usage']['peak_memory_mb'],
            self.metrics['memory_usage']['average_memory_increase_mb'],

            self.metrics['parallelism_efficiency']['sequential_time_seconds'],
            self.metrics['parallelism_efficiency']['parallel_time_seconds'],
            self.metrics['parallelism_efficiency']['speedup_factor'],
            self.metrics['parallelism_efficiency']['efficiency_percentage'],

            self.metrics['throughput']['batch_size_1']['queries_per_second'],
            self.metrics['throughput']['batch_size_3']['queries_per_second'],
            self.metrics['throughput']['batch_size_5']['queries_per_second'],

            self.metrics['accuracy_quality']['overall_quality_score'],
            self.metrics['accuracy_quality']['accuracy_breakdown']['context_relevance'],
            self.metrics['accuracy_quality']['accuracy_breakdown']['response_coherence'],
            self.metrics['accuracy_quality']['accuracy_breakdown']['emotional_appropriateness'],
            self.metrics['accuracy_quality']['accuracy_breakdown']['safety_compliance'],

            self.metrics['false_positive_rate']['false_positive_rate'],
            self.metrics['false_positive_rate']['precision'],
            self.metrics['false_positive_rate']['recall'],
            self.metrics['false_positive_rate']['f1_score'],

            self.metrics['task_completion']['success_rate'],
            self.metrics['inter_agent_latency']['average_latencies']['avg_total_pipeline_latency'],
            (1 - self.metrics['false_positive_rate']['false_positive_rate']) * 100,
            self.metrics['parallelism_efficiency']['efficiency_percentage']
        )

        return report

# Usage example
if __name__ == "__main__":
    # Create calculator instance
    calculator = PerformanceMetricsCalculator()

    # Run all calculations
    print("Initializing performance analysis...")
    metrics = calculator.run_all_calculations()

    # Generate and print report
    print("\n" + "="*60)
    print("PERFORMANCE ANALYSIS COMPLETE")
    print("="*60)

    report = calculator.generate_report()
    print(report)

    # Save detailed metrics to file (now with proper JSON serialization)
    try:
        with open('performance_metrics.json', 'w') as f:
            json.dump(metrics, f, indent=2)
        print("\nDetailed metrics saved to 'performance_metrics.json'")
    except Exception as e:
        print(f"\nError saving metrics: {e}")

    print("Analysis complete!")