In [None]:
!pip install langchain langchain-community langchain-groq groq sentence-transformers faiss-cpu gradio pandas scikit-learn nltk -q

In [None]:
import os
import random
import pandas as pd
import gradio as gr
import pandas as pd
import numpy as np
import nltk

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainFilter

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.translate.bleu_score import sentence_bleu
from nltk.tokenize import word_tokenize



nltk.download('punkt_tab')


[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [None]:
from google.colab import userdata
groq_api_key = userdata.get('groqAPI')
os.environ['GROQ_API_KEY']= groq_api_key

In [None]:
class HealthcareRAGChatbot:
    def __init__(self, knowledge_base_path, groq_api_key):
        """
        Initialize the Healthcare RAG Chatbot

        :param knowledge_base_path: Path to the CSV knowledge base
        :param groq_api_key: API key for Groq
        """
        # Setup Environment and API Key
        os.environ["GROQ_API_KEY"] = groq_api_key

        # Embeddings Initialization
        self.embeddings = HuggingFaceEmbeddings(
            model_name="all-MiniLM-L6-v2"
        )

        # Load Knowledge Base
        self.knowledge_base = pd.read_csv(knowledge_base_path)


        # Guardrails Configuration
        self.setup_guardrails()


        self.last_query = None

        # Conversational Elements
        self.setup_conversational_elements()

        # Vector Database Setup
        self.setup_vector_database()

        # LLM and Retrieval Chain Setup
        self.setup_retrieval_qa_chain()

        self.conversation_memory = []

        self.max_memory_length = 5

    def setup_guardrails(self):
        """
        Configure safety and ethical guardrails
        """
        self.sensitive_topics = [
            'suicide', 'self-harm', 'sexual abuse',
            'domestic violence', 'emergency medical conditions'
        ]

        self.medical_disclaimer = (
            "Do you have any more questions?"
        )

    def setup_conversational_elements(self):
        """
        Setup greeting patterns and responses
        """
        self.greeting_patterns = [
            'hi', 'hello', 'hey', 'good morning', 'good afternoon',
            'good evening', 'how are you', 'what\'s up', 'greetings', 'howdy'
        ]

        self.greeting_responses = [
            "Hello! I'm a healthcare assistant. How can I help you today?",
            "Hi there! I'm here to assist you with health-related questions.",
            "Greetings! I'm ready to provide helpful health information.",
            "Good day! What health concerns can I help you with today?"
        ]

    def setup_vector_database(self):
        """
        Prepare and create vector database from knowledge base
        """
        # Text Splitter Configuration
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=500,
            chunk_overlap=150
        )

        # Prepare Documents
        documents = [
            f"Question: {row['Questions']} Answer: {row['Answers']}"
            for _, row in self.knowledge_base.iterrows()
        ]

        # Create Vector Store
        self.vectorstore = FAISS.from_texts(
            documents,
            self.embeddings
        )

    def setup_retrieval_qa_chain(self):
        """
        Setup Retrieval QA Chain with advanced configurations
        """
        # Initialize Groq LLM
        self.llm = ChatGroq(
            model_name="llama3-8b-8192",
            temperature=0.1,
            max_tokens=200
        )


        qa_template = """
        You are an advanced healthcare AI assistant focused on providing safe, accurate, and compassionate health information.

        Context: {context}

        Question: {question}

        Guidelines:
        1. Provide clear, evidence-based information.
        2. Explain medical terms in simple language.
        3. NEVER provide direct medical diagnosis.
        4. If the query involves sensitive topics, respond with care and redirect.
        5. Always recommend professional medical consultation.
        6. Include potential next steps or recommendations.
        7. If no specific medical advice can be given, explain why.
        8. Maintain a helpful, empathetic tone.
        9. Ensure the response is informative and clear.
        10. If context is insufficient or not directly related: Use your general medical knowledge to provide a helpful response.
        11. Implement a Memory Mechanism: Store previous conversation turns and relevant context.
        12. Leverage Keywords and Topics: Identify key topics and keywords from past interactions to guide future responses.
        13. Use Contextual Prompts: Incorporate past conversation details into subsequent prompts to maintain coherence.
        14. Ensure the response maintains conciseness, relevance, coherence, and correctness, while avoiding harmfulness, maliciousness, controversy, misogyny, criminality, insensitivity.
        15. Do not break the response without meaning; continue the sentence with a few more words to maintain clarity and flow, continue till where '.' ends.
        16. Dynamically adapt responses to current context.
        17. Expand on user's request within the same topic.
        18. Gracefully handle topic transitions while preserving context.
        19. Maintain empathetic and clear communication tone.

        Response:
        """

        # Create Prompt Template
        PROMPT = PromptTemplate(
            template=qa_template,
            input_variables=["context", "question"]
        )

        # Create QA Chain
        self.qa_chain = RetrievalQA.from_chain_type(
            llm=self.llm,
            chain_type="stuff",
            retriever=self.vectorstore.as_retriever(search_kwargs={"k": 3}),
            chain_type_kwargs={"prompt": PROMPT},
            return_source_documents=True
        )

    def update_conversation_memory(self, query, response):
        """
        Update conversation memory with context
        """
        self.conversation_memory.append({
            'query': query,
            'response': response
        })

        # Limit memory length
        if len(self.conversation_memory) > self.max_memory_length:
            self.conversation_memory.pop(0)

    def generate_context_prompt(self, query):
        """
        Generate a context-aware prompt using conversation memory
        """
        # Build context from recent conversation history
        context_history = "\n".join([
            f"Previous Query: {mem['query']}\nPrevious Response: {mem['response']}"
            for mem in self.conversation_memory
        ])

        return f"""
        Conversation Context:
        {context_history}

        Current Query: {query}

        Guidelines:
        1. Maintain conversational continuity
        2. Refer to previous context if relevant
        3. Provide coherent and progressive information
        4. If no direct connection exists, respond naturally
        """

    def check_guardrails(self, query):
        """
        Check query against sensitive topics and potential risks

        :param query: User input query
        :return: Boolean indicating if query passes guardrails
        """
        query_lower = query.lower()

        # Check for sensitive topics
        for topic in self.sensitive_topics:
            if topic in query_lower:
                return False

        if len(query) > 500:
            return False

        return True


    def is_query_related(self, previous_query, current_query):
        """
        Check if the current query is related to the previous query
        """
        # Simple implementation of query relatedness
        similar_words = set(previous_query.lower().split()) & set(current_query.lower().split())
        return len(similar_words) > 0

    def process_query(self, query):
        """
        Process query with enhanced context awareness
        """
        # Check for general conversation
        if self.is_general_conversation(query):
            response = random.choice(self.greeting_responses)
            self.update_conversation_memory(query, response)
            return response

        # Guardrails check
        if not self.check_guardrails(query):
            response = (
                "I apologize, but I cannot provide information on this topic. "
                "For sensitive issues, please consult a healthcare professional directly."
            )
            self.update_conversation_memory(query, response)
            return response

        try:
            # Generate context-aware prompt
            context_prompt = self.generate_context_prompt(query)

            # Process query with context
            result = self.qa_chain({
                'query': query,
                'context': context_prompt
            })

            response = result.get('result', 'I could not find a suitable response.')
            response += f"\n\n{self.medical_disclaimer}"

            # Update conversation memory
            self.update_conversation_memory(query, response)

            return response

        except Exception as e:
            return f"I encountered an error processing your query: {str(e)}"

    def is_general_conversation(self, query):
        """
        Check if query is a general conversation

        :param query: User input query
        :return: Boolean indicating if it's a general conversation
        """
        query_lower = query.lower().strip()
        return any(pattern in query_lower for pattern in self.greeting_patterns)

In [None]:
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [None]:
class AnswerEvaluator:
    def __init__(self, ground_truths, generated_answers):
        """
        Initialize evaluator with ground truth and generated answers

        :param ground_truths: List of ground truth answers
        :param generated_answers: List of generated answers
        """
        self.ground_truths = ground_truths
        self.generated_answers = generated_answers

    def cosine_similarity_score(self, text1, text2):
        """
        Calculate cosine similarity between two texts

        :param text1: First text
        :param text2: Second text
        :return: Cosine similarity score
        """
        vectorizer = CountVectorizer().fit_transform([text1, text2])
        vectors = vectorizer.toarray()
        return cosine_similarity(vectors)[0][1]

    def rough_similarity(self, text1, text2):
        """
        Calculate rough similarity based on common words

        :param text1: First text
        :param text2: Second text
        :return: Rough similarity score
        """
        words1 = set(word_tokenize(text1.lower()))
        words2 = set(word_tokenize(text2.lower()))

        common_words = words1.intersection(words2)
        return len(common_words) / max(len(words1), len(words2))

    def bleu_score(self, reference, candidate):
        """
        Calculate BLEU score

        :param reference: Ground truth text
        :param candidate: Generated text
        :return: BLEU score
        """
        reference_tokens = [word_tokenize(reference.lower())]
        candidate_tokens = word_tokenize(candidate.lower())

        try:
            return sentence_bleu(reference_tokens, candidate_tokens)
        except:
            return 0

    def precision_recall_f1(self, reference, candidate):
        """
        Calculate precision, recall, and F1 score

        :param reference: Ground truth text
        :param candidate: Generated text
        :return: Precision, Recall, F1 Score
        """
        ref_words = set(word_tokenize(reference.lower()))
        cand_words = set(word_tokenize(candidate.lower()))

        # Precision
        true_positives = len(ref_words.intersection(cand_words))
        precision = true_positives / len(cand_words) if len(cand_words) > 0 else 0

        # Recall
        recall = true_positives / len(ref_words) if len(ref_words) > 0 else 0

        # F1 Score
        f1 = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

        return precision, recall, f1

    def evaluate_answers(self):
        """
        Evaluate all answers and compile metrics

        :return: DataFrame with evaluation metrics
        """
        results = []

        for ground_truth, generated in zip(self.ground_truths, self.generated_answers):
            # Calculate metrics
            cosine_sim = self.cosine_similarity_score(ground_truth, generated)
            rough_sim = self.rough_similarity(ground_truth, generated)
            bleu = self.bleu_score(ground_truth, generated)
            precision, recall, f1 = self.precision_recall_f1(ground_truth, generated)

            results.append({
                'Cosine Similarity': cosine_sim,
                'Rough Similarity': rough_sim,
                'BLEU Score': bleu,
                'Precision': precision,
                'Recall': recall,
                'F1 Score': f1
            })

        # Convert results to DataFrame
        metrics_df = pd.DataFrame(results)

        # Calculate overall metrics
        overall_metrics = {
            'Average Cosine Similarity': metrics_df['Cosine Similarity'].mean(),
            'Average Rough Similarity': metrics_df['Rough Similarity'].mean(),
            'Average BLEU Score': metrics_df['BLEU Score'].mean(),
            'Average Precision': metrics_df['Precision'].mean(),
            'Average Recall': metrics_df['Recall'].mean(),
            'Average F1 Score': metrics_df['F1 Score'].mean()
        }

        return metrics_df, overall_metrics

# Extracting ground truth and generated answers from the provided document
ground_truths = [
    "Mental illnesses are health conditions that disrupt a personÃ¢â‚¬â„¢s thoughts, emotions, relationships, and daily functioning. They are associated with distress and diminished capacity to engage in the ordinary activities of daily life.\nMental illnesses fall along a continuum of severity: some are fairly mild and only interfere with some aspects of life, such as certain phobias. On the other end of the spectrum lie serious mental illnesses, which result in major functional impairment and interference with daily life. These include such disorders as major depression, schizophrenia, and bipolar disorder, and may require that the person receives care in a hospital.\nIt is important to know that mental illnesses are medical conditions that have nothing to do with a personÃ¢â‚¬â„¢s character, intelligence, or willpower. Just as diabetes is a disorder of the pancreas, mental illness is a medical condition due to the brainÃ¢â‚¬â„¢s biology.\nSimilarly to how one would treat diabetes with medication and insulin, mental illness is treatable with a combination of medication and social support. These treatments are highly effective, with 70-90 percent of individuals receiving treatment experiencing a reduction in symptoms and an improved quality of life. With the proper treatment, it is very possible for a person with mental illness to be independent and successful.",
    "It is estimated that mental illness affects 1 in 5 adults in America, and that 1 in 24 adults have a serious mental illness. Mental illness does not discriminate; it can affect anyone, regardless of gender, age, income, social status, ethnicity, religion, sexual orientation, or background. Although mental illness can affect anyone, certain conditions may be more common in different populations. For instance, eating disorders tend to occur more often in females, while disorders such as attention deficit/hyperactivity disorder is more prevalent in children. Additionally, all ages are susceptible, but the young and the old are especially vulnerable. Mental illnesses usually strike individuals in the prime of their lives, with 75 percent of mental health conditions developing by the age of 24. This makes identification and treatment of mental disorders particularly difficult, because the normal personality and behavioral changes of adolescence may mask symptoms of a mental health condition. Parents and caretakers should be aware of this fact, and take notice of changes in their childâ€™s mood, personality, personal habits, and social withdrawal. When these occur in children under 18, they are referred to as serious emotional disturbances (SEDs).",
    "Cyclothymic disorder is a subtype of bipolar disorder. Much like bipolar disorder, the symptoms of cyclothymia include three or more symptoms of hypomania, and five or more symptoms of depression. Like bipolar disorder, people may experience wellness between episodes of hypomania and depression.\n Symptoms of hypomania include: \n Symptoms of depression include: \n Talk to a doctor or mental health professional if you think you might have cyclothymic disorder.",
    "Stress and anxiety are often used interchangeably, and there is overlap between stress and anxiety. Stress is related to the same â€˜fight, flight, or freezeâ€™ response as anxiety, and the physical sensations of anxiety and stress may be very similar. \n The cause of stress and anxiety are usually different, however. Stress focuses on mainly external pressures on us that weâ€™re finding hard to cope with. When we are stressed, we usually know what weâ€™re stressed about, and the symptoms of stress typically disappear after the stressful situation is over. \n Anxiety, on the other hand, isnâ€™t always as easy to figure out. Anxiety focuses on worries or fears about things that could threaten us, as well as anxiety about the anxiety itself. Stress and anxiety are both part of being human, but both can be problems if they last for a long time or have an impact on our well-being or daily life.",
    "Sadness is a normal reaction to a loss, disappointment, problems, or other difficult situations. Feeling sad from time to time is just another part of being human. In these cases, feelings of sadness go away quickly and you can go about your daily life. \n Other ways to talk about sadness might be â€˜feeling low,â€™ â€˜feeling down,â€™ or â€˜feeling blue.â€™ A person may say they are feeling â€˜depressed,â€™ but if it goes away on its own and doesnâ€™t impact life in a big way, it probably isnâ€™t the illness of depression. \n Depression is a mental illness that affects your mood, the way you understand yourself, and the way you understand and relate to things around you. It can also go by different names, such as clinical depression, major depressive disorder, or major depression. Depression can come up for no reason, and it lasts for a long time. Itâ€™s much more than sadness or low mood. People who experience depression may feel worthless or hopeless. They may feel unreasonable guilty. Some people may experience depression as anger or irritability. It may be hard to concentrate or make decisions. Most people lose interest in things that they used to enjoy and may isolate themselves from others. There are also physical signs of depression, such as problems with sleep, appetite and energy and unexplainable aches or pains. Some may experience difficult thoughts about death or ending their life (suicide). Depression lasts longer than two weeks, doesnâ€™t usually go away on its own, and impacts your life. Itâ€™s a real illness, and it is very treatable. Itâ€™s important to seek help if youâ€™re concerned about depression."
]

generated_answers = [
    "A mental illness is a health condition that affects a person's thoughts, emotions, relationships, and daily functioning. It's a medical condition that can cause distress and make it difficult for someone to engage in their usual daily activities.\nMental illnesses can vary in severity, ranging from mild to severe. Some people may experience mild symptoms that only affect certain areas of their life, while others may have more severe symptoms that significantly impact their daily life.\nIt's important to know that mental illnesses are not a result of a person's character, intelligence, or willpower. They are medical conditions that can be caused by a combination of genetic, environmental, and psychological factors.\nTreatment for mental illnesses is available and effective. A combination of medication and social support can help reduce symptoms and improve a person's quality of life. In fact, studies have shown that 70-90% of people who receive treatment for mental illness experience a reduction",
    "Mental illness is a multifaceted condition that can arise from a combination of psychological, biological, and social factors. It's essential to understand that mental illness is not caused by personal weakness, lack of character, or poor upbringing. Rather, it's a treatable medical condition that can affect anyone, regardless of their background, age, or circumstances.\nResearch suggests that mental illness can be triggered by a variety of factors, including:\no\tGenetic predisposition: Some people may be more prone to mental illness due to their genetic makeup.\no\tBrain chemistry: Imbalances in neurotransmitters, such as serotonin and dopamine, can contribute to mental health conditions.\no\tLife events: Traumatic experiences, such as the loss of a loved one, can trigger mental health issues.\no\tEnvironmental factors: Social isolation, poverty, and lack of access",
    "Cyclothymic disorder is a type of mood disorder that is characterized by a pattern of mood swings, with periods of hypomania (mild mania) and depression. It's a subtype of bipolar disorder, and people with cyclothymic disorder may experience a mix of symptoms from both hypomania and depression.\nHypomania symptoms can include:\no\tFeeling extremely happy or energetic\no\tIncreased creativity or productivity\no\tReduced need for sleep\no\tIncreased talkativeness or racing thoughts\no\tImpulsivity or reckless behavior\nDepression symptoms can include:\no\tFeeling sad, empty, or hopeless\no\tLoss of interest in activities\no\tChanges in appetite or sleep patterns\no\tFatigue or loss of energy\no\tDifficulty concentrating or making decisions\nIt's important to note that people with cyclothymic disorder may not experience full-blown episodes of mania or depression, but rather",
    "Stress typically refers to the body's response to external pressures or demands that we're finding hard to cope with. When we're stressed, we usually know what we're stressed about, and the symptoms of stress typically disappear once the stressful situation is over.\nAnxiety, on the other hand, is a more complex and nuanced experience. It's often characterized by worries or fears about things that could threaten us, as well as anxiety about the anxiety itself. Anxiety can be triggered by internal thoughts and feelings, rather than just external circumstances.\nWhile both stress and anxiety are normal human experiences, they can become problems if they last for a long time or have a significant impact on our well-being or daily life.\nIt's important to note that anxiety is not just a feeling, but a physical response that can manifest in different ways, such as rapid heartbeat",
    "Sadness is a normal reaction to a difficult situation, loss, or disappointment. It's a common human emotion that can be triggered by various factors. Feeling sad from time to time is a natural part of life, and it usually goes away on its own without impacting daily life.\nOn the other hand, depression is a mental illness that affects a person's mood, thoughts, and behaviors. It's a serious condition that can last for a long time and impact daily life significantly. Depression can cause feelings of hopelessness, worthlessness, and guilt, as well as physical symptoms like changes in sleep, appetite, and energy levels.\nWhile sadness is a normal response to a specific situation, depression is a persistent and debilitating condition that requires professional help. If you're experiencing symptoms of depression, it's essential to seek help from a mental health professional.\nHere are some potential next steps:\no\tIf you're concerned about your mental health,"
]

# Usage
evaluator = AnswerEvaluator(ground_truths, generated_answers)

# Evaluate answers
detailed_metrics, overall_metrics = evaluator.evaluate_answers()

# Print results
print("Detailed Metrics:\n", detailed_metrics)
print("\nOverall Metrics:\n", overall_metrics)

Detailed Metrics:
    Cosine Similarity  Rough Similarity  BLEU Score  Precision    Recall  \
0           0.667377          0.440000    0.187099   0.585106  0.440000   
1           0.611746          0.257812    0.066699   0.379310  0.257812   
2           0.697995          0.262500    0.089980   0.262500  0.525000   
3           0.793776          0.647059    0.425677   0.647059  0.702128   
4           0.665909          0.398693    0.110013   0.592233  0.398693   

   F1 Score  
0  0.502283  
1  0.306977  
2  0.350000  
3  0.673469  
4  0.476562  

Overall Metrics:
 {'Average Cosine Similarity': 0.6873605135029379, 'Average Rough Similarity': 0.4012128267973856, 'Average BLEU Score': 0.17589363717656237, 'Average Precision': 0.4932417122088918, 'Average Recall': 0.4647265940063969, 'Average F1 Score': 0.4618583473927959}


In [None]:
def create_gradio_interface(chatbot):
    """
    Create Gradio interface for the Healthcare RAG Chatbot

    :param chatbot: HealthcareRAGChatbot instance
    :return: Gradio interface
    """
    def chat_interface(message, history):
        # Convert Gradio history to chatbot's expected format
        past_messages = [
            {"role": "user", "content": h[0]}
            for h in history
        ]

        # Process the current message
        response = chatbot.process_query(message)

        return response

    # Create Gradio interface
    interface = gr.ChatInterface(
        fn=chat_interface,
        title="MENTAL HEALTH ASSISTANT",
        description=(
            "Get reliable health information and answers to your medical questions.\n\n"
            "IMPORTANT: This information is for educational purposes only. "
            "It is not a substitute for professional medical advice, diagnosis, or treatment. "
            "Always consult a healthcare professional for personalized medical guidance."
        ),
        theme="soft"
    )

    return interface

def main():
    # Get Groq API Key (recommend using environment variables)
    groq_api_key = os.getenv('GROQ_API_KEY', 'your_groq_api_key_here')

    # Knowledge base path (adjust as needed)
    knowledge_base_path = "/content/drive/MyDrive/bayes labs/Mental_Health_FAQ.csv"  # Update with your actual path

    # Initialize Chatbot
    chatbot = HealthcareRAGChatbot(knowledge_base_path, groq_api_key)

    # Create Gradio Interface
    interface = create_gradio_interface(chatbot)

    # Launch the interface
    interface.launch(share=True)

if __name__ == "__main__":
    main()



Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://89228529b5397344a6.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
