In [50]:
import re
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import string
import pandas as pd
import ipywidgets as widgets
from IPython.display import display

# Download necessary NLTK resources (no success messages)
import nltk
nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)
nltk.download('wordnet', quiet=True)

# Load CSV data
def load_faq_data(file_path):
    """
    Load FAQ data from a CSV file and ensure it contains 'Question' and 'Answer' columns.
    """
    try:
        df = pd.read_csv(file_path)
        if "Question" in df.columns and "Answer" in df.columns:
            faq_questions = df["Question"].tolist()
            faq_answers = df["Answer"].tolist()
            return faq_questions, faq_answers
        else:
            raise ValueError("CSV file must contain 'Question' and 'Answer' columns.")
    except Exception as e:
        print(f"❌ Error loading data: {e}")
        return [], []

# Preprocess sentences to extract keywords
def preprocess(sentence):
    """
    Preprocess a sentence by tokenizing, removing stopwords, and lemmatizing.
    """
    words = word_tokenize(sentence)
    words = [word.lower() for word in words if word.lower() not in stopwords.words('english') and word not in string.punctuation]
    lemmatizer = WordNetLemmatizer()
    words = [lemmatizer.lemmatize(word) for word in words]
    return words

# Keyword-based matching
def find_best_match(query, faq_questions):
    """
    Find the FAQ question with the most keywords matching the user's query.
    """
    query_keywords = set(preprocess(query))
    best_match_idx = -1
    best_match_score = 0

    for idx, question in enumerate(faq_questions):
        question_keywords = set(preprocess(question))
        # Calculate intersection of keywords
        score = len(query_keywords & question_keywords)
        if score > best_match_score:
            best_match_score = score
            best_match_idx = idx

    return best_match_idx, best_match_score

# Generate a response
def chatbot(question, faq_questions, faq_answers):
    """
    Generate a response based on the user's question, returning the most relevant FAQ answer
    and related suggestions, limited to 3 sentences.
    """
    best_match_idx, best_match_score = find_best_match(question, faq_questions)

    if best_match_score > 0:  # If there's any matching keyword
        response = faq_answers[best_match_idx]
        # Limit the response to 3 sentences
        response_sentences = response.split('.')
        response = '. '.join(response_sentences[:3]) + ('...' if len(response_sentences) > 3 else '')  # Truncate to 3 sentences
        related_questions = [faq_questions[i] for i in range(len(faq_questions)) if i != best_match_idx][:3]
        return response, related_questions
    else:
        return "🤔 I'm sorry, I couldn't find a suitable answer. Could you please clarify?", []

# Interactive testing using widgets
def widget_mode(faq_questions, faq_answers):
    """
    Provide an interactive chatbot experience using Colab widgets.
    """
    print("👋 Hi, my name is Fikira! 🤖 How can I assist you today? 😊")

    def on_submit(change):
        user_question = input_box.value
        response, suggestions = chatbot(user_question, faq_questions, faq_answers)
        output_box.append_stdout(f"🟡 You: {user_question}\n")
        output_box.append_stdout(f"🔵 Fikira: {response}\n")
        if suggestions:
            output_box.append_stdout("💡 You might also ask:\n")
            for suggestion in suggestions:
                output_box.append_stdout(f"➖ {suggestion}\n")
        output_box.append_stdout("\n")
        input_box.value = ""

    input_box = widgets.Text(placeholder='Type your question here... 📝', layout=widgets.Layout(width='80%', font_size='16px', font_family='Tahoma'))
    output_box = widgets.Output()

    input_box.on_submit(on_submit)

    # Styling the output to make it more readable with Tahoma font and size adjustments
    with output_box:
        display(widgets.HTML(value="<style>body {font-family: Tahoma, sans-serif; font-size: 16px; line-height: 1.6;}</style>"))

    # Make the input field appear at the bottom after every output
    display(output_box)
    display(input_box)

# Main Function
faq_questions, faq_answers = load_faq_data("Mental_Health_FAQ.csv")
if faq_questions and faq_answers:
    widget_mode(faq_questions, faq_answers)
else:
    print("⚠️ Failed to load FAQ data.")


👋 Hi, my name is Fikira! 🤖 How can I assist you today? 😊


Output()

Text(value='', layout=Layout(width='80%'), placeholder='Type your question here... 📝')