In [2]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Define a dataset containing heart disease-related FAQs
# Each entry includes a question and its corresponding answer
faq_data = pd.DataFrame({
    "Question": [
        "What are the symptoms of heart disease?",
        "What causes heart disease?",
        "How can heart disease be prevented?",
        "What are the treatment options for heart disease?",
        "What are the risk factors for heart disease?"
    ],
    "Answer": [
        "Common symptoms include chest pain, shortness of breath, and fatigue.",
        "Heart disease can be caused by high blood pressure, smoking, diabetes, and lack of exercise.",
        "You can prevent heart disease by maintaining a healthy diet, exercising regularly, and avoiding smoking.",
        "Treatment options include medications, lifestyle changes, and in severe cases, surgery like angioplasty.",
        "Risk factors include high cholesterol, obesity, stress, and a family history of heart disease."
    ]
})

# Initialize a TF-IDF Vectorizer to convert text-based questions into numerical representations
vectorizer = TfidfVectorizer()
faq_vectors = vectorizer.fit_transform(faq_data['Question'])

def get_answer(user_query):
    """
    Process user input, compare it with stored questions, and return the most relevant answer.
    
    Parameters:
    user_query (str): The question asked by the user.
    
    Returns:
    str: The best-matching answer from the dataset or a fallback response.
    """
    # Transform the user query into a TF-IDF vector
    user_vector = vectorizer.transform([user_query])
    
    # Compute cosine similarity between user input and stored questions
    similarities = cosine_similarity(user_vector, faq_vectors).flatten()
    
    # Find the best matching question based on similarity scores
    best_match = np.argmax(similarities)
    
    # Return the most relevant answer if similarity exceeds a predefined threshold
    if similarities[best_match] > 0.2:  # Confidence threshold
        return faq_data.iloc[best_match]['Answer']
    else:
        return "I'm sorry, I don't have an answer for that. Please consult a medical professional."

# Start the chatbot loop for real-time interaction
print("Heart Disease Chatbot is ready! Type 'exit' to stop.")
while True:
    # Prompt the user for a query
    user_input = input("You: ")
    
    # Exit condition for the chatbot
    if user_input.lower() == 'exit':
        print("Chatbot: Stay healthy! Goodbye!")
        break
    
    # Retrieve and display the most relevant answer
    response = get_answer(user_input)
    print(f"Chatbot: {response}")


Heart Disease Chatbot is ready! Type 'exit' to stop.
Chatbot: Heart disease can be caused by high blood pressure, smoking, diabetes, and lack of exercise.
Chatbot: Stay healthy! Goodbye!
