In [1]:
pip install transformers sentence-transformers gradio faiss-cpu pandas numpy

Collecting transformers
  Downloading transformers-4.46.3-py3-none-any.whl.metadata (44 kB)
Collecting sentence-transformers
  Downloading sentence_transformers-3.3.1-py3-none-any.whl.metadata (10 kB)
Collecting gradio
  Downloading gradio-5.6.0-py3-none-any.whl.metadata (16 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0.post1-cp312-cp312-win_amd64.whl.metadata (4.5 kB)
Collecting huggingface-hub<1.0,>=0.23.2 (from transformers)
  Downloading huggingface_hub-0.26.2-py3-none-any.whl.metadata (13 kB)
Collecting tokenizers<0.21,>=0.20 (from transformers)
  Downloading tokenizers-0.20.3-cp312-none-win_amd64.whl.metadata (6.9 kB)
Collecting safetensors>=0.4.1 (from transformers)
  Downloading safetensors-0.4.5-cp312-none-win_amd64.whl.metadata (3.9 kB)
Collecting torch>=1.11.0 (from sentence-transformers)
  Downloading torch-2.5.1-cp312-cp312-win_amd64.whl.metadata (28 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)


In [3]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
import gradio as gr

In [4]:
import pandas as pd

# Load the dataset
file_path = "train.csv"  # Replace with the actual dataset file path
data = pd.read_csv(file_path)

# Display the first few rows
print(data.head())

             qtype                                           Question  \
0   susceptibility  Who is at risk for Lymphocytic Choriomeningiti...   
1         symptoms  What are the symptoms of Lymphocytic Choriomen...   
2   susceptibility  Who is at risk for Lymphocytic Choriomeningiti...   
3  exams and tests  How to diagnose Lymphocytic Choriomeningitis (...   
4        treatment  What are the treatments for Lymphocytic Chorio...   

                                              Answer  
0  LCMV infections can occur after exposure to fr...  
1  LCMV is most commonly recognized as causing ne...  
2  Individuals of all ages who come into contact ...  
3  During the first phase of the disease, the mos...  
4  Aseptic meningitis, encephalitis, or meningoen...  


In [5]:
# Preprocessing
data = data[['qtype', 'Question', 'Answer']].dropna()
data.columns = ['qtype', 'question', 'answer']  # Rename for simplicity

# Convert to lists
qtypes = data['qtype'].tolist()
questions = data['question'].tolist()
answers = data['answer'].tolist()

print(f"Dataset contains {len(questions)} questions across {data['qtype'].nunique()} categories.")

Dataset contains 16407 questions across 16 categories.


In [6]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('all-MiniLM-L6-v2')  # Optimized for semantic search

In [7]:
# Generate embeddings for all questions
import numpy as np

question_embeddings = model.encode(questions, show_progress_bar=True)

# Save embeddings for future use
np.save("question_embeddings.npy", question_embeddings)
print("Embeddings created and saved.")

Batches:   0%|          | 0/513 [00:00<?, ?it/s]

Embeddings created and saved.


In [8]:
import faiss

# Initialize FAISS index
dimension = question_embeddings.shape[1]  # Embedding vector dimension
index = faiss.IndexFlatL2(dimension)

# Add embeddings to the FAISS index
index.add(np.array(question_embeddings))
print(f"FAISS index contains {index.ntotal} items.")

FAISS index contains 16407 items.


In [9]:
def search_faq(query, top_k=3):
    query_embedding = model.encode([query])
    distances, indices = index.search(query_embedding, top_k)

    results = []
    for idx in indices[0]:
        results.append({
            'qtype': qtypes[idx],
            'question': questions[idx],
            'answer': answers[idx]
        })
    return results


In [10]:
query = "What are the symptoms of flu?"
results = search_faq(query)

for result in results:
    print(f"Category: {result['qtype']}")
    print(f"Question: {result['question']}")
    print(f"Answer: {result['answer']}\n")

Category: symptoms
Question: What are the symptoms of Q Fever ?
Answer: Q fever can cause acute or chronic illness in humans, who usually acquire infection after contact with infected animals or exposure to contaminated environments. The acute symptoms caused by infection with Coxiella burnetii usually develop within 2-3 weeks of exposure, although as many as half of humans infected withC. burnetii do not show symptoms. 
 
The following is a list of symptoms commonly seen with acute Q fever. However, it is important to note that the combination of symptoms varies greatly from person to person. 
  
  - high fevers (up to 104-105°F)  
  - severe headache  
  - general malaise  
  - myalgia  
  - chills and/or sweats  
  - non-productive cough  
  - nausea  
  - vomiting  
  - diarrhea  
  - abdominal pain  
  - chest pain  
  
 
Although most persons with acute Q fever infection recover, others may experience serious illness with complications that may include pneumonia, granulomatous he

In [11]:
def chatbot(query):
    results = search_faq(query)
    if results:
        top_result = results[0]
        response = (
            f"**Category:** {top_result['qtype']}\n"
            f"**Question:** {top_result['question']}\n"
            f"**Answer:** {top_result['answer']}"
        )
        return response
    else:
        return "Sorry, I couldn't find an answer to your question."


In [12]:
import gradio as gr

interface = gr.Interface(
    fn=chatbot,
    inputs=gr.Textbox(lines=2, placeholder="Enter your medical question..."),
    outputs="text",
    title="Healthcare Chatbot",
    description="Ask any medical question, and get instant answers with category-specific context!"
)


In [13]:
interface.launch()

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




In [14]:
def chatbot_with_category(query, category=None): // DOnt use yet not working
    results = search_faq(query)
    if category:
        # Filter results by category
        results = [res for res in results if res['qtype'].lower() == category.lower()]

    if results:
        top_result = results[0]
        response = (
            f"**Category:** {top_result['qtype']}\n"
            f"**Question:** {top_result['question']}\n"
            f"**Answer:** {top_result['answer']}"
        )
        return response
    else:
        return f"Sorry, no results found in the '{category}' category."


SyntaxError: invalid syntax (2049942660.py, line 1)

In [None]:
interface = gr.Interface( // DOnt use yet not working
    fn=lambda query, category: chatbot_with_category(query, category),
    inputs=[
        gr.Textbox(lines=2, placeholder="Enter your medical question..."),
        gr.Dropdown(choices=['symptoms', 'treatment', 'prevention', 'other'], label="Category (optional)")
    ],
    outputs="text",
    title="Healthcare Chatbot",
    description="Ask any medical question, and filter by category if desired!"
)


In [None]:
interface.launch()