In [None]:
!pip install -U sentence-transformers --quiet

In [14]:
# Import Packages
import json
import os
from sentence_transformers import SentenceTransformer, util
import pandas as pd
import numpy as np
import torch
from sentence_transformers import SentenceTransformer
from sklearn.cluster import KMeans
from google.colab import drive
from transformers import pipeline


# Create path and dataframe
drive.mount("/content/drive")

path = "/content/drive/MyDrive/Colab Notebooks/Diseases_Symptoms.csv"
df = pd.read_csv(path, encoding="latin-1")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [15]:
# Initialize models
model = SentenceTransformer('all-mpnet-base-v2')
qa_pipeline = pipeline('question-answering', model="deepset/roberta-base-squad2")

# Encode the symptoms as text embeddings
df['Symptom_Embeddings'] = df['Symptoms'].apply(lambda x: model.encode(x))

def semantic_search_and_qa(query, question, top_k=5, context_column='Treatments'):
    """
    Performs a combined semantic search and question-answering process.

    Args:
        query: The search query (symptoms).
        question: The specific question to answer based on the context of the search results.
        top_k: The number of top results to return.
        context_column: The column to use as context for question-answering ('Symptoms' or 'Treatments').

    Returns:
        A dictionary containing the top_k results and the answer to the question based on the top result.
    """
    # Perform semantic search
    query_embedding = model.encode(query)
    similarities = df['Symptom_Embeddings'].apply(lambda x: util.cos_sim(query_embedding, x).item())
    df['Similarity'] = similarities
    results = df.sort_values(by='Similarity', ascending=False).head(top_k)

    # Extract context from the top result for question-answering
    if not results.empty:
        context = results.iloc[0][context_column]  # Use the specified context column (e.g., Treatments)
        answer = qa_pipeline(question=question, context=context)['answer']
    else:
        context = ""
        answer = "No relevant information found."

    return {
        'top_results': results[['Name', 'Symptoms', 'Treatments', 'Similarity']],
        'answer': answer
    }


Top Results:
                               Name                                Symptoms  \
214                 West Nile Virus       Fever, headache, body aches, rash   
241            Intracranial Abscess  Headache, fever, neurological deficits   
11   Headache after lumbar puncture                      throbbing headache   

                                            Treatments  Similarity  
214  Supportive care, symptom management, preventio...    0.712614  
241  Antibiotics, surgical drainage or removal of t...    0.703970  
11         Epidural blood patch, Conservative measures    0.660392  

Answer to the Question:
Supportive care, symptom management, prevention of mosquito bites


In [16]:
# Example usage
query = "I am vomiting and have a heacache"
question = "What are the treatments?"

result = semantic_search_and_qa(query, question, top_k=3, context_column='Treatments')

# Print results
print("Top Results:")
print(result['top_results'])

print("\nAnswer to the Question:")
print(result['answer'])

Top Results:
                              Name  \
192                  Hiatal Hernia   
394  Gastroenteritis (Stomach Flu)   
244    Gastrointestinal Hemorrhage   

                                              Symptoms  \
192       Heartburn, chest pain, difficulty swallowing   
394  Nausea, vomiting, diarrhea, abdominal pain or ...   
244  Abdominal pain, vomiting blood, bloody or blac...   

                                            Treatments  Similarity  
192  Lifestyle changes, medications (antacids, prot...    0.470631  
394  Rest, fluids (electrolyte solutions), bland di...    0.458809  
244  Blood transfusions, endoscopy, surgery (in sev...    0.424325  

Answer to the Question:
antacids, proton pump inhibitors
