In [None]:
!pip install -U sentence-transformers --quiet

In [None]:
# Import Packages
import json
import os
from sentence_transformers import SentenceTransformer, util
import pandas as pd
import numpy as np
import torch
from sentence_transformers import SentenceTransformer
from sklearn.cluster import KMeans
from google.colab import drive
from transformers import pipeline
import joblib


# Create path and dataframe
drive.mount("/content/drive")

path = "/content/drive/MyDrive/Colab Notebooks/Diseases_Symptoms.csv"
df = pd.read_csv(path, encoding="latin-1")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Initialize models
model = SentenceTransformer('all-mpnet-base-v2')
summarizer = pipeline("summarization")

# Encode the symptoms as text embeddings
df['Symptom_Embeddings'] = df['Symptoms'].apply(lambda x: model.encode(x))

def semantic_search_and_summary(query, top_k=5, context_column='Treatments'):
    """
    Performs a combined semantic search and summarization process.

    Args:
        query: The search query (symptoms).
        top_k: The number of top results to return.
        context_column: The column to use as context for summarization ('Symptoms' or 'Treatments').

    Returns:
        A dictionary containing the top_k results and a summary of the top result.
    """
    # Perform semantic search
    query_embedding = model.encode(query)
    similarities = df['Symptom_Embeddings'].apply(lambda x: util.cos_sim(query_embedding, x).item())
    df['Similarity'] = similarities
    results = df.sort_values(by='Similarity', ascending=False).head(top_k)

    # Initialize outputs
    if not results.empty:
        context = results.iloc[0][context_column]  # Use the specified context column (e.g., Treatments)

        # Generate a summary of the context
        summary = summarizer(context, max_length=50, min_length=25, do_sample=False)[0]['summary_text']
    else:
        context = ""
        summary = "No relevant information found."

    return {
        'top_results': results[['Name', 'Symptoms', 'Treatments', 'Similarity']],
        'summary': summary
    }



Top Results:
                               Name                                Symptoms  \
214                 West Nile Virus       Fever, headache, body aches, rash   
241            Intracranial Abscess  Headache, fever, neurological deficits   
11   Headache after lumbar puncture                      throbbing headache   

                                            Treatments  Similarity  
214  Supportive care, symptom management, preventio...    0.712614  
241  Antibiotics, surgical drainage or removal of t...    0.703970  
11         Epidural blood patch, Conservative measures    0.660392  

Answer to the Question:
Supportive care, symptom management, prevention of mosquito bites


In [None]:
# Example usage
query = "headache and fever"

result = semantic_search_and_summary(query, top_k=3, context_column='Treatments')

# Print results
print("Top Results:")
print(result['top_results'])

print("\nSummary of the Top Result:")
print(result['summary'])


Top Results:
                              Name  \
192                  Hiatal Hernia   
394  Gastroenteritis (Stomach Flu)   
244    Gastrointestinal Hemorrhage   

                                              Symptoms  \
192       Heartburn, chest pain, difficulty swallowing   
394  Nausea, vomiting, diarrhea, abdominal pain or ...   
244  Abdominal pain, vomiting blood, bloody or blac...   

                                            Treatments  Similarity  
192  Lifestyle changes, medications (antacids, prot...    0.470631  
394  Rest, fluids (electrolyte solutions), bland di...    0.458809  
244  Blood transfusions, endoscopy, surgery (in sev...    0.424325  

Answer to the Question:
antacids, proton pump inhibitors


In [None]:
# # Save models using joblib
# def save_models(model, summarizer, path="models"):
#     """
#     Save the SentenceTransformer model and summarizer pipeline using joblib.

#     Args:
#         model: The SentenceTransformer model.
#         summarizer: The summarizer pipeline.
#         path: The directory path to save the models.
#     """
#     os.makedirs(path, exist_ok=True)
#     # Save the SentenceTransformer model
#     model_save_path = os.path.join(path, "sentence_transformer_model.joblib")
#     joblib.dump(model, model_save_path)
#     print(f"SentenceTransformer model saved to {model_save_path}")

#     # Save the summarizer pipeline
#     summarizer_save_path = os.path.join(path, "summarizer_pipeline.joblib")
#     joblib.dump(summarizer, summarizer_save_path)
#     print(f"Summarizer pipeline saved to {summarizer_save_path}")


In [None]:
# # Import required packages
# import os
# import streamlit as st
# from sentence_transformers import util

# # Load the saved models using joblib
# @st.cache_resource  # Cache the models to avoid reloading every time
# def load_models():
#     # Load the SentenceTransformer model
#     model = joblib.load("models/sentence_transformer_model.joblib")
#     # Load the summarizer pipeline
#     summarizer = joblib.load("models/summarizer_pipeline.joblib")
#     return model, summarizer

# model, summarizer = load_models()

# # Load the dataset
# @st.cache_data  # Cache the data to avoid reloading every time
# def load_data(path="Diseases_Symptoms.csv"):
#     df = pd.read_csv(path, encoding="latin-1")
#     # Ensure symptom embeddings exist in the dataset
#     if 'Symptom_Embeddings' not in df.columns:
#         df['Symptom_Embeddings'] = df['Symptoms'].apply(lambda x: model.encode(x))
#     return df

# df = load_data()

# # Define the search and summarization function
# def semantic_search_and_summary(query, top_k=5, context_column='Treatments'):
#     """
#     Performs a combined semantic search and summarization process.

#     Args:
#         query: The search query (symptoms).
#         top_k: The number of top results to return.
#         context_column: The column to use as context for summarization ('Symptoms' or 'Treatments').

#     Returns:
#         A dictionary containing the top_k results and a summary of the top result.
#     """
#     # Perform semantic search
#     query_embedding = model.encode(query)
#     similarities = df['Symptom_Embeddings'].apply(lambda x: util.cos_sim(query_embedding, x).item())
#     df['Similarity'] = similarities
#     results = df.sort_values(by='Similarity', ascending=False).head(top_k)

#     # Initialize outputs
#     if not results.empty:
#         context = results.iloc[0][context_column]  # Use the specified context column (e.g., Treatments)

#         # Generate a summary of the context
#         summary = summarizer(context, max_length=50, min_length=25, do_sample=False)[0]['summary_text']
#     else:
#         context = ""
#         summary = "No relevant information found."

#     return {
#         'top_results': results[['Name', 'Symptoms', 'Treatments', 'Similarity']],
#         'summary': summary
#     }

# # Streamlit App UI
# st.title("Disease Search Engine")
# st.markdown("This search engine gives suggestions of diseases based on your symptoms. At the bottom it will give a brief summary for treatments.")

# # User inputs
# query = st.text_input("Enter your symptoms (e.g., 'I have a headache and fever'):", "")
# top_k = st.slider("Select the number of top results to return:", 1, 10, 3)
# context_column = st.selectbox(
#     "Summarization subject:",
#     options=['Treatments'],
#     index=0
# )

# if st.button("Search"):
#     if query:
#         # Perform semantic search and summarization
#         result = semantic_search_and_summary(query, top_k=top_k, context_column=context_column)

#         # Display top results
#         st.subheader("Top Results:")
#         st.write(result['top_results'])

#         # Display summary of the top result
#         st.subheader("Summary of the Top Result:")
#         st.write(result['summary'])
#     else:
#         st.warning("Please enter a query to perform the search.")
