In [13]:
!pip install sentence-transformers faiss-cpu gradio

Collecting gradio
  Downloading gradio-4.40.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.112.0-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.2.0 (from gradio)
  Downloading gradio_client-1.2.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.9 (from gradi

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
import pandas as pd

# Load the dataset
file_path = '/content/drive/MyDrive/MayoClinic_Diseases.xlsx'
df = pd.read_excel(file_path)

# Display the first few rows of the dataframe
df.head()


Unnamed: 0,Disease Name,Symptoms,Specialist Doctor
0,Atrial fibrillation,You might feel like your heart is racing or po...,cardiologist
1,Atrial fibrillation,If your heart feels like it's beating really f...,cardiologist
2,Atrial fibrillation,You could experience a fast or pounding heartb...,cardiologist
3,Atrial fibrillation,Feeling like your heart is racing or thumping ...,cardiologist
4,Atrial fibrillation,If your heart's beating fast or feels like it'...,cardiologist


In [6]:
# Preprocessing the data
df.dropna(subset=['Symptoms', 'Specialist Doctor'], inplace=True)
df['Symptoms'] = df['Symptoms'].str.lower()
df.head()


Unnamed: 0,Disease Name,Symptoms,Specialist Doctor
0,Atrial fibrillation,you might feel like your heart is racing or po...,cardiologist
1,Atrial fibrillation,if your heart feels like it's beating really f...,cardiologist
2,Atrial fibrillation,you could experience a fast or pounding heartb...,cardiologist
3,Atrial fibrillation,feeling like your heart is racing or thumping ...,cardiologist
4,Atrial fibrillation,if your heart's beating fast or feels like it'...,cardiologist


In [9]:
from sentence_transformers import SentenceTransformer

# Load the pre-trained model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Generate embeddings for the symptoms
symptom_embeddings = model.encode(df['Symptoms'].tolist(), show_progress_bar=True)


  from tqdm.autonotebook import tqdm, trange
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/62 [00:00<?, ?it/s]

In [16]:
import faiss

# Create a FAISS index
dimension = symptom_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(symptom_embeddings)

# Function to recommend specialists based on input symptoms
def recommend_specialist(symptoms):
    input_embedding = model.encode([symptoms])
    _, indices = index.search(input_embedding, k=5)  # Retrieve top 5 matches
    specialists = df.iloc[indices[0]]['Specialist Doctor'].unique()
    return specialists.tolist()

In [17]:
import gradio as gr

# Define the Gradio interface
inputs = gr.Textbox(lines=2, placeholder="Enter symptoms here...")
outputs = gr.Textbox()

gr.Interface(fn=recommend_specialist,
             inputs=inputs,
             outputs=outputs,
             title="Specialist Recommendation System using RAG",
             description="Enter your symptoms to get a specialist recommendation based on the Mayo Clinic dataset using the RAG technique."
            ).launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://f5550f654cb198412d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


