In [2]:
!pip install streamlit

Collecting streamlit
  Using cached streamlit-1.40.2-py2.py3-none-any.whl.metadata (8.4 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.40.2-py2.py3-none-any.whl (8.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.6/8.6 MB[0m [31m51.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m59.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m


In [4]:
!pip install scikit-learn



In [5]:
!pip install transformers



In [8]:
!pip install datasets

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (1

In [14]:
%%writefile app.py
import streamlit as st
from datasets import load_dataset
from transformers import pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Load MedQuAD dataset from Hugging Face
@st.cache_data
def load_medquad_data():
    dataset = load_dataset("keivalya/MedQuad-MedicalQnADataset", split="train")
    questions = dataset["Question"]
    answers = dataset["Answer"]
    return questions, answers

questions, answers = load_medquad_data()

# TF-IDF Vectorization for questions
@st.cache_resource
def initialize_vectorizer():
    vectorizer = TfidfVectorizer().fit(questions)
    question_vectors = vectorizer.transform(questions)
    return vectorizer, question_vectors

vectorizer, question_vectors = initialize_vectorizer()

# Retrieval function
def retrieve_answer(user_question):
    user_vector = vectorizer.transform([user_question])
    similarities = cosine_similarity(user_vector, question_vectors).flatten()
    best_match_idx = np.argmax(similarities)
    return answers[best_match_idx], questions[best_match_idx]

# Load Hugging Face's NER pipeline with a model trained for medical entity recognition
@st.cache_resource
def load_ner_pipeline():
    # Use a medical NER model from Hugging Face (like `dmis-lab/biobert-v1.1` or `cogstack/biomedical-ner`)
    ner_pipeline = pipeline("ner", model="dmis-lab/biobert-v1.1", tokenizer="dmis-lab/biobert-v1.1")
    return ner_pipeline

ner_pipeline = load_ner_pipeline()

# Medical Entity Recognition using Hugging Face
def recognize_entities(text):
    entities = ner_pipeline(text)
    medical_entities = [{"text": entity["word"], "label": entity["entity"]} for entity in entities]
    return medical_entities

# Streamlit Interface
st.title("Medical Q&A Chatbot")
st.sidebar.title("About")
st.sidebar.info(
    "This chatbot uses the MedQuAD dataset from Hugging Face to provide answers to medical questions. "
    "It also recognizes medical entities in the input text using a transformer-based NER model."
)

# User input
user_question = st.text_input("Ask a medical question:")

if user_question:
    # Retrieve the best answer
    answer, matched_question = retrieve_answer(user_question)

    # Display results
    st.subheader("Answer:")
    st.write(answer)

    st.subheader("Matched Question:")
    st.write(matched_question)

    # Recognize entities
    st.subheader("Recognized Medical Entities:")
    entities = recognize_entities(user_question)
    if entities:
        for entity in entities:
            st.write(f"{entity['text']} - {entity['label']}")
    else:
        st.write("No entities recognized.")


Writing app.py


In [11]:
!pip install pyngrok

Collecting pyngrok
  Using cached pyngrok-7.2.1-py3-none-any.whl.metadata (8.3 kB)
Downloading pyngrok-7.2.1-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.1


In [13]:
!ngrok config add-authtoken 2pQvixZCRRnA2pe5Ow9Xomj1GCl_7rgJxvHhn7jqt1kyaQYm6

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [16]:
from pyngrok import ngrok
import os
os.system('streamlit run app.py &')
public_url = ngrok.connect(8501)
print(f"Streamlit app is live at: {public_url}")

Streamlit app is live at: NgrokTunnel: "https://8dd2-35-192-107-5.ngrok-free.app" -> "http://localhost:8501"
