# Installing libraries

In [None]:
!pip install scispacy



In [None]:
pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.3.0/en_core_sci_sm-0.3.0.tar.gz


Collecting https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.3.0/en_core_sci_sm-0.3.0.tar.gz
  Using cached https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.3.0/en_core_sci_sm-0.3.0.tar.gz (33.1 MB)
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [None]:
import spacy
spacy.util.get_installed_models()


['en_core_web_sm']

In [None]:
!pip install fuzzywuzzy



# Chatbot Code

In [None]:
#Importing Libraries

import pandas as pd
import re
import scispacy
import spacy
import csv
from nltk.corpus import wordnet
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from fuzzywuzzy import process


In [None]:
#Downloading Dataset From kagglehub

import kagglehub

#Download latest version
path = kagglehub.dataset_download("niyarrbarman/symptom2disease")

print("Path to Dataset files:", path)

disease_data = pd.read_csv("/disease_Precautions.csv", on_bad_lines='skip')

Path to Dataset files: /root/.cache/kagglehub/datasets/niyarrbarman/symptom2disease/versions/1


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
def load_spacy_model():
    models = ["en_core_sci_sm", "en_core_sci_md", "en_core_web_sm"]
    for model in models:
        try:
            return spacy.load(model)
        except OSError:
            print(f"{model} not found. Trying next model...")
            subprocess.run(["python", "-m", "spacy", "download", model])
    raise OSError("No valid spaCy model found. Please install a medical NLP model manually.")

nlp = load_spacy_model()


In [None]:
# Load datasets
dataset_path = "/root/.cache/kagglehub/datasets/niyarrbarman/symptom2disease/versions/1"
csv_file = dataset_path + "/Symptom2Disease.csv"

df = pd.read_csv(csv_file)

# Preprocessing function
def preprocess(text):
    text = text.lower().strip()
    text = re.sub(r'\s+', ' ', text)  # Remove extra spaces
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    return text

# Ensure correct column names
if 'text' in df.columns:
    df['processed_text'] = df['text'].apply(preprocess)
else:
    raise ValueError("Error: Column 'text' not found in dataset.")

# Initialize TF-IDF vectorizer
vectorizer = TfidfVectorizer()
corpus_vec = vectorizer.fit_transform(df['processed_text'])  # Precompute once


In [None]:
# Function to extract synonyms
def get_synonyms(word):
    synonyms = set()
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            synonyms.add(lemma.name().lower())
    return list(synonyms)

# Function to extract named entities
def extract_entities(user_input):
    doc = nlp(user_input)
    entities = {"symptoms": [], "other_info": []}

    symptom_keywords = ["fever", "cough", "cold", "headache", "nausea", "pain", "rash", "fatigue",
                        "vomiting", "chills", "sore throat", "diarrhea", "dizziness", "shortness of breath"]

    for ent in doc.ents:
        if ent.label_ in ["SYMPTOM", "MEDICAL_CONDITION"]:
            entities["symptoms"].append(ent.text.lower())
        else:
            entities["other_info"].append(ent.text)

    # Use fuzzy matching to detect symptoms
    for word in user_input.split():
        match, score = process.extractOne(word, symptom_keywords)
        if score > 80:  # If similarity is above 80%
            entities["symptoms"].append(match)

    return entities
# Function to calculate similarity
def get_similarity(user_input):
    processed_input = preprocess(user_input)
    user_input_vec = vectorizer.transform([processed_input])
    return cosine_similarity(user_input_vec, corpus_vec)[0]

# Function to suggest diagnosis
def suggest_diagnosis(user_input, threshold=0.2):
    similarities = get_similarity(user_input)
    sorted_diseases = sorted(zip(df['label'], similarities), key=lambda x: x[1], reverse=True)
    unique_diseases = {}

    for disease, score in sorted_diseases:
        if disease not in unique_diseases and score > threshold:
            unique_diseases[disease] = round(score * 100)  # No decimals

    return list(unique_diseases.items()) if unique_diseases else "No specific diagnosis found. Please provide more details."

# Function to collect symptoms
def collect_symptoms():
    user_input = input("You: ")
    greetings = ["hello", "hi", "hey", "good morning", "good evening"]

    if any(greet in user_input.lower() for greet in greetings):
        print("Bot: Hello! I am DocBot, your virtual health assistant. I can help you identify possible conditions based on your symptoms and provide general precautions. How can I assist you today?")
        return collect_symptoms()

    entities = extract_entities(user_input)
    return " ".join(entities["symptoms"])

# Function to ask for more details
def ask_for_more_info():
    additional_info = {}
    additional_info['temperature'] = input("What is your temperature? (in Celsius): ")
    additional_info['appetite'] = input("How is your appetite? (good/poor): ")
    additional_info['age'] = input("What is your age? ")
    additional_info['gender'] = input("What is your gender? (male/female/other): ")
    additional_info['duration'] = input("How long have you had these symptoms? (days/weeks): ")
    return additional_info

# Function to fetch precautions
general_advice = ("Most mild infections, such as viral fevers and colds, improve with rest, hydration and a balanced diet. If symptoms worsen or persist beyond 5 days, consult a doctor.")

def get_precautions(diseases):
    precautions = []
    disease_data['Condition_normalized'] = disease_data['Condition'].str.lower().str.strip()
    general_advice_given = False

    for disease in diseases:
        disease_info = disease_data[disease_data['Condition_normalized'] == disease.lower().strip()]
        if not disease_info.empty:
            precautions.append(f"For {disease}: {disease_info['Precautions'].values[0]}")
        elif not general_advice_given:
            precautions.append(f"General advice: {general_advice}")
            general_advice_given = True  # Ensure it's only shown once

    return precautions

# Function to detect if user asks for precautions in a greeting
def detect_precaution_request(user_input):
    precaution_keywords = ["precaution", "safety", "prevent", "avoid", "protection"]
    for word in precaution_keywords:
        if word in user_input.lower():
            return True
    return False

# Function to extract disease name from input
def extract_disease(user_input):
    words = user_input.lower().split()
    known_diseases = disease_data['Condition_normalized'].tolist()
    for word in words:
        if word in known_diseases:
            return word
    return None


In [None]:
# Main chatbot loop
print("\nHello! I am DocBot, your virtual health assistant. I can provide health precautions or help identify conditions based on your symptoms.")
while True:
    print("\nHow can I assist you today?")
    user_input = input().strip().lower()

    if detect_precaution_request(user_input):
        disease_name = extract_disease(user_input)
        if disease_name:
            print("\n" + get_precautions(disease_name))
        else:
            disease_name = input("Enter the name of the disease you want precautions for: ").strip()
            print("\n" + get_precautions(disease_name)[0])

    elif "symptom" in user_input:
        user_symptoms = input("Describe your symptoms (e.g., fever, headache, cough): ")
        diagnosis_response = suggest_diagnosis(user_symptoms)

        if isinstance(diagnosis_response, str):
            print(f"Bot: {diagnosis_response}")
        else:
            print("Bot: Possible conditions based on your symptoms:")
            for disease, confidence in diagnosis_response:
                print(f"- {disease} ({confidence}% confidence)")

    else:
        print("Invalid input. You can ask for precautions or symptom diagnosis.")

    exit_input = input("Do you need anything else? (yes/no): ").lower()
    if exit_input != 'yes':
        print("Bot: Take care! If symptoms persist, consider visiting a healthcare professional. Goodbye!")
        break


In [None]:
!curl ipecho.net/plain

35.185.162.59

In [None]:
from flask import Flask, request, jsonify, render_template
from flask_cors import CORS
from pyngrok import ngrok
import pandas as pd
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import kagglehub

# Initialize Flask app
app = Flask(__name__)
CORS(app)

# Download dataset from KaggleHub
dataset_path = kagglehub.dataset_download("niyarrbarman/symptom2disease")
print("Dataset downloaded at:", dataset_path)

# Load datasets
df = pd.read_csv(f"{dataset_path}/Symptom2Disease.csv")
disease_data = pd.read_csv("/disease_Precautions.csv")

ngrok.set_auth_token("")  # Replace with your actual token

# Preprocessing function
def preprocess(text):
    text = text.lower().strip()
    text = re.sub(r'\s+', ' ', text)  # Remove extra spaces
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    return text

df['processed_text'] = df['text'].apply(preprocess)

# Initialize TF-IDF vectorizer
vectorizer = TfidfVectorizer()
corpus_vec = vectorizer.fit_transform(df['processed_text'])

def get_similarity(user_input):
    processed_input = preprocess(user_input)
    user_input_vec = vectorizer.transform([processed_input])
    return cosine_similarity(user_input_vec, corpus_vec)[0]

def suggest_diagnosis(user_input, threshold=0.2):
    similarities = get_similarity(user_input)
    sorted_diseases = sorted(zip(df['label'], similarities), key=lambda x: x[1], reverse=True)
    unique_diseases = {disease: round(score * 100) for disease, score in sorted_diseases if score > threshold}
    return list(unique_diseases.items()) if unique_diseases else "No specific diagnosis found. Please provide more details."

def get_precautions(diseases):
    disease_data['Condition_normalized'] = disease_data['Condition'].str.lower().str.strip()
    precautions = []
    for disease in diseases:
        disease_info = disease_data[disease_data['Condition_normalized'] == disease.lower().strip()]
        if not disease_info.empty:
            precautions.append(f"For {disease}: {disease_info['Precautions'].values[0]}")
    return precautions if precautions else ["General advice: Stay hydrated, rest well, and consult a doctor if symptoms persist."]

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/disease', methods=['POST'])
def diagnose():
    data = request.json
    symptoms = data.get('symptoms', '')
    response = suggest_diagnosis(symptoms)
    return jsonify(response)

@app.route('/precautions', methods=['POST'])
def precautions():
    data = request.json
    disease = data.get('disease', '')
    response = get_precautions([disease])
    return jsonify(response)

# Start ngrok tunnel
public_url = ngrok.connect(5000).public_url
print(f" * ngrok tunnel: {public_url}")

if __name__ == '__main__':
    app.run(debug=True)


Dataset downloaded at: /root/.cache/kagglehub/datasets/niyarrbarman/symptom2disease/versions/1
 * ngrok tunnel: https://fe87-35-185-162-59.ngrok-free.app
 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug: * Restarting with stat
