In [1]:
import os
# Set the environment variable to avoid OpenMP runtime errors
os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'

import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model
import json
%matplotlib inline

In [3]:
# Load the dictionary from the json file
data = json.load(open("model_detail.json", "r") )

# Load the model
loaded_model = load_model(data["model_path"])

# Load the categories
diseases_classes = data["diseases_classes"]
symptoms_classes = data["symptoms"]

# Example usage
print("diseases_classes:", diseases_classes)
print("\nsymptoms:", symptoms_classes)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


diseases_classes: ['(vertigo) Paroymsal  Positional Vertigo', 'AIDS', 'Acne', 'Alcoholic hepatitis', 'Allergy', 'Arthritis', 'Bronchial Asthma', 'Cervical spondylosis', 'Chicken pox', 'Chronic cholestasis', 'Common Cold', 'Dengue', 'Diabetes', 'Dimorphic hemmorhoids(piles)', 'Drug Reaction', 'Fungal infection', 'GERD', 'Gastroenteritis', 'Heart attack', 'Hepatitis B', 'Hepatitis C', 'Hepatitis D', 'Hepatitis E', 'Hypertension', 'Hyperthyroidism', 'Hypoglycemia', 'Hypothyroidism', 'Impetigo', 'Jaundice', 'Malaria', 'Migraine', 'Osteoarthristis', 'Paralysis (brain hemorrhage)', 'Peptic ulcer diseae', 'Pneumonia', 'Psoriasis', 'Tuberculosis', 'Typhoid', 'Urinary tract infection', 'Varicose veins', 'hepatitis A']

symptoms: ['mild_fever', 'swelling_of_stomach', 'chills', 'patches_in_throat', 'stomach_pain', 'abdominal_pain', 'receiving_blood_transfusion', 'blister', 'swelled_lymph_nodes', 'history_of_alcohol_consumption', 'burning_micturition', 'skin_rash', 'nodal_skin_eruptions', 'congest

In [4]:
symptoms_classes = [s.replace("_", " ") for s in symptoms_classes if isinstance(s, str)]
symptoms_classes

['mild fever',
 'swelling of stomach',
 'chills',
 'patches in throat',
 'stomach pain',
 'abdominal pain',
 'receiving blood transfusion',
 'blister',
 'swelled lymph nodes',
 'history of alcohol consumption',
 'burning micturition',
 'skin rash',
 'nodal skin eruptions',
 'congestion',
 'fatigue',
 'red spots over body',
 'depression',
 'mood swings',
 'dischromic  patches',
 'redness of eyes',
 'puffy face and eyes',
 'vomiting',
 'back pain',
 'bladder discomfort',
 'distention of abdomen',
 'swelling joints',
 'toxic look (typhos)',
 'dehydration',
 'dark urine',
 'acidity',
 'irritability',
 'phlegm',
 'obesity',
 'cough',
 'joint pain',
 'altered sensorium',
 'stomach bleeding',
 'hip joint pain',
 'red sore around nose',
 'knee pain',
 'headache',
 'weakness in limbs',
 'sweating',
 'throat irritation',
 'foul smell of urine',
 'swollen legs',
 'pain during bowel movements',
 'movement stiffness',
 'pain in anal region',
 'yellow crust ooze',
 'internal itching',
 'unsteadiness

### prediction

In [None]:
def get_prediction_with_confidence(model, x_input):
    # Predict probabilities
    predictions = model.predict(x_input.reshape(1,-1))
    
    # Get the predicted class (index of the highest probability)
    predicted_class = np.array(diseases_classes) [np.argmax(predictions, axis=1)]
    
    # Get the confidence score (highest probability)
    confidence_score = np.max(predictions, axis=1)
    
    return predicted_class[0], confidence_score[0]

"I have been experiencing chills , fatigue, my eyes are red and pain in chest and muscle with high fever and cough and running nose. i am feeling irritation in throat and headache"

### using nltk

In [None]:
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize

# Ensure you have the necessary nltk resources
nltk.download('punkt')



# Function to find symptoms in a sentence using NLTK
def find_symptoms_nltk(sentence, symptoms):
    # Step 1: Tokenize the sentence into words
    tokens = word_tokenize(sentence)
    print(f"Tokenized sentence: {tokens}")

    # Step 2: Normalize the symptoms list for matching
    normalized_symptoms = [symptom.lower() for symptom in symptoms]
    print(f"Normalized symptoms list: {normalized_symptoms}")

    # Step 3: Create possible phrases from tokens (including multi-word tokens)
    possible_phrases = []
    for i in range(len(tokens)):
        for j in range(i+1, len(tokens)+1):
            possible_phrases.append(" ".join(tokens[i:j]))
    print(f"Possible phrases: {possible_phrases}")

    # Step 4: Check for matches
    matched_symptoms = []
    for phrase in possible_phrases:
        clean_phrase = " ".join(phrase.split())
        if clean_phrase.lower() in normalized_symptoms:
            matched_symptoms.append(clean_phrase)

    # Step 5: Remove duplicates
    matched_symptoms = list(set(matched_symptoms))
    print(f"Unique matched symptoms: {matched_symptoms}")

    return matched_symptoms



### General

In [None]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import string

nltk.download('stopwords')


def find_symptoms(sentence, symptoms):
    # Step 1: Tokenize the sentence into individual words
    words = word_tokenize(sentence)
    print(f"Tokenized words: {words}")  # Debug: Print tokenized words
    
    # Step 2: Convert all words to lowercase to ensure case-insensitive matching
    words = [word.lower() for word in words]
    print(f"Lowercased words: {words}")  # Debug: Print lowercased words
    
    # Step 3: Remove punctuation by keeping only alphanumeric tokens
    words = [word for word in words if word.isalnum()]
    print(f"Alphanumeric words: {words}")  # Debug: Print words without punctuation
    
    # Step 4: Remove stopwords to focus on relevant words
    stop_words = set(stopwords.words('english'))
    words = [word for word in words if word not in stop_words]
    print(f"Words after removing stopwords: {words}")  # Debug: Print words after removing stopwords
    
    # Step 5: Create n-grams from the words list for symptom matching
    n = max(len(symptom.split()) for symptom in symptoms)
    ngrams = [' '.join(words[i:i+n]) for n in range(1, n+1) for i in range(len(words)-n+1)]
    print(f"N-grams: {ngrams}")  # Debug: Print generated n-grams
    
    # Step 6: Find and collect n-grams that match the symptoms list
    matched_symptoms = [ngram for ngram in ngrams if ngram in symptoms]
    print(f"Matched symptoms: {matched_symptoms}")  # Debug: Print matched symptoms
    
    return matched_symptoms


### SPACY

In [1]:
import spacy
from spacy.matcher import PhraseMatcher

# Load the spaCy model
nlp = spacy.load('en_core_web_sm')



# Function to find symptoms in a sentence
def find_symptoms(sentence, symptoms):
    # Step 1: Create a PhraseMatcher object
    matcher = PhraseMatcher(nlp.vocab)
    
    # Step 2: Convert symptoms into spaCy doc objects
    patterns = [nlp(text) for text in symptoms]
    matcher.add("SYMPTOMS", patterns)
    
    # Step 3: Process the sentence with spaCy
    doc = nlp(sentence)
    
    # Step 4: Find matches in the processed sentence
    matches = matcher(doc)
    
    # Step 5: Extract matched symptoms
    matched_symptoms = [doc[start:end].text for match_id, start, end in matches]
    
    return matched_symptoms


In [None]:

# Example usage
sentence = "I have been experiencing chills , fatigue, my eyes are red and pain in chest and muscle with high fever and cough and running nose. i am feeling irritation in throat and headache"
matched_symptoms = find_symptoms(sentence, symptoms_classes)

print(f"Matched symptoms: {matched_symptoms}")


In [None]:
# pip install --upgrade setuptools wheel
# pip cache purge
# pip install spacy --no-build-isolation 

In [None]:

# Function to create a binary array for matched symptoms
def symptoms_to_binary(matched_symptoms, all_symptoms):
    lis= [1 if symptom in matched_symptoms else 0 for symptom in all_symptoms]
    return np.array(lis)

In [None]:
inp=symptoms_to_binary(matched_symptoms,symptoms_classes)

In [None]:
# Example usage
predicted_disease, confidence_score = get_prediction_with_confidence(loaded_model, inp)

print(f"Predicted class: {predicted_disease}, Confidence score: {confidence_score:.4f}")

In [None]:
df_des = pd.read_csv("symptom_Description.csv")
df_des.head()

In [None]:
description = df_des[df_des['Disease'] == predicted_disease]["Description"].item()
description

In [None]:
df_recom = pd.read_csv("symptom_precaution.csv")
df_recom.head()

In [None]:
recommendations = df_recom[df_recom['Disease'] == predicted_disease]
recommendations = [recommendations[col].item() for col in recommendations.drop(columns='Disease').columns if  not pd.isna(recommendations[col].item())]
recommendations

In [5]:
data = json.load(open("test/test.json", "r") )
# Load the categories
testx = data["testx"]
testy = data["testy"]

In [6]:
diseases_classes.index("Typhoid")

37

In [7]:
indices = [index for index, value in enumerate(testy) if value == 37]
indices

[38, 129, 136, 171, 189, 196, 203, 209, 214, 380, 407, 443]

In [8]:
[symptoms_classes[i] for i , sym in enumerate(testx[38])if sym==1]

['chills',
 'abdominal pain',
 'fatigue',
 'vomiting',
 'toxic look (typhos)',
 'headache',
 'diarrhoea',
 'belly pain',
 'nausea',
 'high fever',
 'constipation']

In [None]:
matched_symptoms

In [4]:
import spacy
from spacy.matcher import PhraseMatcher

# Load the spaCy model
nlp = spacy.load('en_core_web_sm')


In [7]:
# Function to find symptoms in a sentence
def find_symptoms(sentence, symptoms=symptoms_classes):
    # Step 1: Create a PhraseMatcher object
    matcher = PhraseMatcher(nlp.vocab)
    
    # Step 2: Convert symptoms into spaCy doc objects
    patterns = [nlp(text) for text in symptoms]
    matcher.add("SYMPTOMS", patterns)
    
    # Step 3: Process the sentence with spaCy
    doc = nlp(sentence)
    
    # Step 4: Find matches in the processed sentence
    matches = matcher(doc)
    
    # Step 5: Extract matched symptoms
    matched_symptoms = [doc[start:end].text for match_id, start, end in matches]
    
    return matched_symptoms

matched_symptoms=find_symptoms("I have been experiencing chills , fatigue, my eyes are red and pain in chest and muscle with high fever and cough and running nose. i am feeling irritation in throat and headache")
matched_symptoms

['chills', 'fatigue', 'high fever', 'cough', 'headache']

In [11]:

# Function to create a binary array for matched symptoms
def symptoms_to_binary(matched_symptoms, all_symptoms):
    lis= [1 if symptom in matched_symptoms else 0 for symptom in all_symptoms]
    return np.array(lis)


def get_prediction_with_confidence(model, x_input, diseases_classes=diseases_classes):
    # Predict probabilities
    predictions = model.predict(x_input.reshape(1,-1))
    
    # Get the predicted class (index of the highest probability)
    predicted_class = np.array(diseases_classes) [np.argmax(predictions, axis=1)]
    
    # Get the confidence score (highest probability)
    confidence_score = np.max(predictions, axis=1)
    
    return predicted_class[0], confidence_score

inp=symptoms_to_binary(matched_symptoms,symptoms_classes)
predicted_disease, confidence_score = get_prediction_with_confidence(loaded_model, inp)
predicted_disease



'Typhoid'

In [12]:
def give_description(predicted_disease):
    df_des = pd.read_csv("symptom_Description.csv")
    description = df_des[df_des['Disease'] == predicted_disease]["Description"].item()
    return description


def give_recommendation(predicted_disease):
    df_recom = pd.read_csv("symptom_precaution.csv")
    recommendations = df_recom[df_recom['Disease'] == predicted_disease]
    recommendations = [recommendations[col].item() for col in recommendations.drop(columns='Disease').columns if  not pd.isna(recommendations[col].item())]
    return recommendations

In [13]:
description= give_description(predicted_disease)
recommendations= give_recommendation(predicted_disease)
recommendations

['eat high calorie vegitables',
 'antiboitic therapy',
 'consult doctor',
 'medication']

In [1]:
import os
# Set the environment variable to avoid OpenMP runtime errors
os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'


import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model
import json


import spacy
from spacy.matcher import PhraseMatcher
# Load the spaCy model
nlp = spacy.load('en_core_web_sm')



# Load the dictionary from the json file
data = json.load(open("model_detail.json", "r") )
# Load the model
loaded_model = load_model(data["model_path"])
# Load the categories
diseases_classes = data["diseases_classes"]
symptoms_classes = data["symptoms"]
symptoms_classes = [s.replace("_", " ") for s in symptoms_classes if isinstance(s, str)]


df_des = pd.read_csv("symptom_Description.csv")
df_recom = pd.read_csv("symptom_precaution.csv")




# Function to find symptoms in a sentence
def find_symptoms(sentence, symptoms=symptoms_classes):
    # Step 1: Create a PhraseMatcher object
    matcher = PhraseMatcher(nlp.vocab)
    
    # Step 2: Convert symptoms into spaCy doc objects
    patterns = [nlp(text) for text in symptoms]
    matcher.add("SYMPTOMS", patterns)
    
    # Step 3: Process the sentence with spaCy
    doc = nlp(sentence)
    
    # Step 4: Find matches in the processed sentence
    matches = matcher(doc)
    
    # Step 5: Extract matched symptoms
    matched_symptoms = [doc[start:end].text for match_id, start, end in matches]
    
    return matched_symptoms



# Function to create a binary array for matched symptoms
def symptoms_to_binary(matched_symptoms, all_symptoms):
    lis= [1 if symptom in matched_symptoms else 0 for symptom in all_symptoms]
    return np.array(lis)


def get_prediction_with_confidence(model, x_input, diseases_classes=diseases_classes):
    # Predict probabilities
    predictions = model.predict(x_input.reshape(1,-1))
    
    # Get the predicted class (index of the highest probability)
    predicted_class = np.array(diseases_classes) [np.argmax(predictions, axis=1)]
    
    # Get the confidence score (highest probability)
    confidence_score = np.max(predictions, axis=1)
    
    return predicted_class[0], confidence_score[0]



def give_description(predicted_disease):
    description = df_des[df_des['Disease'] == predicted_disease]["Description"].item()
    return description


def give_recommendation(predicted_disease):
    recommendations = df_recom[df_recom['Disease'] == predicted_disease]
    recommendations = [recommendations[col].item() for col in recommendations.drop(columns='Disease').columns if  not pd.isna(recommendations[col].item())]
    return recommendations




def give_predicted_result(sentence):
    
    matched_symptoms=find_symptoms(sentence, symptoms=symptoms_classes)
    inp=symptoms_to_binary(matched_symptoms,symptoms_classes)
    predicted_disease, confidence_score = get_prediction_with_confidence(loaded_model, inp)

    description= give_description(predicted_disease)
    recommendations= give_recommendation(predicted_disease)

    return {"predicted_disease":predicted_disease,
             "confidence_score":confidence_score,
             "description":description,
             "recommendations":recommendations}

In [5]:
r=give_predicted_result("I have been experiencing chills , fatigue, my eyes are red and pain in chest and muscle with high fever and cough and running nose. i am feeling irritation in throat and headache")
print(r["recommendations"])

['eat high calorie vegitables', 'antiboitic therapy', 'consult doctor', 'medication']
