In [122]:
import pandas as pd
import numpy as np
import random
import re
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
import gensim.downloader as api
import gensim
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import speech_recognition
import pyttsx3

In [44]:
d1 = pd.read_csv("dataset.csv")

In [45]:
d1 = d1.fillna(0)

In [46]:
symptoms = []
disease = d1.Disease
starting = ["I am having ", "I'm facing ","I am going from "]

for j in range(0, 4920):
  sent = random.choice(starting)
  for i in range(1, 18):
    if d1[f"Symptom_{i}"][j] != 0:
      sent += d1[f"Symptom_{i}"][j]
      sent += ", "
  symptoms.append(sent)

In [47]:
dataset = pd.DataFrame({"symptoms":symptoms, "disease":disease})

In [49]:
nltk.download("punkt")
nltk.download("stopwords")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [50]:
stopword = stopwords.words("english")

In [51]:
def process_text(text):
  text = re.sub(r",","", text)
  stemmer = PorterStemmer()

  clean_text = []
  for word in word_tokenize(text):
    if word not in stopword:
      clean_text.append(stemmer.stem(word))
      # clean_text += " "
  return clean_text

In [52]:
process_text(dataset.symptoms[0])

['i', 'itch', 'skin_rash', 'nodal_skin_erupt', 'dischrom', '_patch']

In [53]:
Sentences = []
for i in range(0, 4920):
  process = process_text(dataset.symptoms[i])
  Sentences.append(process)

In [54]:
labels = [i for i in range(0, 41)]
map = {}
for d, i in zip(dataset.disease.unique(), labels):
  map[d] = i

In [55]:
dataset = pd.DataFrame({"sentences":Sentences, "label":dataset.disease})

In [56]:
dataset.label = dataset.label.map(map)

In [57]:
dataset.sentences[0]

['i', 'itch', 'skin_rash', 'nodal_skin_erupt', 'dischrom', '_patch']

In [58]:
vectorizer = gensim.models.Word2Vec(sentences=dataset.sentences)

In [59]:
vectorizer.wv.index_to_key

['i',
 'fatigu',
 'vomit',
 'go',
 "'m",
 'face',
 'high_fev',
 'loss_of_appetit',
 'nausea',
 'headach',
 'abdominal_pain',
 'yellowish_skin',
 'yellowing_of_ey',
 'chill',
 'skin_rash',
 'malais',
 'chest_pain',
 'joint_pain',
 'sweat',
 'itch',
 'dark_urin',
 'diarrhoea',
 'cough',
 'muscle_pain',
 'irrit',
 'excessive_hung',
 'lethargi',
 'weight_loss',
 'breathless',
 'mild_fev',
 'phlegm',
 'swelled_lymph_nod',
 'loss_of_bal',
 'blurred_and_distorted_vis',
 'dizzi',
 'abnormal_menstru',
 'depress',
 'fast_heart_r',
 'red_spots_over_bodi',
 'muscle_weak',
 'constip',
 'stiff_neck',
 'obes',
 'back_pain',
 'neck_pain',
 'mood_sw',
 'restless',
 'swelling_joint',
 'painful_walk',
 'family_histori',
 'indigest',
 'continuous_sneez',
 'acid',
 'stomach_pain',
 'burning_micturit',
 'urin',
 'sinus_pressur',
 'redness_of_ey',
 'slurred_speech',
 'polyuria',
 'swollen_extremeti',
 'brittle_nail',
 'increased_appetit',
 'enlarged_thyroid',
 'palpit',
 'rusty_sputum',
 'loss_of_smel',
 'co

In [60]:
vectorizer.wv["itch"].shape

(100,)

In [61]:
vectors = []
vec_size = vectorizer.vector_size
for i in range(0, 4920):
  sent = dataset.sentences[i]
  vec = np.zeros(vec_size, )
  count = 0
  for word in sent:
    if word in vectorizer.wv.index_to_key:
      vec += vectorizer.wv[word]
      count += 1
  final_vec = vec / count
  vectors.append(final_vec)

In [62]:
dataset = pd.DataFrame({"vectors":vectors, "labels":dataset.label})

In [63]:
x = dataset.vectors.to_list()
y = dataset.labels.to_list()

In [65]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2)

In [66]:
classifier = RandomForestClassifier(n_estimators=50)
classifier.fit(x_train, y_train)

In [67]:
y_pred = classifier.predict(x_test)

In [68]:
print(accuracy_score(y_test, y_pred))

1.0


In [69]:
def main(input):
  input = process_text(input)
  vec = np.zeros(vec_size, )
  count = 0
  for word in input:
    if word in vectorizer.wv.index_to_key:
      vec += vectorizer.wv[word]
      count += 1
  final_vec = vec / count
  return [final_vec]

In [70]:
print(map)

{'Fungal infection': 0, 'Allergy': 1, 'GERD': 2, 'Chronic cholestasis': 3, 'Drug Reaction': 4, 'Peptic ulcer diseae': 5, 'AIDS': 6, 'Diabetes ': 7, 'Gastroenteritis': 8, 'Bronchial Asthma': 9, 'Hypertension ': 10, 'Migraine': 11, 'Cervical spondylosis': 12, 'Paralysis (brain hemorrhage)': 13, 'Jaundice': 14, 'Malaria': 15, 'Chicken pox': 16, 'Dengue': 17, 'Typhoid': 18, 'hepatitis A': 19, 'Hepatitis B': 20, 'Hepatitis C': 21, 'Hepatitis D': 22, 'Hepatitis E': 23, 'Alcoholic hepatitis': 24, 'Tuberculosis': 25, 'Common Cold': 26, 'Pneumonia': 27, 'Dimorphic hemmorhoids(piles)': 28, 'Heart attack': 29, 'Varicose veins': 30, 'Hypothyroidism': 31, 'Hyperthyroidism': 32, 'Hypoglycemia': 33, 'Osteoarthristis': 34, 'Arthritis': 35, '(vertigo) Paroymsal  Positional Vertigo': 36, 'Acne': 37, 'Urinary tract infection': 38, 'Psoriasis': 39, 'Impetigo': 40}


In [108]:
d2 = pd.read_csv("symptom_Description.csv")
d3 = pd.read_csv("symptom_precaution.csv")
d4 = pd.read_csv("Symptom-severity.csv")
d3.fillna(0, inplace=True)

In [None]:
recognizer = speech_recognition.Recognizer()
while True:
  try:
    with speech_recognition.Microphone() as mic:
      recognizer.adjust_for_ambient_noise(mic, duration=0.2)
      audio = recognizer.listen(mic)
      a = recognizer.recognize_google(audio)
      a = text.lower()
      # print(f"Recognized : {text}")
  except speech_recognition.exceptions.UnknownValueError():
    recognizer = speech_recognition.Recognizer()
    continue

In [116]:
processing = main(a)
result = classifier.predict(processing)
for k,v in map.items():
  if v == result[0]:
    fr = k
    try:
      r_description = d2[d2.Disease == fr].Description.values[0]
    except IndexError:
      pass
    precautions = []
    for i in range(1, 5):
      p = d3[d3.Disease == fr][f"Precaution_{i}"].values[0]
      precautions.append(p)
    weight_sum = 0
    print(f"You might have {k}")
    print(f"Description of {k} is {r_description}")
    print("I'll suggest you to take precautions : ")
    for p in precautions:
      print(p)
    print("But I'll suggest you visiting doctor")

Enter your symptoms : am going from fatigue, weight_loss, restlessness, lethargy, irregular_sugar_level, blurred_and_distorted_vision, obesity, increased_appetite, polyuria,
You might have Diabetes 
I'll suggest you to take precautions : 
have balanced diet
exercise
consult doctor
follow up
But I'll suggest you visiting doctor
