In [2]:
# Import Packages
import pandas as pd
import csv
import seaborn as sns
import numpy as np
import networkx as nx
import graphviz
from IPython.display import display
from collections import defaultdict
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, MultiLabelBinarizer, StandardScaler
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree 
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from nltk.tokenize import word_tokenize

In [3]:
from joblib import dump, load
dt = load('manual_DT.joblib') ## larger data model

### Testing a sample

In [4]:
sample_x = [1 if i ==0 or i==1 or i==2 else i*0 for i in range(132)]

In [5]:
sample_x = np.array(sample_x).reshape(1,len(sample_x))

In [6]:
dt.predict(sample_x)

array(['Fungal infection'], dtype=object)

### Process input sentence for symptom detection

In [7]:
from sentence_transformers import SentenceTransformer

# Load the BERT model. Various models trained on Natural Language Inference (NLI) https://github.com/UKPLab/sentence-transformers/blob/master/docs/pretrained-models/nli-models.md and 
# Semantic Textual Similarity are available https://github.com/UKPLab/sentence-transformers/blob/master/docs/pretrained-models/sts-models.md

model = SentenceTransformer('bert-base-nli-mean-tokens')

In [8]:
with open('stopwords.txt','r') as f:
    stopwords = f.read()
stopwords = stopwords.split('\n')[:-1]

In [9]:
df = pd.read_csv('Testing.csv')
symptoms = list(df.columns[:-1])
symptoms = [el.replace("_"," ") for el in symptoms]
symptoms_dict = {}

i=0

for s in symptoms:
    symptoms_dict[s] = i
    i+=1

In [10]:
import scipy

sentence_embeddings = model.encode(symptoms)



In [11]:
def binary_semantic_symptoms_search(query):
    
    queries = [query]
    query_embeddings = model.encode(queries)

    number_top_matches = 5

    for query, query_embedding in zip(queries, query_embeddings):
        distances = scipy.spatial.distance.cdist([query_embedding], sentence_embeddings, "cosine")[0]

        results = zip(range(len(distances)), distances)
        results = sorted(results, key=lambda x: x[1])
        
        result_dict = {}
        for idx, distance in results[0:number_top_matches]:
            result_dict[symptoms[idx].strip()] = (1-distance)
            
    return result_dict

In [12]:
def symptom_detector_by_full_tokenizing(user):
    text = user.lower()
    word_tokens = word_tokenize(text)  
    text = ' '.join([w for w in word_tokens if not w in stopwords])
    top_symptoms_dict = binary_semantic_symptoms_search(text)
    return top_symptoms_dict

In [17]:
def one_prediction():   
    final_symptom_list = []
    print("Bot: Hello there! I am your health bot. Please tell me what symptoms are you noticing?")
    while True:
        user = input("User:")
        if user == "Process it":
            "stop talking!"
            break
        else:
            sym_dict = symptom_detector_by_full_tokenizing(user)
            final_symptom_list.append(list(sym_dict.keys())[0])
            print("Bot: I would classify it as " + list(sym_dict.keys())[0]+ ". Do you also feel some other related symptoms like " + ', '.join(list(sym_dict.keys())[1:]) + '. If there is no other symptom please type "Process it" to find out your possible disease.')

    sym_x = [0]*len(symptoms)

    for el in final_symptom_list:
        sym_x[symptoms_dict[el]] = 1

    sym_x = np.array(sym_x).reshape(1,len(sym_x))
    possible_disease = dt.predict(sym_x)
    print("Bot: You possibly have " + possible_disease[0] )

In [20]:
one_prediction()

Bot: Hello there! I am your health bot. Please tell me what symptoms are you noticing?


User: I feel fatigue




Bot: I would classify it as fatigue. Do you also feel some other related symptoms like depression, headache, anxiety, muscle weakness. If there is no other symptom please type "Process it" to find out your possible disease.


User: I feel breatheless




Bot: I would classify it as lack of concentration. Do you also feel some other related symptoms like unsteadiness, sunken eyes, stiff neck, loss of smell. If there is no other symptom please type "Process it" to find out your possible disease.


User: I have breathlessness




Bot: I would classify it as breathlessness. Do you also feel some other related symptoms like sweating, cough, palpitations, fast heart rate. If there is no other symptom please type "Process it" to find out your possible disease.


User: I have high fever




Bot: I would classify it as high fever. Do you also feel some other related symptoms like sweating, diarrhoea, excessive hunger, chest pain. If there is no other symptom please type "Process it" to find out your possible disease.


User: I have cough




Bot: I would classify it as cough. Do you also feel some other related symptoms like vomiting, indigestion, constipation, dizziness. If there is no other symptom please type "Process it" to find out your possible disease.


User: Process it


Bot: You possibly have Hypertension 
