In [1]:
# Import Packages
import pandas as pd
import csv
import seaborn as sns
import numpy as np
import networkx as nx
import graphviz
from IPython.display import display
from collections import defaultdict
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, MultiLabelBinarizer, StandardScaler
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree 
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from nltk.tokenize import word_tokenize

In [105]:
from joblib import dump, load
dt = load('manual_DT.joblib') ## larger data model

### Testing a sample

In [122]:
sample_x = [1 if i ==0 or i==1 or i==2 else i*0 for i in range(132)]

In [123]:
sample_x = np.array(sample_x).reshape(1,len(sample_x))

In [124]:
dt.predict(sample_x)

array(['Fungal infection'], dtype=object)

### Process input sentence for symptom detection

In [9]:
from sentence_transformers import SentenceTransformer

# Load the BERT model. Various models trained on Natural Language Inference (NLI) https://github.com/UKPLab/sentence-transformers/blob/master/docs/pretrained-models/nli-models.md and 
# Semantic Textual Similarity are available https://github.com/UKPLab/sentence-transformers/blob/master/docs/pretrained-models/sts-models.md

model = SentenceTransformer('bert-base-nli-mean-tokens')

In [125]:
with open('stopwords.txt','r') as f:
    stopwords = f.read()
stopwords = stopwords.split('\n')[:-1]

In [142]:
df = pd.read_csv('Testing.csv')
symptoms = list(df.columns[:-1])
symptoms = [el.replace("_"," ") for el in symptoms]
symptoms_dict = {}

i=0

for s in symptoms:
    symptoms_dict[s] = i
    i+=1

In [143]:
import scipy

sentence_embeddings = model.encode(symptoms)



In [144]:
def binary_semantic_symptoms_search(query):
    
    queries = [query]
    query_embeddings = model.encode(queries)

    number_top_matches = 5

    for query, query_embedding in zip(queries, query_embeddings):
        distances = scipy.spatial.distance.cdist([query_embedding], sentence_embeddings, "cosine")[0]

        results = zip(range(len(distances)), distances)
        results = sorted(results, key=lambda x: x[1])
        
        result_dict = {}
        for idx, distance in results[0:number_top_matches]:
            result_dict[symptoms[idx].strip()] = (1-distance)
            
    return result_dict

In [145]:
def symptom_detector_by_full_tokenizing(user):
    text = user.lower()
    word_tokens = word_tokenize(text)  
    text = ' '.join([w for w in word_tokens if not w in stopwords])
    top_symptoms_dict = binary_semantic_symptoms_search(text)
    return top_symptoms_dict

In [157]:
def one_prediction():   
    final_symptom_list = []

    while True:
        user = input("User:")
        if user == "quit":
            "stop talking!"
            break
        else:
            sym_dict = symptom_detector_by_full_tokenizing(user)
            print(sym_dict)
            final_symptom_list.append(list(sym_dict.keys())[0])

    sym_x = [0]*len(symptoms)

    for el in final_symptom_list:
        sym_x[symptoms_dict[el]] = 1

    sym_x = np.array(sym_x).reshape(1,len(sym_x))
    possible_disease = dt.predict(sym_x)
    print("Bot: You might possibly have " + possible_disease[0] )

User: I have rahses on my skin




{'skin peeling': 0.7825336939136237, 'dischromic  patches': 0.7735862121040926, 'palpitations': 0.7480618497115591, 'polyuria': 0.7192131488064998, 'blister': 0.7113781941355212}


User: I have itching inside my stomach




{'itching': 0.875489223926689, 'stomach pain': 0.872770920245198, 'cramps': 0.8543408312017782, 'internal itching': 0.8531860947230887, 'belly pain': 0.8408240695525722}


User: I have stomach bloating




{'stomach pain': 0.9070720575772043, 'stomach bleeding': 0.894737361948623, 'belly pain': 0.8595098952669781, 'nausea': 0.8410577493790309, 'loss of appetite': 0.8334809503065255}


User: I am quite restless




{'restlessness': 0.978481525184779, 'anxiety': 0.8835983463242364, 'irritability': 0.858096998681838, 'fatigue': 0.8320044963492982, 'itching': 0.8293418967238948}


User: I have high heart rate




{'fast heart rate': 0.871004436865801, 'high fever': 0.738894567576441, 'increased appetite': 0.6889495320620109, 'sweating': 0.6863724587359993, 'breathlessness': 0.6560663872972644}


User: quit


Bot: You might possibly have Drug Reaction
