# ------------------------- Simple Healthcare Chatbot --------------------------

## Importing the required libraries

In [1]:
import pandas as pd
import numpy as np

from sklearn import preprocessing
from sklearn.tree import DecisionTreeClassifier,_tree
from sklearn.model_selection import train_test_split
from sklearn import model_selection
from sklearn.tree import export_graphviz

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

## Importing the datasets

In [2]:
training = pd.read_csv('Chatbot_Training.csv')
testing  = pd.read_csv('Chatbot_Testing.csv')

## Analyzing the data and performing required operations.

In [7]:
training.head()

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
4,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection


In [8]:
testing.head()

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
1,0,0,0,1,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Allergy
2,0,0,0,0,0,0,0,1,1,1,...,0,0,0,0,0,0,0,0,0,GERD
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Chronic cholestasis
4,1,1,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,Drug Reaction


In [11]:
training.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4920 entries, 0 to 4919
Columns: 133 entries, itching to prognosis
dtypes: int64(132), object(1)
memory usage: 5.0+ MB


In [12]:
testing.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41 entries, 0 to 40
Columns: 133 entries, itching to prognosis
dtypes: int64(132), object(1)
memory usage: 42.7+ KB


In [13]:
training.shape

(4920, 133)

In [14]:
testing.shape

(41, 133)

In [15]:
cols = training.columns
cols = cols[:-1]

In [16]:
x = training[cols]
y = training['prognosis']

In [17]:
y1 = y

In [18]:
reduced_data = training.groupby(training['prognosis']).max()

### We need to map strings to numbers

In [19]:
le = preprocessing.LabelEncoder()
le.fit(y)
y = le.transform(y)

### Performing the train_test_split operation on the training dataset

In [20]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)

In [21]:
testx = testing[cols]
testy = testing['prognosis']  
testy = le.transform(testy)

## Model Building

In [22]:
clf1  = DecisionTreeClassifier()
clf = clf1.fit(x_train,y_train)

In [23]:
importances = clf.feature_importances_
indices = np.argsort(importances)[::-1]
features = cols

In [26]:
print("Please reply Yes or No for the following symptoms") 
def print_disease(node):
    node = node[0]
    val  = node.nonzero() 
    disease = le.inverse_transform(val[0])
    return disease
def tree_to_code(tree, feature_names):
    tree_ = tree.tree_
    feature_name = [
        feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
        for i in tree_.feature
    ]
    symptoms_present = []
    def recurse(node, depth):
        indent = "  " * depth
        if tree_.feature[node] != _tree.TREE_UNDEFINED:
            name = feature_name[node]
            threshold = tree_.threshold[node]
            print(name + " ?")
            ans = input()
            ans = ans.lower()
            if ans == 'yes':
                val = 1
            else:
                val = 0
            if  val <= threshold:
                recurse(tree_.children_left[node], depth + 1)
            else:
                symptoms_present.append(name)
                recurse(tree_.children_right[node], depth + 1)
        else:
            present_disease = print_disease(tree_.value[node])
            print( "You may have " +  present_disease )
            red_cols = reduced_data.columns 
            symptoms_given = red_cols[reduced_data.loc[present_disease].values[0].nonzero()]
            print("symptoms present  " + str(list(symptoms_present)))
            print("symptoms given "  +  str(list(symptoms_given)) )  
            confidence_level = (1.0*len(symptoms_present))/len(symptoms_given)

    recurse(0, 1)

tree_to_code(clf,cols)

Please reply Yes or No for the following symptoms
loss_of_smell ?
no
internal_itching ?
no
hip_joint_pain ?
no
polyuria ?
no
silver_like_dusting ?
no
swollen_extremeties ?
no
blood_in_sputum ?
no
yellow_crust_ooze ?
no
nodal_skin_eruptions ?
no
unsteadiness ?
no
weight_loss ?
no
prominent_veins_on_calf ?
no
fluid_overload.1 ?
no
depression ?
yes
['You may have Migraine']
symptoms present  ['depression']
symptoms given ['acidity', 'indigestion', 'headache', 'blurred_and_distorted_vision', 'excessive_hunger', 'stiff_neck', 'depression', 'irritability', 'visual_disturbances']
