In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# Importing the dataset
training_dataset = pd.read_csv('Training.csv')
test_dataset = pd.read_csv('Testing.csv')


In [3]:
X = training_dataset.iloc[:, 0:132].values
print(X)


[[1 1 1 ... 0 0 0]
 [0 1 1 ... 0 0 0]
 [1 0 1 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 1 0 ... 0 0 0]
 [0 1 0 ... 1 1 1]]


In [4]:
X.shape

(4920, 132)

In [5]:
print("Shape of the array:", X.shape)
print("Number of dimensions:", X.ndim)
print("Total number of elements:", X.size)


Shape of the array: (4920, 132)
Number of dimensions: 2
Total number of elements: 649440


In [6]:
print("Data type of elements:", X.dtype)


Data type of elements: int64


In [8]:
print("Array summary:")
print(X)


Array summary:
[[1 1 1 ... 0 0 0]
 [0 1 1 ... 0 0 0]
 [1 0 1 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 1 0 ... 0 0 0]
 [0 1 0 ... 1 1 1]]


In [9]:
y = training_dataset.iloc[:, -1].values
print(y)

['Fungal infection' 'Fungal infection' 'Fungal infection' ...
 'Urinary tract infection' 'Psoriasis' 'Impetigo']


In [10]:
# Dimensionality Reduction for removing redundancies
dimensionality_reduction = training_dataset.groupby(training_dataset['prognosis']).max()
print(dimensionality_reduction)

                                         itching  skin_rash  \
prognosis                                                     
(vertigo) Paroymsal  Positional Vertigo        0          0   
AIDS                                           0          0   
Acne                                           0          1   
Alcoholic hepatitis                            0          0   
Allergy                                        0          0   
Arthritis                                      0          0   
Bronchial Asthma                               0          0   
Cervical spondylosis                           0          0   
Chicken pox                                    1          1   
Chronic cholestasis                            1          0   
Common Cold                                    0          0   
Dengue                                         0          1   
Diabetes                                       0          0   
Dimorphic hemmorhoids(piles)                   0       

In [11]:
# Encoding String values to integer constants
from sklearn.preprocessing import LabelEncoder
labelencoder = LabelEncoder()
y = labelencoder.fit_transform(y)
print(y)

[15 15 15 ... 38 35 27]


In [12]:
# Splitting the dataset into training set and test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [13]:
# Implementing the Decision Tree Classifier
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier()
classifier.fit(X_train, y_train)

DecisionTreeClassifier()

In [14]:
accuracy = classifier.score(X_test, y_test)
print(f"Accuracy: {accuracy}")

Accuracy: 1.0


In [15]:
# Saving the information of columns
cols     = training_dataset.columns
cols     = cols[:-1]

In [16]:
# Checking the Important features
importances = classifier.feature_importances_
indices = np.argsort(importances)[::-1]
features = cols

In [17]:
from sklearn.tree import _tree


In [18]:
def execute_bot():

    print("Please reply with yes/Yes or no/No for the following symptoms") 
    def print_disease(node):
        #print(node)
        node = node[0]
        #print(len(node))
        val  = node.nonzero() 
        #print(val)
        disease = labelencoder.inverse_transform(val[0])
        return disease
    def tree_to_code(tree, feature_names):
        tree_ = tree.tree_
        #print(tree_)
        feature_name = [
            feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
            for i in tree_.feature
        ]
        #print("def tree({}):".format(", ".join(feature_names)))
        symptoms_present = []
        def recurse(node, depth):
            indent = "  " * depth
            if tree_.feature[node] != _tree.TREE_UNDEFINED:
                name = feature_name[node]
                threshold = tree_.threshold[node]
                print(name + " ?")
                ans = input()
                ans = ans.lower()
                if ans == 'yes':
                    val = 1
                else:
                    val = 0
                if  val <= threshold:
                    recurse(tree_.children_left[node], depth + 1)
                else:
                    symptoms_present.append(name)
                    recurse(tree_.children_right[node], depth + 1)
            else:
                present_disease = print_disease(tree_.value[node])
                print( "You may have " +  present_disease )
                print()
                red_cols = dimensionality_reduction.columns 
                symptoms_given = red_cols[dimensionality_reduction.loc[present_disease].values[0].nonzero()]
                print("symptoms present  " + str(list(symptoms_present)))
                print()
                print("symptoms given "  +  str(list(symptoms_given)) )  
                print()
                confidence_level = (1.0*len(symptoms_present))/len(symptoms_given)
                print("confidence level is " + str(confidence_level))
                print()
                print('The model suggests:')
                print()
                row = doctors[doctors['disease'] == present_disease[0]]
                print('Consult ', str(row['name'].values))
                print()
                print('Visit ', str(row['link'].values))
                #print(present_disease[0])
                
    
        recurse(0, 1)
    
    tree_to_code(classifier,cols)


This code appears to be a part of a diagnostic system that helps users identify potential diseases based on their symptoms. It uses a decision tree classifier to navigate through symptoms and predict diseases. Here’s a detailed step-by-step explanation:

Step-by-Step Explanation
Function Definition:

execute_bot(): This is the main function that will be executed.
Print Prompt:

print("Please reply with yes/Yes or no/No for the following symptoms"): Informs the user to respond with "yes" or "no" for the upcoming symptom questions.
Disease Prediction Function:

def print_disease(node): This function takes a node (a numpy array) and returns the disease(s) associated with it.
node = node[0]: Extract the first element from the node array.
val = node.nonzero(): Find the indices of non-zero elements in the node.
disease = labelencoder.inverse_transform(val[0]): Decode the indices to the corresponding disease names using labelencoder.
return disease: Return the predicted disease(s).
Decision Tree to Code Function:

def tree_to_code(tree, feature_names): Converts the decision tree into a set of if-else conditions to query the user about symptoms.
tree_ = tree.tree_: Access the internal tree structure of the classifier.
feature_name = [...]: Create a list of feature names corresponding to the indices in the tree. If a node is undefined, it's labeled as "undefined!".
Recursive Function to Traverse the Tree:

def recurse(node, depth): Recursively traverses the decision tree based on user input.
indent = " " * depth: Creates indentation for better readability in the nested structure.
If the current node is not a leaf node (tree_.feature[node] != _tree.TREE_UNDEFINED):
name = feature_name[node]: Get the feature name for the current node.
threshold = tree_.threshold[node]: Get the threshold value for the current node.
print(name + " ?"): Ask the user if they have the symptom corresponding to the current feature.
ans = input(): Get the user's input.
ans = ans.lower(): Convert the input to lowercase.
val = 1 if ans == 'yes' else 0: Assign a value of 1 for "yes" and 0 for "no".
Based on the value and the threshold, decide which child node to visit next (recurse(tree_.children_left[node], depth + 1) or recurse(tree_.children_right[node], depth + 1)).
If the symptom is present (val > threshold), add the symptom name to symptoms_present.
If the current node is a leaf node (else):
present_disease = print_disease(tree_.value[node]): Get the predicted disease for the current node.
print("You may have " + present_disease): Inform the user of the potential disease.
red_cols = dimensionality_reduction.columns: Get the columns from the dimensionality reduction DataFrame.
symptoms_given = red_cols[dimensionality_reduction.loc[present_disease].values[0].nonzero()]: Get the symptoms associated with the predicted disease.
Print the symptoms present and the symptoms given by the model.
confidence_level = (1.0*len(symptoms_present))/len(symptoms_given): Calculate the confidence level of the prediction.
Print the confidence level.
Provide doctor recommendations based on the disease.
row = doctors[doctors['disease'] == present_disease[0]]: Get the row from the doctors DataFrame that matches the predicted disease.
print('Consult ', str(row['name'].values)): Print the doctor's name.
print('Visit ', str(row['link'].values)): Print the link to visit the doctor.
Execute the Recursive Function:

recurse(0, 1): Start the recursive traversal of the decision tree from the root node (0) with a depth of 1.
Execute the Decision Tree Function:

tree_to_code(classifier, cols): Execute the tree_to_code function with the decision tree classifier and the feature names.
Explanation of Key Variables
classifier: The decision tree classifier used for predicting diseases.
cols: The feature names (symptoms) used in the decision tree.
labelencoder: An encoder that transforms disease labels into indices and vice versa.
dimensionality_reduction: A DataFrame that maps diseases to their symptoms.
doctors: A DataFrame containing information about doctors and their specialties.
Assumptions
The code assumes that the necessary libraries (e.g., pandas, numpy, sklearn) and variables (classifier, cols, labelencoder, dimensionality_reduction, doctors) are defined and available in the scope where execute_bot() is called.


In [25]:
doc_dataset = pd.read_csv('doctors_dataset.csv', names=['Name', 'Description'])

diseases = dimensionality_reduction.index
doctors = pd.DataFrame({'disease': diseases, 'name': np.nan, 'link': np.nan})

for disease in diseases:
    record = doc_dataset[doc_dataset['Name'] == disease]
    if not record.empty:
        doctors.loc[doctors['disease'] == disease, ['name', 'link']] = record.values[0]

# Now you can access the information for a specific disease like 'AIDS'
record = doctors[doctors['disease'] == 'AIDS']
doctor_name = record['name'].values[0]
doctor_link = record['link'].values[0]
print("Doctor Name:", doctor_name)
print("Doctor Link:", doctor_link)

record = doctors[doctors['disease'] == 'Pneumonia']
doctor_name = record['name'].values[0]
doctor_link = record['link'].values[0]
print("Doctor Name:", doctor_name)
print("Doctor Link:", doctor_link)


Doctor Name: nan
Doctor Link: nan
Doctor Name: nan
Doctor Link: nan


In [19]:

# Execute the bot and see it in Action
execute_bot()


Please reply with yes/Yes or no/No for the following symptoms
palpitations ?
yes
['You may have Hypoglycemia']

symptoms present  ['palpitations']

symptoms given ['vomiting', 'fatigue', 'anxiety', 'sweating', 'headache', 'nausea', 'blurred_and_distorted_vision', 'excessive_hunger', 'drying_and_tingling_lips', 'slurred_speech', 'irritability', 'palpitations']

confidence level is 0.08333333333333333

The model suggests:



NameError: name 'doctors' is not defined