In [1]:
#All required packages imported

import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split 
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

In [2]:
#Function to import dataset

def dataimport():
    balance_data = pd.read_csv('Disease_dataset.csv', 
    sep= ',', encoding ="ISO-8859-1")
    
    #to print shape of dataset
    print("Dataset length = ", len(balance_data))
    print("Dataset shape = ", balance_data.shape)
    
    #to print observations of dataset
    print("Dataset: \n", balance_data.head())
    return balance_data    

In [3]:
balance_data = dataimport()

Dataset length =  99
Dataset shape =  (99, 80)
Dataset: 
     DISEASES  abdominal_pain  loss_of_taste_and_smell  acidity  anxiety  \
0  Sinusitis               0                        0        0        0   
1  Sinusitis               0                        0        0        0   
2  Sinusitis               0                        0        0        0   
3  Sinusitis               0                        0        0        0   
4  Sinusitis               0                        1        0        0   

   back_pain  bladder_discomfort  blister  blood_in_sputum(phlegm)  \
0          0                   0        0                        0   
1          0                   0        0                        0   
2          0                   0        0                        0   
3          0                   0        0                        0   
4          0                   0        0                        0   

   bloody_stool            ...             swelling_of_stomach  \
0   

In [4]:
#Function to split dataset

def data_split(balance_data):
    
    #separating target variable
    X = balance_data.values[:, 1:]
    Y = balance_data.values[:, 0]
    
    
    #spliting data into train and test
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3, random_state = 100)
    
    return X, Y, X_train, X_test, Y_train, Y_test

In [5]:
data_split(balance_data)


(array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], dtype=object),
 array(['Sinusitis', 'Sinusitis', 'Sinusitis', 'Sinusitis', 'Sinusitis',
        'Sinusitis', 'Sinusitis', 'Sinusitis', 'Sinusitis', 'Sinusitis',
        'Common cold', 'Common cold', 'Common cold', 'Common cold',
        'Common cold', 'Common cold', 'Common cold', 'Common cold',
        'Common cold', 'Common cold', 'Common cold', 'Chickenpox',
        'Chickenpox', 'Chickenpox', 'Chickenpox', 'Chickenpox',
        'Chickenpox', 'Chickenpox', 'Chickenpox', 'Chickenpox',
        'Chickenpox', 'Chickenpox', 'HIV/AIDS', 'HIV/AIDS', 'HIV/AIDS',
        'HIV/AIDS', 'HIV/AIDS', 'HIV/AIDS', 'HIV/AIDS', 'HIV/AIDS',
        'HIV/AIDS', 'HIV/AIDS', 'HIV/AIDS', 'Influenza (flu)',
        'Influenza (flu)', 'Influenza (flu)', 'Influenza (flu)',
        'Influenza (flu)', '

In [6]:
#to train the tree using gini index
def train_using_gini(X_train, X_test, Y_train):
    
    #Create classifier object
    clf_gini = DecisionTreeClassifier(criterion = "gini", random_state = 100, max_depth = 10, min_samples_leaf = 5)    
    
    #train the object
    clf_gini.fit(X_train, Y_train)
    return clf_gini

In [11]:
#function to calculate accuracy of tree
def cal_accuracy(Y_test, Y_pred):
    
    #print("Confusion matrix : ", confusion_matrix(Y_test, Y_pred))
    print("Accuracy : ", accuracy_score(Y_test, Y_pred)*100)
    print("Classification report : \n", classification_report(Y_test, Y_pred))

In [12]:
def prediction(X_test, clf_object):
    y_pred = clf_object.predict(X_test)
    print(X_test)
    print("Predicted values: ")
    print(y_pred)
    return y_pred

In [13]:
def main():
    
    #getting 
    data = dataimport()
    X, Y, X_train, X_test, Y_train, Y_test = data_split(data)
    clf_gini = train_using_gini(X_train, X_test, Y_train)
    
    symptoms_list=['abdominal_pain', 'loss_of_taste_and_smell', 'acidity', 'anxiety', 'back_pain', 'bladder_discomfort', 'blister', 'blood_in_sputum(phlegm)', 'bloody_stool', 'blurred_and_distorted_vision', 'breathlessness', 'brittle_nails', 'bruising', 'burning_urination', 'chest_pain', 'chills', 'cold', 'confusion', 'nasal_congestion', 'constipation', 'cough', 'cramps', 'dehydration', 'depression', 'diarrhoea', 'difficulty_swallowing', 'dizziness', 'dry_skin', 'facial_pain','earache','fast_heart_rate','fatigue','headache','heartburn','high_fever','increased_appetite','indigestion','irritability','itching','joint_pain','knee_pain','lack_of_concentration','loss_of_appetite','mild_fever','muscle_pain','muscle_weakness','nausea','neck_pain','numbness','pain_behind_the_eyes','painful_walking','phlegm','pinkish_color_urine','puffy_face_and_eyes','itchy_nose','red_spots_over_body','redness_of_eyes','ringing_in_ears','runny_nose','seizures','skin_discoloration','skin_peeling','skin_rash','sleeplessness','slurred_speech','wheezing','stomach_pain','swelled_lymph_nodes','swelling_joints','swelling_of_stomach','throat_irritation','tooth_ache','watering_from_eyes','weakness_in_limbs','weight_loss','sneezing','extreme_thirst','bleeding_nose','sexual_activity_recently']
    
    i = 0
    inp_list = []
    list_len = len(symptoms_list)
    for i in range (0, list_len):
        print(i+1, symptoms_list[i])
        inp_list.append(0)

    print(inp_list)
    
    choice = 'Y'
#pos = 0
    while choice == 'Y':
        pos = int(input("\n\nEnter the numbers corresponding to the symptoms you are suffering from: "))
        inp_list = inp_list[:pos-1] + [1] + inp_list[pos:]
        print("Do you have any more symptoms? (Y/N)")
        choice = input()
        
    inplist2 = [inp_list]
    inplist2
        
    y_pred = prediction(inplist2, clf_gini)
    #y_pred
    #cal_accuracy(Y_test, y_pred)
    

In [14]:
if __name__ == "__main__":
    main()

Dataset length =  99
Dataset shape =  (99, 80)
Dataset: 
     DISEASES  abdominal_pain  loss_of_taste_and_smell  acidity  anxiety  \
0  Sinusitis               0                        0        0        0   
1  Sinusitis               0                        0        0        0   
2  Sinusitis               0                        0        0        0   
3  Sinusitis               0                        0        0        0   
4  Sinusitis               0                        1        0        0   

   back_pain  bladder_discomfort  blister  blood_in_sputum(phlegm)  \
0          0                   0        0                        0   
1          0                   0        0                        0   
2          0                   0        0                        0   
3          0                   0        0                        0   
4          0                   0        0                        0   

   bloody_stool            ...             swelling_of_stomach  \
0   

In [None]:
symptoms_list=['abdominal_pain', 'loss_of_taste_and_smell', 'acidity', 'anxiety', 'back_pain', 'bladder_discomfort', 'blister', 'blood_in_sputum(phlegm)', 'bloody_stool', 'blurred_and_distorted_vision', 'breathlessness', 'brittle_nails', 'bruising', 'burning_urination', 'chest_pain', 'chills', 'cold', 'confusion', 'nasal_congestion', 'constipation', 'cough', 'cramps', 'dehydration', 'depression', 'diarrhoea', 'difficulty_swallowing', 'dizziness', 'dry_skin', 'facial_pain','earache','fast_heart_rate','fatigue','headache','heartburn','high_fever','increased_appetite','indigestion','irritability','itching','joint_pain','knee_pain','lack_of_concentration','loss_of_appetite','mild_fever','muscle_pain','muscle_weakness','nausea','neck_pain','numbness','pain_behind_the_eyes','painful_walking','phlegm','pinkish_color_urine','puffy_face_and_eyes','itchy_nose','red_spots_over_body','redness_of_eyes','ringing_in_ears','runny_nose','seizures','skin_discoloration','skin_peeling','skin_rash','sleeplessness','slurred_speech','wheezing','stomach_pain','swelled_lymph_nodes','swelling_joints','swelling_of_stomach','throat_irritation','tooth_ache','watering_from_eyes','weakness_in_limbs','weight_loss','sneezing','extreme_thirst','bleeding_nose','sexual_activity_recently']

In [None]:
i = 0
inp_list = []
list_len = len(symptoms_list)
for i in range (0, list_len):
    print(i+1, symptoms_list[i])
    inp_list.append(0)

print(inp_list)

print("\n\nEnter the numbers corresponding to the symptoms you are suffering from: ")

In [None]:
choice = 'Y'
#pos = 0
while choice == 'Y':
    pos = int(input("\n\nEnter the numbers corresponding to the symptoms you are suffering from: "))
    inp_list = inp_list[:pos-1] + [1] + inp_list[pos:]
    print("Do you have any more symptoms? (Y/N)")
    choice = input()



In [None]:
inp_list


In [None]:
pred = prediction(inp_list, clf_gini)
pred