Import necessary packages

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

Import the dataset

In [2]:
dataset = pd.read_csv("../dataset/symptoSense.csv")

Identifying the dataset

In [3]:
parameters = dataset.columns[1:]
for parameter in parameters:
    print(parameter)

abdominal distention
abnormal appearing skin
abnormal appearing tongue
abnormal breathing sounds
abnormal involuntary movements
abnormal movement of eyelid
abnormal size or shape of ear
absence of menstruation
abusing alcohol
ache all over
acne or pimples
allergic reaction
ankle stiffness or tightness
ankle weakness
antisocial behavior
anxiety and nervousness
apnea
arm cramps or spasms
arm lump or mass
arm pain
arm stiffness or tightness
arm swelling
arm weakness
back cramps or spasms
back mass or lump
back pain
back swelling
back weakness
bedwetting
bladder mass
bleeding from ear
bleeding from eye
bleeding gums
bleeding in mouth
bleeding or discharge from nipple
blindness
blood clots during menstrual periods
blood in stool
blood in urine
bones are painful
bowlegged or knock-kneed
breathing fast
bumps on penis
burning abdominal pain
burning chest pain
change in skin mole size or color
changes in stool appearance
chest tightness
chills
cloudy eye
congestion in chest
constipation
coryza


In [4]:
diseases = dataset['prognosis'].unique()
for disease in diseases:
    print(disease)

Anemia of chronic disease
Dysthymic disorder
Scarlet fever
Hypertensive heart disease
Polycystic ovarian syndrome (PCOS)
Encephalitis
Cyst of the eyelid
Balanitis
Foreign body in the throat
Alcohol withdrawal
Premature atrial contractions (PACs)
Hemiplegia
Joint effusion
Meningioma
Brain cancer
Placental abruption
Seasonal allergies (hay fever)
Eye alignment disorder
Headache after lumbar puncture
Pyloric stenosis
Salivary gland disorder
Osteochondrosis
Metabolic disorder
Vaginitis
Tinnitus of unknown cause
Glaucoma
Eating disorder
Transient ischemic attack
Pyelonephritis
Rotator cuff injury
Chronic pain disorder
Problem during pregnancy
Liver cancer
Atelectasis
Injury to the hand
Choledocholithiasis
Thoracic aortic aneurysm
Subdural hemorrhage
Congenital rubella
Diabetic retinopathy
Fibromyalgia
Ischemia of the bowel
Fetal alcohol syndrome
Peritonitis
Injury to the abdomen
Acute pancreatitis
Thrombophlebitis
Asthma
Foreign body in the vagina
Restless leg syndrome
Emphysema
Cysticercos

In [5]:
print("Number of parameters:- ", len(parameters))
print("Number of diseases:- ", len(diseases))

Number of parameters:-  367
Number of diseases:-  679


Splitting the dependent and independent variables

In [6]:
Y = dataset["prognosis"]
X = dataset.iloc[:, 1:]

In [7]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

In [8]:
Tree = DecisionTreeClassifier(criterion="entropy", random_state=42)
Tree.fit(X_train, Y_train)

In [9]:
Y_train_pred = Tree.predict(X_train)
train_accuracy = accuracy_score(Y_train, Y_train_pred)
train_confusion_matrix = confusion_matrix(Y_train, Y_train_pred)
train_classification_report = classification_report(Y_train, Y_train_pred)

print("TRAIN DATA")
print("Accuracy Score")
print(train_accuracy * 100, "%")
print("Confusion Matrix")
print(train_confusion_matrix)
print("Classification Report")
print(train_classification_report)

TRAIN DATA
Accuracy Score
99.64233115926783 %
Confusion Matrix
[[7 0 0 ... 0 0 0]
 [0 8 0 ... 0 0 0]
 [0 0 7 ... 0 0 0]
 ...
 [0 0 0 ... 8 0 0]
 [0 0 0 ... 0 6 0]
 [0 0 0 ... 0 0 7]]
Classification Report
                                                          precision    recall  f1-score   support

                               Abdominal aortic aneurysm       1.00      1.00      1.00         7
                                        Abdominal hernia       1.00      1.00      1.00         8
                                         Abscess of nose       1.00      1.00      1.00         7
                                     Abscess of the lung       1.00      1.00      1.00         5
                                  Abscess of the pharynx       1.00      1.00      1.00         7
                                    Acanthosis nigricans       1.00      1.00      1.00         8
                                               Acariasis       1.00      1.00      1.00         8
          

In [10]:
Y_test_pred = Tree.predict(X_test)
test_accuracy = accuracy_score(Y_test, Y_test_pred)
test_confusion_matrix = confusion_matrix(Y_test, Y_test_pred)
test_classification_report = classification_report(Y_test, Y_test_pred)

print("TEST DATA")
print("Accuracy Score")
print(test_accuracy * 100, "%")
print("Confusion Matrix")
print(test_confusion_matrix)
print("Classification Report")
print(test_classification_report)

TEST DATA
Accuracy Score
43.29896907216495 %
Confusion Matrix
[[3 0 0 ... 0 0 0]
 [0 2 0 ... 0 0 0]
 [0 0 1 ... 0 0 0]
 ...
 [0 0 0 ... 1 0 0]
 [0 0 0 ... 0 1 0]
 [0 0 0 ... 0 0 1]]
Classification Report
                                                          precision    recall  f1-score   support

                               Abdominal aortic aneurysm       1.00      1.00      1.00         3
                                        Abdominal hernia       0.40      1.00      0.57         2
                                         Abscess of nose       0.17      0.33      0.22         3
                                     Abscess of the lung       0.75      0.60      0.67         5
                                  Abscess of the pharynx       0.20      0.33      0.25         3
                                    Acanthosis nigricans       0.50      0.50      0.50         2
                                               Acariasis       0.67      1.00      0.80         2
           

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [11]:
random_forest = RandomForestClassifier(random_state=42, n_estimators=100)
random_forest.fit(X_train, Y_train)

In [12]:
Y_pred_train_forest = random_forest.predict(X_train)
accuracy_train_forest = accuracy_score(Y_train, Y_pred_train_forest)
confusion_matrix_train_forest = confusion_matrix(Y_train, Y_pred_train_forest)
classification_report_train_forest = classification_report(Y_train, Y_pred_train_forest)

print("TRAIN DATA")
print("Accuracy Score")
print(accuracy_train_forest * 100, "%")
print("Confusion Matrix")
print(confusion_matrix_train_forest)
print("Classification Report")
print(classification_report_train_forest)

TRAIN DATA
Accuracy Score
99.64233115926783 %
Confusion Matrix
[[7 0 0 ... 0 0 0]
 [0 8 0 ... 0 0 0]
 [0 0 7 ... 0 0 0]
 ...
 [0 0 0 ... 8 0 0]
 [0 0 0 ... 0 6 0]
 [0 0 0 ... 0 0 7]]
Classification Report
                                                          precision    recall  f1-score   support

                               Abdominal aortic aneurysm       1.00      1.00      1.00         7
                                        Abdominal hernia       1.00      1.00      1.00         8
                                         Abscess of nose       1.00      1.00      1.00         7
                                     Abscess of the lung       1.00      1.00      1.00         5
                                  Abscess of the pharynx       1.00      1.00      1.00         7
                                    Acanthosis nigricans       1.00      1.00      1.00         8
                                               Acariasis       1.00      1.00      1.00         8
          

In [13]:
Y_pred_test_forest = random_forest.predict(X_test)
accuracy_test_forest = accuracy_score(Y_test, Y_pred_test_forest)
confusion_matrix_test_forest = confusion_matrix(Y_test, Y_pred_test_forest)
classification_report_test_forest = classification_report(Y_test, Y_pred_test_forest)

print("TEST DATA")
print("Accuracy Score")
print(accuracy_test_forest * 100, "%")
print("Confusion Matrix")
print(confusion_matrix_test_forest)
print("Classification Report")
print(classification_report_test_forest)

TEST DATA
Accuracy Score
92.8325969563083 %
Confusion Matrix
[[3 0 0 ... 0 0 0]
 [0 2 0 ... 0 0 0]
 [0 0 3 ... 0 0 0]
 ...
 [0 0 0 ... 2 0 0]
 [0 0 0 ... 0 3 0]
 [0 0 0 ... 0 0 3]]
Classification Report
                                                          precision    recall  f1-score   support

                               Abdominal aortic aneurysm       1.00      1.00      1.00         3
                                        Abdominal hernia       1.00      1.00      1.00         2
                                         Abscess of nose       0.75      1.00      0.86         3
                                     Abscess of the lung       1.00      1.00      1.00         5
                                  Abscess of the pharynx       1.00      1.00      1.00         3
                                    Acanthosis nigricans       1.00      1.00      1.00         2
                                               Acariasis       1.00      1.00      1.00         2
            

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [14]:
mlp = MLPClassifier(hidden_layer_sizes=(100, ), max_iter=1000, random_state=42)
mlp.fit(X_train, Y_train)

In [15]:
Y_pred_train_mlp = mlp.predict(X_train)
accuracy_train_mlp = accuracy_score(Y_train, Y_pred_train_mlp)
confusion_matrix_train_mlp = confusion_matrix(Y_train, Y_pred_train_mlp)
classification_report_train_forest = classification_report(Y_train, Y_pred_train_mlp)

print("TEST DATA")
print("Accuracy Score")
print(accuracy_train_mlp * 100, "%")
print("Confusion Matrix")
print(confusion_matrix_train_mlp)
print("Classification Report")
print(classification_report_train_forest)

TEST DATA
Accuracy Score
99.62129181569534 %
Confusion Matrix
[[7 0 0 ... 0 0 0]
 [0 8 0 ... 0 0 0]
 [0 0 7 ... 0 0 0]
 ...
 [0 0 0 ... 8 0 0]
 [0 0 0 ... 0 6 0]
 [0 0 0 ... 0 0 7]]
Classification Report
                                                          precision    recall  f1-score   support

                               Abdominal aortic aneurysm       1.00      1.00      1.00         7
                                        Abdominal hernia       1.00      1.00      1.00         8
                                         Abscess of nose       1.00      1.00      1.00         7
                                     Abscess of the lung       1.00      1.00      1.00         5
                                  Abscess of the pharynx       1.00      1.00      1.00         7
                                    Acanthosis nigricans       1.00      1.00      1.00         8
                                               Acariasis       1.00      1.00      1.00         8
           

In [17]:
Y_pred_test_mlp = mlp.predict(X_test)
accuracy_test_mlp = accuracy_score(Y_test, Y_pred_test_mlp)
confusion_matrix_test_mlp = confusion_matrix(Y_test, Y_pred_test_mlp)
classification_report_test_forest = classification_report(Y_test, Y_pred_test_mlp, zero_division=1)

print("TEST DATA")
print("Accuracy Score")
print(accuracy_test_mlp * 100, "%")
print("Confusion Matrix")
print(confusion_matrix_test_mlp)
print("Classification Report")
print(classification_report_test_forest)

TEST DATA
Accuracy Score
92.8325969563083 %
Confusion Matrix
[[3 0 0 ... 0 0 0]
 [0 2 0 ... 0 0 0]
 [0 0 3 ... 0 0 0]
 ...
 [0 0 0 ... 2 0 0]
 [0 0 0 ... 0 3 0]
 [0 0 0 ... 0 0 3]]
Classification Report
                                                          precision    recall  f1-score   support

                               Abdominal aortic aneurysm       1.00      1.00      1.00         3
                                        Abdominal hernia       1.00      1.00      1.00         2
                                         Abscess of nose       0.75      1.00      0.86         3
                                     Abscess of the lung       1.00      1.00      1.00         5
                                  Abscess of the pharynx       1.00      1.00      1.00         3
                                    Acanthosis nigricans       1.00      1.00      1.00         2
                                               Acariasis       1.00      1.00      1.00         2
            