# Data Preprocessing

In [7]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [8]:
# Importing the dependencies
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score,confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline


In [9]:
#Reading the data from the drive
data = pd.read_csv("/content/drive/MyDrive/B.Tech Project/Training.csv")

In [10]:
#Checking the dataset
data.head(5)

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,(vertigo) Paroymsal Positional Vertigo
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,(vertigo) Paroymsal Positional Vertigo
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,(vertigo) Paroymsal Positional Vertigo
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,(vertigo) Paroymsal Positional Vertigo
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,(vertigo) Paroymsal Positional Vertigo


In [11]:
#printing the col names
for col in data.columns:
  print(col)

itching
skin_rash
nodal_skin_eruptions
continuous_sneezing
shivering
chills
joint_pain
stomach_pain
acidity
ulcers_on_tongue
muscle_wasting
vomiting
burning_micturition
spotting_ urination
fatigue
weight_gain
anxiety
cold_hands_and_feets
mood_swings
weight_loss
restlessness
lethargy
patches_in_throat
irregular_sugar_level
cough
high_fever
sunken_eyes
breathlessness
sweating
dehydration
indigestion
headache
yellowish_skin
dark_urine
nausea
loss_of_appetite
pain_behind_the_eyes
back_pain
constipation
abdominal_pain
diarrhoea
mild_fever
yellow_urine
yellowing_of_eyes
acute_liver_failure
fluid_overload
swelling_of_stomach
swelled_lymph_nodes
malaise
blurred_and_distorted_vision
phlegm
throat_irritation
redness_of_eyes
sinus_pressure
runny_nose
congestion
chest_pain
weakness_in_limbs
fast_heart_rate
pain_during_bowel_movements
pain_in_anal_region
bloody_stool
irritation_in_anus
neck_pain
dizziness
cramps
bruising
obesity
swollen_legs
swollen_blood_vessels
puffy_face_and_eyes
enlarged_thyroi

In [12]:
#Dropping the rows based on some specific classes which does not belongs to any hierarchy
data = data[data['prognosis'] != 'Chronic cholestasis']
data = data[data['prognosis'] != 'Paralysis (brain hemorrhage)']
data = data[data['prognosis'] != 'Dimorphic hemmorhoids(piles)'] #This will be the original dataset to work

In [13]:
#copying the data in a new variable for future use
preprocessed_original_data = data.copy()

In [14]:
#Checking the shape again after cleaning irrelevant rows
data.shape

(4560, 133)

In [15]:
#Saving the updated dataset as csv file 
data.to_csv('updated_data.csv')

In [16]:
data.head(5)

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,(vertigo) Paroymsal Positional Vertigo
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,(vertigo) Paroymsal Positional Vertigo
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,(vertigo) Paroymsal Positional Vertigo
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,(vertigo) Paroymsal Positional Vertigo
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,(vertigo) Paroymsal Positional Vertigo


In [17]:
count = data['prognosis'].unique()
print(count)

['(vertigo) Paroymsal  Positional Vertigo' 'Acne' 'AIDS'
 'Alcoholic hepatitis' 'Allergy' 'Arthritis' 'Bronchial Asthma'
 'Cervical spondylosis' 'Chicken pox' 'Common Cold' 'Dengue' 'Diabetes '
 'Drug Reaction' 'Fungal infection' 'Gastroenteritis' 'GERD'
 'Heart attack' 'hepatitis A' 'Hepatitis B' 'Hepatitis C' 'Hepatitis D'
 'Hepatitis E' 'Hypertension ' 'Hyperthyroidism' 'Hypoglycemia'
 'Hypothyroidism' 'Impetigo' 'Jaundice' 'Malaria' 'Migraine'
 'Osteoarthristis' 'Peptic ulcer diseae' 'Pneumonia' 'Psoriasis'
 'Tuberculosis' 'Typhoid' 'Urinary tract infection' 'Varicose veins']


# Level 1 Data Preprocessing

In [18]:
data1 = data.copy()

In [19]:
#Degenerate
data1 = data.replace(['(vertigo) Paroymsal  Positional Vertigo', 'Acne',
 'Alcoholic hepatitis', 'Allergy', 'Arthritis','Bronchial Asthma',
 'Cervical spondylosis', 'Common Cold',
 'Drug Reaction', 'Gastroenteritis', 'GERD',
 'Heart attack', 'Hypertension ', 'Hyperthyroidism',
 'Hypothyroidism', 'Impetigo', 'Jaundice', 'Migraine',
 'Osteoarthristis', 'Peptic ulcer diseae', 'Pneumonia', 'Psoriasis', 'Varicose veins'], 'Degenerate')


In [20]:
#Immunological
data1 = data1.replace(['AIDS',
 'Chicken pox', 'Dengue', 'Fungal infection', 'hepatitis A' ,'Hepatitis B', 'Hepatitis C', 'Hepatitis D',
 'Hepatitis E', 'Malaria',
 'Tuberculosis', 'Typhoid', 'Urinary tract infection'], 'Immunological')


In [21]:
#Genetic
data1 = data1.replace(['Diabetes ', 'Hypoglycemia'], 'Genetic')

In [22]:
#Checking the target hierarchy
count = data1['prognosis'].unique()
print(count)

['Degenerate' 'Immunological' 'Genetic']


In [23]:
#Checking the number of target hierarchy
for i in range(len(count)):
  print(count[i])
print(i)

Degenerate
Immunological
Genetic
2


Label Encoding for data1

In [24]:
#Printing the level 1 preprocessed dataset
data1.head(5)

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Degenerate
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Degenerate
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Degenerate
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Degenerate
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Degenerate


# Level 1 classifier (rfc1)

In [25]:
X_train, X_test, y_train, y_test = train_test_split(data1.drop('prognosis', axis=1), data1['prognosis'], test_size=0.2, random_state=42)

# create Random Forest classifier object
rfc_disease = RandomForestClassifier(n_estimators=100, random_state=42)

# train the classifier
rfc_disease.fit(X_train, y_train)

# make predictions on the training set
print(f"Accuracy on train data by classifier\
: {accuracy_score(y_train,rfc_disease.predict(X_train))}")

# make predictions on the testing set
y_pred = rfc_disease.predict(X_test)

# evaluate the performance of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy on test data by classifier: ", accuracy)

Accuracy on train data by classifier: 1.0
Accuracy on test data by classifier:  1.0


In [26]:
"""for i in range(len(y_pred)):
  if y_pred[i] == 'Degenerate':
    print("True")"""

'for i in range(len(y_pred)):\n  if y_pred[i] == \'Degenerate\':\n    print("True")'

# Level 2 for Degenrate

In [27]:
data2 = data.copy()
#DegerateSubClasses
#Gastrointestinal
data2 = data2.replace(['Alcoholic hepatitis', 'Gastroenteritis', 'GERD','Peptic ulcer diseae'], 'Gastrointestinal')

#Respiratory
data2 = data2.replace(['Bronchial Asthma','Common Cold','Pneumonia'], 'Respiratory')

#Cardiovascular
data2 = data2.replace(['Heart attack', 'Hypertension ','Varicose veins'], 'Cardiovascular')

#Endocrine
data2 = data2.replace(['Hyperthyroidism','Hypothyroidism'], 'Endocrine')

#Integumentary
data2 = data2.replace(['Acne','Impetigo', 'Jaundice', 'Psoriasis'], 'Integumentary')

#Nervous
data2 = data2.replace(['(vertigo) Paroymsal  Positional Vertigo','Migraine'], 'Nervous')

#Allergy
data2 = data2.replace([ 'Allergy','Drug Reaction'], 'Allergy')

#Musculoskeletal 
data2 = data2.replace(['Arthritis','Cervical spondylosis','Osteoarthristis'], 'Musculoskeletal')

#Selecting the desired rows
data2 = data2[data2['prognosis'].str.contains('Musculoskeletal') | data2['prognosis'].str.contains('Allergy') | data2['prognosis'].str.contains('Nervous') | data2['prognosis'].str.contains('Gastrointestinal') | data2['prognosis'].str.contains('Endocrine') | data2['prognosis'].str.contains('Integumentary') | data2['prognosis'].str.contains('Respiratory') | data2['prognosis'].str.contains('Cardiovascular')]

In [28]:
#Checking the target hierarchy
count = data2['prognosis'].unique()
print(count)

#Checking the number of target hierarchy
for i in range(len(count)):
  print(count[i])
print(i)

['Nervous' 'Integumentary' 'Gastrointestinal' 'Allergy' 'Musculoskeletal'
 'Respiratory' 'Cardiovascular' 'Endocrine']
Nervous
Integumentary
Gastrointestinal
Allergy
Musculoskeletal
Respiratory
Cardiovascular
Endocrine
7


# Level 2 for Immunological

In [29]:
data3 = data.copy()

#ImmunologicalSubClass
#Fungal
data3 = data3.replace(['Fungal infection'], 'Fungal')

#Bacterial
data3 = data3.replace(['Tuberculosis', 'Typhoid', 'Urinary tract infection'], 'Bacterial')

#Viral
data3 = data3.replace(['AIDS','Chicken pox', 'Dengue', 'hepatitis A' ,'Hepatitis B', 'Hepatitis C', 'Hepatitis D','Hepatitis E'], 'Viral')

#Parasitic
data3 = data3.replace(['Malaria'] , 'Parasitic')

#Selecting the desired rows
data3 = data3[data3['prognosis'].str.contains('Fungal') | data3['prognosis'].str.contains('Bacterial') | data3['prognosis'].str.contains('Viral') | data3['prognosis'].str.contains('Parasitic')]

In [30]:
#Checking the target hierarchy
count = data3['prognosis'].unique()
print(count)

#Checking the number of target hierarchy
for i in range(len(count)):
  print(count[i])
print(i)

['Viral' 'Fungal' 'Parasitic' 'Bacterial']
Viral
Fungal
Parasitic
Bacterial
3


# Level 2 for Genetic 

In [31]:
data4 = data.copy()
#Genetic subclass
#Inherited
data4 = data4.replace(['Diabetes ', 'Hypoglycemia'], 'Inherited')

#Selecting the desired rows
data4 = data4[data4['prognosis'].str.contains('Inherited')]

In [32]:
#Checking the target hierarchy
count = data4['prognosis'].unique()
print(count)

#Checking the number of target hierarchy
for i in range(len(count)):
  print(count[i])
print(i)

['Inherited']
Inherited
0


# Level 2 Classifiers

In [33]:
# split data into training and testing sets for data2- Degenerate
X_train, X_test, y_train, y_test = train_test_split(data2.drop('prognosis', axis=1), data2['prognosis'], test_size=0.2, random_state=42)

# create Random Forest classifier object
rfc_degenerate = RandomForestClassifier(n_estimators=100, random_state=42)

# train the classifier
rfc_degenerate.fit(X_train, y_train)

# make predictions on the testing set
y_pred = rfc_degenerate.predict(X_test)

# evaluate the performance of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: ", accuracy)


Accuracy:  1.0


In [34]:
print(np.unique(y_pred))

['Allergy' 'Cardiovascular' 'Endocrine' 'Gastrointestinal' 'Integumentary'
 'Musculoskeletal' 'Nervous' 'Respiratory']


In [35]:
# split data into training and testing sets for data3-Immunological
X_train, X_test, y_train, y_test = train_test_split(data3.drop('prognosis', axis=1), data3['prognosis'], test_size=0.2, random_state=42)

# create Random Forest classifier object
rfc_immunological = RandomForestClassifier(n_estimators=100, random_state=42)

# train the classifier
rfc_immunological.fit(X_train, y_train)

# make predictions on the testing set
y_pred = rfc_immunological.predict(X_test)

# evaluate the performance of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: ", accuracy)


Accuracy:  1.0


In [36]:
print(np.unique(y_pred))

['Bacterial' 'Fungal' 'Parasitic' 'Viral']


In [37]:
# split data into training and testing sets for data4-Genetic
X_train, X_test, y_train, y_test = train_test_split(data4.drop('prognosis', axis=1), data4['prognosis'], test_size=0.2, random_state=42)

# create Random Forest classifier object
rfc_genetic = RandomForestClassifier(n_estimators=100, random_state=42)

# train the classifier
rfc_genetic.fit(X_train, y_train)

# make predictions on the testing set
y_pred = rfc_genetic.predict(X_test)

# evaluate the performance of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: ", accuracy)


Accuracy:  1.0


In [38]:
print(np.unique(y_pred))

['Inherited']


# Level 3 


For immunological

In [39]:
#Fungal- 

data5 = data.copy()
#Selecting the desired rows
data5 = data5[data5['prognosis'].str.contains('Fungal infection')]

In [40]:
#Checking the target hierarchy
count = data5['prognosis'].unique()
print(count)

#Checking the number of target hierarchy
for i in range(len(count)):
  print(count[i])
print(i)

['Fungal infection']
Fungal infection
0


In [41]:
# split data into training and testing sets for data5- Immunological- Fungal
X_train, X_test, y_train, y_test = train_test_split(data5.drop('prognosis', axis=1), data5['prognosis'], test_size=0.2, random_state=42)

# create Random Forest classifier object
rfc_fungal = RandomForestClassifier(n_estimators=100, random_state=42)

# train the classifier
rfc_fungal.fit(X_train, y_train)

# make predictions on the testing set
y_pred = rfc_fungal.predict(X_test)

# evaluate the performance of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: ", accuracy)


Accuracy:  1.0


In [42]:
print(np.unique(y_pred))

['Fungal infection']


In [43]:
#Bacterial
data6 = data.copy()
#Selecting the desired rows
data6 = data6[data6['prognosis'].str.contains('Tuberculosis') | data6['prognosis'].str.contains('Typhoid') | data6['prognosis'].str.contains('Urinary tract infection')]

In [44]:
#Checking the target hierarchy
count = data6['prognosis'].unique()
print(count)

#Checking the number of target hierarchy
for i in range(len(count)):
  print(count[i])
print(i)

['Tuberculosis' 'Typhoid' 'Urinary tract infection']
Tuberculosis
Typhoid
Urinary tract infection
2


In [45]:
# split data into training and testing sets for data6- Immunological- Bacterial
X_train, X_test, y_train, y_test = train_test_split(data6.drop('prognosis', axis=1), data6['prognosis'], test_size=0.2, random_state=42)

# create Random Forest classifier object
rfc_bacterial = RandomForestClassifier(n_estimators=100, random_state=42)

# train the classifier
rfc_bacterial.fit(X_train, y_train)

# make predictions on the testing set
y_pred = rfc_bacterial.predict(X_test)

# evaluate the performance of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: ", accuracy)


Accuracy:  1.0


In [46]:
print(np.unique(y_pred))

['Tuberculosis' 'Typhoid' 'Urinary tract infection']


In [47]:
#Parasitic
data7 = data.copy()
#Selecting the desired rows
data7 = data7[data7['prognosis'].str.contains('Malaria')]

In [48]:
#Checking the target hierarchy
count = data7['prognosis'].unique()
print(count)

#Checking the number of target hierarchy
for i in range(len(count)):
  print(count[i])
print(i)

['Malaria']
Malaria
0


In [49]:
# split data into training and testing sets for data7- Immunological-Parasitical
X_train, X_test, y_train, y_test = train_test_split(data7.drop('prognosis', axis=1), data7['prognosis'], test_size=0.2, random_state=42)

# create Random Forest classifier object
rfc_parasitic = RandomForestClassifier(n_estimators=100, random_state=42)

# train the classifier
rfc_parasitic.fit(X_train, y_train)

# make predictions on the testing set
y_pred = rfc_parasitic.predict(X_test)

# evaluate the performance of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: ", accuracy)


Accuracy:  1.0


In [50]:
print(np.unique(y_pred))

['Malaria']


In [51]:
#Viral
data8 = data.copy()
#Selecting the desired rows
data8 = data8[data8['prognosis'].str.contains('AIDS') | data8['prognosis'].str.contains('Chicken pox') | data8['prognosis'].str.contains('Dengue') | data8['prognosis'].str.contains('hepatitis A') | data8['prognosis'].str.contains('Hepatitis B') | data8['prognosis'].str.contains('Hepatitis C') | data8['prognosis'].str.contains('Hepatitis D') | data8['prognosis'].str.contains('Hepatitis E')]

In [52]:
#Checking the target hierarchy
count = data8['prognosis'].unique()
print(count)

#Checking the number of target hierarchy
for i in range(len(count)):
  print(count[i])
print(i)

['AIDS' 'Chicken pox' 'Dengue' 'hepatitis A' 'Hepatitis B' 'Hepatitis C'
 'Hepatitis D' 'Hepatitis E']
AIDS
Chicken pox
Dengue
hepatitis A
Hepatitis B
Hepatitis C
Hepatitis D
Hepatitis E
7


In [53]:
# split data into training and testing sets for data8- Immunological-Viral
X_train, X_test, y_train, y_test = train_test_split(data8.drop('prognosis', axis=1), data8['prognosis'], test_size=0.2, random_state=42)

# create Random Forest classifier object
rfc_viral = RandomForestClassifier(n_estimators=100, random_state=42)

# train the classifier
rfc_viral.fit(X_train, y_train)

# make predictions on the testing set
y_pred = rfc_viral.predict(X_test)

# evaluate the performance of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: ", accuracy)


Accuracy:  1.0


In [54]:
print(np.unique(y_pred))

['AIDS' 'Chicken pox' 'Dengue' 'Hepatitis B' 'Hepatitis C' 'Hepatitis D'
 'Hepatitis E' 'hepatitis A']


For degenerate sub classes

In [55]:
# for degenerate- Gastrointestinal
data9 = data.copy()
#Selecting the desired rows
data9 = data9[data9['prognosis'].str.contains('GERD') | data9['prognosis'].str.contains('Alcoholic hepatitis') | data9['prognosis'].str.contains('Gastroenteritis') | data9['prognosis'].str.contains('Peptic ulcer diseae')]

In [56]:
#Checking the target hierarchy
count = data9['prognosis'].unique()
print(count)

#Checking the number of target hierarchy
for i in range(len(count)):
  print(count[i])
print(i)

['Alcoholic hepatitis' 'Gastroenteritis' 'GERD' 'Peptic ulcer diseae']
Alcoholic hepatitis
Gastroenteritis
GERD
Peptic ulcer diseae
3


In [57]:
# split data into training and testing sets for data9- Degenerate
X_train, X_test, y_train, y_test = train_test_split(data9.drop('prognosis', axis=1), data9['prognosis'], test_size=0.2, random_state=42)

# create Random Forest classifier object
rfc_gastrointestinal = RandomForestClassifier(n_estimators=100, random_state=42)

# train the classifier
rfc_gastrointestinal.fit(X_train, y_train)

# make predictions on the testing set
y_pred = rfc_gastrointestinal.predict(X_test)

# evaluate the performance of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: ", accuracy)


Accuracy:  1.0


In [58]:
print(np.unique(y_pred))

['Alcoholic hepatitis' 'GERD' 'Gastroenteritis' 'Peptic ulcer diseae']


In [59]:
#Respiratory
data10 = data.copy()
#Selecting the desired rows
data10 = data10[data10['prognosis'].str.contains('Bronchial Asthma') | data10['prognosis'].str.contains('Common Cold') | data10['prognosis'].str.contains('Pneumonia')]

In [60]:
#Checking the target hierarchy
count = data10['prognosis'].unique()
print(count)

#Checking the number of target hierarchy
for i in range(len(count)):
  print(count[i])
print(i)

['Bronchial Asthma' 'Common Cold' 'Pneumonia']
Bronchial Asthma
Common Cold
Pneumonia
2


In [61]:
# split data into training and testing sets for data10- Degenerate
X_train, X_test, y_train, y_test = train_test_split(data10.drop('prognosis', axis=1), data10['prognosis'], test_size=0.2, random_state=42)

# create Random Forest classifier object
rfc_respiratory = RandomForestClassifier(n_estimators=100, random_state=42)

# train the classifier
rfc_respiratory.fit(X_train, y_train)

# make predictions on the testing set
y_pred = rfc_respiratory.predict(X_test)

# evaluate the performance of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: ", accuracy)


Accuracy:  1.0


In [62]:
print(np.unique(y_pred))

['Bronchial Asthma' 'Common Cold' 'Pneumonia']


In [63]:
#Cardiovascular
data11 = data.copy()
#Selecting the desired rows
data11 = data11[data11['prognosis'].str.contains('Heart attack') | data11['prognosis'].str.contains('Hypertension') | data11['prognosis'].str.contains('Varicose veins')]

In [64]:
#Checking the target hierarchy
count = data11['prognosis'].unique()
print(count)

#Checking the number of target hierarchy
for i in range(len(count)):
  print(count[i])
print(i)

['Heart attack' 'Hypertension ' 'Varicose veins']
Heart attack
Hypertension 
Varicose veins
2


In [65]:
# split data into training and testing sets for data11- Degenerate
X_train, X_test, y_train, y_test = train_test_split(data11.drop('prognosis', axis=1), data11['prognosis'], test_size=0.2, random_state=42)

# create Random Forest classifier object
rfc_cardiovascular = RandomForestClassifier(n_estimators=100, random_state=42)

# train the classifier
rfc_cardiovascular.fit(X_train, y_train)

# make predictions on the testing set
y_pred = rfc_cardiovascular.predict(X_test)

# evaluate the performance of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: ", accuracy)


Accuracy:  1.0


In [66]:
print(np.unique(y_pred))

['Heart attack' 'Hypertension ' 'Varicose veins']


In [67]:
""
#Endocrine
data12 = data.copy()
#Selecting the desired rows
data12 = data12[data12['prognosis'].str.contains('Hyperthyroidism') | data12['prognosis'].str.contains('Hypothyroidism')]

In [68]:
#Checking the target hierarchy
count = data12['prognosis'].unique()
print(count)

#Checking the number of target hierarchy
for i in range(len(count)):
  print(count[i])
print(i)

['Hyperthyroidism' 'Hypothyroidism']
Hyperthyroidism
Hypothyroidism
1


In [69]:
# split data into training and testing sets for data12- Degenerate

X_train, X_test, y_train, y_test = train_test_split(data12.drop('prognosis', axis=1), data12['prognosis'], test_size=0.2, random_state=42)

# create Random Forest classifier object
rfc_endocrine = RandomForestClassifier(n_estimators=100, random_state=42)

# train the classifier
rfc_endocrine.fit(X_train, y_train)

# make predictions on the testing set
y_pred = rfc_endocrine.predict(X_test)

# evaluate the performance of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: ", accuracy)


Accuracy:  1.0


In [70]:
print(np.unique(y_pred))

['Hyperthyroidism' 'Hypothyroidism']


In [71]:
#Integumentary
data13 = data.copy()
#Selecting the desired rows
data13 = data13[data13['prognosis'].str.contains('Acne') | data13['prognosis'].str.contains('Impetigo') | data13['prognosis'].str.contains('Jaundice') | data13['prognosis'].str.contains('Psoriasis')]

In [72]:
#Checking the target hierarchy
count = data13['prognosis'].unique()
print(count)

#Checking the number of target hierarchy
for i in range(len(count)):
  print(count[i])
print(i)

['Acne' 'Impetigo' 'Jaundice' 'Psoriasis']
Acne
Impetigo
Jaundice
Psoriasis
3


In [73]:

# split data into training and testing sets for data13- Degenerate

X_train, X_test, y_train, y_test = train_test_split(data13.drop('prognosis', axis=1), data13['prognosis'], test_size=0.2, random_state=42)

# create Random Forest classifier object
rfc_integumentary = RandomForestClassifier(n_estimators=100, random_state=42)

# train the classifier
rfc_integumentary.fit(X_train, y_train)

# make predictions on the testing set
y_pred = rfc_integumentary.predict(X_test)

# evaluate the performance of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: ", accuracy)


Accuracy:  1.0


In [74]:
print(np.unique(y_pred))

['Acne' 'Impetigo' 'Jaundice' 'Psoriasis']


In [75]:
#Nervous
data14 = data.copy()
#Selecting the desired rows
data14 = data14[data14['prognosis'].str.contains('Paroymsal  Positional Vertigo') | data14['prognosis'].str.contains('Migraine') ]

In [76]:
#Checking the target hierarchy
count = data14['prognosis'].unique()
print(count)

#Checking the number of target hierarchy
for i in range(len(count)):
  print(count[i])
print(i)

['(vertigo) Paroymsal  Positional Vertigo' 'Migraine']
(vertigo) Paroymsal  Positional Vertigo
Migraine
1


In [77]:
# split data into training and testing sets for data14- Degenerate
X_train, X_test, y_train, y_test = train_test_split(data14.drop('prognosis', axis=1), data14['prognosis'], test_size=0.2, random_state=42)

# create Random Forest classifier object
rfc_nervous = RandomForestClassifier(n_estimators=100, random_state=42)

# train the classifier
rfc_nervous.fit(X_train, y_train)

# make predictions on the testing set
y_pred = rfc_nervous.predict(X_test)

# evaluate the performance of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: ", accuracy)


Accuracy:  1.0


In [78]:
print(np.unique(y_pred))

['(vertigo) Paroymsal  Positional Vertigo' 'Migraine']


In [79]:
#Allergy
data15 = data.copy()
#Selecting the desired rows
data15 = data15[data15['prognosis'].str.contains('Allergy') | data15['prognosis'].str.contains('Drug Reaction') ]

In [80]:
#Checking the target hierarchy
count = data15['prognosis'].unique()
print(count)

#Checking the number of target hierarchy
for i in range(len(count)):
  print(count[i])
print(i)

['Allergy' 'Drug Reaction']
Allergy
Drug Reaction
1


In [81]:
# split data into training and testing sets for data15- Degenerate
X_train, X_test, y_train, y_test = train_test_split(data15.drop('prognosis', axis=1), data15['prognosis'], test_size=0.2, random_state=42)

# create Random Forest classifier object
rfc_allergy = RandomForestClassifier(n_estimators=100, random_state=42)

# train the classifier
rfc_allergy.fit(X_train, y_train)

# make predictions on the testing set
y_pred = rfc_allergy.predict(X_test)

# evaluate the performance of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: ", accuracy)


Accuracy:  1.0


In [82]:
print(np.unique(y_pred))

['Allergy' 'Drug Reaction']


In [83]:
#Musculoskeletal 
data16 = data.copy()
#Selecting the desired rows
data16 = data16[data16['prognosis'].str.contains('Arthritis') | data16['prognosis'].str.contains('Cervical spondylosis') | data16['prognosis'].str.contains('Osteoarthristis') ]

In [84]:
#Checking the target hierarchy
count = data16['prognosis'].unique()
print(count)

#Checking the number of target hierarchy
for i in range(len(count)):
  print(count[i])
print(i)

['Arthritis' 'Cervical spondylosis' 'Osteoarthristis']
Arthritis
Cervical spondylosis
Osteoarthristis
2


In [85]:
# split data into training and testing sets for data12- Degenerate
X_train, X_test, y_train, y_test = train_test_split(data16.drop('prognosis', axis=1), data16['prognosis'], test_size=0.2, random_state=42)

# create Random Forest classifier object
rfc_musculoskeletal = RandomForestClassifier(n_estimators=100, random_state=42)

# train the classifier
rfc_musculoskeletal.fit(X_train, y_train)

# make predictions on the testing set
y_pred = rfc_musculoskeletal.predict(X_test)

# evaluate the performance of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: ", accuracy)


Accuracy:  1.0


In [86]:
print(np.unique(y_pred))

['Arthritis' 'Cervical spondylosis' 'Osteoarthristis']


For genetic sub class

In [87]:
#Inherited
data17 = data.copy()
data17 = data17[data17['prognosis'].str.contains('Diabetes') | data17['prognosis'].str.contains('Hypoglycemia')]

In [88]:
#Checking the target hierarchy
count = data17['prognosis'].unique()
print(count)

#Checking the number of target hierarchy
for i in range(len(count)):
  print(count[i])
print(i)

['Diabetes ' 'Hypoglycemia']
Diabetes 
Hypoglycemia
1


In [95]:
# split data into training and testing sets for data17- Genetic
X_train, X_test, y_train, y_test = train_test_split(data17.drop('prognosis', axis=1), data17['prognosis'], test_size=0.2, random_state=42)

# create Random Forest classifier object
rfc_inherited = RandomForestClassifier(n_estimators=100, random_state=42)

# train the classifier
rfc_inherited.fit(X_train, y_train)

# make predictions on the testing set
y_pred = rfc_inherited.predict(X_test)

# evaluate the performance of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: ", accuracy)


Accuracy:  1.0


# Prediction Part Begins Here

In [96]:
input = data.iloc[1200, 0:132]
print(input)

itching                 0
skin_rash               1
nodal_skin_eruptions    0
continuous_sneezing     0
shivering               0
                       ..
small_dents_in_nails    0
inflammatory_nails      0
blister                 0
red_sore_around_nose    0
yellow_crust_ooze       0
Name: 1320, Length: 132, dtype: object


In [97]:
input =np.array(input).reshape(1,-1)

In [98]:
y_pred = rfc_disease.predict(input)



In [99]:
print(y_pred)

['Immunological']


In [100]:
Output_disease = rfc_disease.predict(input)
if Output_disease == 'Immunological':
   Output_immunological = rfc_immunological.predict(input)
   if Output_immunological == 'Viral':
     Output = rfc_viral.predict(input)
     print('{}--->{}--->{}'.format(Output_disease, Output_immunological, Output))
   elif Output_immunological == 'Bacterial':
      Output = rfc_bacterial.predict(input)
      print('{}--->{}--->{}'.format(Output_disease, Output_immunological, Output))
   elif Output_immunological == 'Fungal':
     Output = rfc_fungal.predict(input)
     print('{}--->{}--->{}'.format(Output_disease, Output_immunological, Output))
   else:
     Output = rfc_parasitic.predict(input)
     print('{}--->{}--->{}'.format(Output_disease, Output_immunological, Output))
elif Output_disease == 'Genetic':
  Output_genetic = rfc_genetic.predict(input)
  if Output_genetic == 'Inherited':
     Output = rfc_inherited.predict(input)
     print('{}--->{}--->{}'.format(Output_disease, Output_genetic, Output))   
else:
   Output_degenerate = rfc_degenerate.predict(input)
   if Output_degenerate == 'Gastrointestinal':
     Output = rfc_gastrointestinal.predict(input)
     print('{}--->{}--->{}'.format(Output_disease, Output_degenerate, Output))
   elif Output_degenerate == 'Cardiovascular':
      Output = rfc_cardiovascular.predict(input)
      print('{}--->{}--->{}'.format(Output_disease, Output_degenerate, Output))
   elif Output_degenerate == 'Nervous':
     Output = rfc_nervous.predict(input)
     print('{}--->{}--->{}'.format(Output_disease, Output_degenerate, Output))
   elif Output_degenerate == 'Allergy':
     Output = rfc_allergy.predict(input)
     print('{}--->{}--->{}'.format(Output_disease, Output_degenerate, Output))
   elif Output_degenerate == 'Musculoskeletal':
      Output = rfc_musculoskeletal.predict(input)
      print('{}--->{}--->{}'.format(Output_disease, Output_degenerate, Output))
   elif Output_degenerate == 'Integumentary':
     Output = rfc_integumentary.predict(input)
     print('{}--->{}--->{}'.format(Output_disease, Output_degenerate, Output))
   elif Output_degenerate == 'Endocrine':
     Output = rfc_endocrine.predict(input)
     print('{}--->{}--->{}'.format(Output_disease, Output_degenerate, Output))
   else:
    Output = rfc_respiratory.predict(input)
    print('{}--->{}--->{}'.format(Output_disease, Output_degenerate, Output))

['Immunological']--->['Viral']--->['Dengue']


