In [1]:
import pandas as pd
pd.option_context("display.max_rows",None)
pd.option_context("display.max_columns",None)

from sklearn.model_selection import train_test_split, cross_val_score, learning_curve
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB

In [2]:
data = pd.read_csv("backend/dataset/train_disease.csv")

In [3]:
data


Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis,Unnamed: 133
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Fungal infection,
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Fungal infection,
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Fungal infection,
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Fungal infection,
4,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Fungal infection,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4915,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,(vertigo) Paroymsal Positional Vertigo,
4916,0,1,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,Acne,
4917,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Urinary tract infection,
4918,0,1,0,0,0,0,1,0,0,0,...,0,1,1,1,1,0,0,0,Psoriasis,


In [4]:
data = data.drop(["Unnamed: 133"],axis=1)

In [5]:
data.shape

(4920, 133)

In [6]:
data.head()

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
4,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection


In [7]:
data.isna().sum()

itching                 0
skin_rash               0
nodal_skin_eruptions    0
continuous_sneezing     0
shivering               0
                       ..
inflammatory_nails      0
blister                 0
red_sore_around_nose    0
yellow_crust_ooze       0
prognosis               0
Length: 133, dtype: int64

In [8]:
data.columns

Index(['itching', 'skin_rash', 'nodal_skin_eruptions', 'continuous_sneezing',
       'shivering', 'chills', 'joint_pain', 'stomach_pain', 'acidity',
       'ulcers_on_tongue',
       ...
       'blackheads', 'scurring', 'skin_peeling', 'silver_like_dusting',
       'small_dents_in_nails', 'inflammatory_nails', 'blister',
       'red_sore_around_nose', 'yellow_crust_ooze', 'prognosis'],
      dtype='object', length=133)

In [9]:
data["prognosis"].unique()

array(['Fungal infection', 'Allergy', 'GERD', 'Chronic cholestasis',
       'Drug Reaction', 'Peptic ulcer diseae', 'AIDS', 'Diabetes ',
       'Gastroenteritis', 'Bronchial Asthma', 'Hypertension ', 'Migraine',
       'Cervical spondylosis', 'Paralysis (brain hemorrhage)', 'Jaundice',
       'Malaria', 'Chicken pox', 'Dengue', 'Typhoid', 'hepatitis A',
       'Hepatitis B', 'Hepatitis C', 'Hepatitis D', 'Hepatitis E',
       'Alcoholic hepatitis', 'Tuberculosis', 'Common Cold', 'Pneumonia',
       'Dimorphic hemmorhoids(piles)', 'Heart attack', 'Varicose veins',
       'Hypothyroidism', 'Hyperthyroidism', 'Hypoglycemia',
       'Osteoarthristis', 'Arthritis',
       '(vertigo) Paroymsal  Positional Vertigo', 'Acne',
       'Urinary tract infection', 'Psoriasis', 'Impetigo'], dtype=object)

In [10]:
data['prognosis'] = data['prognosis'].str.strip().replace({
    'hepatitis A': 'Hepatitis A',
    'Fungal infection': 'Fungal Infection',
    'Chronic cholestasis': 'Chronic Cholestasis',
    'Peptic ulcer diseae': 'Peptic Ulcer Disease',
    'Cervical spondylosis': 'Cervical Spondylosis',
    'Paralysis (brain hemorrhage)' : 'Paralysis (Brain Hemorrhage)',
    'Chicken pox': 'Chicken Pox',
    'Alcoholic hepatitis': 'Alcoholic Hepatitis',
    'Dimorphic hemmorhoids(piles)': 'Dimorphic Hemmorhoids (Piles)',
    'Heart attack': 'Heart Attack',
    'Varicose veins': 'Varicose Veins',
    'Urinary tract infection': 'Urinary Tract Infection',
    '(vertigo) Paroymsal  Positional Vertigo': 'Paroymsal Positional Vertigo'
})

In [11]:
encoder = LabelEncoder()
data['prognosis'] = encoder.fit_transform(data['prognosis'])

In [12]:
data.head()

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,14
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,14
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,14
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,14
4,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,14


In [13]:
X = data.drop(columns=['prognosis'])
y = data['prognosis']

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [15]:
# Initialize Random Forest Classifier
rf_model = RandomForestClassifier(random_state=24)

# Train Random Forest model
rf_model.fit(X_train, y_train)

# Make predictions
rf_model_preds = rf_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, rf_model_preds)
classification_rep = classification_report(y_test, rf_model_preds)

# Preview model evaluations
print(f'Random Forest Model Accuracy Score: {accuracy}')
print(f'Random Forest Classification Report:\n{classification_rep}')

Random Forest Model Accuracy Score: 1.0
Random Forest Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        30
           1       1.00      1.00      1.00        24
           2       1.00      1.00      1.00        25
           3       1.00      1.00      1.00        24
           4       1.00      1.00      1.00        23
           5       1.00      1.00      1.00        33
           6       1.00      1.00      1.00        23
           7       1.00      1.00      1.00        21
           8       1.00      1.00      1.00        15
           9       1.00      1.00      1.00        23
          10       1.00      1.00      1.00        26
          11       1.00      1.00      1.00        21
          12       1.00      1.00      1.00        29
          13       1.00      1.00      1.00        24
          14       1.00      1.00      1.00        19
          15       1.00      1.00      1.00        28
    

In [16]:
test = pd.read_csv("backend/dataset/test_disease.csv")

In [17]:
test = test.drop(columns=["prognosis"])

test.head()

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,pus_filled_pimples,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,1,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,1,1,1,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,1,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [18]:
rf_test_preds = rf_model.predict(test)

In [19]:
rf_test_preds

array([14,  3, 15,  8, 13, 34,  0, 11, 16,  5, 23, 30,  6, 32, 28, 29,  7,
       10, 38, 18, 19, 20, 21, 22,  2, 37,  9, 35, 12, 17, 40, 26, 24, 25,
       31,  4, 33,  1, 39, 36, 27,  7])

In [20]:
rf_preds_labels = encoder.inverse_transform(rf_test_preds)

In [21]:
rf_preds_labels

array(['Fungal Infection', 'Allergy', 'GERD', 'Chronic Cholestasis',
       'Drug Reaction', 'Peptic Ulcer Disease', 'AIDS', 'Diabetes',
       'Gastroenteritis', 'Bronchial Asthma', 'Hypertension', 'Migraine',
       'Cervical Spondylosis', 'Paralysis (Brain Hemorrhage)', 'Jaundice',
       'Malaria', 'Chicken Pox', 'Dengue', 'Typhoid', 'Hepatitis A',
       'Hepatitis B', 'Hepatitis C', 'Hepatitis D', 'Hepatitis E',
       'Alcoholic Hepatitis', 'Tuberculosis', 'Common Cold', 'Pneumonia',
       'Dimorphic Hemmorhoids (Piles)', 'Heart Attack', 'Varicose Veins',
       'Hypothyroidism', 'Hyperthyroidism', 'Hypoglycemia',
       'Osteoarthristis', 'Arthritis', 'Paroymsal Positional Vertigo',
       'Acne', 'Urinary Tract Infection', 'Psoriasis', 'Impetigo',
       'Chicken Pox'], dtype=object)