In [1]:
# Import necessary tools
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MultiLabelBinarizer
print("TF version:", tf.__version__)
# print("TF Hub version:", hub.__version__)

# Check for GPU availability
print("GPU", "available (YESSSS!!!!!)" if tf.config.list_physical_devices("GPU") else "not available :(")

TF version: 2.10.0
GPU available (YESSSS!!!!!)


In [2]:
# Read the two CSVs
df = pd.read_csv(r"C:/Users/amang/Downloads/healthcare 1/dataset.csv")
weights_df = pd.read_csv(r"C:/Users/amang/Downloads/healthcare 1/symptom-severity.csv")
precaution_df = pd.read_csv(r"C:/Users/amang/Downloads/healthcare 1/symptom_precaution.csv")
description_df = pd.read_csv(r"C:/Users/amang/Downloads/healthcare 1/symptom_Description.csv")

In [3]:
# Symptom and Weights (Severity)
weights_df.columns = [c.strip().lower() for c in weights_df.columns]   # e.g. ["symptom","weight"]
symptom_weights = dict(zip(weights_df["symptom"], weights_df["weight"]))

In [4]:
# Clean symptom strings in dataset
label_col    = df.columns[0]          # first col is the diagnosis label
symptom_cols = df.columns[1:]         # remaining cols are symptom slots

In [5]:
# Vectorised string clean‑up (no deprecated applymap):
df[symptom_cols] = (
    df[symptom_cols]
      .apply(lambda col: (
          col
            .astype(str)              # handle NaNs safely
            .str.strip()
            .str.lower()
            .str.replace(" ", "_", regex=False)
            .where(col.notna())       # restore NaNs where they were
      ))
)

In [6]:
# Skip Empty Values in the Dataset
df["symptoms"] = df[symptom_cols].apply(
    lambda row: [s for s in row if pd.notna(s)],
    axis=1
)

In [7]:
# One‑hot encode with scikit‑learn
mlb = MultiLabelBinarizer()
symptom_binary = pd.DataFrame(
    mlb.fit_transform(df["symptoms"]),
    columns=mlb.classes_,
    index=df.index
)

In [8]:
# Convert binary matrix to weighted matrix in one vectorised step
weight_series    = pd.Series(symptom_weights)
symptom_weighted = symptom_binary.mul(weight_series, axis=1).fillna(0)

In [9]:
# Concatenate with the label column
final_df = pd.concat(
    [symptom_weighted, df[[label_col]].rename(columns={label_col: "label"})],
    axis=1
)

In [10]:
final_df

Unnamed: 0,abdominal_pain,abnormal_menstruation,acidity,acute_liver_failure,altered_sensorium,anxiety,back_pain,belly_pain,blackheads,bladder_discomfort,...,watering_from_eyes,weakness_in_limbs,weakness_of_one_body_side,weight_gain,weight_loss,yellow_crust_ooze,yellow_urine,yellowing_of_eyes,yellowish_skin,label
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Fungal infection
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Fungal infection
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Fungal infection
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Fungal infection
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Fungal infection
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4915,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,(vertigo) Paroymsal Positional Vertigo
4916,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Acne
4917,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Urinary tract infection
4918,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Psoriasis


In [11]:
weights_df.sort_values(by='symptom', inplace = True)

In [12]:
weights_df

Unnamed: 0,symptom,weight
39,abdominal_pain,4
101,abnormal_menstruation,6
8,acidity,3
44,acute_liver_failure,6
98,altered_sensorium,2
...,...,...
19,weight_loss,3
131,yellow_crust_ooze,3
42,yellow_urine,4
43,yellowing_of_eyes,4


In [13]:
precaution_df

Unnamed: 0,Disease,Precaution_1,Precaution_2,Precaution_3,Precaution_4
0,Drug Reaction,stop irritation,consult nearest hospital,stop taking drug,follow up
1,Malaria,Consult nearest hospital,avoid oily food,avoid non veg food,keep mosquitos out
2,Allergy,apply calamine,cover area with bandage,,use ice to compress itching
3,Hypothyroidism,reduce stress,exercise,eat healthy,get proper sleep
4,Psoriasis,wash hands with warm soapy water,stop bleeding using pressure,consult doctor,salt baths
5,GERD,avoid fatty spicy food,avoid lying down after eating,maintain healthy weight,exercise
6,Chronic cholestasis,cold baths,anti itch medicine,consult doctor,eat healthy
7,hepatitis A,Consult nearest hospital,wash hands through,avoid fatty spicy food,medication
8,Osteoarthristis,acetaminophen,consult nearest hospital,follow up,salt baths
9,(vertigo) Paroymsal Positional Vertigo,lie down,avoid sudden change in body,avoid abrupt head movment,relax


In [14]:
description_df

Unnamed: 0,Disease,Description
0,Drug Reaction,An adverse drug reaction (ADR) is an injury ca...
1,Malaria,An infectious disease caused by protozoan para...
2,Allergy,An allergy is an immune system response to a f...
3,Hypothyroidism,"Hypothyroidism, also called underactive thyroi..."
4,Psoriasis,Psoriasis is a common skin disorder that forms...
5,GERD,"Gastroesophageal reflux disease, or GERD, is a..."
6,Chronic cholestasis,"Chronic cholestatic diseases, whether occurrin..."
7,hepatitis A,Hepatitis A is a highly contagious liver infec...
8,Osteoarthristis,Osteoarthritis is the most common form of arth...
9,(vertigo) Paroymsal Positional Vertigo,Benign paroxysmal positional vertigo (BPPV) is...


In [15]:
final_df.columns.to_list()[32]

'dischromic__patches'

In [16]:
final_df.columns.to_list()[33]

'dischromic_patches'

In [17]:
final_df.drop("dischromic__patches", axis = 1, inplace = True)

In [18]:
final_df

Unnamed: 0,abdominal_pain,abnormal_menstruation,acidity,acute_liver_failure,altered_sensorium,anxiety,back_pain,belly_pain,blackheads,bladder_discomfort,...,watering_from_eyes,weakness_in_limbs,weakness_of_one_body_side,weight_gain,weight_loss,yellow_crust_ooze,yellow_urine,yellowing_of_eyes,yellowish_skin,label
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Fungal infection
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Fungal infection
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Fungal infection
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Fungal infection
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Fungal infection
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4915,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,(vertigo) Paroymsal Positional Vertigo
4916,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Acne
4917,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Urinary tract infection
4918,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Psoriasis


In [19]:
final_df.columns.to_list()[106]

'spotting__urination'

In [20]:
final_df.columns.to_list()[107]

'spotting_urination'

In [21]:
final_df.drop("spotting__urination", axis = 1, inplace = True)

In [22]:
final_df

Unnamed: 0,abdominal_pain,abnormal_menstruation,acidity,acute_liver_failure,altered_sensorium,anxiety,back_pain,belly_pain,blackheads,bladder_discomfort,...,watering_from_eyes,weakness_in_limbs,weakness_of_one_body_side,weight_gain,weight_loss,yellow_crust_ooze,yellow_urine,yellowing_of_eyes,yellowish_skin,label
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Fungal infection
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Fungal infection
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Fungal infection
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Fungal infection
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Fungal infection
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4915,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,(vertigo) Paroymsal Positional Vertigo
4916,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Acne
4917,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Urinary tract infection
4918,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Psoriasis


In [23]:
# Save the result
final_df.to_csv(r"C:/Users/amang/Downloads/healthcare 1/final_weighted_dataset.csv", index=False)

In [24]:
X = final_df.drop("label", axis = 1)
y = final_df["label"]

In [25]:
X

Unnamed: 0,abdominal_pain,abnormal_menstruation,acidity,acute_liver_failure,altered_sensorium,anxiety,back_pain,belly_pain,blackheads,bladder_discomfort,...,vomiting,watering_from_eyes,weakness_in_limbs,weakness_of_one_body_side,weight_gain,weight_loss,yellow_crust_ooze,yellow_urine,yellowing_of_eyes,yellowish_skin
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4915,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4916,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4917,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4918,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [26]:
y

0                              Fungal infection
1                              Fungal infection
2                              Fungal infection
3                              Fungal infection
4                              Fungal infection
                         ...                   
4915    (vertigo) Paroymsal  Positional Vertigo
4916                                       Acne
4917                    Urinary tract infection
4918                                  Psoriasis
4919                                   Impetigo
Name: label, Length: 4920, dtype: object

In [27]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [28]:
from  sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(n_estimators = 100)

clf.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'sqrt',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

In [29]:
clf.fit(X_train, y_train)

In [30]:
y_preds = clf.predict(X_test)

In [31]:
clf.score(X_test, y_test)

1.0

In [32]:
clf.score(X_train, y_train)

1.0

In [33]:
def array_to_dataframe(input_array, column_names):
    if len(input_array) != len(column_names):
        raise ValueError("Array length and number of columns must match.")
    return pd.DataFrame([input_array], columns=column_names)

In [34]:
import ipywidgets as widgets
from IPython.display import display, clear_output
import numpy as np
import re

# Symptom list
symptoms = X.columns.to_list()

# Create checkboxes
checkboxes = [widgets.Checkbox(value=False, description=symptom, indent=False) for symptom in symptoms]

# UI Elements
checkbox_container = widgets.VBox()
search_box = widgets.Text(
    placeholder='Search symptoms...',
    description='Search:',
    layout=widgets.Layout(width='50%')
)
output = widgets.Output()
submit_btn = widgets.Button(description="Submit", button_style='success')

# Advanced search filter
def filter_checkboxes(change):
    search_term = search_box.value.strip().lower()
    
    if search_term == "":
        filtered = checkboxes
    else:
        pattern = re.compile(r'\b' + re.escape(search_term))
        filtered = [cb for cb in checkboxes if pattern.search(cb.description.lower())]
    
    checkbox_container.children = filtered

# Initial render
filter_checkboxes(None)
search_box.observe(filter_checkboxes, names='value')

def on_submit(b):
    selected = [cb.description for cb in checkboxes if cb.value]
    binary_array = [1 if cb.value else 0 for cb in checkboxes]

    with output:
        clear_output()
        if not selected:
            print("⚠️ Please select at least one symptom.")
            return
        
        print("✅ You selected:")
        for s in selected:
            print(f"- {s}")
        
        # Predict disease
        weighted_input = np.array(binary_array) * np.array(weights_df["weight"].to_numpy())
        x = clf.predict(array_to_dataframe(weighted_input, X_train.columns))

        print("\n According to the model, you might have:")
        print(x[0])
        print("\n")
        
        row1 = description_df[description_df['Disease'] == x[0]].iloc[0]
        for col, val in row1.items():
            if col != "Disease":
                print(f"{col}: {val}")

        print("\n")
        
        row2 = precaution_df[precaution_df['Disease'] == x[0]].iloc[0]
        for col, val in row2.items():
            if col != "Disease":
                print(f"{col}: {val}")

        

# Connect button to function
submit_btn.on_click(on_submit)

# Display UI
display(widgets.HTML("<h4>Enter your symptoms</h4>"),
        search_box,
        widgets.Box([checkbox_container], layout=widgets.Layout(height='400px', overflow_y='scroll')),
        submit_btn,
        output)


HTML(value='<h4>Enter your symptoms</h4>')

Text(value='', description='Search:', layout=Layout(width='50%'), placeholder='Search symptoms...')

Box(children=(VBox(children=(Checkbox(value=False, description='abdominal_pain', indent=False), Checkbox(value…

Button(button_style='success', description='Submit', style=ButtonStyle())

Output()