In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [3]:
# Reading the dataset
data = pd.read_csv("dataset.csv")
print("Disease Set")
data.head()

Disease Set


Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
0,Fungal infection,itching,skin_rash,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,
1,Fungal infection,skin_rash,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,,
2,Fungal infection,itching,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,,
3,Fungal infection,itching,skin_rash,dischromic _patches,,,,,,,,,,,,,,
4,Fungal infection,itching,skin_rash,nodal_skin_eruptions,,,,,,,,,,,,,,


In [4]:
# Checking for null values and duplicate values
print("\nChecking the null")
print(data.isnull().sum())
print("Data Duplicate")
data.duplicated().sum()


Checking the null
Disease          0
Symptom_1        0
Symptom_2        0
Symptom_3        0
Symptom_4      348
Symptom_5     1206
Symptom_6     1986
Symptom_7     2652
Symptom_8     2976
Symptom_9     3228
Symptom_10    3408
Symptom_11    3726
Symptom_12    4176
Symptom_13    4416
Symptom_14    4614
Symptom_15    4680
Symptom_16    4728
Symptom_17    4848
dtype: int64
Data Duplicate


4616

In [5]:
# Creating a copy of the dataset
data1=data.copy()
data1


Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
0,Fungal infection,itching,skin_rash,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,
1,Fungal infection,skin_rash,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,,
2,Fungal infection,itching,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,,
3,Fungal infection,itching,skin_rash,dischromic _patches,,,,,,,,,,,,,,
4,Fungal infection,itching,skin_rash,nodal_skin_eruptions,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4915,(vertigo) Paroymsal Positional Vertigo,vomiting,headache,nausea,spinning_movements,loss_of_balance,unsteadiness,,,,,,,,,,,
4916,Acne,skin_rash,pus_filled_pimples,blackheads,scurring,,,,,,,,,,,,,
4917,Urinary tract infection,burning_micturition,bladder_discomfort,foul_smell_of urine,continuous_feel_of_urine,,,,,,,,,,,,,
4918,Psoriasis,skin_rash,joint_pain,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,,,,,,,,,,,


In [6]:
all_symptoms = set(data1.iloc[:, 1:].values.flatten())
all_symptoms

{' abdominal_pain',
 ' abnormal_menstruation',
 ' acidity',
 ' acute_liver_failure',
 ' altered_sensorium',
 ' anxiety',
 ' back_pain',
 ' belly_pain',
 ' blackheads',
 ' bladder_discomfort',
 ' blister',
 ' blood_in_sputum',
 ' bloody_stool',
 ' blurred_and_distorted_vision',
 ' breathlessness',
 ' brittle_nails',
 ' bruising',
 ' burning_micturition',
 ' chest_pain',
 ' chills',
 ' cold_hands_and_feets',
 ' coma',
 ' congestion',
 ' constipation',
 ' continuous_feel_of_urine',
 ' continuous_sneezing',
 ' cough',
 ' cramps',
 ' dark_urine',
 ' dehydration',
 ' depression',
 ' diarrhoea',
 ' dischromic _patches',
 ' distention_of_abdomen',
 ' dizziness',
 ' drying_and_tingling_lips',
 ' enlarged_thyroid',
 ' excessive_hunger',
 ' extra_marital_contacts',
 ' family_history',
 ' fast_heart_rate',
 ' fatigue',
 ' fluid_overload',
 ' foul_smell_of urine',
 ' headache',
 ' high_fever',
 ' hip_joint_pain',
 ' history_of_alcohol_consumption',
 ' increased_appetite',
 ' indigestion',
 ' inflam

In [7]:
# Create a new DataFrame for one-hot encoding
one_hot_data = pd.DataFrame()

In [8]:
# Create a new DataFrame for one-hot encoding
one_hot_data = pd.concat([data1.iloc[:, 0], pd.DataFrame({symptom: data1.apply(lambda row: 1 if symptom in row.values else 0, axis=1) for symptom in all_symptoms})], axis=1)

In [9]:
# Add the 'prognosis' column to the one-hot encoded DataFrame
one_hot_data['prognosis'] = data1['Disease']

In [10]:
# Save the one-hot encoded DataFrame to a new CSV file or use it as needed
one_hot_data.to_csv('one_hot_dataset.csv', index=False)

In [11]:
# Read the one-hot encoded CSV file
one_hot_df = pd.read_csv('one_hot_dataset.csv')

# Print the entire DataFrame
one_hot_df

Unnamed: 0,Disease,swollen_extremeties,bruising,red_spots_over_body,increased_appetite,muscle_weakness,spotting_ urination,weight_gain,dark_urine,malaise,...,watering_from_eyes,hip_joint_pain,toxic_look_(typhos),yellowing_of_eyes,skin_rash,enlarged_thyroid,bloody_stool,receiving_unsterile_injections,shivering,prognosis
0,Fungal infection,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,Fungal infection
1,Fungal infection,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,Fungal infection
2,Fungal infection,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
3,Fungal infection,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,Fungal infection
4,Fungal infection,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,Fungal infection
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4915,(vertigo) Paroymsal Positional Vertigo,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,(vertigo) Paroymsal Positional Vertigo
4916,Acne,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,Acne
4917,Urinary tract infection,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Urinary tract infection
4918,Psoriasis,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,Psoriasis


In [12]:
one_hot_df.head()

Unnamed: 0,Disease,swollen_extremeties,bruising,red_spots_over_body,increased_appetite,muscle_weakness,spotting_ urination,weight_gain,dark_urine,malaise,...,watering_from_eyes,hip_joint_pain,toxic_look_(typhos),yellowing_of_eyes,skin_rash,enlarged_thyroid,bloody_stool,receiving_unsterile_injections,shivering,prognosis
0,Fungal infection,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,Fungal infection
1,Fungal infection,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,Fungal infection
2,Fungal infection,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
3,Fungal infection,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,Fungal infection
4,Fungal infection,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,Fungal infection


In [13]:
# Accessing a specific column
one_hot_df['prognosis']

0                              Fungal infection
1                              Fungal infection
2                              Fungal infection
3                              Fungal infection
4                              Fungal infection
                         ...                   
4915    (vertigo) Paroymsal  Positional Vertigo
4916                                       Acne
4917                    Urinary tract infection
4918                                  Psoriasis
4919                                   Impetigo
Name: prognosis, Length: 4920, dtype: object

In [14]:
one_hot_df=one_hot_df.drop(columns=["Disease"])
one_hot_df

Unnamed: 0,swollen_extremeties,bruising,red_spots_over_body,increased_appetite,muscle_weakness,spotting_ urination,weight_gain,dark_urine,malaise,swelling_joints,...,watering_from_eyes,hip_joint_pain,toxic_look_(typhos),yellowing_of_eyes,skin_rash,enlarged_thyroid,bloody_stool,receiving_unsterile_injections,shivering,prognosis
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,Fungal infection
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,Fungal infection
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,Fungal infection
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,Fungal infection
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4915,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,(vertigo) Paroymsal Positional Vertigo
4916,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,Acne
4917,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Urinary tract infection
4918,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,Psoriasis


In [15]:
# Splitting the dataset
x = one_hot_df.drop('prognosis', axis = 1)
y = one_hot_df['prognosis']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

In [16]:
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
# Logistic Regression Classifier
logreg = LogisticRegression()
logreg.fit(x_train, y_train)
logreg_pred = logreg.predict(x_test)
logreg_acc = accuracy_score(y_test, logreg_pred)

print("Logistic Regression Classifier:")
print("Accuracy on test set: {:.2f}%".format(logreg_acc * 100))

Logistic Regression Classifier:
Accuracy on test set: 100.00%


In [17]:
logreg_confidence = logreg.predict_proba(x_test)
# Print confidence scores for Logistic Regression predictions
print("Confidence scores for Logistic Regression predictions:")
print(logreg_confidence)

Confidence scores for Logistic Regression predictions:
[[1.22808611e-03 2.83049596e-03 9.20279014e-01 ... 3.00140248e-03
  1.58406980e-03 4.13225204e-04]
 [1.70227182e-04 3.75490345e-04 9.89681822e-01 ... 3.96491089e-04
  2.17522351e-04 5.85306105e-05]
 [4.22994046e-05 1.40963799e-04 1.82123188e-04 ... 1.91949795e-04
  2.22990497e-04 6.21754974e-05]
 ...
 [2.47720978e-04 2.34021313e-04 2.26530922e-04 ... 2.47724443e-04
  1.14461556e-04 3.52243957e-05]
 [5.68839030e-05 1.74076521e-04 1.87623456e-04 ... 2.06504651e-04
  2.52151987e-04 1.77737635e-05]
 [8.62838785e-04 2.02390803e-04 3.24345368e-04 ... 7.08377112e-05
  1.71079095e-04 7.91633186e-04]]


In [18]:
# Feature columns from the dataset
feature_columns = ['itching', 'skin_rash', 'nodal_skin_eruptions', 'continuous_sneezing', 'shivering', 'chills',
                   'joint_pain', 'stomach_pain', 'acidity', 'ulcers_on_tongue', 'muscle_wasting', 'vomiting',
                   'burning_micturition', 'spotting_ urination', 'fatigue', 'weight_gain', 'anxiety',
                   'cold_hands_and_feets', 'mood_swings', 'weight_loss', 'restlessness', 'lethargy',
                   'patches_in_throat', 'irregular_sugar_level', 'cough', 'high_fever', 'sunken_eyes',
                   'breathlessness', 'sweating', 'dehydration', 'indigestion', 'headache', 'yellowish_skin',
                   'dark_urine', 'nausea', 'loss_of_appetite', 'pain_behind_the_eyes', 'back_pain', 'constipation',
                   'abdominal_pain', 'diarrhoea', 'mild_fever', 'yellow_urine', 'yellowing_of_eyes',
                   'acute_liver_failure', 'fluid_overload', 'swelling_of_stomach', 'swelled_lymph_nodes',
                   'malaise', 'blurred_and_distorted_vision', 'phlegm', 'throat_irritation', 'redness_of_eyes',
                   'sinus_pressure', 'runny_nose', 'congestion', 'chest_pain', 'weakness_in_limbs', 'fast_heart_rate',
                   'pain_during_bowel_movements', 'pain_in_anal_region', 'bloody_stool', 'irritation_in_anus',
                   'neck_pain', 'dizziness', 'cramps', 'bruising', 'obesity', 'swollen_legs', 'swollen_blood_vessels',
                   'puffy_face_and_eyes', 'enlarged_thyroid', 'brittle_nails', 'swollen_extremeties',
                   'excessive_hunger', 'extra_marital_contacts', 'drying_and_tingling_lips', 'slurred_speech',
                   'knee_pain', 'hip_joint_pain', 'muscle_weakness', 'stiff_neck', 'swelling_joints',
                   'movement_stiffness', 'spinning_movements', 'loss_of_balance', 'unsteadiness',
                   'weakness_of_one_body_side', 'loss_of_smell', 'bladder_discomfort', 'foul_smell_of urine',
                   'continuous_feel_of_urine', 'passage_of_gases', 'internal_itching', 'toxic_look_(typhos)',
                   'depression', 'irritability', 'muscle_pain', 'altered_sensorium', 'red_spots_over_body',
                   'belly_pain', 'abnormal_menstruation', 'dischromic _patches', 'watering_from_eyes',
                   'increased_appetite', 'polyuria', 'family_history', 'mucoid_sputum', 'rusty_sputum',
                   'lack_of_concentration', 'visual_disturbances', 'receiving_blood_transfusion',
                   'receiving_unsterile_injections', 'coma', 'stomach_bleeding', 'distention_of_abdomen',
                   'history_of_alcohol_consumption', 'fluid_overload', 'blood_in_sputum', 'prominent_veins_on_calf',
                   'palpitations', 'painful_walking', 'pus_filled_pimples', 'blackheads', 'scurring', 'skin_peeling',
                   'silver_like_dusting', 'small_dents_in_nails', 'inflammatory_nails', 'blister',
                   'red_sore_around_nose', 'yellow_crust_ooze']

In [19]:
# Function to convert user input symptoms into an array
def symptoms_to_array(symptoms, feature_columns):
    # Create an array with zeros for all features
    user_input_array = np.zeros(len(feature_columns))

    # Set 1 for the features corresponding to the provided symptoms
    for symptom in symptoms:
        if symptom in feature_columns:
            index = feature_columns.index(symptom)
            user_input_array[index] = 1

    return user_input_array.reshape(1, -1)

In [27]:
# Get user input for symptoms
user_symptoms_str = input("Enter symptoms (comma-separated): ")
user_symptoms = [symptom.strip() for symptom in user_symptoms_str.split(',')]

# Convert user symptoms to array
user_input_array = symptoms_to_array(user_symptoms, feature_columns)

# Perform prediction

logreg_pred = logreg.predict(user_input_array)

print("Predicted Disease:", logreg_pred[0])

Enter symptoms (comma-separated):  itching,nodal_skin,joint_pain


Predicted Disease: Allergy




In [24]:
# Confidence Prediction
logreg_confidence = logreg.predict_proba(user_input_array)
print("Prediction Confidence:", logreg_confidence)

Prediction Confidence: [[0.0132694  0.05058991 0.04926685 0.0177201  0.05594123 0.032846
  0.02028053 0.03452059 0.00546661 0.00664069 0.01077997 0.0069203
  0.01203532 0.03851689 0.02744783 0.04943026 0.0191093  0.03927427
  0.03901332 0.00658752 0.0210444  0.00153556 0.00253415 0.02716315
  0.00907191 0.00547132 0.00954365 0.02824064 0.00757676 0.05453332
  0.01445802 0.025137   0.03674573 0.02114533 0.00675639 0.02378136
  0.00250047 0.00323051 0.05600368 0.07876258 0.02910717]]


