In [90]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np

In [91]:
# Load dataset
try:
    dataset = pd.read_csv("datasets/Training.csv")
    print("Dataset loaded successfully.")
except FileNotFoundError:
    print("Error: Dataset file not found. Please ensure 'datasets/Training.csv' exists.")

Dataset loaded successfully.


In [92]:
dataset.head()

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
4,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection


In [93]:
dataset.shape

(4920, 133)

In [94]:
len(dataset['prognosis'].unique())

41

##  Menas We Have total 41 Unique Disease
# And the Name of the Diseases are - 

In [96]:
dataset['prognosis'].unique()

array(['Fungal infection', 'Allergy', 'GERD', 'Chronic cholestasis',
       'Drug Reaction', 'Peptic ulcer diseae', 'AIDS', 'Diabetes ',
       'Gastroenteritis', 'Bronchial Asthma', 'Hypertension ', 'Migraine',
       'Cervical spondylosis', 'Paralysis (brain hemorrhage)', 'Jaundice',
       'Malaria', 'Chicken pox', 'Dengue', 'Typhoid', 'hepatitis A',
       'Hepatitis B', 'Hepatitis C', 'Hepatitis D', 'Hepatitis E',
       'Alcoholic hepatitis', 'Tuberculosis', 'Common Cold', 'Pneumonia',
       'Dimorphic hemmorhoids(piles)', 'Heart attack', 'Varicose veins',
       'Hypothyroidism', 'Hyperthyroidism', 'Hypoglycemia',
       'Osteoarthristis', 'Arthritis',
       '(vertigo) Paroymsal  Positional Vertigo', 'Acne',
       'Urinary tract infection', 'Psoriasis', 'Impetigo'], dtype=object)

#  Train Test Split

In [98]:
# Preprocessing
X = dataset.drop('prognosis', axis=1)
y = dataset['prognosis']

In [99]:
X

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,pus_filled_pimples,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4915,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4916,0,1,0,0,0,0,0,0,0,0,...,1,1,1,0,0,0,0,0,0,0
4917,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4918,0,1,0,0,0,0,1,0,0,0,...,0,0,0,1,1,1,1,0,0,0


## Now we will convert y to Numpy Array & Perform Lebal Encoding beacuse machine does not understand Strings

In [101]:
# Encoding prognosis
le = LabelEncoder()
le.fit(y)
y_encoded = le.transform(y)

In [102]:
# Splitting dataset
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.3, random_state=20)

# Now We Will Do Top Model Training 

In [104]:
models = {
    "SVC": SVC(),
    "Random Forest": RandomForestClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "KNN": KNeighborsClassifier(),
    "Naive Bayes": MultinomialNB()
}

results = {}

best_model = None
best_accuracy = 0

for model_name, model in models.items():
    try:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        results[model_name] = accuracy
        print(f"{model_name}: Accuracy = {accuracy}")
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_model = model
    except Exception as e:
        print(f"Error training {model_name}: {e}")

SVC: Accuracy = 1.0
Random Forest: Accuracy = 1.0
Gradient Boosting: Accuracy = 1.0
KNN: Accuracy = 1.0
Naive Bayes: Accuracy = 1.0


In [105]:
# Display results
print("\nModel Performance:")
for model_name, accuracy in results.items():
    print(f"{model_name}: {accuracy:.2f}")


Model Performance:
SVC: 1.00
Random Forest: 1.00
Gradient Boosting: 1.00
KNN: 1.00
Naive Bayes: 1.00


In [106]:
# Interactive symptom input and prediction
def predict_medicine(symptoms):
    if best_model is None:
        print("No trained model available.")
        return

    # Ensure the symptoms match the dataset format
    symptom_vector = [0] * len(X.columns)
    for symptom in symptoms:
        if symptom in X.columns:
            symptom_index = X.columns.get_loc(symptom)
            symptom_vector[symptom_index] = 1

    symptom_vector = np.array(symptom_vector).reshape(1, -1)
    prediction_encoded = best_model.predict(symptom_vector)
    prediction = le.inverse_transform(prediction_encoded)

    print(f"Predicted prognosis: {prediction[0]}")


In [133]:
# Example usage
user_symptoms = input("Enter symptoms separated by commas: ").split(",")
user_symptoms = [symptom.strip() for symptom in user_symptoms]
predict_medicine(user_symptoms)

Enter symptoms separated by commas:  itching, skin_rash, nodal_skin_eruptions, continuous_sneezing


Predicted prognosis: Fungal infection




itching, skin_rash, nodal_skin_eruptions, continuous_sneezing
stomach_pain, acidity
Sneezing, chills