In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("noorsaeed/medicine-recommendation-system-dataset")

print("Path to dataset files:", path)

Path to dataset files: /Users/whiskey/.cache/kagglehub/datasets/noorsaeed/medicine-recommendation-system-dataset/versions/1


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder



In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
df = pd.read_csv('Training.csv')
sym_des = pd.read_csv('symtoms_df.csv')
precautions = pd.read_csv('precautions_df.csv')
workout = pd.read_csv('workout_df.csv')
description = pd.read_csv('description.csv')
medications = pd.read_csv('medications.csv')
diets = pd.read_csv('diets.csv')

In [5]:
df.head


<bound method NDFrame.head of       itching  skin_rash  nodal_skin_eruptions  continuous_sneezing  \
0           1          1                     1                    0   
1           0          1                     1                    0   
2           1          0                     1                    0   
3           1          1                     0                    0   
4           1          1                     1                    0   
...       ...        ...                   ...                  ...   
4915        0          0                     0                    0   
4916        0          1                     0                    0   
4917        0          0                     0                    0   
4918        0          1                     0                    0   
4919        0          1                     0                    0   

      shivering  chills  joint_pain  stomach_pain  acidity  ulcers_on_tongue  \
0             0       0           0  

In [6]:
df.tail


<bound method NDFrame.tail of       itching  skin_rash  nodal_skin_eruptions  continuous_sneezing  \
0           1          1                     1                    0   
1           0          1                     1                    0   
2           1          0                     1                    0   
3           1          1                     0                    0   
4           1          1                     1                    0   
...       ...        ...                   ...                  ...   
4915        0          0                     0                    0   
4916        0          1                     0                    0   
4917        0          0                     0                    0   
4918        0          1                     0                    0   
4919        0          1                     0                    0   

      shivering  chills  joint_pain  stomach_pain  acidity  ulcers_on_tongue  \
0             0       0           0  

In [7]:
df.shape

(4920, 133)

In [8]:
df.prognosis.unique()

array(['Fungal infection', 'Allergy', 'GERD', 'Chronic cholestasis',
       'Drug Reaction', 'Peptic ulcer diseae', 'AIDS', 'Diabetes ',
       'Gastroenteritis', 'Bronchial Asthma', 'Hypertension ', 'Migraine',
       'Cervical spondylosis', 'Paralysis (brain hemorrhage)', 'Jaundice',
       'Malaria', 'Chicken pox', 'Dengue', 'Typhoid', 'hepatitis A',
       'Hepatitis B', 'Hepatitis C', 'Hepatitis D', 'Hepatitis E',
       'Alcoholic hepatitis', 'Tuberculosis', 'Common Cold', 'Pneumonia',
       'Dimorphic hemmorhoids(piles)', 'Heart attack', 'Varicose veins',
       'Hypothyroidism', 'Hyperthyroidism', 'Hypoglycemia',
       'Osteoarthristis', 'Arthritis',
       '(vertigo) Paroymsal  Positional Vertigo', 'Acne',
       'Urinary tract infection', 'Psoriasis', 'Impetigo'], dtype=object)

In [9]:
len(df.prognosis.unique())

41

In [10]:
df.prognosis.isnull().sum()

np.int64(0)

In [11]:
X = df.drop("prognosis" , axis = 1)
y = df.prognosis


In [12]:
label_encoder = LabelEncoder()
label_encoder.fit(y)
y = label_encoder.transform(y)
y

array([15, 15, 15, ..., 38, 35, 27])

In [13]:
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB


In [14]:
# Apply KMeans clustering
kmeans = KMeans(n_clusters=3, random_state=5)
kmeans_labels = kmeans.fit_predict(X)

# Add the cluster label as a new feature
X_with_clusters = X.copy()
X_with_clusters = np.column_stack((X_with_clusters, kmeans_labels))  # New feature for cluster labels


In [15]:
X_train , X_valid , y_train , y_valid = train_test_split(X , y , random_state = 5 , test_size = 0.2 , train_size = 0.8)

In [16]:
models = {
      'SVC' : SVC(kernel='linear'),
      'Random Forest' :RandomForestClassifier(random_state=5,n_estimators=100),
      'KNeighbors' : KNeighborsClassifier(n_neighbors=5),
      'Gradient Boosting': GradientBoostingClassifier(random_state=5,n_estimators=100),
      'MultinomialNB' :MultinomialNB() }

In [17]:
from sklearn.metrics import accuracy_score , confusion_matrix , mean_absolute_error

In [18]:
for model in models.values() :
    model.fit(X_train , y_train)
    preds = model.predict(X_valid)
    accuracy = accuracy_score(y_valid , preds)
    cm = confusion_matrix(y_valid , preds)
    print(accuracy)
    print(np.array2string(cm , separator = ","))
    print("------------------------------")
    

1.0
[[17, 0, 0,..., 0, 0, 0],
 [ 0,23, 0,..., 0, 0, 0],
 [ 0, 0,23,..., 0, 0, 0],
 ...,
 [ 0, 0, 0,...,26, 0, 0],
 [ 0, 0, 0,..., 0,28, 0],
 [ 0, 0, 0,..., 0, 0,29]]
------------------------------
1.0
[[17, 0, 0,..., 0, 0, 0],
 [ 0,23, 0,..., 0, 0, 0],
 [ 0, 0,23,..., 0, 0, 0],
 ...,
 [ 0, 0, 0,...,26, 0, 0],
 [ 0, 0, 0,..., 0,28, 0],
 [ 0, 0, 0,..., 0, 0,29]]
------------------------------
1.0
[[17, 0, 0,..., 0, 0, 0],
 [ 0,23, 0,..., 0, 0, 0],
 [ 0, 0,23,..., 0, 0, 0],
 ...,
 [ 0, 0, 0,...,26, 0, 0],
 [ 0, 0, 0,..., 0,28, 0],
 [ 0, 0, 0,..., 0, 0,29]]
------------------------------
1.0
[[17, 0, 0,..., 0, 0, 0],
 [ 0,23, 0,..., 0, 0, 0],
 [ 0, 0,23,..., 0, 0, 0],
 ...,
 [ 0, 0, 0,...,26, 0, 0],
 [ 0, 0, 0,..., 0,28, 0],
 [ 0, 0, 0,..., 0, 0,29]]
------------------------------
1.0
[[17, 0, 0,..., 0, 0, 0],
 [ 0,23, 0,..., 0, 0, 0],
 [ 0, 0,23,..., 0, 0, 0],
 ...,
 [ 0, 0, 0,...,26, 0, 0],
 [ 0, 0, 0,..., 0,28, 0],
 [ 0, 0, 0,..., 0, 0,29]]
------------------------------


In [19]:
rf_model = RandomForestClassifier(n_estimators = 100)
rf_model.fit(X_train , y_train)
pred = rf_model.predict(X_valid)
acc = accuracy_score(pred , y_valid)
acc

1.0

In [20]:
import pickle
file_path = "forest_model.pkl"
model_file = open(file_path , "wb")
pickle.dump(rf_model , model_file)
model_file.close()

In [21]:
import pickle

# Load the saved SVC model
with open('forest_model.pkl', 'rb') as file:
    loaded_svc_model = pickle.load(file)

print("SVC model loaded successfully.")


SVC model loaded successfully.


In [22]:
#test 1
prediction1 = rf_model.predict(X_valid.iloc[[0]])
print("Predicted Label :",prediction1)
print("Actual Label :",y_valid[0])

Predicted Label : [18]
Actual Label : 18


In [23]:
print('Model Predictions :',rf_model.predict(X_valid.iloc[5].values.reshape(1,-1)))
print('Actual Labels :', y_valid[5])

Model Predictions : [40]
Actual Labels : 40


In [24]:
print('Model Predictions :',rf_model.predict(X_valid.iloc[13].values.reshape(1,-1)))
print('Actual Labels :', y_valid[13])

Model Predictions : [19]
Actual Labels : 19


In [30]:
import pandas as pd
import numpy as np
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder

# Load necessary files and data
df = pd.read_csv('Training.csv')
medications = pd.read_csv('medications.csv')
description = pd.read_csv('description.csv')
precautions = pd.read_csv('precautions_df.csv')
workout = pd.read_csv('workout_df.csv')
diets = pd.read_csv('diets.csv')

# Load the trained model and label encoder
with open('forest_model.pkl', 'rb') as file:
    model = pickle.load(file)

# Initialize Label Encoder for inverse transforming predictions
label_encoder = LabelEncoder()
label_encoder.fit(df['prognosis'])

# Prepare disease information dictionary
disease_data = {
    row["Disease"]: {
        "medication": row["Medication"],
        "description": description["Description"].get(i, "No description available"),
        "precautions": [
            precautions["Precaution_1"].get(i, "N/A"),
            precautions["Precaution_2"].get(i, "N/A"),
            precautions["Precaution_3"].get(i, "N/A"),
            precautions["Precaution_4"].get(i, "N/A")
        ],
        "workout": workout["workout"].get(i, "No workout information available"),
        "diet": diets["Diet"].get(i, "No diet information available")
    }
    for i, row in medications.iterrows()
}

# List of all symptoms for reference
all_symptoms = list(df.columns.drop("prognosis"))

def display_symptoms():
    """
    Display all symptoms with index numbers for user selection.
    """
    print("\nAvailable Symptoms:")
    for idx, symptom in enumerate(all_symptoms, 1):
        print(f"{idx}. {symptom.replace('_', ' ').capitalize()}")

def get_user_symptoms():
    """
    Prompt the user to select symptoms and return a dictionary of symptom presence.
    """
    display_symptoms()
    print("\nEnter the numbers of your symptoms separated by commas (e.g., 1, 3, 5):")
    
    # Collect user input and handle errors
    try:
        selected_numbers = input("Symptom numbers: ").strip()
        selected_indices = [int(num) - 1 for num in selected_numbers.split(",") if num.isdigit() and 0 < int(num) <= len(all_symptoms)]
    except ValueError:
        print("Invalid input. Please enter numbers only.")
        return get_user_symptoms()
    
    # Initialize all symptoms as 0, mark selected symptoms as 1
    symptom_input = {symptom: 0 for symptom in all_symptoms}
    for idx in selected_indices:
        symptom_input[all_symptoms[idx]] = 1
    
    return symptom_input

def get_disease_recommendations(symptom_input, top_n=3):
    """
    Predicts diseases and provides recommendations for the top N diseases.
    """
    symptoms_df = pd.DataFrame([symptom_input])
    probabilities = model.predict_proba(symptoms_df)[0]
    top_indices = np.argsort(probabilities)[-top_n:][::-1]
    top_diseases = label_encoder.inverse_transform(top_indices)
    
    recommendations = []
    for i, disease in enumerate(top_diseases):
        details = disease_data.get(disease, {})
        recommendations.append({
            "rank": i + 1,
            "disease": disease,
            "confidence": f"{probabilities[top_indices[i]]:.2%}",
            "description": details.get("description", "No description available"),
            "medication": details.get("medication", "No medication information available"),
            "precautions": ", ".join(str(p) for p in details.get("precautions", ["N/A"])),
            "workout": details.get("workout", "No workout information available"),
            "diet": details.get("diet", "No diet information available")
        })
    return recommendations

def display_recommendations(recommendations):
    """
    Print formatted recommendations for the user.
    """
    print("\nDisease Predictions and Recommendations:\n")
    for rec in recommendations:
        print(f"Rank {rec['rank']}: {rec['disease']} (Confidence: {rec['confidence']})")
        print(f"  Description: {rec['description']}")
        print(f"  Medication: {rec['medication']}")
        print(f"  Precautions: {rec['precautions']}")
        print(f"  Workout: {rec['workout']}")
        print(f"  Diet: {rec['diet']}\n")
        print("-" * 40)

def run_disease_recommendation():
    """
    Runs the complete process for user to input symptoms and receive recommendations.
    """
    
    
    print("""
Welcome to the Disease Prediction and Recommendation System!

This tool helps you identify potential health conditions based on your symptoms.
Simply select the symptoms you are experiencing, and the system will analyze them 
to provide the most likely diagnoses along with tailored recommendations for 
medications, precautions, workout suggestions, and dietary advice.

Please follow the instructions to select your symptoms, and receive insights into
possible health concerns and tips for management.

Let's get started!
""")

    symptom_input = get_user_symptoms()
    recommendations = get_disease_recommendations(symptom_input, top_n=3)
    display_recommendations(recommendations)

# Start the recommendation system
run_disease_recommendation()



Welcome to the Disease Prediction and Recommendation System!

This tool helps you identify potential health conditions based on your symptoms.
Simply select the symptoms you are experiencing, and the system will analyze them 
to provide the most likely diagnoses along with tailored recommendations for 
medications, precautions, workout suggestions, and dietary advice.

Please follow the instructions to select your symptoms, and receive insights into
possible health concerns and tips for management.

Let's get started!


Available Symptoms:
1. Itching
2. Skin rash
3. Nodal skin eruptions
4. Continuous sneezing
5. Shivering
6. Chills
7. Joint pain
8. Stomach pain
9. Acidity
10. Ulcers on tongue
11. Muscle wasting
12. Vomiting
13. Burning micturition
14. Spotting  urination
15. Fatigue
16. Weight gain
17. Anxiety
18. Cold hands and feets
19. Mood swings
20. Weight loss
21. Restlessness
22. Lethargy
23. Patches in throat
24. Irregular sugar level
25. Cough
26. High fever
27. Sunken eyes
2