# Title: Personalized Medical Recommendation System with Machine Learning

# Description:

Welcome to our cutting-edge Personalized Medical Recommendation System, a powerful platform designed to assist users in understanding and managing their health. Leveraging the capabilities of machine learning, our system analyzes user-input symptoms to predict potential diseases accurately.

# load dataset & tools

In [21]:
import pandas as pd

In [22]:
dataset = pd.read_csv('../dataset/symtoms_df.csv')

In [23]:
dataset

Unnamed: 0.1,Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4
0,0,Fungal infection,itching,skin_rash,nodal_skin_eruptions,dischromic _patches
1,1,Fungal infection,skin_rash,nodal_skin_eruptions,dischromic _patches,
2,2,Fungal infection,itching,nodal_skin_eruptions,dischromic _patches,
3,3,Fungal infection,itching,skin_rash,dischromic _patches,
4,4,Fungal infection,itching,skin_rash,nodal_skin_eruptions,
...,...,...,...,...,...,...
4915,4915,(vertigo) Paroymsal Positional Vertigo,vomiting,headache,nausea,spinning_movements
4916,4916,Acne,skin_rash,pus_filled_pimples,blackheads,scurring
4917,4917,Urinary tract infection,burning_micturition,bladder_discomfort,foul_smell_of urine,continuous_feel_of_urine
4918,4918,Psoriasis,skin_rash,joint_pain,skin_peeling,silver_like_dusting


In [24]:
# vals = dataset.values.flatten()
dataset.drop('Unnamed: 0',axis=1,inplace=True)
dataset.drop('Symptom_4',axis=1,inplace=True)

In [25]:
dataset.shape

(4920, 4)

In [26]:
#len(dataset['prognosis'].unique())
dataset.isnull().sum()

Disease      0
Symptom_1    0
Symptom_2    0
Symptom_3    0
dtype: int64

In [27]:
dataset['Symptom_1'].unique()

array(['itching', ' skin_rash', ' continuous_sneezing', ' shivering',
       ' stomach_pain', ' acidity', ' vomiting', ' indigestion',
       ' muscle_wasting', ' patches_in_throat', ' fatigue',
       ' weight_loss', ' sunken_eyes', ' cough', ' headache',
       ' chest_pain', ' back_pain', ' weakness_in_limbs', ' chills',
       ' joint_pain', ' yellowish_skin', ' constipation',
       ' pain_during_bowel_movements', ' breathlessness', ' cramps',
       ' weight_gain', ' mood_swings', ' neck_pain', ' muscle_weakness',
       ' stiff_neck', ' pus_filled_pimples', ' burning_micturition',
       ' bladder_discomfort', ' high_fever'], dtype=object)

In [28]:
dataset['Symptom_2'].unique()

array([' skin_rash', ' nodal_skin_eruptions', ' shivering', ' chills',
       ' acidity', ' ulcers_on_tongue', ' vomiting', ' yellowish_skin',
       ' stomach_pain', ' loss_of_appetite', ' indigestion',
       ' patches_in_throat', ' high_fever', ' weight_loss',
       ' restlessness', ' sunken_eyes', ' dehydration', ' cough',
       ' chest_pain', ' dizziness', ' headache', ' weakness_in_limbs',
       ' neck_pain', ' weakness_of_one_body_side', ' fatigue',
       ' joint_pain', ' lethargy', ' nausea', ' abdominal_pain',
       ' pain_during_bowel_movements', ' pain_in_anal_region',
       ' breathlessness', ' sweating', ' cramps', ' bruising',
       ' weight_gain', ' cold_hands_and_feets', ' mood_swings',
       ' anxiety', ' knee_pain', ' stiff_neck', ' swelling_joints',
       ' pus_filled_pimples', ' blackheads', ' bladder_discomfort',
       ' foul_smell_of urine', ' skin_peeling', ' blister'], dtype=object)

In [29]:
dataset['Symptom_3'].unique()

array([' nodal_skin_eruptions', ' dischromic _patches', ' chills',
       ' watering_from_eyes', ' ulcers_on_tongue', ' vomiting',
       ' yellowish_skin', ' nausea', ' stomach_pain',
       ' burning_micturition', ' abdominal_pain', ' loss_of_appetite',
       ' high_fever', ' extra_marital_contacts', ' restlessness',
       ' lethargy', ' dehydration', ' diarrhoea', ' breathlessness',
       ' dizziness', ' loss_of_balance', ' headache',
       ' blurred_and_distorted_vision', ' neck_pain',
       ' weakness_of_one_body_side', ' altered_sensorium', ' fatigue',
       ' weight_loss', ' sweating', ' joint_pain', ' dark_urine',
       ' swelling_of_stomach', ' cough', ' pain_in_anal_region',
       ' bloody_stool', ' chest_pain', ' bruising', ' obesity',
       ' cold_hands_and_feets', ' mood_swings', ' anxiety', ' knee_pain',
       ' hip_joint_pain', ' swelling_joints', ' movement_stiffness',
       ' spinning_movements', ' blackheads', ' scurring',
       ' foul_smell_of urine', ' c

In [30]:
dataset['Disease'].unique()

array(['Fungal infection', 'Allergy', 'GERD', 'Chronic cholestasis',
       'Drug Reaction', 'Peptic ulcer diseae', 'AIDS', 'Diabetes ',
       'Gastroenteritis', 'Bronchial Asthma', 'Hypertension ', 'Migraine',
       'Cervical spondylosis', 'Paralysis (brain hemorrhage)', 'Jaundice',
       'Malaria', 'Chicken pox', 'Dengue', 'Typhoid', 'hepatitis A',
       'Hepatitis B', 'Hepatitis C', 'Hepatitis D', 'Hepatitis E',
       'Alcoholic hepatitis', 'Tuberculosis', 'Common Cold', 'Pneumonia',
       'Dimorphic hemmorhoids(piles)', 'Heart attack', 'Varicose veins',
       'Hypothyroidism', 'Hyperthyroidism', 'Hypoglycemia',
       'Osteoarthristis', 'Arthritis',
       '(vertigo) Paroymsal  Positional Vertigo', 'Acne',
       'Urinary tract infection', 'Psoriasis', 'Impetigo'], dtype=object)

# train test split

In [31]:
from sklearn.model_selection import train_test_split

In [32]:
X = dataset.drop('Disease', axis=1)
y = dataset['Disease']

In [33]:
X

Unnamed: 0,Symptom_1,Symptom_2,Symptom_3
0,itching,skin_rash,nodal_skin_eruptions
1,skin_rash,nodal_skin_eruptions,dischromic _patches
2,itching,nodal_skin_eruptions,dischromic _patches
3,itching,skin_rash,dischromic _patches
4,itching,skin_rash,nodal_skin_eruptions
...,...,...,...
4915,vomiting,headache,nausea
4916,skin_rash,pus_filled_pimples,blackheads
4917,burning_micturition,bladder_discomfort,foul_smell_of urine
4918,skin_rash,joint_pain,skin_peeling


In [34]:
# cat_column=X.select_dtypes(include="object").columns
# from sklearn.preprocessing import LabelEncoder
# from sklearn.compose import ColumnTransformer
# le = LabelEncoder()
# preprocessor = ColumnTransformer(
#     [
#         ("LabelEncoder", le,cat_column)
#     ]
# )


In [35]:
X['Symptom_1']=le.fit_transform(X['Symptom_1'])
X['Symptom_2']=le.fit_transform(X['Symptom_2'])
X['Symptom_3']=le.fit_transform(X['Symptom_3'])

In [36]:
X

Unnamed: 0,Symptom_1,Symptom_2,Symptom_3
0,33,35,35
1,24,27,18
2,33,27,18
3,33,35,18
4,33,35,35
...,...,...,...
4915,28,17,33
4916,24,31,3
4917,4,4,22
4918,24,20,42


In [37]:
from sklearn.model_selection import train_test_split  
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=20)

In [38]:
X_train.shape , y_train.shape

((3444, 3), (3444,))

In [39]:
X_test.shape , y_test.shape

((1476, 3), (1476,))

# Training top models

In [40]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np


# Create a dictionary to store models
models = {
    'SVC': SVC(kernel='linear'),
    'RandomForest': RandomForestClassifier(n_estimators=100, random_state=42),
    'GradientBoosting': GradientBoostingClassifier(n_estimators=100, random_state=42),
    'KNeighbors': KNeighborsClassifier(n_neighbors=5),
    'MultinomialNB': MultinomialNB()
}

# Loop through the models, train, test, and print results
for model_name, model in models.items():
    # Train the model
    model.fit(X_train, y_train)

    # Test the model
    predictions = model.predict(X_test)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, predictions)
    print(f"{model_name} Accuracy: {accuracy}")

    # Calculate confusion matrix
    cm = confusion_matrix(y_test, predictions)
    print(f"{model_name} Confusion Matrix:")
    print(np.array2string(cm, separator=', '))

    print("\n" + "="*40 + "\n")


SVC Accuracy: 0.8611111111111112
SVC Confusion Matrix:
[[32,  0,  0, ...,  0,  0,  0],
 [ 0, 43,  0, ...,  0,  0,  0],
 [ 0,  0, 24, ...,  0,  0,  0],
 ...,
 [ 0,  0,  0, ..., 34,  0,  0],
 [ 0,  0,  0, ...,  0, 35,  0],
 [ 0,  0,  0, ...,  0,  0, 29]]


RandomForest Accuracy: 0.9498644986449865
RandomForest Confusion Matrix:
[[40,  0,  0, ...,  0,  0,  0],
 [ 0, 43,  0, ...,  0,  0,  0],
 [ 0,  0, 28, ...,  0,  0,  0],
 ...,
 [ 0,  0,  0, ..., 34,  0,  0],
 [ 0,  0,  0, ...,  0, 41,  0],
 [ 0,  0,  0, ...,  0,  0, 31]]


GradientBoosting Accuracy: 0.9498644986449865
GradientBoosting Confusion Matrix:
[[40,  0,  0, ...,  0,  0,  0],
 [ 0, 43,  0, ...,  0,  0,  0],
 [ 0,  0, 28, ...,  0,  0,  0],
 ...,
 [ 0,  0,  0, ..., 34,  0,  0],
 [ 0,  0,  0, ...,  0, 41,  0],
 [ 0,  0,  0, ...,  0,  0, 31]]


KNeighbors Accuracy: 0.9234417344173442
KNeighbors Confusion Matrix:
[[35,  0,  0, ...,  0,  0,  0],
 [ 0, 43,  0, ...,  0,  0,  0],
 [ 0,  0, 28, ...,  0,  0,  0],
 ...,
 [ 0,  0,  0, ..., 3

# single prediction

In [41]:
# selecting RandomForestClassifier
RandomForest = RandomForestClassifier(n_estimators=100, random_state=42)
RandomForest.fit(X_train,y_train)
ypred = RandomForest.predict(X_test)
accuracy_score(y_test,ypred)

0.9498644986449865

In [42]:
# save svc
import pickle
pickle.dump(RandomForest ,open('RandomForest.pkl','wb'))

In [48]:
# load model
import pickle
RandomForest = pickle.load(open('RandomForest.pkl','rb'))

In [50]:
random = [[0,25,1]]
ypred = RandomForest.predict(random)
ypred



array(['Migraine'], dtype=object)

In [49]:
# random=X_test.iloc[0].values.reshape(1,-1)

ypred = RandomForest.predict(random)
print('Actual Diesease :',ypred)


Actual Diesease : ['Urinary tract infection']




# Recommendation System and Prediction

# load database and use logic for recommendations

In [45]:
sym_des = pd.read_csv("C:/Medicine-Recommendation-System/dataset/symtoms_df.csv")
precautions = pd.read_csv("C:/Medicine-Recommendation-System/dataset/precautions_df.csv")
workout = pd.read_csv("C:/Medicine-Recommendation-System/dataset/workout_df.csv")
description = pd.read_csv("C:/Medicine-Recommendation-System/dataset/description.csv")
medications = pd.read_csv("C:/Medicine-Recommendation-System/dataset/medications.csv")
diets = pd.read_csv("C:/Medicine-Recommendation-System/dataset/diets.csv")


FileNotFoundError: [Errno 2] No such file or directory: 'C:/Medicine-Recommendation-System/dataset/symtoms_df.csv'

In [None]:
import numpy as np

#======================== Helper Functions ========================
def helper(dis):
    # Retrieve description for the disease
    desc = description[description['Disease'] == dis]['Description']
    desc = " ".join([w for w in desc])

    # Retrieve precautions for the disease
    pre = precautions[precautions['Disease'] == dis][['Precaution_1', 'Precaution_2', 'Precaution_3', 'Precaution_4']]
    pre = [col for col in pre.values]

    # Retrieve medications for the disease
    med = medications[medications['Disease'] == dis]['Medication']
    med = [med for med in med.values]

    # Retrieve diets for the disease
    die = diets[diets['Disease'] == dis]['Diet']
    die = [die for die in die.values]

    # Retrieve workouts for the disease
    wrkout = workout[workout['disease'] == dis]['workout']

    return desc, pre, med, die, wrkout

# Dictionary for symptoms and diseases
symptoms_dict = {
    'itching': 0, 'skin_rash': 1, 'nodal_skin_eruptions': 2, 'continuous_sneezing': 3, 'shivering': 4, 'chills': 5, 
    'joint_pain': 6, 'stomach_pain': 7, 'acidity': 8, 'ulcers_on_tongue': 9, 'muscle_wasting': 10, 'vomiting': 11, 
    'burning_micturition': 12, 'spotting_urination': 13, 'fatigue': 14, 'weight_gain': 15, 'anxiety': 16, 
    'cold_hands_and_feets': 17, 'mood_swings': 18, 'weight_loss': 19, 'restlessness': 20, 'lethargy': 21, 
    'patches_in_throat': 22, 'irregular_sugar_level': 23, 'cough': 24, 'high_fever': 25, 'sunken_eyes': 26, 
    'breathlessness': 27, 'sweating': 28, 'dehydration': 29, 'indigestion': 30, 'headache': 31, 'yellowish_skin': 32, 
    'dark_urine': 33, 'nausea': 34, 'loss_of_appetite': 35, 'pain_behind_the_eyes': 36, 'back_pain': 37, 
    'constipation': 38, 'abdominal_pain': 39, 'diarrhoea': 40, 'mild_fever': 41, 'yellow_urine': 42, 
    'yellowing_of_eyes': 43, 'acute_liver_failure': 44, 'fluid_overload': 45, 'swelling_of_stomach': 46, 
    'swelled_lymph_nodes': 47, 'malaise': 48, 'blurred_and_distorted_vision': 49, 'phlegm': 50, 
    'throat_irritation': 51, 'redness_of_eyes': 52, 'sinus_pressure': 53, 'runny_nose': 54, 'congestion': 55, 
    'chest_pain': 56, 'weakness_in_limbs': 57, 'fast_heart_rate': 58, 'pain_during_bowel_movements': 59, 
    'pain_in_anal_region': 60, 'bloody_stool': 61, 'irritation_in_anus': 62, 'neck_pain': 63, 'dizziness': 64, 
    'cramps': 65, 'bruising': 66, 'obesity': 67, 'swollen_legs': 68, 'swollen_blood_vessels': 69, 
    'puffy_face_and_eyes': 70, 'enlarged_thyroid': 71, 'brittle_nails': 72, 'swollen_extremeties': 73, 
    'excessive_hunger': 74, 'extra_marital_contacts': 75, 'drying_and_tingling_lips': 76, 'slurred_speech': 77, 
    'knee_pain': 78, 'hip_joint_pain': 79, 'muscle_weakness': 80, 'stiff_neck': 81, 'swelling_joints': 82, 
    'movement_stiffness': 83, 'spinning_movements': 84, 'loss_of_balance': 85, 'unsteadiness': 86, 
    'weakness_of_one_body_side': 87, 'loss_of_smell': 88, 'bladder_discomfort': 89, 'foul_smell_of_urine': 90, 
    'continuous_feel_of_urine': 91, 'passage_of_gases': 92, 'internal_itching': 93, 'toxic_look_(typhos)': 94, 
    'depression': 95, 'irritability': 96, 'muscle_pain': 97, 'altered_sensorium': 98, 'red_spots_over_body': 99, 
    'belly_pain': 100, 'abnormal_menstruation': 101, 'dischromic_patches': 102, 'watering_from_eyes': 103, 
    'increased_appetite': 104, 'polyuria': 105, 'family_history': 106, 'mucoid_sputum': 107, 
    'rusty_sputum': 108, 'lack_of_concentration': 109, 'visual_disturbances': 110, 
    'receiving_blood_transfusion': 111, 'receiving_unsterile_injections': 112, 'coma': 113, 
    'stomach_bleeding': 114, 'distention_of_abdomen': 115, 'history_of_alcohol_consumption': 116, 
    'fluid_overload.1': 117, 'blood_in_sputum': 118, 'prominent_veins_on_calf': 119, 'palpitations': 120, 
    'painful_walking': 121, 'pus_filled_pimples': 122, 'blackheads': 123, 'scurring': 124, 
    'skin_peeling': 125, 'silver_like_dusting': 126, 'small_dents_in_nails': 127, 
    'inflammatory_nails': 128, 'blister': 129, 'red_sore_around_nose': 130, 'yellow_crust_ooze': 131
}

diseases_list = {
    15: 'Fungal infection', 4: 'Allergy', 16: 'GERD', 9: 'Chronic cholestasis', 14: 'Drug Reaction', 
    33: 'Peptic ulcer diseae', 1: 'AIDS', 12: 'Diabetes ', 17: 'Gastroenteritis', 6: 'Bronchial Asthma', 
    23: 'Hypertension ', 30: 'Migraine', 7: 'Cervical spondylosis', 32: 'Paralysis (brain hemorrhage)', 
    28: 'Jaundice', 29: 'Malaria', 8: 'Chicken pox', 11: 'Dengue', 37: 'Typhoid', 40: 'hepatitis A', 
    19: 'Hepatitis B', 20: 'Hepatitis C', 21: 'Hepatitis D', 22: 'Hepatitis E', 3: 'Alcoholic hepatitis', 
    36: 'Tuberculosis', 10: 'Common Cold', 34: 'Pneumonia', 13: 'Dimorphic hemmorhoids(piles)', 
    18: 'Heart attack', 39: 'Varicose veins', 26: 'Hypothyroidism', 24: 'Hyperthyroidism', 
    25: 'Hypoglycemia', 31: 'Osteoarthristis', 5: 'Arthritis', 0: '(vertigo) Paroymsal  Positional Vertigo', 
    2: 'Acne', 38: 'Urinary tract infection', 35: 'Psoriasis', 27: 'Impetigo'
}

#======================== Prediction Function =========================
def get_predicted_value(patient_symptoms):
    # Create an input vector with the same length as the number of features in the model
    input_vector = np.zeros(len(symptoms_dict))
    
    for symptom in patient_symptoms:
        # If symptom is not in the dictionary, print a warning
        if symptom in symptoms_dict:
            input_vector[symptoms_dict[symptom]] = 1
        else:
            print(f"Warning: Symptom '{symptom}' not found in symptoms_dict.")
    
    # Predict using the RandomForest model
    predicted_id = RandomForest.predict([input_vector])[0]
    
    # Return the predicted disease name, or an error message if the ID is not in the diseases_list
    return diseases_list.get(predicted_id, "Disease ID not found in diseases_list.")


In [None]:
# Test 1
# Split the user's input into a list of symptoms (assuming they are comma-separated) # itching,skin_rash,nodal_skin_eruptions
symptoms = input("Enter your symptoms.......")  # Example input: "itching,skin_rash,nodal_skin_eruptions"
user_symptoms = [s.strip() for s in symptoms.split(',')]

# Remove any extra characters, if any
user_symptoms = [symptom.strip("[]' ") for symptom in user_symptoms]

# Get predicted disease
predicted_disease = get_predicted_value(user_symptoms)

# Retrieve data for the predicted disease
desc, pre, med, die, wrkout = helper(predicted_disease)

print("=================predicted disease============")
print(predicted_disease)

print("=================description==================")
print(desc if desc else "No description available")

print("=================precautions==================")
if pre and pre[0]:  # Check if precautions are available
    for i, p_i in enumerate(pre[0], 1):
        print(f"{i}: {p_i}")
else:
    print("No precautions available")

print("=================medications==================")
if med:  # Check if medications are available
    for i, m_i in enumerate(med, 1):
        print(f"{i}: {m_i}")
else:
    print("No medications available")

print("=================workout==================")
if wrkout and not wrkout.empty:  # Check if workouts are available and not empty
    for i, w_i in enumerate(wrkout, 1):
        print(f"{i}: {w_i}")
else:
    print("No workouts available")

print("=================diets==================")
if die and not die.empty:  # Check if diets are available and not empty
    for i, d_i in enumerate(die, 1):
        print(f"{i}: {d_i}")
else:
    print("No diets available")




ValueError: X has 132 features, but RandomForestClassifier is expecting 3 features as input.

In [None]:
# let's use pycharm flask app
# but install this version in pycharm
import sklearn
print(sklearn.__version__)

1.4.2


In [52]:
sym_des = pd.read_csv("../dataset/symtoms_df.csv")
precautions = pd.read_csv("../dataset/precautions_df.csv")
workout = pd.read_csv("../dataset/workout_df.csv")
description = pd.read_csv("../dataset/description.csv")
medications = pd.read_csv('../dataset/medications.csv')
diets = pd.read_csv("../dataset/diets.csv")

medications

Unnamed: 0,Disease,Medication
0,Fungal infection,"['Antifungal Cream', 'Fluconazole', 'Terbinafi..."
1,Allergy,"['Antihistamines', 'Decongestants', 'Epinephri..."
2,GERD,"['Proton Pump Inhibitors (PPIs)', 'H2 Blockers..."
3,Chronic cholestasis,"['Ursodeoxycholic acid', 'Cholestyramine', 'Me..."
4,Drug Reaction,"['Antihistamines', 'Epinephrine', 'Corticoster..."
5,Peptic ulcer disease,"['Antibiotics', 'Proton Pump Inhibitors (PPIs)..."
6,AIDS,"['Antiretroviral drugs', 'Protease inhibitors'..."
7,Diabetes,"['Insulin', 'Metformin', 'Sulfonylureas', 'DPP..."
8,Gastroenteritis,"['Antibiotics', 'Antiemetic drugs', 'Antidiarr..."
9,Bronchial Asthma,"['Bronchodilators', 'Inhaled corticosteroids',..."


In [64]:
dis = "Migraine"
med = medications[medications['Disease'] == dis]['Medication'] if not medications[medications['Disease'] == dis].empty else []
# type(med)
med = med.iloc[0]
med[1:-1]

"'Analgesics', 'Triptans', 'Ergotamine derivatives', 'Preventive medications', 'Biofeedback'"

[np.int64(29), 'Migraine', 'meditation', 'reduce stress', 'use poloroid glasses in sun', 'consult doctor'] this data is coming in string format i want to show these values in list in the html page how can i do that


In [71]:
wrkout = workout[workout['disease'] == dis]['workout'] if not workout[workout['disease'] == dis].empty else []
type(wrkout) 
wrkout =  wrkout.tolist()


(pandas.core.series.Series,
 ['Identify and avoid trigger foods',
  'Stay hydrated',
  'Include magnesium-rich foods',
  'Consume omega-3 fatty acids',
  'Limit caffeine and alcohol',
  'Consume riboflavin-rich foods',
  'Limit processed foods',
  'Maintain regular meal times',
  'Consult a healthcare professional',
  'Manage stress'])

In [82]:
die = diets[diets['Disease'] == dis]['Diet'] if not diets[diets['Disease'] == dis].empty else []
die = die.iloc[0][1:-1].replace("'","").split(", ")
die 



['Migraine Diet',
 'Low-Tyramine Diet',
 'Caffeine withdrawal',
 'Hydration',
 'Magnesium-rich foods']