## Title Personlized Medical Recommendation System with Machine Learning

### Load Dataset

In [2]:
import pandas as pd

In [5]:
oDataSet = pd.read_csv("dataset/Training.csv")

In [3]:
oDataSet.shape

(4920, 133)

In [4]:
oDataSet['prognosis'].unique()

array(['Fungal infection', 'Allergy', 'GERD', 'Chronic cholestasis',
       'Drug Reaction', 'Peptic ulcer diseae', 'AIDS', 'Diabetes ',
       'Gastroenteritis', 'Bronchial Asthma', 'Hypertension ', 'Migraine',
       'Cervical spondylosis', 'Paralysis (brain hemorrhage)', 'Jaundice',
       'Malaria', 'Chicken pox', 'Dengue', 'Typhoid', 'hepatitis A',
       'Hepatitis B', 'Hepatitis C', 'Hepatitis D', 'Hepatitis E',
       'Alcoholic hepatitis', 'Tuberculosis', 'Common Cold', 'Pneumonia',
       'Dimorphic hemmorhoids(piles)', 'Heart attack', 'Varicose veins',
       'Hypothyroidism', 'Hyperthyroidism', 'Hypoglycemia',
       'Osteoarthristis', 'Arthritis',
       '(vertigo) Paroymsal  Positional Vertigo', 'Acne',
       'Urinary tract infection', 'Psoriasis', 'Impetigo'], dtype=object)

### Train test split

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [6]:
# drop only prognosis column and save all the column record and stored it in the X
X = oDataSet.drop('prognosis',axis=1)

# store only the prognosis in the Y 
Y = oDataSet['prognosis']

In [7]:
LabelEncoderConvert_Y = LabelEncoder()
LabelEncoderConvert_Y.fit(Y)
Updated_Y = LabelEncoderConvert_Y.transform(Y)


In [8]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Updated_Y,test_size=0.3,random_state=20)

In [9]:
X_train.shape,X_test.shape,Y_train.shape,Y_test.shape

((3444, 132), (1476, 132), (3444,), (1476,))

### Training a Top Models

In [10]:
from sklearn.datasets import make_classification
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB

from sklearn.metrics import accuracy_score,confusion_matrix
import numpy as np

# create a Dictionary to store model
models = {
    "SVC": SVC(kernel='linear'),
    "RandomForest": RandomForestClassifier(n_estimators=100,random_state=42),
    "GradientBoosting": GradientBoostingClassifier(n_estimators=100,random_state=42),
    "KNeighbors": KNeighborsClassifier(n_neighbors=5),
    "MultinomialNB" : MultinomialNB()
}

for model_name , model in models.items():
   # train model
   model.fit(X_train,Y_train)

   # test model
   predictions = model.predict(X_test)

   # calculate accuracy
   accuracy = accuracy_score(Y_test,predictions)

   # calculate the Confusion matrix
   cm = confusion_matrix(Y_test,predictions)

   print(f"{model_name} accuracy: {accuracy}")
   print(f"{model_name} Confuction Matricx:")
   print(np.array2string(cm,separator=', '))

SVC accuracy: 1.0
SVC Confuction Matricx:
[[40,  0,  0, ...,  0,  0,  0],
 [ 0, 43,  0, ...,  0,  0,  0],
 [ 0,  0, 28, ...,  0,  0,  0],
 ...,
 [ 0,  0,  0, ..., 34,  0,  0],
 [ 0,  0,  0, ...,  0, 41,  0],
 [ 0,  0,  0, ...,  0,  0, 31]]
RandomForest accuracy: 1.0
RandomForest Confuction Matricx:
[[40,  0,  0, ...,  0,  0,  0],
 [ 0, 43,  0, ...,  0,  0,  0],
 [ 0,  0, 28, ...,  0,  0,  0],
 ...,
 [ 0,  0,  0, ..., 34,  0,  0],
 [ 0,  0,  0, ...,  0, 41,  0],
 [ 0,  0,  0, ...,  0,  0, 31]]
GradientBoosting accuracy: 1.0
GradientBoosting Confuction Matricx:
[[40,  0,  0, ...,  0,  0,  0],
 [ 0, 43,  0, ...,  0,  0,  0],
 [ 0,  0, 28, ...,  0,  0,  0],
 ...,
 [ 0,  0,  0, ..., 34,  0,  0],
 [ 0,  0,  0, ...,  0, 41,  0],
 [ 0,  0,  0, ...,  0,  0, 31]]
KNeighbors accuracy: 1.0
KNeighbors Confuction Matricx:
[[40,  0,  0, ...,  0,  0,  0],
 [ 0, 43,  0, ...,  0,  0,  0],
 [ 0,  0, 28, ...,  0,  0,  0],
 ...,
 [ 0,  0,  0, ..., 34,  0,  0],
 [ 0,  0,  0, ...,  0, 41,  0],
 [ 0,  0,  0, 

In [11]:
svc = SVC(kernel='linear')
svc.fit(X_train,Y_train)
yPred = svc.predict(X_test)
accuracy_score(Y_test,yPred)

1.0

In [12]:
# Saving the model
import pickle
# pickle.dump(svc,open('model/svc.pkl','wb'))

In [13]:
# Load the model
svc = pickle.load(open("model/svc.pkl","rb"))

In [14]:
# Test 1
print("Predicted Label :",svc.predict(X_test.iloc[0].values.reshape(1,-1)))
print("Actual Label :", Y_test[0])

Predicted Label : [40]
Actual Label : 40




In [15]:
# Test 2
print("Predicted Label :",LabelEncoderConvert_Y.inverse_transform(svc.predict(X_test.iloc[100].values.reshape(1,-1))))
print("Actual Label :", LabelEncoderConvert_Y.inverse_transform([Y_test[100]]))


Predicted Label : ['Varicose veins']
Actual Label : ['Varicose veins']




### Recommendation System and Prediction
#### Load dataset and use logic for recommendations

In [16]:
sym_des = pd.read_csv("dataset/symtoms_df.csv")
precaution = pd.read_csv("dataset/precautions_df.csv")
workout = pd.read_csv("dataset/workout_df.csv")
descreption = pd.read_csv("dataset/description.csv")
medication = pd.read_csv("dataset/medications.csv")
diets = pd.read_csv("dataset/diets.csv")

In [20]:
# fetch all the column and symptoms in the vairiable
iii = 0
symptoms_dict = dict()
for column in oDataSet.drop('prognosis',axis=1):
    symptoms_dict.update({column:iii})
    iii = iii + 1

# fetch all the column and symptoms in the vairiable
decodedDiseasesDict = {encoded : original for original, encoded in zip(Y, Updated_Y)}

print(decodedDiseasesDict)


{15: 'Fungal infection', 4: 'Allergy', 16: 'GERD', 9: 'Chronic cholestasis', 14: 'Drug Reaction', 33: 'Peptic ulcer diseae', 1: 'AIDS', 12: 'Diabetes ', 17: 'Gastroenteritis', 6: 'Bronchial Asthma', 23: 'Hypertension ', 30: 'Migraine', 7: 'Cervical spondylosis', 32: 'Paralysis (brain hemorrhage)', 28: 'Jaundice', 29: 'Malaria', 8: 'Chicken pox', 11: 'Dengue', 37: 'Typhoid', 40: 'hepatitis A', 19: 'Hepatitis B', 20: 'Hepatitis C', 21: 'Hepatitis D', 22: 'Hepatitis E', 3: 'Alcoholic hepatitis', 36: 'Tuberculosis', 10: 'Common Cold', 34: 'Pneumonia', 13: 'Dimorphic hemmorhoids(piles)', 18: 'Heart attack', 39: 'Varicose veins', 26: 'Hypothyroidism', 24: 'Hyperthyroidism', 25: 'Hypoglycemia', 31: 'Osteoarthristis', 5: 'Arthritis', 0: '(vertigo) Paroymsal  Positional Vertigo', 2: 'Acne', 38: 'Urinary tract infection', 35: 'Psoriasis', 27: 'Impetigo'}


In [18]:
#  model prediction function


def getPredictedValue(patientSymtoms):
    input_vector = np.zeros(len(symptoms_dict))

    for item in patientSymtoms:
        input_vector[symptoms_dict[item]] = 1
        
    return decodedDiseasesDict[svc.predict([input_vector])[0]]

In [19]:
#test 1:
symptoms = input("Enter your symptoms......")
sUserSymtoms = [s.strip() for s in symptoms.split(',')]

sUserSymtoms = [sym.strip("[]' ") for  sym in sUserSymtoms]
print(sUserSymtoms)
sPredictedDisease = getPredictedValue(sUserSymtoms)
sPredictedDisease

['']


KeyError: ''

In [None]:
# custome helper function to return the all related value of Disease

def helper(sPredictedDisease):
    sDescr = descreption[descreption['Disease'] == sPredictedDisease]['Description']
    sDescr = " ".join([w for w in sDescr])

    sPrecaution = precaution[precaution['Disease'] == sPredictedDisease][['Precaution_1','Precaution_2','Precaution_3','Precaution_4']]
    sPrecaution = [col for col in sPrecaution.values]

    sMedication = medication[medication['Disease'] == sPredictedDisease]['Medication']
    sMedication = [col for col in sMedication.values]

    sDiets = diets[diets['Disease'] == sPredictedDisease]['Diet']
    sDiets = [col for col in sDiets.values]

    sWorkout = workout[workout['disease'] == sPredictedDisease]['workout']
    sWorkout = [col for col in sWorkout.values]

    aFinalData = {
        "Disease" : sPredictedDisease,
        "Description" : sDescr,
        "Precaution" : sPrecaution,
        "Medication" : sMedication,
        "Diet" : sDiets,
        "Workout" : sWorkout
    }

    return aFinalData


In [None]:
aReturnData = helper(sPredictedDisease)

print("======================Disease==============================\n")
print("Disease: "+ aReturnData['Disease']+"\n")
print("======================Disease Description==============================\n")
print("Description: " +aReturnData['Description']+"\n")
print("======================Disease Precaution==============================\n")
P_i = 1
for item in aReturnData['Precaution'][0]:
    print("("+str(P_i)+") "+str(item))
    P_i = P_i + 1
print("======================Disease Medication==============================\n")
# print("Medication: "+aReturnData['Medication']+"\n")
m_i = 1
for item in aReturnData['Medication']:
    print("("+str(m_i)+") "+str(item))
    m_i = m_i + 1
print("======================Disease Diet==============================\n")
# print("Diet: "+aReturnData['Diet']+"\n")
d_i = 1
for item in aReturnData['Diet']:
    print("("+str(d_i)+") "+str(item))
    d_i = d_i + 1
    
print("======================Disease Workout==============================\n")
# print("Workout: "+aReturnData['Workout']+"\n")
w_i = 1
for item in aReturnData['Workout']:
    print("("+str(w_i)+") "+str(item))
    w_i = w_i + 1



Disease: AIDS


Description: AIDS (Acquired Immunodeficiency Syndrome) is a disease caused by HIV that weakens the immune system.


(1) avoid open cuts
(2) wear ppe if possible
(3) consult doctor
(4) follow up

(1) ['Antiretroviral drugs', 'Protease inhibitors', 'Integrase inhibitors', 'Entry inhibitors', 'Fusion inhibitors']

(1) ['Balanced Diet', 'Protein-rich foods', 'Fruits and vegetables', 'Whole grains', 'Healthy fats']

(1) Follow a balanced and nutritious diet
(2) Include lean proteins
(3) Consume nutrient-rich foods
(4) Stay hydrated
(5) Include healthy fats
(6) Avoid raw or undercooked foods
(7) Limit sugary foods and beverages
(8) Consume immune-boosting foods
(9) Take prescribed supplements
(10) Consult a healthcare professional
