In [57]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import re
from rapidfuzz import process


In [8]:
df = pd.read_csv('../data/training.csv')
df.head()

Unnamed: 0,abdominal_pain,abnormal_menstruation,acidity,acute_liver_failure,altered_sensorium,anxiety,back_pain,belly_pain,blackheads,bladder_discomfort,...,vomiting,watering_from_eyes,weakness_in_limbs,weakness_of_one_body_side,weight_gain,weight_loss,yellow_crust_ooze,yellow_urine,yellowing_of_eyes,yellowish_skin
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [9]:
X = df.drop(columns='prognosis', axis=1)
Y = df['prognosis']
encoder = LabelEncoder()
y = encoder.fit_transform(Y)
X_train,X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Logistic Regression

In [12]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, recall_score, precision_score, confusion_matrix
from sklearn.model_selection import cross_val_score

logistic_model = LogisticRegression(random_state=42)
logistic_model.fit(X_train, y_train)

y_pred_train = logistic_model.predict(X_train)
y_pred_test = logistic_model.predict(X_test)

accuracy_train = accuracy_score(y_train, y_pred_train)
accuracy_test = accuracy_score(y_test, y_pred_test)

cv = cross_val_score(logistic_model, X_train, y_train, cv=5, scoring='accuracy')

print('Accuracy Train :', accuracy_train)
print('Accuracy Test :', accuracy_test)
print('Cross Validation Score :', cv.mean())

results = {
    'Model': 'Logistic Regression',
    'Accuracy_Train': accuracy_train,
    'Accuracy_Test' : accuracy_test,
    'Cross_Validation_Score': cv.mean()
}


Accuracy Train : 1.0
Accuracy Test : 1.0
Cross Validation Score : 1.0


In [13]:
import joblib

joblib.dump(logistic_model, '../notebook/logistic_model.joblib')

['../notebook/logistic_model.joblib']

In [14]:
model = joblib.load('../notebook/logistic_model.joblib')

In [15]:
print('Predicted Diease:', model.predict(X_test.iloc[0].values.reshape(1,-1)))
print('Actual Diease :', y_test[0])

Predicted Diease: [2]
Actual Diease : 2




## Recommendation System

In [77]:
symptoms = pd.read_csv("../data/symptoms.csv")
precautions = pd.read_csv("../data/precautions.csv")
workout = pd.read_csv("../data/workout.csv")
description = pd.read_csv("../data/description.csv")
medications = pd.read_csv('../data/medications.csv')
diets = pd.read_csv("../data/diets.csv")

In [99]:
def helper(disease):
    desc = description[description['Disease']==disease]['Description']
    desc = desc.iloc[0]

    pre = precautions[precautions['Disease']==disease][['Precaution_1', 'Precaution_2', 'Precaution_3', 'Precaution_4']]
    pre = [val for val in pre.values]

    medi = medications[medications['Disease']==disease]['Medication']
    medi = [val for val in medi.values]

    die = diets[diets['Disease']==disease]['Diet']
    die = [val for val in die.values]

    work = workout[workout['disease']==disease]['workout']
    work = [val for val in work.values]

    return desc, pre, medi, die, work

helper('Allergy')

('Allergy is an immune system reaction to a substance in the environment.',
 [array(['apply calamine', 'cover area with bandage', nan,
         'use ice to compress itching'], dtype=object)],
 ["['Antihistamines', 'Decongestants', 'Epinephrine', 'Corticosteroids', 'Immunotherapy']"],
 ["['Elimination Diet', 'Omega-3-rich foods', 'Vitamin C-rich foods', 'Quercetin-rich foods', 'Probiotics']"],
 ['Avoid allergenic foods',
  'Consume anti-inflammatory foods',
  'Include omega-3 fatty acids',
  'Stay hydrated',
  'Eat foods rich in vitamin C',
  'Include quercetin-rich foods',
  'Consume local honey',
  'Limit processed foods',
  'Include ginger in diet',
  'Avoid artificial additives'])

In [80]:
df.columns

Index(['abdominal_pain', 'abnormal_menstruation', 'acidity',
       'acute_liver_failure', 'altered_sensorium', 'anxiety', 'back_pain',
       'belly_pain', 'blackheads', 'bladder_discomfort',
       ...
       'vomiting', 'watering_from_eyes', 'weakness_in_limbs',
       'weakness_of_one_body_side', 'weight_gain', 'weight_loss',
       'yellow_crust_ooze', 'yellow_urine', 'yellowing_of_eyes',
       'yellowish_skin'],
      dtype='object', length=133)

In [81]:
symptoms_dict = {}
for i, col in enumerate(df.columns):
    if col=='prognosis':
        continue
    col = col.replace('_','')
    symptoms_dict[col] = i

symptoms_dict

{'abdominalpain': 0,
 'abnormalmenstruation': 1,
 'acidity': 2,
 'acuteliverfailure': 3,
 'alteredsensorium': 4,
 'anxiety': 5,
 'backpain': 6,
 'bellypain': 7,
 'blackheads': 8,
 'bladderdiscomfort': 9,
 'blister': 10,
 'bloodinsputum': 11,
 'bloodystool': 12,
 'blurredanddistortedvision': 13,
 'breathlessness': 14,
 'brittlenails': 15,
 'bruising': 16,
 'burningmicturition': 17,
 'chestpain': 18,
 'chills': 19,
 'coldhandsandfeets': 20,
 'coma': 21,
 'congestion': 22,
 'constipation': 23,
 'continuousfeelofurine': 24,
 'continuoussneezing': 25,
 'cough': 26,
 'cramps': 27,
 'darkurine': 28,
 'dehydration': 29,
 'depression': 30,
 'diarrhoea': 31,
 'dischromic patches': 32,
 'distentionofabdomen': 33,
 'dizziness': 34,
 'dryingandtinglinglips': 35,
 'enlargedthyroid': 36,
 'excessivehunger': 37,
 'extramaritalcontacts': 38,
 'familyhistory': 39,
 'fastheartrate': 40,
 'fatigue': 41,
 'fluidoverload': 42,
 'fluidoverload.1': 43,
 'foulsmellof urine': 44,
 'headache': 45,
 'highfever': 

In [82]:
disease_dict = dict(zip(range(len(encoder.classes_)), encoder.classes_))
disease_dict

{0: '(vertigo) Paroymsal  Positional Vertigo',
 1: 'AIDS',
 2: 'Acne',
 3: 'Alcoholic hepatitis',
 4: 'Allergy',
 5: 'Arthritis',
 6: 'Bronchial Asthma',
 7: 'Cervical spondylosis',
 8: 'Chicken pox',
 9: 'Chronic cholestasis',
 10: 'Common Cold',
 11: 'Dengue',
 12: 'Diabetes ',
 13: 'Dimorphic hemmorhoids(piles)',
 14: 'Drug Reaction',
 15: 'Fungal infection',
 16: 'GERD',
 17: 'Gastroenteritis',
 18: 'Heart attack',
 19: 'Hepatitis B',
 20: 'Hepatitis C',
 21: 'Hepatitis D',
 22: 'Hepatitis E',
 23: 'Hypertension ',
 24: 'Hyperthyroidism',
 25: 'Hypoglycemia',
 26: 'Hypothyroidism',
 27: 'Impetigo',
 28: 'Jaundice',
 29: 'Malaria',
 30: 'Migraine',
 31: 'Osteoarthristis',
 32: 'Paralysis (brain hemorrhage)',
 33: 'Peptic ulcer diseae',
 34: 'Pneumonia',
 35: 'Psoriasis',
 36: 'Tuberculosis',
 37: 'Typhoid',
 38: 'Urinary tract infection',
 39: 'Varicose veins',
 40: 'hepatitis A'}

In [83]:
def get_predicted_disease(symptoms):
    input_vector = np.zeros(len(symptoms_dict))
    for item in symptoms:
        input_vector[symptoms_dict[item]]=1
    predicted = model.predict([input_vector])[0]
    return disease_dict[predicted]

In [101]:
symptoms = input("Enter your sysmptoms")
user_symptoms = [s.strip() for s in symptoms.split(',')]
user_symptoms = [s.strip("[]' ") for s in user_symptoms]
user_symptoms




['itching', 'backpain']

In [102]:
def get_matched_symptoms(user_symptoms, symptoms_dict, threshold=80):
    matched_symptoms = []
    for symptom in user_symptoms:
        cleaned_symptom = ''.join([char for char in symptom if char.isalpha()])
        match, score,_ = process.extractOne(cleaned_symptom, symptoms_dict.keys())
        if score >= threshold:
            matched_symptoms.append(match)
        else:
            print(f"The symptom {cleaned_symptom} didnt match. Enter the correct symptoms")
    return matched_symptoms

symptoms =get_matched_symptoms(user_symptoms, symptoms_dict)
symptoms

['itching', 'backpain']

In [103]:
predicted_disease = get_predicted_disease(symptoms)


desc, pre, med, die, wrkout = helper(predicted_disease)

print("=================predicted disease============")
print(predicted_disease)
print("=================description==================")
print(desc)
print("=================precautions==================")
i = 1
for p_i in pre[0]:
    print(i, ": ", p_i)
    i += 1

print("=================medications==================")
for m_i in med:
    print(i, ": ", m_i)
    i += 1

print("=================workout==================")
for w_i in wrkout:
    print(i, ": ", w_i)
    i += 1

print("=================diets==================")
for d_i in die:
    print(i, ": ", d_i)
    i += 1

Fungal infection
Fungal infection is a common skin condition caused by fungi.
1 :  bath twice
2 :  use detol or neem in bathing water
3 :  keep infected area dry
4 :  use clean cloths
5 :  ['Antifungal Cream', 'Fluconazole', 'Terbinafine', 'Clotrimazole', 'Ketoconazole']
6 :  Avoid sugary foods
7 :  Consume probiotics
8 :  Increase intake of garlic
9 :  Include yogurt in diet
10 :  Limit processed foods
11 :  Stay hydrated
12 :  Consume green tea
13 :  Eat foods rich in zinc
14 :  Include turmeric in diet
15 :  Eat fruits and vegetables
16 :  ['Antifungal Diet', 'Probiotics', 'Garlic', 'Coconut oil', 'Turmeric']


