### Import Necessary Libraries

In [17]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


### Initialization of Variables

In [18]:
drugs_db = pd.read_csv('../../data/drug-dataset/Medicine_Details.csv')

# Synonyms
medication_types_to_exclude = ['Drop'] # Exclude eye drops

heart = ['heart', 'cardiovascular', 'pressure']
alcohol = ['alcohol', 'drinking', 'alcoholic', 'liver']
smoking = ['smoking', 'cigarette', 'nicotine', 'tobacco']
stroke  = ['stroke', 'brain', 'paralysis', 'hemorrhage', 'clot', 'Dizziness', 'Headache', 'Nausea']
physical_health = ['physical', 'health', 'exercise', 'workout', 'gym', 'fitness', 'fit']
mental_health = ['mental', 'health', 'depression', 'anxiety', 'stress', 'psychological']
diff_walking = ['difficulty', 'walking', 'walk', 'move', 'mobility', 'paralysis']
diabetes = ['diabetes', 'sugar', 'insulin', 'glucose']
bmi = ['bmi', 'body', 'mass', 'index', 'weight', 'obesity']
generic_health = ['generic', 'health', 'overall', 'well-being', 'general']
sleep_time = ['sleep', 'time', 'tired', 'fatigue', 'rest', 'nap']
asthma = ['asthma', 'breathing', 'inhaler', 'wheezing', 'cough']
kidney_disease = ['kidney', 'disease', 'renal', 'failure', 'dialysis', 'urine']
skin_cancer = ['skin', 'cancer', 'melanoma', 'mole', 'sun', 'uv']
cholesterol = ['cholesterol', 'hdl', 'ldl', 'triglycerides', 'fat']

### Load User Input

In [19]:
# Test user
user_input = {
    'HeartDisease': 'No',
    'BMI': 23,
    'Smoking': 'No',
    'Alcohol': 'No',
    'Stroke': 'No',
    'PhysicalHealth': 10,
    'MentalHealth': 10,
    'DiffWalking': 1,
    'Sex': 'Male',
    'Age': 21,
    'Diabetes': 'No',
    'PhysicalActivity': 10,
    'GenericHealth': 10,
    'SleepTime': 8,
    'Asthma': 'No',
    'KidneyDisease': 'No',
    'SkinCancer': 'No',
    # 'Cholesterol': 200,
    # 'BloodPressure': 120,
    'Allergies': ['penicillin', 'aspirin', 'Diarrhea', 'Bupropion']  
}

# User to df
user_df: pd.DataFrame = pd.DataFrame([user_input])


### Define Health Conditions

In [20]:
# Filter conditions for dataset
conditions = []
if user_input['Smoking'] == 'Yes':
    conditions.append('|'.join(smoking))
if user_input['Stroke'] == 'Yes':
    conditions.append('|'.join(stroke))
if user_input['Alcohol'] == 'Yes':
    conditions.append('|'.join(alcohol))
if user_input['Diabetes'] == 'Yes':
    conditions.append('|'.join(diabetes))
if user_input['Asthma'] == 'Yes':
    conditions.append('|'.join(asthma))
if user_input['KidneyDisease'] == 'Yes':
    conditions.append('|'.join(kidney_disease))
if user_input['SkinCancer'] == 'Yes':
    conditions.append('|'.join(skin_cancer))
# if user_input['Cholesterol'] > 200:
#     conditions.append('|'.join(cholesterol))
if user_input['BMI'] > 25:
    conditions.append('|'.join(bmi))
if user_input['PhysicalHealth'] < 3:
    conditions.append('|'.join(physical_health))
if user_input['MentalHealth'] < 3:
    conditions.append('|'.join(mental_health))
if user_input['GenericHealth'] < 3:
    conditions.append('|'.join(generic_health))
if user_input['SleepTime'] < 7:
    conditions.append('|'.join(sleep_time))
if user_input['DiffWalking'] > 3:
    conditions.append('|'.join(diff_walking))


### Filter Drug Database

In [21]:
# Filter dataset
filtered_drugs_db = drugs_db[~drugs_db['Medicine Name'].str.contains('|'.join(medication_types_to_exclude), case=False, na=False)]

filtered_drugs_db = filtered_drugs_db[filtered_drugs_db['Uses'].str.contains('|'.join(heart), case=False, na=False)]

# Further filter based on conditions and heart-related side effects
if conditions:
    combined_conditions = '|'.join(conditions + heart)
    filtered_drugs_db = filtered_drugs_db[~filtered_drugs_db['Side_effects'].str.contains(combined_conditions, case=False, na=False)]

print("Filtered Drugs:")
print(filtered_drugs_db)


Filtered Drugs:
                     Medicine Name  \
18                  Arkamin Tablet   
23                Aldactone Tablet   
34              Axcer  90mg Tablet   
59                Atorva 40 Tablet   
65     Angispan - TR 2.5mg Capsule   
...                            ...   
11781              Zorem 5 Capsule   
11792           Zolahart 40 Tablet   
11804            Zilarta 80 Tablet   
11818            Zilokem 40 Tablet   
11820    Zilarta-CT 40/6.25 Tablet   

                                             Composition  \
18                                    Clonidine (100mcg)   
23                                 Spironolactone (25mg)   
34                                     Ticagrelor (90mg)   
59                                   Atorvastatin (40mg)   
65                                 Nitroglycerin (2.5mg)   
...                                                  ...   
11781                                     Ramipril (5mg)   
11792                        Azilsartan medoxom

### Exclusions

In [22]:

# Exclude words from Composition column
allergies = user_input['Allergies']
if allergies:
    allergy_pattern = '|'.join(allergies)
    filtered_drugs_db = filtered_drugs_db[~filtered_drugs_db['Composition'].str.contains(allergy_pattern, case=False, na=False)]

# Exclude words from Side_effects column
allergies = user_input['Allergies']
if allergies:
    allergy_pattern = '|'.join(allergies)
    filtered_drugs_db = filtered_drugs_db[~filtered_drugs_db['Side_effects'].str.contains(allergy_pattern, case=False, na=False)]

# User input to string
user_conditions = ' '.join([f'{k}:{v}' for k, v in user_input.items()])
print(user_conditions)

HeartDisease:No BMI:23 Smoking:No Alcohol:No Stroke:No PhysicalHealth:10 MentalHealth:10 DiffWalking:1 Sex:Male Age:21 Diabetes:No PhysicalActivity:10 GenericHealth:10 SleepTime:8 Asthma:No KidneyDisease:No SkinCancer:No Allergies:['penicillin', 'aspirin', 'Diarrhea', 'Bupropion']


### Feature Vectorization

In [23]:
# Vectorize the drug descriptions
vectorizer = TfidfVectorizer()
drug_vectors = vectorizer.fit_transform(filtered_drugs_db['Uses'])
print("Drug Vectors:")
print(drug_vectors)

# Vectorize the user conditions
user_vector = vectorizer.transform([user_conditions])
print("User Vectors:")
print(user_vector)

Drug Vectors:
<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 7380 stored elements and shape (1005, 57)>
  Coords	Values
  (0, 52)	0.4163791262319044
  (0, 38)	0.3946015237701766
  (0, 28)	0.42922109260333396
  (0, 26)	0.35693418062583937
  (0, 10)	0.423842721443917
  (0, 42)	0.423842721443917
  (1, 28)	0.13040973107933
  (1, 26)	0.10844688509158124
  (1, 10)	0.12877562700422437
  (1, 42)	0.12877562700422437
  (1, 18)	0.39242860086058023
  (1, 34)	0.5402294988584887
  (1, 40)	0.6168651055101744
  (1, 23)	0.16057337415838563
  (1, 20)	0.29310708936856866
  (2, 38)	0.23844140516339143
  (2, 23)	0.3193504293049628
  (2, 43)	0.4204398550529352
  (2, 6)	0.4073868798714513
  (2, 2)	0.4878448865022383
  (2, 49)	0.5103279579953779
  (3, 52)	0.22529323441054436
  (3, 38)	0.213509871155257
  (3, 23)	0.2859590135679471
  (3, 3)	0.453289073331618
  :	:
  (1002, 52)	0.19401765039148997
  (1002, 38)	0.3677401467053902
  (1002, 26)	0.16631845043756363
  (1002, 23)	0.2462617045109306
  (

### Cosine similarity

In [24]:
# Cosine sim
similarity_scores = cosine_similarity(user_vector, drug_vectors).flatten()


### Finalize Recommendations

In [25]:
# Recommend top 5 drugs
top_indices = similarity_scores.argsort()[:5][::-1]
recommended_drugs = filtered_drugs_db.iloc[top_indices]

print("Recommended Drugs:")
for i in range(len(recommended_drugs)):
    print(f"{i+1}. {recommended_drugs.iloc[i]['Medicine Name']}", ", ", f"{recommended_drugs.iloc[i]['Uses']}")

print(recommended_drugs)

Recommended Drugs:
1. Xstan-AMH Tablet ,   Hypertension (high blood pressure)
2. Xstan 20mg Tablet ,  Treatment of Hypertension (high blood pressure) Prevention of heart attack and strokeTreatment of Heart failure
3. Xirtam H Tablet ,   Hypertension (high blood pressure)
4. Aquazide 12.5 Tablet ,  Treatment of Hypertension (high blood pressure)Treatment of Edema
5. Amlodac 5 Tablet ,  Treatment of Hypertension (high blood pressure)Prevention of Angina (heart-related chest pain)
              Medicine Name  \
11359      Xstan-AMH Tablet   
11383     Xstan 20mg Tablet   
11389       Xirtam H Tablet   
124    Aquazide 12.5 Tablet   
135        Amlodac 5 Tablet   

                                             Composition  \
11359  Telmisartan (40mg) + Amlodipine (5mg) + Hydroc...   
11383                                 Telmisartan (20mg)   
11389  Hydrochlorothiazide (12.5mg) + Olmesartan Medo...   
124                         Hydrochlorothiazide (12.5mg)   
135                           

### Save everything for the HealthPredictorAndRecommender

In [26]:
# Python
# Import necessary library for saving with pickle
import pickle

# Specify the file path to store all objects
combined_path = "recommender_model.pkl"

# Save all objects into a single file
with open(combined_path, 'wb') as file:
    pickle.dump({
        "vectorizer": vectorizer,
        "filtered_drugs_db": filtered_drugs_db,
        "drug_vectors": drug_vectors
    }, file)

print("All objects have been successfully saved into one file!")


All objects have been successfully saved into one file!
