In [1]:
import pandas as pd

In [2]:
dt = pd.read_csv("../Dataset/clean_medicine_data.csv")

In [3]:
dt.head(5)

Unnamed: 0,id,name,price,Is_discontinued,manufacturer_name,type,pack_size_label,medicine_desc,side_effects,drug_interactions,composition,cleaned_name,document
0,1,Augmentin 625 Duo Tablet,223,False,glaxo smithkline pharmaceuticals ltd,allopathy,strip of 10 tablets,Augmentin 625 Duo Tablet is a penicillin-type ...,"Vomiting,Nausea,Diarrhea","{""drug"": [], ""brand"": [], ""effect"": []}",amoxycillin (500mg) clavulanic acid (125mg),augmentin 625 duo tablet,"\n Medicine : augmentin 625 duo tablet,\n ..."
1,2,Azithral 500 Tablet,132,False,alembic pharmaceuticals ltd,allopathy,strip of 5 tablets,Azithral 500 Tablet is an antibiotic used to t...,"Vomiting,Nausea,Abdominal pain,Diarrhea","{""drug"": [""Astemizole"", ""Ebastine"", ""Mizolasti...",azithromycin (500mg),azithral 500 tablet,"\n Medicine : azithral 500 tablet,\n Com..."
2,3,Ascoril LS Syrup,118,False,glenmark pharmaceuticals ltd,allopathy,bottle of 100 ml syrup,Ascoril LS Syrup is a combination medicine use...,"Nausea,Vomiting,Diarrhea,Upset stomach,Stomach...","{""drug"": [], ""brand"": [], ""effect"": []}",ambroxol (30mg/5ml) levosalbutamol (1mg/5ml),ascoril ls syrup,"\n Medicine : ascoril ls syrup,\n Compos..."
3,4,Allegra 120mg Tablet,218,False,sanofi india ltd,allopathy,strip of 10 tablets,Allegra 120mg Tablet is an anti-allergy medici...,"Headache,Drowsiness,Dizziness,Nausea","{""drug"": [""Aprepitant"", ""Fosaprepitant"", ""Maga...",fexofenadine (120mg),allegra 120mg tablet,"\n Medicine : allegra 120mg tablet,\n Co..."
4,5,Avil 25 Tablet,10,False,sanofi india ltd,allopathy,strip of 15 tablets,Avil 25 Tablet is an antiallergic medication u...,Sleepiness,"{""drug"": [""Zafirlukast"", ""Alprazolam"", ""Betahi...",pheniramine (25mg),avil 25 tablet,"\n Medicine : avil 25 tablet,\n Composit..."


#### We will create Dictionaries to map name to index and vice-versa for fast lookup

In [18]:
name_to_index = {
    name:idx for idx,name in enumerate(dt['cleaned_name'])
}

index_to_name = {
    idx:name for idx,name in enumerate(dt['cleaned_name'])
}

In [19]:
def get_composition(medicine:str):
    medicine = medicine.lower().strip()

    if medicine not in name_to_index:
        return None
    
    idx = name_to_index[medicine]
    return dt.loc[idx, 'composition']

In [48]:
def find_exact_substitutes(medicine:str, top_k=5):
    comp = get_composition(medicine)

    if comp is None:
        return "Medicine not found."

    substitutes = dt[dt['composition'] == comp]
    substitutes = substitutes.sort_values("price")

    return substitutes[[
        "name",
        "manufacturer_name",
        "price",
        "pack_size_label",
        "composition"
    ]].head(top_k)

In [49]:
find_exact_substitutes("Allegra 120mg Tablet")

Unnamed: 0,name,manufacturer_name,price,pack_size_label,composition
87339,Faxodas 120mg Tablet,shreya life sciences pvt ltd,19,strip of 6 tablets,fexofenadine (120mg)
236774,Zelgra 120 Tablet,zeelab pharmacy pvt ltd,22,strip of 10 tablets,fexofenadine (120mg)
1265,Alert 120mg Tablet,alkem laboratories ltd,28,strip of 10 tablets,fexofenadine (120mg)
60041,DavaIndia Fexofenadine 120mg Tablet,davaindia generic pharmacy,29,strip of 10 tablets,fexofenadine (120mg)
4458,Allegix 120mg Tablet,intas pharmaceuticals ltd,30,strip of 6 tablets,fexofenadine (120mg)


In [50]:
find_exact_substitutes("benadryl syrup")

Unnamed: 0,name,manufacturer_name,price,pack_size_label,composition
103810,Intadryl Syrup,intas pharmaceuticals ltd,38,bottle of 100 ml syrup,diphenhydramine (14.08mg/5ml) ammonium chlori...
156341,Odaril Syrup,moraceae pharmaceuticals pvt ltd,43,bottle of 100 ml syrup,diphenhydramine (14.08mg/5ml) ammonium chlori...
207575,Tussberry-N Syrup,acinta pharmaceuticals pvt ltd,51,bottle of 100 ml syrup,diphenhydramine (14.08mg/5ml) ammonium chlori...
145633,New Brethese Syrup,galpha laboratories ltd,68,bottle of 100 ml syrup,diphenhydramine (14.08mg/5ml) ammonium chlori...
193007,Suntuss CF Syrup,suncare formulations pvt ltd,70,bottle of 100 ml syrup,diphenhydramine (14.08mg/5ml) ammonium chlori...


### Similarity Matching (TF-IDF)

In [47]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

tfidf = TfidfVectorizer()
comp_vectors = tfidf.fit_transform(dt['composition'])

In [56]:
def find_similar_substitutes(medicine:str, top_k=5):
    idx = name_to_index.get(medicine.lower())

    if idx is None:
        return "Medicine not found."
    
    query_vec = comp_vectors[idx]

    similarities = cosine_similarity(query_vec, comp_vectors)[0]

    dt["similarity"] = similarities

    result = dt.sort_values(['similarity', 'price'],ascending=[False,True])

    return result[[
        "name",
        "manufacturer_name",
        "price",
        "pack_size_label",
        "composition",
        "similarity"
    ]].head(top_k)

In [57]:
find_similar_substitutes("benadryl syrup")

Unnamed: 0,name,manufacturer_name,price,pack_size_label,composition,similarity
103810,Intadryl Syrup,intas pharmaceuticals ltd,38,bottle of 100 ml syrup,diphenhydramine (14.08mg/5ml) ammonium chlori...,1.0
156341,Odaril Syrup,moraceae pharmaceuticals pvt ltd,43,bottle of 100 ml syrup,diphenhydramine (14.08mg/5ml) ammonium chlori...,1.0
207575,Tussberry-N Syrup,acinta pharmaceuticals pvt ltd,51,bottle of 100 ml syrup,diphenhydramine (14.08mg/5ml) ammonium chlori...,1.0
145633,New Brethese Syrup,galpha laboratories ltd,68,bottle of 100 ml syrup,diphenhydramine (14.08mg/5ml) ammonium chlori...,1.0
193007,Suntuss CF Syrup,suncare formulations pvt ltd,70,bottle of 100 ml syrup,diphenhydramine (14.08mg/5ml) ammonium chlori...,1.0
