In [19]:
!pip install rapidfuzz


Collecting rapidfuzz
  Downloading rapidfuzz-3.14.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (12 kB)
Downloading rapidfuzz-3.14.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m32.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rapidfuzz
Successfully installed rapidfuzz-3.14.3


In [8]:
import pandas as pd
import numpy as np
from itertools import combinations

!pip install streamlit pyngrok




Collecting streamlit
  Downloading streamlit-1.52.2-py3-none-any.whl.metadata (9.8 kB)
Collecting pyngrok
  Downloading pyngrok-7.5.0-py3-none-any.whl.metadata (8.1 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.52.2-py3-none-any.whl (9.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.0/9.0 MB[0m [31m60.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.5.0-py3-none-any.whl (24 kB)
Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m115.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyngrok, pydeck, streamlit
Successfully installed pydeck-0.9.1 pyngrok-7.5.0 streamlit-1.52.2


In [2]:
disease_data = {
    "General": {
        "symptoms": ["fever", "headache", "body pain", "fatigue"],
        "medicines": ["Paracetamol", "Dolo 650", "Crocin", "Calpol"]
    },
    "Cold & Allergy": {
        "symptoms": ["cold", "cough", "sneezing", "runny nose"],
        "medicines": ["Cetirizine", "Levocetirizine", "Benadryl", "Cheston Cold"]
    },
    "Gastrointestinal": {
        "symptoms": ["stomach pain", "acidity", "indigestion", "nausea"],
        "medicines": ["Pan D", "Omez", "Gelusil", "Digene"]
    },
    "Infection": {
        "symptoms": ["fever", "infection", "inflammation"],
        "medicines": ["Azithromycin", "Amoxicillin", "Ciprofloxacin"]
    },
    "Diabetes": {
        "symptoms": ["high blood sugar", "frequent urination", "thirst"],
        "medicines": ["Metformin", "Glycomet", "Glibenclamide"]
    },
    "Hypertension": {
        "symptoms": ["high blood pressure", "dizziness", "chest pain"],
        "medicines": ["Amlodipine", "Losartan", "Telmisartan"]
    },
    "Pain & Inflammation": {
        "symptoms": ["joint pain", "swelling", "muscle pain"],
        "medicines": ["Ibuprofen", "Diclofenac", "Voveran"]
    }
}


In [3]:
rows = []

for disease, data in disease_data.items():
    symptoms = data["symptoms"]
    medicines = data["medicines"]

    for med in medicines:
        for r in range(1, len(symptoms)+1):
            for combo in combinations(symptoms, r):
                rows.append({
                    "medicine_name": med,
                    "symptoms": ",".join(combo),
                    "disease_category": disease,
                    "prescription_required": disease in ["Infection", "Diabetes", "Hypertension"]
                })

df = pd.DataFrame(rows)
df = df.drop_duplicates().reset_index(drop=True)

df.head(), df.shape


(  medicine_name        symptoms disease_category  prescription_required
 0   Paracetamol           fever          General                  False
 1   Paracetamol        headache          General                  False
 2   Paracetamol       body pain          General                  False
 3   Paracetamol         fatigue          General                  False
 4   Paracetamol  fever,headache          General                  False,
 (264, 4))

In [16]:
df.to_csv("medicine_dataset.csv", index=False)
print("Dataset saved as medicine_dataset.csv")


Dataset saved as medicine_dataset.csv


In [17]:
import pandas as pd

df = pd.read_csv("medicine_dataset.csv")

df["search_text"] = (
    df["medicine_name"].str.lower() + " " +
    df["symptoms"].str.replace(",", " ", regex=False).str.lower()
)

df.to_csv("medicine_dataset_search.csv", index=False)


In [20]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from rapidfuzz import fuzz

df = pd.read_csv("medicine_dataset_search.csv")

vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(df["search_text"])

def smart_search(user_query, top_k=5):
    user_query = user_query.lower()

    # NLP similarity
    user_vector = vectorizer.transform([user_query])
    nlp_scores = cosine_similarity(user_vector, tfidf_matrix)[0]

    # Fuzzy matching with medicine names
    fuzzy_scores = df["medicine_name"].apply(
        lambda x: fuzz.partial_ratio(user_query, x.lower()) / 100
    )

    # Combine scores
    df["final_score"] = (0.7 * nlp_scores) + (0.3 * fuzzy_scores)

    result = (
        df[df["final_score"] > 0.1]
        .sort_values("final_score", ascending=False)
        .drop_duplicates("medicine_name")
        .head(top_k)
    )

    return result[[
        "medicine_name",
        "final_score",
        "disease_category",
        "prescription_required"
    ]]


In [6]:
test_symptoms = ["fever", "headache"]
recommend_medicines(test_symptoms, df)


Unnamed: 0,medicine,score,category,prescription_required
2,Paracetamol,2,General,False
20,Dolo 650,2,General,False
26,Crocin,2,General,False
38,Calpol,2,General,False
48,Azithromycin,1,Infection,True


In [12]:
# %%writefile streamlit_app.py

# import streamlit as st
# import pandas as pd

# df = pd.read_csv("medicine_dataset.csv")

# def recommend_medicines(input_symptoms, df):
#     input_symptoms = set([s.strip().lower() for s in input_symptoms])

#     scores = []
#     for _, row in df.iterrows():
#         med_symptoms = set(row["symptoms"].lower().split(","))
#         match_count = len(input_symptoms & med_symptoms)

#         if match_count > 0:
#             scores.append((
#                 row["medicine_name"],
#                 match_count,
#                 row["disease_category"],
#                 row["prescription_required"]
#             ))

#     result = pd.DataFrame(
#         scores,
#         columns=["Medicine", "Match Score", "Category", "Prescription Required"]
#     )

#     return result.sort_values("Match Score", ascending=False).drop_duplicates("Medicine")

# st.title("Medicine Recommendation System")
# st.warning("For educational purposes only. Not medical advice.")

# user_input = st.text_input("Enter symptoms (comma separated)")

# if user_input:
#     symptoms = user_input.split(",")
#     result = recommend_medicines(symptoms, df)
#     st.dataframe(result)
# import pandas as pd

# df = pd.read_csv("medicine_dataset.csv")

# df["symptom_description"] = (
#     df["symptoms"]
#     .str.replace(",", " ", regex=False)
# )

# df.to_csv("medicine_dataset_nlp.csv", index=False)


Overwriting streamlit_app.py


In [13]:
import pandas as pd

df = pd.read_csv("medicine_dataset.csv")

df["symptom_description"] = (
    df["symptoms"]
    .str.replace(",", " ", regex=False)
)

df.to_csv("medicine_dataset_nlp.csv", index=False)


In [14]:
pip install scikit-learn

