In [1]:
import pandas as pd
from collections import defaultdict
import pickle

# Step 1: Load the dataset
df = pd.read_csv("../data/Final_Augmented_dataset_Diseases_and_Symptoms.csv")

# Step 2: Get list of all symptoms
symptoms = df.columns[1:]  # excluding the 'diseases' column

# Step 3: Build co-occurrence matrix
symptom_cooccurrence = defaultdict(lambda: defaultdict(int))

for _, row in df.iterrows():
    present_symptoms = [sym for sym in symptoms if row[sym] == 1]
    for i in present_symptoms:
        for j in present_symptoms:
            if i != j:
                symptom_cooccurrence[i][j] += 1

# Step 4: Convert to top N related symptoms (sorted)
top_related_symptoms = {}

for symptom, related in symptom_cooccurrence.items():
    sorted_related = sorted(related.items(), key=lambda x: x[1], reverse=True)
    top_related_symptoms[symptom] = [s for s, _ in sorted_related[:5]]  # top 5

# Step 5: Save the result
with open("../model/symptom_related_map.pkl", "wb") as f:
    pickle.dump(top_related_symptoms, f)

print("✅ Related symptoms model saved as 'symptom_related_map.pkl'")

✅ Related symptoms model saved as 'symptom_related_map.pkl'


In [2]:
import pickle
with open("../model/symptom_related_map.pkl", "rb") as f:
    symptom_related = pickle.load(f)

symptom_related["headache"]  # 🔄 Replace with any symptom to test

['vomiting', 'nausea', 'dizziness', 'sharp chest pain', 'fever']

In [3]:
import pickle
from collections import Counter

# Load the related symptoms dictionary
with open("../model/symptom_related_map.pkl", "rb") as f:
    symptom_related = pickle.load(f)

In [4]:
def suggest_related_symptoms(confirmed_symptoms, top_k=5):
    """
    Suggest co-occurring symptoms based on already confirmed symptoms.
    """
    all_related = []

    for sym in confirmed_symptoms:
        related = symptom_related.get(sym, [])
        all_related.extend(related)

    # Count how often each related symptom appeared
    freq = Counter(all_related)

    # Remove symptoms already confirmed
    for sym in confirmed_symptoms:
        if sym in freq:
            del freq[sym]

    # Return top K suggestions
    return freq.most_common(top_k)

In [5]:
confirmed_symptoms = ["cough", "sore throat", "fever"]
suggestions = suggest_related_symptoms(confirmed_symptoms)

print("Confirmed Symptoms:", confirmed_symptoms)
print("Suggested Co-occurring Symptoms:")
for sym, count in suggestions:
    print(f"→ {sym} ({count}x)")

Confirmed Symptoms: ['cough', 'sore throat', 'fever']
Suggested Co-occurring Symptoms:
→ nasal congestion (3x)
→ coryza (3x)
→ shortness of breath (1x)
→ ear pain (1x)
→ vomiting (1x)
