In [3]:
import csv
import re
import tiktoken
import json



In [4]:
tokenizer = tiktoken.get_encoding('gpt2')

In [5]:
symptom_vocabulary = set()
try:
    with open('health_dataset.csv', 'r', encoding='utf-8') as f:
        reader = csv.reader(f)
        for row in reader:
            for phrase in row:
                cleaned_phrase = phrase.strip().lower()
                if cleaned_phrase:
                    symptom_vocabulary.add(cleaned_phrase)
    print(f" Loaded {len(symptom_vocabulary)} symptom phrases.")
    print("-" * 30)
except FileNotFoundError:
    print(" ERROR: health_dataset.csv not found!")
    exit()
except Exception as e:
    print(f" An error occurred reading the CSV: {e}")
    exit()


 Loaded 145 symptom phrases.
------------------------------


In [6]:
user_query = input("Hey! What's on your mind today: ")
user_query_lower = user_query.lower()

In [7]:
matched_symptoms_strings = set()
for symptom_phrase in symptom_vocabulary:
    if symptom_phrase in user_query_lower:
        matched_symptoms_strings.add(symptom_phrase)

if not matched_symptoms_strings:
    print("\nNo symptoms from the vocabulary were found in the query.")
else:
    print(f"\n Matched Symptoms (as strings): {matched_symptoms_strings}")


 Matched Symptoms (as strings): {'runny nose', 'fever', 'blisters', 'mild fever', 'rash', 'rashes'}


In [8]:
symptom_token_map = {}
if matched_symptoms_strings:
    print("\n" + "="*40)
    print("Symptom Token ID Analysis")
    print("="*40)
    for symptom in matched_symptoms_strings:
        tokens = tokenizer.encode(symptom)
        symptom_token_map[symptom] = tokens
        print(f"'{symptom}': {tokens}")



Symptom Token ID Analysis
'runny nose': [5143, 3281, 9686]
'fever': [69, 964]
'blisters': [2436, 6223]
'mild fever': [76, 688, 17372]
'rash': [81, 1077]
'rashes': [81, 7465]


In [9]:
print("\n" + "="*40)
print("ðŸ©º Finding Potential Treatments...")
print("="*40)


ðŸ©º Finding Potential Treatments...


In [10]:
# Check if we have symptoms to match
if not matched_symptoms_strings:
    print("No symptoms were matched, cannot suggest treatment.")
else:
    try:
        # Open the JSON database of diseases
        with open('updatedSolution.json', 'r', encoding='utf-8') as sol:
            disease_database = json.load(sol)

        # This new list will store all potential matches
        all_matches = []

        # This helper function calculates the match score for one disease
        def get_match_data(disease_entry):
            symptom_list = disease_entry.get('symptoms')
            if not symptom_list:
                return None  # No symptoms in JSON to compare

            # Find the intersection between user query symptoms and DB symptoms
            json_symptoms = set(s.strip().lower() for s in symptom_list)
            overlapping_symptoms = matched_symptoms_strings.intersection(json_symptoms)

            # The "score" is simply the number of overlapping symptoms
            match_score = len(overlapping_symptoms)

            # Only return data if there is at least one match
            if match_score > 0:
                disease_name = disease_entry.get('disease', 'Unknown Disease')
                aid_1 = disease_entry.get('first_aid_treatment_1', 'No first aid 1 listed.')
                aid_2 = disease_entry.get('first_aid_treatment_2', 'No first aid 2 listed.')

                # Return a dictionary with all info needed for sorting and printing
                return {
                    'disease': disease_name,
                    'score': match_score,
                    'matched_symptoms': list(overlapping_symptoms),
                    'aid_1': aid_1,
                    'aid_2': aid_2
                }

            return None # No overlap found

        # --- Main loop to gather all matches ---

        # Case 1: The JSON is a flat list of diseases
        if isinstance(disease_database, list):
            for disease_entry in disease_database:
                match_data = get_match_data(disease_entry)
                if match_data:
                    all_matches.append(match_data) # Add to our list

        # Case 2: The JSON is a dictionary with a 'data' key (like in your original code)
        elif isinstance(disease_database, dict):
            disease_list = disease_database.get('data')
            if isinstance(disease_list, list):
                for disease_entry in disease_list:
                    match_data = get_match_data(disease_entry)
                    if match_data:
                        all_matches.append(match_data) # Add to our list
            else:
                print(" ERROR: JSON is a dictionary but has no 'data' key with a list.")

        # --- Sorting and Printing ---

        if not all_matches:
            print("\nYour symptoms did not match a specific disease in the database.")
        else:
            # 1. Sort the list by 'score' in descending (highest-to-lowest) order
            sorted_matches = sorted(all_matches, key=lambda x: x['score'], reverse=True)

            # 2. Get the top 3 (or fewer if less than 3 matched)
            top_matches = sorted_matches[:3]

            print(f"\nDisplaying top {len(top_matches)} most likely matches based on symptom count:")

            # 3. Print only the top matches
            for match in top_matches:
                print(f"\n--- Possible Match Found (Score: {match['score']}) ---")
                print(f"Disease: {match['disease']}")
                print(f"Matched Symptoms: {match['matched_symptoms']}")
                print(f"  First Aid 1: {match['aid_1']}")
                print(f"  First Aid 2: {match['aid_2']}")

    except FileNotFoundError:
        print(" ERROR: updatedSolution.json not found!")
    except json.JSONDecodeError:
        print(" ERROR: updatedSolution.json is not a valid JSON file.")
    except Exception as e:
        print(f" An error occurred reading or processing updatedSolution.json: {e}")


Displaying top 3 most likely matches based on symptom count:

--- Possible Match Found (Score: 3) ---
Disease: Roseola
Matched Symptoms: ['rash', 'rashes', 'runny nose']
  First Aid 1: Give acetaminophen (Tylenol) or ibuprofen (Advil, Motrin) to reduce fever and pain; do not give aspirin to children
  First Aid 2: Ensure plenty of fluids (water, clear liquids, electrolyte solutions like Pedialyte) to prevent dehydration; offer small sips frequently

--- Possible Match Found (Score: 3) ---
Disease: Hand, Foot, and Mouth Disease
Matched Symptoms: ['fever', 'mild fever', 'blisters']
  First Aid 1: Give acetaminophen (Tylenol) or ibuprofen (Advil, Motrin) to reduce fever and mouth pain; do not give aspirin to children
  First Aid 2: Offer plenty of cool fluids and soft foods (yogurt, pudding, smoothies, ice pops) to ease mouth pain and prevent dehydration

--- Possible Match Found (Score: 2) ---
Disease: Norovirus Infection
Matched Symptoms: ['fever', 'rashes']
  First Aid 1: Drink plenty