In [1]:
from pandas import read_csv
import re

In [2]:
df = read_csv('../dataset/symptoSense.csv')

In [3]:
features = df.drop('prognosis', axis=1).columns

pain_descriptors = [
    "dull", "sharp", "severe", "mild", "constant", "throbbing", "aching",
    "burning", "stabbing", "intense", "intermittent", "chronic", "acute"
]
negations = ["no", "not", "without"]

In [None]:
whitelist = set()
for feature in features:
    whitelist.update(feature.split())
whitelist.update(negations)
whitelist.update(pain_descriptors)

def remove_unnecessary_words(user_input):
    cleaned_words = [word for word in user_input.split() if word in whitelist]
    cleaned_input = ' '.join(cleaned_words)
    return cleaned_input

def match_symptom(user_input, feature_dict):
    cleaned_input = remove_unnecessary_words(user_input)

    if contains_negation(cleaned_input):
        print(f"Negation detected in input: '{cleaned_input}'")
        normalized_input = remove_descriptors(cleaned_input)
        for feature in feature_dict.keys():
            if normalized_input in feature:
                feature_dict[feature] = 0
    else:
        normalized_input = remove_descriptors(cleaned_input)
        for feature in feature_dict.keys():
            if normalized_input in feature:
                feature_dict[feature] = 1

    return feature_dict


def remove_descriptors(user_input):
    pattern = r'\b(' + '|'.join(pain_descriptors) + r')\b'
    cleaned_input = re.sub(pattern, '', user_input, flags=re.IGNORECASE).strip()
    return cleaned_input

def contains_negation(user_input):
    return any(negation in user_input for negation in negations)


def print_present_features(feature_dict):
    print("Present features (value 1):")
    for feature, value in feature_dict.items():
        if value == 1:
            print(f"{feature}: {value}")


print("Enter your symptoms one by one. Type 'done' when finished.")

while True:
    user_input = input("Enter symptom: ").lower().strip()
    
    if user_input == "done":
        print(user_input)
        break
    
    feature_dict = match_symptom(user_input, feature_dict)

print("\nUpdated feature dictionary:")
for feature, value in feature_dict.items():
    print(f"{feature}: {value}")

print_present_features(feature_dict)

Enter your symptoms one by one. Type 'done' when finished.

Updated feature dictionary:
abdominal distention: 0
abnormal appearing skin: 0
abnormal appearing tongue: 0
abnormal breathing sounds: 1
abnormal involuntary movements: 0
abnormal movement of eyelid: 0
abnormal size or shape of ear: 0
absence of menstruation: 0
abusing alcohol: 0
ache all over: 0
acne or pimples: 0
allergic reaction: 0
ankle stiffness or tightness: 0
ankle weakness: 0
antisocial behavior: 0
anxiety and nervousness: 0
apnea: 0
arm cramps or spasms: 0
arm lump or mass: 0
arm pain: 0
arm stiffness or tightness: 0
arm swelling: 0
arm weakness: 0
back cramps or spasms: 0
back mass or lump: 0
back pain: 0
back swelling: 0
back weakness: 0
bedwetting: 0
bladder mass: 0
bleeding from ear: 0
bleeding from eye: 0
bleeding gums: 0
bleeding in mouth: 0
bleeding or discharge from nipple: 0
blindness: 0
blood clots during menstrual periods: 0
blood in stool: 0
blood in urine: 0
bones are painful: 0
bowlegged or knock-kneed:

['I have chest pain', ' I have pain in kidney', ' What should I do?']
