In [127]:
import joblib
import spacy
import medspacy
from medspacy.ner import TargetRule
from medspacy.context import ConTextRule
from medspacy.visualization import visualize_ent, visualize_dep
from spellchecker import SpellChecker
from rapidfuzz import process, fuzz

In [128]:
nlp = medspacy.load()

# Load valid symptoms
symptom_columns = joblib.load('../models/symptom_columns.pkl')
valid_symptoms = [symptom for symptom in symptom_columns]

input_text = (
    "I have no migraine but I do have a fevver and depression. "
    "I do experience anxiety as well as abnormal involuntary movements. "
    "I have dont feel shortness of breath as well."
)

In [129]:
# Adding target rules
target_matcher = nlp.get_pipe("medspacy_target_matcher")
target_rules = []
for symptom in valid_symptoms:
    target_rules.append(TargetRule(literal=symptom, category="SYMPTOM"))
target_matcher.add(target_rules)


In [130]:
# Preprocess the input text for typos
def correct_typos(input_text):
    corrected_tokens = []
    spell = SpellChecker()
    tokens = input_text.split()
    for token in tokens:
        if token.lower() not in spell:
            correction = spell.correction(token)
            corrected_tokens.append(correction if correction else token)
        else:
            corrected_tokens.append(token)
    return " ".join(corrected_tokens)

In [None]:
# After correcting typos, map symptoms to valid symptoms
def correct_symptom_candidate(candidate: str, valid_symptoms: list, threshold: int = 80) -> str:
    """
    Given a candidate word from the input text (e.g. "anxiety"), fuzzy-match it
    against the valid_symptoms list (which may contain multi-word phrases like 
    "anxiety and nervousness") and return the best matching valid symptom if the 
    fuzzy score is above the threshold. Otherwise, return the candidate as-is.
    """
    candidate = candidate.lower()
    best_match, score, _ = process.extractOne(candidate, valid_symptoms, scorer=fuzz.token_set_ratio)
    if score >= threshold:
        return best_match.lower()
    return candidate

def fuzzy_match_text(text: str, valid_symptoms: list, threshold: int = 80) -> str:
    tokens = text.split()
    corrected_tokens = [correct_symptom_candidate(token, valid_symptoms, threshold) for token in tokens]
    return " ".join(corrected_tokens)

# Fix dupicate issue

In [132]:
# Use medspacy to locate valid symptoms
# Apply contextual anlysis to filter out negated symptoms
corrected_text = correct_typos(input_text)
fuzzy_text = fuzzy_match_text(corrected_text, valid_symptoms)
print(fuzzy_text)

doc = nlp(fuzzy_text)
visualize_dep(doc)

i have no migraine but i do have a fever anxiety and nervousness depression i do experience anxiety and nervousness as well as abnormal involuntary movements abnormal involuntary movements abnormal involuntary movements i have don't feel shortness of breath shortness of breath shortness of breath as well


In [133]:
final_symptoms = set()
for ent in doc.ents:
    if ent.label_ == "SYMPTOM" and not ent._.is_negated:
        final_symptoms.add(ent.text.lower())
print("Final Extracted Valid Symptoms:", list(final_symptoms))

Final Extracted Valid Symptoms: ['fever', 'abnormal involuntary movements', 'depression', 'anxiety and nervousness']


In [134]:
# Return the valid symptoms