# Named Entity Recognition
1. Preprocess and clean the extracted recipes further
    - Fix any typos in the recipes using `SymSpell`.
    - Perform Named Entity Recognition (NER) to extract ingredients and techniques used.
    - Analyze recipes and their length, if they're often very long then chunk recipe instructions.

2. Implement NER on user queries to improve retrieval via filtering.


In [None]:
# load the stored recipe jsons

In [None]:
# simple method to clean the recipes via SymSpell
from symspellpy.symspellpy import SymSpell, Verbosity
import re

# Initialize SymSpell object
sym_spell = SymSpell(max_dictionary_edit_distance=2, prefix_length=7)
sym_spell.load_dictionary("custom_recipe_dict.txt", term_index=0, count_index=1)

def clean_and_correct(text):
    words = re.findall(r"\w+|\W+", text)  # Keep punctuation
    corrected = []
    for word in words:
        if re.match(r"\w+", word):  # Only correct word-like tokens
            suggestions = sym_spell.lookup(word, Verbosity.CLOSEST, max_edit_distance=2)
            corrected_word = suggestions[0].term if suggestions else word
            corrected.append(corrected_word)
        else:
            corrected.append(word)
    return ''.join(corrected)

# example use
text = "Add 1 cup of consomm^ to the roux."
fixed = clean_and_correct(text)
print(fixed)
