In [15]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer_vi2en = AutoTokenizer.from_pretrained(
    "vinai/vinai-translate-vi2en-v2",
    use_fast=False,
    src_lang="vi_VN",
    tgt_lang="en_XX"
)
model_vi2en = AutoModelForSeq2SeqLM.from_pretrained("vinai/vinai-translate-vi2en-v2")

def translate_vi2en(vi_text: str) -> str:
    inputs = tokenizer_vi2en(vi_text, return_tensors="pt")
    decoder_start_token_id = tokenizer_vi2en.lang_code_to_id["en_XX"]
    outputs = model_vi2en.generate(
        **inputs,
        decoder_start_token_id=decoder_start_token_id,
        num_beams=5,
        early_stopping=True
    )
    return tokenizer_vi2en.decode(outputs[0], skip_special_tokens=True)

In [None]:
from food_model import FoodModel
model = FoodModel()

In [17]:
examples = """3 tablespoons (21 grams) blanched almond flour
... ¾ teaspoon pumpkin spice blend
... ⅛ teaspoon baking soda
... ⅛ teaspoon Diamond Crystal kosher salt
... 1½ tablespoons maple syrup or 1 tablespoon honey
... 1 tablespoon (15 grams) canned pumpkin puree
... 1 teaspoon avocado oil or melted coconut oil
... ⅛ teaspoon vanilla extract
... 1 large egg""".split("\n")

model.extract_foods(examples)

[{'Product': [],
  'Ingredient': [{'text': 'almond flour',
    'span': [34, 46],
    'conf': 0.9803279836972555}]},
 {'Product': [],
  'Ingredient': [{'text': 'pumpkin spice blend',
    'span': [11, 30],
    'conf': 0.8877270817756653}]},
 {'Product': [],
  'Ingredient': [{'text': 'baking soda',
    'span': [11, 22],
    'conf': 0.8989846706390381}]},
 {'Product': [{'text': 'Diamond Crystal kosher salt',
    'span': [11, 38],
    'conf': 0.7700595160325369}],
  'Ingredient': []},
 {'Product': [],
  'Ingredient': [{'text': 'maple syrup',
    'span': [15, 26],
    'conf': 0.9884961545467377},
   {'text': 'honey', 'span': [43, 48], 'conf': 0.9858396053314209}]},
 {'Product': [],
  'Ingredient': [{'text': 'pumpkin puree',
    'span': [31, 44],
    'conf': 0.9875507950782776}]},
 {'Product': [],
  'Ingredient': [{'text': 'avocado oil',
    'span': [11, 22],
    'conf': 0.9926817119121552},
   {'text': 'coconut oil', 'span': [33, 44], 'conf': 0.9395932555198669}]},
 {'Product': [],
  'Ingred

In [18]:
test_text = """Banchan-style home cooking is cumulative, which is to say, you might make one or two dishes at a time and keep leftovers in the fridge. 
The point is that you’re amassing a store of banchan so that, come dinnertime, all that’s left to do is steam the rice and take out your stash.
Some banchan can be eaten as soon as you make them. But others are meant to be eaten later, stemming from historic methods of preservation. 
On the Korean Peninsula, food often had to be preserved, especially with salt, to last through the long, grueling winters. 
That’s why fermentation is central to many banchan, like kimchi, pickles and jeotgal, or salted seafood.""".split("\n\n")

model.extract_foods(test_text)

[{'Product': [],
  'Ingredient': [{'text': 'banchan',
    'span': [182, 189],
    'conf': 0.9883177876472473},
   {'text': 'rice', 'span': [251, 255], 'conf': 0.9737038016319275},
   {'text': 'banchan', 'span': [286, 293], 'conf': 0.9947269856929779},
   {'text': 'salt', 'span': [495, 499], 'conf': 0.9849796891212463},
   {'text': 'banchan', 'span': [589, 596], 'conf': 0.9946950078010559},
   {'text': 'kimchi', 'span': [603, 609], 'conf': 0.9959975679715475},
   {'text': 'pickles', 'span': [611, 618], 'conf': 0.994708776473999},
   {'text': 'jeotgal', 'span': [623, 630], 'conf': 0.9903813749551773},
   {'text': 'salt', 'span': [635, 639], 'conf': 0.6304224729537964},
   {'text': 'seafood', 'span': [642, 649], 'conf': 0.9943801462650299}]}]

In [19]:
res = model.extract_foods(test_text)
unique_ingredients = [
    ing['text']
    for record in res
    for ing in record.get('Ingredient', [])
]

print(unique_ingredients)

['banchan', 'rice', 'banchan', 'salt', 'banchan', 'kimchi', 'pickles', 'jeotgal', 'salt', 'seafood']


In [20]:
test = """Tôi muốn làm 1 món pizza với thịt bò, phô mai và nấm."""
text = translate_vi2en(test)
print(text)

I'd like to make a pizza with beef, cheese and mushrooms.


In [21]:
res = model.extract_foods(text)
unique_ingredients = [
    ing['text']
    for record in res
    for ing in record.get('Ingredient', [])
]

print(unique_ingredients)

['pizza', 'beef', 'cheese', 'mushrooms']
