In [None]:
%time
%load_ext autotime
%load_ext autoreload
%autoreload 2

# if cannot import the modules, add the parent directory to system path might help
import os, tqdm, sys
parent_dir = os.path.abspath(os.getcwd()+'/..')+'/'
sys.path.append(parent_dir)

from utils.path import dir_HugeFiles
from utils.save import make_dir, save_pickle, load_pickle, save
from utils.tree import instr2tree, tree_distance, build_tree, stem
from utils.evaluation import metrics, spacy_extension

import pandas as pd
import numpy as np
import re

treemaker = instr2tree()
sp = spacy_extension()

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 8.58 µs
The autotime extension is already loaded. To reload it, use:
  %reload_ext autotime
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
class evaluation:
    def __init__(self, filename, tag):
        self.dic = self.load_dic({}, filename, tag)
        self.ori = tag
        self.gens = []
        #from utils.evaluation import metrics
    '''
    loading data
    '''
    def append_dic(self, filename, tag):
        if tag in self.gens:
            print('already exist, will not load again')
            self.gen = tag
        else:
            self.dic = self.load_dic(self.dic, filename, tag)
            self.gen = tag
            self.gens += [tag]
        
    def load_dic(self, dic, filename, tag):
        if os.path.isdir(filename):
            print('load', filename)
            for (dirpath, _, fnames) in os.walk(filename):
                for fname in fnames:
                    path = os.path.join(dirpath, fname)
                    with open(path, 'r') as fp:
                        raw_text = fp.read()
                        raw_text = self.remove_end(raw_text)

                    name, field = int(fname[:-5]), fname[-5]

                    if name not in dic.keys() and field in ['d','i']:
                        dic.update({name: {}})

                    if field == 'd':
                        dic[name].update({'%s_instr'%(tag): raw_text})

                    if field == 'i':
                        raw_text = self.reverse_list(raw_text.split('$'))
                        dic[name].update({'%s_ingr'%(tag): raw_text})
        return dic
    
    '''
    exporting data
    '''
    def to_bleu(self):
        to_write = {'%s_i'%(self.ori):'',
                    '%s_i'%(self.gen):'',
                    '%s_d'%(self.ori):'',
                    '%s_d'%(self.gen):''}
        
        for i, v in self.dic.items():
            to_write['%s_i'%(self.ori)] += self.add_space(' $ '.join(v['%s_ingr'%(self.ori)]))+ ' $ \n'
            to_write['%s_i'%(self.gen)] += self.add_space(' $ '.join(v['%s_ingr'%(self.gen)])) + ' $ \n'
            
            to_write['%s_d'%(self.ori)] += self.add_space(v['%s_instr'%(self.ori)])+ '\n'
            to_write['%s_d'%(self.gen)] += self.add_space(v['%s_instr'%(self.gen)])+ '\n'
        
        for k, v in to_write.items():
            save('../../to_gpt2/generation_%s.txt'%(k), v ,overwrite = True)
        !eval {"perl multi-bleu.perl ../../to_gpt2/generation_%s_i.txt < ../../to_gpt2/generation_%s_i.txt" %(self.ori, self.gen)}
        !eval {"perl multi-bleu.perl ../../to_gpt2/generation_%s_d.txt < ../../to_gpt2/generation_%s_d.txt" %(self.ori, self.gen)}
    
        !eval {"rouge -f ../../to_gpt2/generation_%s_i.txt ../../to_gpt2/generation_%s_i.txt --avg"%(self.ori, self.gen)}
        !eval {"rouge -f ../../to_gpt2/generation_%s_d.txt ../../to_gpt2/generation_%s_d.txt --avg"%(self.ori, self.gen)}
        
        print()
    

    def ingr_f1_freq(self, root = False):
        value = []
        for i, v in tqdm.tqdm(self.dic.items()):
            true, pred = v['%s_ingr'%(self.ori)], v['%s_ingr'%(self.gen)]
            if root:
                true, pred = sp.root(true), sp.root(pred)
            scores = metrics(true, pred)
            value.append(scores.f1_freq())
        avg = sum(value)/len(value)
        print(avg)
        return avg
    '''
    instruction evaluation
    '''
    def instr_tree(self, stem_only = False):
        value = []
        for i, v in tqdm.tqdm(self.dic.items()):
            ori_instr, gen_instr = v['%s_instr'%(self.ori)], v['%s_instr'%(self.gen)]
            score = self.norm_dist(ori_instr, gen_instr, stem_only = stem_only)
            value.append(score)
        avg = sum(value)/len(value)
        print(avg)
        return avg
    
    '''
    cleaning data
    '''
    def remove_end(self, text):
        return text.replace('\n','').split('<')[0]
    
    def reverse(self, text):
        '''
        Important data cleaning before NY times parser
        '''
        # replace things in brace
        text = re.sub(r'\([^)]*\)', '', text)

        # remove space before punct
        text = re.sub(r'\s([?.!,"](?:\s|$))', r'\1', text)

        # remove consecutive spaces
        text = re.sub(' +',' ',text).strip()
        return text
    
    def reverse_list(self, listoftext):
        output = []
        for text in listoftext:
            rev = self.reverse(text)
            if rev:
                output.append(rev)
        return output
    
    def add_space(self, line):
        # add space before punct
        line = re.sub('([.,!?()])', r' \1 ', line)
        line = re.sub('\s{2,}', ' ', line)
        return line
    
    '''
    tree edit distance
    '''

    def str2tree(self, instr, stem_only):
        instr = [x for x in instr.split('. ') if x]
        instr = treemaker.sents2tree(instr)
        if stem_only:
            instr = stem(instr)
        n_nodes = sum([len(line['ingredient']) +1 for line in instr])
        return build_tree(instr), n_nodes

    def norm_dist(self, ori_instr, gen_instr, stem_only):
        '''
        Args: ori_instr: str
        Args: gen_instr: str
        '''
        ori_tree, ori_nodes = self.str2tree(ori_instr, stem_only = stem_only)
        gen_tree, gen_nodes = self.str2tree(gen_instr, stem_only = stem_only)
        tree_dist = tree_distance(ori_tree, gen_tree)
        normed = tree_dist/(ori_nodes+gen_nodes)
        return normed

In [5]:
data = evaluation('../../to_gpt2/recipe1M_1118/val/y/', 'ori')

data.append_dic('../../to_gpt2/generation_201911118_k1_val/', 'k1')
data.to_bleu()

data.append_dic('../../to_gpt2/generation_201911118_k3_val/', 'k3')
data.to_bleu()

data.append_dic('../../to_gpt2/generation_201911118_k3_val/', 'k5')
data.to_bleu()

data.append_dic('../../to_gpt2/generation_201911118_k10_val/', 'k10')
data.to_bleu()

data.append_dic('../../to_gpt2/generation_201911118_k30_val/', 'k30')
data.to_bleu()

data.append_dic('../../to_gpt2/generation_201911118_p99_val/', 'p99')
data.to_bleu()

load ../../to_gpt2/recipe1M_1118/val/y/
load ../../to_gpt2/generation_201911118_k1_val/
saved ../../to_gpt2/generation_ori_i.txt
saved ../../to_gpt2/generation_k1_i.txt
saved ../../to_gpt2/generation_ori_d.txt
saved ../../to_gpt2/generation_k1_d.txt
BLEU = 25.27, 54.9/40.4/21.5/8.6 (BP=1.000, ratio=1.067, hyp_len=108507, ref_len=101657)
It is not advisable to publish scores from multi-bleu.perl.  The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups.  Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization.  Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer.
BLEU = 10.34, 54.1/21.4/9.4/4.7 (BP=0.689, ratio=0.728, hyp_len=380434, ref_len=522308)
It is not advisable to publish scores from multi-bleu.perl.  The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups

In [117]:
data = evaluation('../../to_gpt2/recipe1M_1118/val/y/', 'ori')

data.append_dic('../../to_gpt2/generation_201911118_k1_val/', 'k1')
data.ingr_f1_freq(root=False)

data.append_dic('../../to_gpt2/generation_201911118_k3_val/', 'k3')
data.ingr_f1_freq(root=False)

data.append_dic('../../to_gpt2/generation_201911118_k5_val/', 'k5')
data.ingr_f1_freq(root=False)

data.append_dic('../../to_gpt2/generation_201911118_k10_val/', 'k10')
data.ingr_f1_freq(root=False)

data.append_dic('../../to_gpt2/generation_201911118_k30_val/', 'k30')
data.ingr_f1_freq(root=False)

data.append_dic('../../to_gpt2/generation_201911118_p99_val/', 'p99')
data.ingr_f1_freq(root=False)

load ../../to_gpt2/recipe1M_1118/val/y/
load ../../to_gpt2/generation_201911118_k1_val/


  self.warn()
100%|██████████| 4000/4000 [00:00<00:00, 8812.46it/s]


0.4162089278961664
load ../../to_gpt2/generation_201911118_k3_val/


100%|██████████| 4000/4000 [00:00<00:00, 10025.25it/s]


0.39568148253596147
load ../../to_gpt2/generation_201911118_k5_val/


100%|██████████| 4000/4000 [00:00<00:00, 10040.19it/s]


0.3923144156092605
load ../../to_gpt2/generation_201911118_k10_val/


100%|██████████| 4000/4000 [00:00<00:00, 8910.83it/s]


0.37357185692462935
load ../../to_gpt2/generation_201911118_k30_val/


100%|██████████| 4000/4000 [00:00<00:00, 8993.37it/s]


0.3433319541036354
load ../../to_gpt2/generation_201911118_p99_val/


100%|██████████| 4000/4000 [00:00<00:00, 9114.11it/s]

0.32027630918170885





0.32027630918170885

time: 12.6 s


In [119]:
data = evaluation('../../to_gpt2/recipe1M_1118/val/y/', 'ori')
data.append_dic('../../to_gpt2/generation_201911118_k1_val/', 'k1')
data.append_dic('../../to_gpt2/generation_201911118_k3_val/', 'k3')
data.append_dic('../../to_gpt2/generation_201911118_k5_val/', 'k5')
data.append_dic('../../to_gpt2/generation_201911118_k10_val/', 'k10')
data.append_dic('../../to_gpt2/generation_201911118_k30_val/', 'k30')
data.append_dic('../../to_gpt2/generation_201911118_p99_val/', 'p99')

load ../../to_gpt2/recipe1M_1118/val/y/
load ../../to_gpt2/generation_201911118_k1_val/
load ../../to_gpt2/generation_201911118_k3_val/
load ../../to_gpt2/generation_201911118_k5_val/
load ../../to_gpt2/generation_201911118_k10_val/
load ../../to_gpt2/generation_201911118_k30_val/
load ../../to_gpt2/generation_201911118_p99_val/
time: 9.73 s


In [130]:
for tag in data.gens:
    print(tag)
    data.gen = tag
    data.to_bleu()

k1
saved ../../to_gpt2/generation_ori_i.txt
saved ../../to_gpt2/generation_k1_i.txt
saved ../../to_gpt2/generation_ori_d.txt
saved ../../to_gpt2/generation_k1_d.txt
BLEU = 25.27, 54.9/40.4/21.5/8.6 (BP=1.000, ratio=1.067, hyp_len=108507, ref_len=101657)
It is not advisable to publish scores from multi-bleu.perl.  The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups.  Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization.  Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer.
BLEU = 10.34, 54.1/21.4/9.4/4.7 (BP=0.689, ratio=0.728, hyp_len=380434, ref_len=522308)
It is not advisable to publish scores from multi-bleu.perl.  The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups.  Instead you should detokenize then use mteval-v14.pl, which has a standard tokeniz

In [None]:
data = evaluation('../../to_gpt2/recipe1M_1118/val/y/', 'ori')

data.append_dic('../../to_gpt2/generation_201911118_k1_val/', 'k1')
data.ingr_f1_freq(root=True)

data.append_dic('../../to_gpt2/generation_201911118_k3_val/', 'k3')
data.ingr_f1_freq(root=True)

data.append_dic('../../to_gpt2/generation_201911118_k5_val/', 'k5')
data.ingr_f1_freq(root=True)

data.append_dic('../../to_gpt2/generation_201911118_k10_val/', 'k10')
data.ingr_f1_freq(root=True)

data.append_dic('../../to_gpt2/generation_201911118_k30_val/', 'k30')
data.ingr_f1_freq(root=True)

data.append_dic('../../to_gpt2/generation_201911118_p99_val/', 'p99')
data.ingr_f1_freq(root=True)

In [4]:
data = evaluation('../../to_gpt2/recipe1M_1118/val/y/', 'ori')

data.append_dic('../../to_gpt2/generation_201911118_k1_val/', 'k1')
data.instr_tree(stem_only = False)

data.append_dic('../../to_gpt2/generation_201911118_k3_val/', 'k3')
data.instr_tree(stem_only = False)

data.append_dic('../../to_gpt2/generation_201911118_k10_val/', 'k10')
data.instr_tree(stem_only = False)

data.append_dic('../../to_gpt2/generation_201911118_k30_val/', 'k30')
data.instr_tree(stem_only = False)

data.append_dic('../../to_gpt2/generation_201911118_p99_val/', 'p99')
data.instr_tree(stem_only = False)

load ../../to_gpt2/recipe1M_1118/val/y/
load ../../to_gpt2/generation_201911118_k1_val/


100%|██████████| 4000/4000 [26:39<00:00,  2.40it/s]


0.5186078582567383
load ../../to_gpt2/generation_201911118_k3_val/


100%|██████████| 4000/4000 [26:54<00:00,  2.43it/s]


0.5270206439868736
load ../../to_gpt2/generation_201911118_k10_val/


100%|██████████| 4000/4000 [28:50<00:00,  1.76it/s]


0.53722605176261
load ../../to_gpt2/generation_201911118_k30_val/


100%|██████████| 4000/4000 [30:39<00:00,  1.64it/s]


0.5432887709524692
load ../../to_gpt2/generation_201911118_p99_val/


100%|██████████| 4000/4000 [33:03<00:00,  1.67it/s]

0.5555177256269578





0.5555177256269578

time: 2h 26min 12s


In [5]:
data = evaluation('../../to_gpt2/recipe1M_1118/val/y/', 'ori')

data.append_dic('../../to_gpt2/generation_201911118_k1_val/', 'k1')
data.instr_tree(stem_only = True)

data.append_dic('../../to_gpt2/generation_201911118_k3_val/', 'k3')
data.instr_tree(stem_only = True)

data.append_dic('../../to_gpt2/generation_201911118_k5_val/', 'k5')
data.instr_tree(stem_only = True)

data.append_dic('../../to_gpt2/generation_201911118_k10_val/', 'k10')
data.instr_tree(stem_only = True)

data.append_dic('../../to_gpt2/generation_201911118_k30_val/', 'k30')
data.instr_tree(stem_only = True)

data.append_dic('../../to_gpt2/generation_201911118_p99_val/', 'p99')
data.instr_tree(stem_only = True)

load ../../to_gpt2/recipe1M_1118/val/y/
load ../../to_gpt2/generation_201911118_k1_val/


100%|██████████| 4000/4000 [16:27<00:00,  4.12it/s]


0.48086849711629653
load ../../to_gpt2/generation_201911118_k3_val/


100%|██████████| 4000/4000 [15:56<00:00,  4.18it/s]


0.4834859582582718
load ../../to_gpt2/generation_201911118_k10_val/


100%|██████████| 4000/4000 [16:41<00:00,  3.27it/s]


0.4921781118409965
load ../../to_gpt2/generation_201911118_k30_val/


100%|██████████| 4000/4000 [17:24<00:00,  3.23it/s]


0.49803296322476104
load ../../to_gpt2/generation_201911118_p99_val/


100%|██████████| 4000/4000 [18:11<00:00,  3.08it/s]

0.5114497360289872





0.5114497360289872

time: 1h 24min 45s


In [36]:
data.dic[786989]['ori_ingr']

['water', 'pepper', 'rice', 'butter', 'juice']

time: 61.4 ms


In [6]:
data.dic[786989]['k1_ingr']

['butter', 'salt', 'lemon juice', 'long grain rice']

time: 51.3 ms


In [7]:
data.dic[786989]['k3_instr'].split('. ')

['put rice and water in a large pan and bring to a boil',
 'reduce heat and simmer, covered, about 20 to 25 minutes or until rice is tender and the water is absorbed',
 'add butter and stir until melted',
 'add lemon juice, salt and pepper and stir to blend',
 ' ']

time: 52.8 ms


In [12]:
data.dic[786989]['ori_instr'].split('. ')

['bring the water to a boil in a saucepan and add salt and rice',
 'when the water returns to a boil, let rice boil vigorously for exactly 17 minutes',
 'drain in a colander run hot water over the rice',
 'drain again',
 'add the butter, salt and pepper',
 'sprinkle with lemon juice and toss until the grains are coated',
 ' ']

time: 21.3 ms


### human judgements

In [40]:
import json
layer1 = json.load(open('/data/yueliu/RecipeAnalytics_201906/raw_data/recipe1M/layer1.json','r'))

recipe1M_ny = load_pickle('../big_data/recipe1M_ny.pickle')
new_data, idx = [], []
for i, v in tqdm.tqdm(enumerate(recipe1M_ny)):
    ingr = []
    for ny_full_ingredients in v['ny_full_ingredients']:
        if 'half and half' in ny_full_ingredients['input']:
            ingr.append('half and half')
        elif type(ny_full_ingredients['name'])==float:
            break
        else:
            ingr.append(ny_full_ingredients['name'])
    if len(ingr)>=2 and len(v['instructions']) >=2:
        recipe = {'ingredients':ingr, 'title':v['title'], 'instructions': v['instructions']}
        new_data.append(recipe)
        idx.append(i)

1029720it [00:32, 31819.53it/s]

time: 33.9 s





In [83]:
# Compare with ground truth
import pprint
pp = pprint.PrettyPrinter(indent=4)
'''Usuage:
pp.pprint(data.dic[786989]['ori_ingr'])
'''

from random import sample 
sampled_id = sample(list(data.dic.keys()),2)

def show(true, pred):
    pp.pprint(true)
    pp.pprint(pred)
    true, pred = sp.root(true), sp.root(pred)
    scores = metrics(true, pred)
    print(scores.precision_freq())
    print(scores.recall_freq())
    print(scores.f1_freq())
        
for k in sampled_id:
    print(k)
    true = data.dic[k]['ori_ingr']
    pred = data.dic[k]['k1_ingr']
    show(true, pred)
    
    true = data.dic[k]['ori_ingr']
    pred = data.dic[k]['k3_ingr']
    show(true, pred)
    
    true = data.dic[k]['ori_ingr']
    pred = data.dic[k]['k10_ingr']
    show(true, pred)
        
    true = data.dic[k]['ori_ingr']
    pred = data.dic[k]['p99_ingr']
    show(true, pred)
    print()

908471
[   'shallots',
    'garlic',
    'peanut oil',
    'shrimps',
    'sesame seeds',
    'peanuts',
    'cilantro',
    'lime']
[   'garlic',
    'shallots',
    'salt',
    'olive oil',
    'sour cream',
    'salt',
    'salt',
    'salt',
    'sour cream',
    'salt',
    'salt',
    'garlic',
    'sour cream',
    'salt',
    'salt',
    'salt',
    'sour cream',
    'salt',
    'salt',
    'salt',
    'sour cream',
    'salt',
    'salt',
    'sour cream',
    'salt',
    'salt',
    'salt',
    'salt',
    'salt',
    'sour cream',
    'salt',
    'salt',
    'salt',
    'salt',
    'sour cream',
    'salt',
    'salt',
    'salt',
    'salt',
    'sour cream',
    'salt',
    'salt',
    'salt',
    'salt',
    'salt',
    'salt',
    'sour cream',
    'salt',
    'salt',
    'salt',
    'salt',
    'salt',
    'salt',
    'salt',
    'salt',
    'sour cream',
    'salt',
    'salt',
    'salt',
    'salt',
    'salt',
    'salt',
    'salt',
    'salt',
    'salt',
    'sal

In [131]:
sampled_id = sample(list(data.dic.keys()),10)
for k in sampled_id:
    print(layer1[idx[k]]['title'])
    pp.pprint(layer1[idx[k]]['ingredients'])
    print(data.dic[k]['ori_instr'])
    print()
    print(data.dic[k]['k1_instr'])
    print(data.dic[k]['k3_instr'])
    print(data.dic[k]['k5_instr'])
    print('---'*5)

Make Ur Own Herbal Shampoo
[   {'text': '14 cup of your favorite herbal tea (strongly brewed)'},
    {'text': '8 ounces liquid castile soap'}]
add soap to tea. stir over low heat until well blended. store in a capped bottle.  

mix all ingredients together. apply to your face. rinse off with warm water.  
pour all ingredients into a blender. blend until smooth. pour into a glass and enjoy.  
mix all ingredients together well. apply to your face and neck. rinse with warm water and pat dry .  
---------------
Mojito
[   {'text': '6 fresh mint leaves, shredded & mashed'},
    {'text': '1 tablespoon sugar'},
    {'text': '12 fresh lime, juice of'},
    {'text': '2 ounces light rum'},
    {'text': 'chilled club soda'},
    {'text': 'ice cube'},
    {'text': 'fresh mint sprig (for garnish)'},
    {'text': 'lime slice (for garnish)'}]
in a tall glass, stir mint, sugar and lime juice until sugar is dissolved and stir in rum. add ice cubes and top off drink with club soda or seltzer water. stir

In [116]:
# Retrieve prompt
sampled_id = sample(list(data.dic.keys()),10)
for k in sampled_id:
    pp.pprint(recipe1M_ny[idx[k]]['title'])
    x = recipe1M_ny[idx[k]]['ny__ingredients']['exact']
    print('\n'.join(x))
    print('----'*5)

'melt away peppermint wreaths'
butter
confectioner sugar
all purpose flour
peppermint extract
drop
red food coloring
green food coloring
--------------------
's0 buttery brioche'
flour bread
sugar
yeast
salt
egg
butter
--------------------
'brussels sprout salad with avocado pumpkin seeds'
lemon juice
dijon mustard
olive oil
salt
pepper
brussels
pumpkin seed
avocado
--------------------
'mint chocolate chip cookies'
butter
sugar
vanilla extract
mint extract
green food coloring
whole egg
13 cup
all purpose flour
baking powder
bag
semi sweet chocolate chip
--------------------
'venison bites'
small venison roast
milk
flour
salt
black pepper
garlic powder
--------------------
'four cheese white broccoli pizza'
olive oil
pizza dough
cheese
provolone cheese
broccoli floret
ricotta cheese
mozzarella cheese
garlic powder
salt
oregano
--------------------
'tuna pasta with salad cream red onions'
gram
pasta
gram
tin tuna
red onion
--------------------
'lahmahjoon pizza'
olive oil
shallot
lamb
p

In [106]:
# explore the change of ingredient preprocessing
sampled_id = sample(list(data.dic.keys()),2)
for k in sampled_id:
    pp.pprint(layer1[idx[k]]['ingredients'])
    pp.pprint(recipe1M_ny[idx[k]]['ingredients'])
    pp.pprint(data.dic[k]['ori_ingr'])
    pp.pprint(data.dic[k]['k1_ingr'])
    print('----'*5)

[   {'text': '4 boneless skinless chicken breasts'},
    {'text': '1 teaspoon olive oil'},
    {'text': '12 teaspoon onion powder'},
    {'text': '1 pinch salt'},
    {'text': '1 pinch ground black pepper'},
    {'text': '2 avocados, peeled, pitted and sliced'},
    {'text': '2 tomatoes, sliced'},
    {'text': '1 (8 ounce) package monterey jack cheese, cut into 10 slices'}]
[   '4 boneless skinless chicken breasts',
    '1 teaspoon olive oil',
    '12 teaspoon onion powder',
    '1 pinch salt',
    '1 pinch ground black pepper',
    '2 avocados, peeled, pitted and sliced',
    '2 tomatoes, sliced',
    '1 package monterey jack cheese, cut into 10 slices']
[   'chicken breasts',
    'olive oil',
    'onion powder',
    'salt',
    'black pepper',
    'avocados',
    'tomatoes',
    'jack cheese']
[   'avocado peeled',
    'salt and pepper',
    'tomato',
    'onion',
    'cheddar cheese',
    'chicken breast halves',
    'getable oil']
--------------------
[   {   'text': '2 pounds beef