In [61]:
%time
%load_ext autotime
%load_ext autoreload
%autoreload 2

# if cannot import the modules, add the parent directory to system path might help
import os, tqdm, sys
parent_dir = os.path.abspath(os.getcwd()+'/..')+'/'
sys.path.append(parent_dir)

from utils.path import dir_HugeFiles
from utils.save import make_dir, save_pickle, load_pickle, save
from utils.tree import instr2tree, tree_distance, build_tree, stem
from utils.evaluation import metrics, spacy_extension

import pandas as pd
import numpy as np
import re

treemaker = instr2tree()
sp = spacy_extension()

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 9.54 µs
The autotime extension is already loaded. To reload it, use:
  %reload_ext autotime
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
time: 33.6 s


In [62]:
class evaluation:
    def __init__(self, filename, tag):
        self.dic = self.load_dic({}, filename, tag)
        self.ori = tag
        self.gens = []
        #from utils.evaluation import metrics
    '''
    loading data
    '''
    def append_dic(self, filename, tag):
        if tag in self.gens:
            print('already exist, will not load again')
            self.gen = tag
        else:
            self.dic = self.load_dic(self.dic, filename, tag)
            self.gen = tag
            self.gens += [tag]
        
    def load_dic(self, dic, filename, tag):
        if os.path.isdir(filename):
            print('load', filename)
            for (dirpath, _, fnames) in os.walk(filename):
                for fname in fnames:
                    path = os.path.join(dirpath, fname)
                    with open(path, 'r') as fp:
                        raw_text = fp.read()
                        raw_text = self.remove_end(raw_text)

                    name, field = int(fname[:-5]), fname[-5]

                    if name not in dic.keys() and field in ['d','i']:
                        dic.update({name: {}})

                    if field == 'd':
                        dic[name].update({'%s_instr'%(tag): raw_text})

                    if field == 'i':
                        raw_text = self.reverse_list(raw_text.split('$'))
                        dic[name].update({'%s_ingr'%(tag): raw_text})
        return dic
    
    '''
    exporting data
    '''
    def to_bleu(self):
        to_write = {'%s_i'%(self.ori):'',
                    '%s_i'%(self.gen):'',
                    '%s_d'%(self.ori):'',
                    '%s_d'%(self.gen):''}
        
        for i, v in self.dic.items():
            to_write['%s_i'%(self.ori)] += self.add_space(' $ '.join(v['%s_ingr'%(self.ori)]))+ ' $ \n'
            to_write['%s_i'%(self.gen)] += self.add_space(' $ '.join(v['%s_ingr'%(self.gen)])) + ' $ \n'
            
            to_write['%s_d'%(self.ori)] += self.add_space(v['%s_instr'%(self.ori)])+ '\n'
            to_write['%s_d'%(self.gen)] += self.add_space(v['%s_instr'%(self.gen)])+ '\n'
        
        for k, v in to_write.items():
            save('../../to_gpt2/generation_%s.txt'%(k), v ,overwrite = True)
        !eval {"perl multi-bleu.perl ../../to_gpt2/generation_%s_i.txt < ../../to_gpt2/generation_%s_i.txt" %(self.ori, self.gen)}
        !eval {"perl multi-bleu.perl ../../to_gpt2/generation_%s_d.txt < ../../to_gpt2/generation_%s_d.txt" %(self.ori, self.gen)}
    
        !eval {"rouge -f ../../to_gpt2/generation_%s_i.txt ../../to_gpt2/generation_%s_i.txt --avg"%(self.ori, self.gen)}
        !eval {"rouge -f ../../to_gpt2/generation_%s_d.txt ../../to_gpt2/generation_%s_d.txt --avg"%(self.ori, self.gen)}
        
        print()

    def ingr_f1_freq(self, root = False):
        value = []
        for i, v in tqdm.tqdm(self.dic.items()):
            true, pred = v['%s_ingr'%(self.ori)], v['%s_ingr'%(self.gen)]
            if root:
                true, pred = sp.root(true), sp.root(pred)
            scores = metrics(true, pred)
            value.append(scores.f1_freq())
        avg = sum(value)/len(value)
        print(avg)
        return avg
    '''
    instruction evaluation
    '''
    def instr_tree(self, stem_only = False):
        value = []
        for i, v in tqdm.tqdm(self.dic.items()):
            ori_instr, gen_instr = v['%s_instr'%(self.ori)], v['%s_instr'%(self.gen)]
            score = self.norm_dist(ori_instr, gen_instr, stem_only = stem_only)
            value.append(score)
        avg = sum(value)/len(value)
        print(avg)
        return avg

    def state_f1_freq(self):
        value = []
        for i, v in tqdm.tqdm(self.dic.items()):
            true, pred = v['%s_instr'%(self.ori)], v['%s_instr'%(self.gen)]
            true, pred = sp.match_state(true), sp.match_state(pred)
            scores = metrics(true, pred)
            value.append(scores.f1_freq())
        avg = sum(value)/len(value)
        print(avg)
        return avg
    
    def verb_f1_freq(self):
        value = []
        for i, v in tqdm.tqdm(self.dic.items()):
            true, pred = v['%s_instr'%(self.ori)], v['%s_instr'%(self.gen)]
            true, pred = sp.instructions(true)[1], sp.instructions(pred)[1]
            scores = metrics(true, pred)
            value.append(scores.f1_freq())
        avg = sum(value)/len(value)
        print(avg)
        return avg
    
    '''
    cleaning data
    '''
    def remove_end(self, text):
        return text.replace('\n','').split('<')[0]
    
    def reverse(self, text):
        '''
        Important data cleaning before NY times parser
        '''
        # replace things in brace
        text = re.sub(r'\([^)]*\)', '', text)

        # remove space before punct
        text = re.sub(r'\s([?.!,"](?:\s|$))', r'\1', text)

        # remove consecutive spaces
        text = re.sub(' +',' ',text).strip()
        return text
    
    def reverse_list(self, listoftext):
        output = []
        for text in listoftext:
            rev = self.reverse(text)
            if rev:
                output.append(rev)
        return output
    
    def add_space(self, line):
        # add space before punct
        line = re.sub('([.,!?()])', r' \1 ', line)
        line = re.sub('\s{2,}', ' ', line)
        return line
    
    '''
    tree edit distance
    '''

    def str2tree(self, instr, stem_only):
        instr = [x for x in instr.split('. ') if x]
        instr = treemaker.sents2tree(instr)
        if stem_only:
            instr = stem(instr)
        n_nodes = sum([len(line['ingredient']) +1 for line in instr])
        return build_tree(instr), n_nodes

    def norm_dist(self, ori_instr, gen_instr, stem_only):
        '''
        Args: ori_instr: str
        Args: gen_instr: str
        '''
        ori_tree, ori_nodes = self.str2tree(ori_instr, stem_only = stem_only)
        gen_tree, gen_nodes = self.str2tree(gen_instr, stem_only = stem_only)
        tree_dist = tree_distance(ori_tree, gen_tree)
        normed = tree_dist/(ori_nodes+gen_nodes)
        return normed

time: 82.6 ms


In [63]:
data = evaluation('../../to_gpt2/recipe1M_1118/val/y/', 'ori')
data.append_dic('../../to_gpt2/generation_201911118_k1_val/', 'k1')
data.append_dic('../../to_gpt2/generation_201911118_k3_val/', 'k3')
data.append_dic('../../to_gpt2/generation_201911118_k5_val/', 'k5')
data.append_dic('../../to_gpt2/generation_201911118_k10_val/', 'k10')
data.append_dic('../../to_gpt2/generation_201911118_k30_val/', 'k30')
data.append_dic('../../to_gpt2/generation_201911118_p99_val/', 'p99')

load ../../to_gpt2/recipe1M_1118/val/y/
load ../../to_gpt2/generation_201911118_k1_val/
load ../../to_gpt2/generation_201911118_k3_val/
load ../../to_gpt2/generation_201911118_k5_val/
load ../../to_gpt2/generation_201911118_k10_val/
load ../../to_gpt2/generation_201911118_k30_val/
load ../../to_gpt2/generation_201911118_p99_val/
time: 9.93 s


In [16]:
data = evaluation('../../to_gpt2/recipe1M_1118/val/y/', 'ori')
data.append_dic('../../to_gpt2/generation_201911118_k5_val/', '117_k5')
for tag in data.gens:
    print(tag)
    data.gen = tag
    data.to_bleu()
for tag in data.gens:
    print(tag)
    data.gen = tag
    data.ingr_f1_freq(root=True)

load ../../to_gpt2/recipe1M_1118/val/y/
load ../../to_gpt2/generation_201911118_k5_val/
117_k5
saved ../../to_gpt2/generation_ori_i.txt
saved ../../to_gpt2/generation_117_k5_i.txt
saved ../../to_gpt2/generation_ori_d.txt
saved ../../to_gpt2/generation_117_k5_d.txt
BLEU = 27.73, 70.9/51.2/26.8/10.6 (BP=0.870, ratio=0.878, hyp_len=89249, ref_len=101657)
It is not advisable to publish scores from multi-bleu.perl.  The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups.  Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization.  Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer.
BLEU = 8.17, 49.9/16.4/5.8/2.3 (BP=0.796, ratio=0.814, hyp_len=425065, ref_len=522308)
It is not advisable to publish scores from multi-bleu.perl.  The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across r

  0%|          | 0/4000 [00:00<?, ?it/s]


117_k5


  self.warn()
100%|██████████| 4000/4000 [04:10<00:00, 14.27it/s]

0.6254981632223141
time: 6min 45s





In [None]:
data = evaluation('../../to_gpt2/recipe1M_1118/val/y/', 'ori')
data.append_dic('../../to_gpt2/generation_201911118_k1_val/', '117_k1')
data.append_dic('../../to_gpt2/generation_201911118_k3_val/', '117_k3')
data.append_dic('../../to_gpt2/generation_201911118_k5_val/', '117_k5')

for tag in data.gens:
    print(tag)
    data.gen = tag
    data.to_bleu()
for tag in data.gens:
    print(tag)
    data.gen = tag
    data.ingr_f1_freq(root=True)

In [28]:
data = evaluation('../../to_gpt2/recipe1M_1118/val/y/', 'ori')
data.append_dic('../../to_gpt2/generation_201911118_k1_val/', '117_k1')
data.append_dic('../../to_gpt2/generation_201911118_k3_val/', '117_k3')
data.append_dic('../../to_gpt2/generation_201911118_k5_val/', '117_k5')

load ../../to_gpt2/recipe1M_1118/val/y/
load ../../to_gpt2/generation_201911118_k1_val/
load ../../to_gpt2/generation_201911118_k3_val/
load ../../to_gpt2/generation_201911118_k5_val/
time: 5.74 s


In [57]:
tag = 'ori_instr'
np.mean([len(v[tag].split(' ')) for i, v in data.dic.items()])

115.68425

time: 103 ms


In [58]:
tag = '117_k1_instr'
np.mean([len(v[tag].split(' ')) for i, v in data.dic.items()])

81.81525

time: 83.2 ms


In [59]:
tag = '117_k3_instr'
np.mean([len(v[tag].split(' ')) for i, v in data.dic.items()])

87.9125

time: 86.1 ms


In [5]:
data = evaluation('../../to_gpt2/recipe1M_1118/val/y/', 'ori')
data.append_dic('../../to_gpt2/generation_201911118_k1_val/', '117_k1')
data.append_dic('../../to_gpt2/generation_201911118_k3_val/', '117_k3')
data.append_dic('../../to_gpt2/generation_20191127_k1_val/', '345_k1')

load ../../to_gpt2/recipe1M_1118/val/y/
load ../../to_gpt2/generation_201911118_k1_val/
load ../../to_gpt2/generation_201911118_k3_val/
load ../../to_gpt2/generation_20191127_k1_val/
time: 5.76 s


In [11]:
for tag in data.gens:
    print(tag)
    data.gen = tag
    data.to_bleu()

117_k1
saved ../../to_gpt2/generation_ori_i.txt
saved ../../to_gpt2/generation_117_k1_i.txt
saved ../../to_gpt2/generation_ori_d.txt
saved ../../to_gpt2/generation_117_k1_d.txt
BLEU = 25.27, 54.9/40.4/21.5/8.6 (BP=1.000, ratio=1.067, hyp_len=108507, ref_len=101657)
It is not advisable to publish scores from multi-bleu.perl.  The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups.  Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization.  Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer.
BLEU = 10.34, 54.1/21.4/9.4/4.7 (BP=0.689, ratio=0.728, hyp_len=380434, ref_len=522308)
It is not advisable to publish scores from multi-bleu.perl.  The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups.  Instead you should detokenize then use mteval-v14.pl, which has a stan

In [None]:
for tag in data.gens:
    print(tag)
    data.gen = tag
    data.ingr_f1_freq(root=False)

In [12]:
for tag in data.gens:
    print(tag)
    data.gen = tag
    data.ingr_f1_freq(root=True)

  0%|          | 2/4000 [00:00<05:28, 12.17it/s]

117_k1


  self.warn()
100%|██████████| 4000/4000 [04:56<00:00, 13.50it/s]
  0%|          | 2/4000 [00:00<04:47, 13.88it/s]

0.6241537564179083
117_k3


100%|██████████| 4000/4000 [04:09<00:00, 14.31it/s]
  0%|          | 2/4000 [00:00<05:09, 12.93it/s]

0.6275425132190814
345_k1


100%|██████████| 4000/4000 [04:36<00:00, 14.49it/s]

0.6500015924262921
time: 13min 42s





In [13]:
for tag in data.gens:
    print(tag)
    data.gen = tag
    data.instr_tree(stem_only = False)

  0%|          | 0/4000 [00:00<?, ?it/s]

117_k1


100%|██████████| 4000/4000 [42:02<00:00,  1.47it/s]
  0%|          | 0/4000 [00:00<?, ?it/s]

0.5186078582567383
117_k3


100%|██████████| 4000/4000 [42:23<00:00,  1.54it/s]
  0%|          | 0/4000 [00:00<?, ?it/s]

0.5270206439868736
345_k1


100%|██████████| 4000/4000 [39:48<00:00,  1.44it/s]

0.5176564111401535
time: 2h 4min 13s





In [None]:
for tag in data.gens:
    print(tag)
    data.gen = tag
    data.instr_tree(stem_only = True)

### human judgements

In [83]:
# Compare with ground truth
import pprint
pp = pprint.PrettyPrinter(indent=4)
'''Usuage:
pp.pprint(data.dic[786989]['ori_ingr'])
'''

from random import sample 
sampled_id = sample(list(data.dic.keys()),2)

def show(true, pred):
    pp.pprint(true)
    pp.pprint(pred)
    true, pred = sp.root(true), sp.root(pred)
    scores = metrics(true, pred)
    print(scores.precision_freq())
    print(scores.recall_freq())
    print(scores.f1_freq())
        
for k in sampled_id:
    print(k)
    true = data.dic[k]['ori_ingr']
    pred = data.dic[k]['k1_ingr']
    show(true, pred)
    
    true = data.dic[k]['ori_ingr']
    pred = data.dic[k]['k3_ingr']
    show(true, pred)
    
    true = data.dic[k]['ori_ingr']
    pred = data.dic[k]['k10_ingr']
    show(true, pred)
        
    true = data.dic[k]['ori_ingr']
    pred = data.dic[k]['p99_ingr']
    show(true, pred)
    print()

908471
[   'shallots',
    'garlic',
    'peanut oil',
    'shrimps',
    'sesame seeds',
    'peanuts',
    'cilantro',
    'lime']
[   'garlic',
    'shallots',
    'salt',
    'olive oil',
    'sour cream',
    'salt',
    'salt',
    'salt',
    'sour cream',
    'salt',
    'salt',
    'garlic',
    'sour cream',
    'salt',
    'salt',
    'salt',
    'sour cream',
    'salt',
    'salt',
    'salt',
    'sour cream',
    'salt',
    'salt',
    'sour cream',
    'salt',
    'salt',
    'salt',
    'salt',
    'salt',
    'sour cream',
    'salt',
    'salt',
    'salt',
    'salt',
    'sour cream',
    'salt',
    'salt',
    'salt',
    'salt',
    'sour cream',
    'salt',
    'salt',
    'salt',
    'salt',
    'salt',
    'salt',
    'sour cream',
    'salt',
    'salt',
    'salt',
    'salt',
    'salt',
    'salt',
    'salt',
    'salt',
    'sour cream',
    'salt',
    'salt',
    'salt',
    'salt',
    'salt',
    'salt',
    'salt',
    'salt',
    'salt',
    'sal

In [131]:
sampled_id = sample(list(data.dic.keys()),10)
for k in sampled_id:
    print(layer1[idx[k]]['title'])
    pp.pprint(layer1[idx[k]]['ingredients'])
    print(data.dic[k]['ori_instr'])
    print()
    print(data.dic[k]['k1_instr'])
    print(data.dic[k]['k3_instr'])
    print(data.dic[k]['k5_instr'])
    print('---'*5)

Make Ur Own Herbal Shampoo
[   {'text': '14 cup of your favorite herbal tea (strongly brewed)'},
    {'text': '8 ounces liquid castile soap'}]
add soap to tea. stir over low heat until well blended. store in a capped bottle.  

mix all ingredients together. apply to your face. rinse off with warm water.  
pour all ingredients into a blender. blend until smooth. pour into a glass and enjoy.  
mix all ingredients together well. apply to your face and neck. rinse with warm water and pat dry .  
---------------
Mojito
[   {'text': '6 fresh mint leaves, shredded & mashed'},
    {'text': '1 tablespoon sugar'},
    {'text': '12 fresh lime, juice of'},
    {'text': '2 ounces light rum'},
    {'text': 'chilled club soda'},
    {'text': 'ice cube'},
    {'text': 'fresh mint sprig (for garnish)'},
    {'text': 'lime slice (for garnish)'}]
in a tall glass, stir mint, sugar and lime juice until sugar is dissolved and stir in rum. add ice cubes and top off drink with club soda or seltzer water. stir

In [116]:
# Retrieve prompt
sampled_id = sample(list(data.dic.keys()),10)
for k in sampled_id:
    pp.pprint(recipe1M_ny[idx[k]]['title'])
    x = recipe1M_ny[idx[k]]['ny__ingredients']['exact']
    print('\n'.join(x))
    print('----'*5)

'melt away peppermint wreaths'
butter
confectioner sugar
all purpose flour
peppermint extract
drop
red food coloring
green food coloring
--------------------
's0 buttery brioche'
flour bread
sugar
yeast
salt
egg
butter
--------------------
'brussels sprout salad with avocado pumpkin seeds'
lemon juice
dijon mustard
olive oil
salt
pepper
brussels
pumpkin seed
avocado
--------------------
'mint chocolate chip cookies'
butter
sugar
vanilla extract
mint extract
green food coloring
whole egg
13 cup
all purpose flour
baking powder
bag
semi sweet chocolate chip
--------------------
'venison bites'
small venison roast
milk
flour
salt
black pepper
garlic powder
--------------------
'four cheese white broccoli pizza'
olive oil
pizza dough
cheese
provolone cheese
broccoli floret
ricotta cheese
mozzarella cheese
garlic powder
salt
oregano
--------------------
'tuna pasta with salad cream red onions'
gram
pasta
gram
tin tuna
red onion
--------------------
'lahmahjoon pizza'
olive oil
shallot
lamb
p

In [60]:
# explore the change of ingredient preprocessing
sampled_id = sample(list(data.dic.keys()),2)
for k in sampled_id:
    pp.pprint(layer1[idx[k]]['ingredients'])
    pp.pprint(recipe1M_ny[idx[k]]['ingredients'])
    pp.pprint(data.dic[k]['ori_ingr'])
    pp.pprint(data.dic[k]['k1_ingr'])
    print('----'*5)

NameError: name 'sample' is not defined

time: 60.4 ms
