In [1]:
from nlgeval import NLGEval

nlgeval = NLGEval(no_skipthoughts=True, no_glove=True)

In [2]:
import os
import re

class StopwordFilter(object):

    def __init__(self, filename):
        self.pats = []
        if os.path.exists(filename):
            for ln in open(filename, 'r').readlines():
                ww = ln.split()
                if len(ww)==1:
                    self.pats.append((re.compile(r'^' + ww[0] + r'$'), ''))
                elif len(ww)==2:
                    self.pats.append((re.compile(r'^' + ww[0] + r'$'), ww[1]))

    def _filter(self, input_words):
        output_words = []
        for w in input_words:
            target = w
            for p in self.pats:
                v = p[0].sub(p[1],w)
                if v != w:
                    target = v
                    break
            if target != '':
                output_words.append(target)
        return output_words

    def __call__(self, input_words):
        if isinstance(input_words, str):
            return ' '.join(self._filter(input_words.split()))
        elif isinstance(input_words, list):
            return self._filter(input_words)
        else:
            return None
        
swfilter = StopwordFilter('punc')
print(swfilter.pats)
print(swfilter('yes , you sucks .'))

[(re.compile('^,$'), ''), (re.compile('^.$'), '')]
yes you sucks


In [6]:
import json
from pathlib import Path
root = Path('generate')
def eval(data):
    refs = [[]]
    hypo = []
    for dialog in data['dialogs']:
        for qa in dialog['dialog']:
            question = qa['question']
            ans = qa['answer']
            predict = qa['predict']
            refs[0].append(swfilter(ans))
            hypo.append(swfilter(predict))
    metrics_dict = nlgeval.compute_metrics(refs, hypo) 
    return metrics_dict

'''for file in root.iterdir():
    try:
        f =  file.open()
        data = json.load(f)
        metrics_dict = eval(data)
        with open('metrics/' + file.name, 'w') as f:
            json.dump(metrics_dict, f)
        print(file.name)
    except:
        print('fail')
'''

"for file in root.iterdir():\n    try:\n        f =  file.open()\n        data = json.load(f)\n        metrics_dict = eval(data)\n        with open('metrics/' + file.name, 'w') as f:\n            json.dump(metrics_dict, f)\n        print(file.name)\n    except:\n        print('fail')\n"

In [8]:
import json
name = 'test_set_predicted_11141648.json'
metric_dict = eval(json.load(open('generate/' + name)))
with open('metrics/' + name, 'w') as f:
    json.dump(metric_dict, f)

In [11]:
def eval_released(data, ground):
    refs = [[]]
    hypo = []
    for dialog, g_dialog in zip(data['dialogs'], ground['dialogs']):
        for qa, g_qa in zip(dialog['dialog'], g_dialog['dialog']):
            question = qa['question']
            ans = g_qa['answer']
            predict = qa['answer']
            refs[0].append(swfilter(ans))
            hypo.append(swfilter(predict))
    metrics_dict = nlgeval.compute_metrics(refs, hypo) 
    return metrics_dict
data = json.load(open('result_test_set_b5_p1.0.json'))
ground = json.load(open('generate/test_set_predicted_10280020.json'))
eval_released(data, ground)

{'Bleu_1': 0.2704460967213893,
 'Bleu_2': 0.1722203552529856,
 'Bleu_3': 0.1183584507736195,
 'Bleu_4': 0.08518808526572841,
 'CIDEr': 0.78996472324051348,
 'METEOR': 0.11581314027100997,
 'ROUGE_L': 0.29202615766102058}

In [17]:
def copy_baseline(data):
    refs = [[]]
    hypo = []
    for dialog in data['dialogs']:
        for qa in dialog['dialog']:
            question = qa['question']
            ans = qa['answer']
            predict = qa['question']
            refs[0].append(swfilter(ans))
            hypo.append(swfilter(predict))
    metrics_dict = nlgeval.compute_metrics(refs, hypo) 
    return metrics_dict
data = json.load(open('generate/test_set_predicted_102.json'))
copy_baseline(data)

{'Bleu_1': 0.2316765364353914,
 'Bleu_2': 0.12436788056021685,
 'Bleu_3': 0.07763834895019554,
 'Bleu_4': 0.049947666507955665,
 'CIDEr': 0.63786497275859277,
 'METEOR': 0.11131560813254061,
 'ROUGE_L': 0.2356145004554919}

In [21]:
names = ['conv11.json', 'attn_decode.json', 'top_down.json', 'm_stage.json', 'all.json', 'simple.json']
results = json.load(open('generate/released_baseline.json'))
for dialog in results['dialogs']:
    for qa in dialog['dialog']:
        qa['relased'] = qa['answer']
        
for name in names:
    with open('generate/' + name) as f:
        data = json.load(f)
        for d_idx, dialog in enumerate(data['dialogs']):
            results['dialogs'][d_idx]['caption'] = dialog['caption']
            for q_idx, qa in enumerate(dialog['dialog']):
                results['dialogs'][d_idx]['dialog'][q_idx][name] = qa['predict']
                results['dialogs'][d_idx]['dialog'][q_idx]['answer'] = qa['answer']
json.dump(results, open('result.json', 'w'), indent=4)