In [5]:
import json
from fuzzywuzzy import fuzz

In [6]:
import sys
import re
import string
from collections import Counter
import pickle

def normalize_answer(s):

    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))


def f1_score(prediction, ground_truth):
    normalized_prediction = normalize_answer(prediction)
    normalized_ground_truth = normalize_answer(ground_truth)

    ZERO_METRIC = (0, 0, 0)

    if normalized_prediction in ['yes', 'no', 'noanswer'] and normalized_prediction != normalized_ground_truth:
        return ZERO_METRIC
    if normalized_ground_truth in ['yes', 'no', 'noanswer'] and normalized_prediction != normalized_ground_truth:
        return ZERO_METRIC

    prediction_tokens = normalized_prediction.split()
    ground_truth_tokens = normalized_ground_truth.split()
    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
    num_same = sum(common.values())
    if num_same == 0:
        return ZERO_METRIC
    precision = 1.0 * num_same / len(prediction_tokens)
    recall = 1.0 * num_same / len(ground_truth_tokens)
    f1 = (2 * precision * recall) / (precision + recall)
    return f1, precision, recall


def exact_match_score(prediction, ground_truth):
    return (normalize_answer(prediction) == normalize_answer(ground_truth))

In [45]:
with open('pred_codalab.json', 'r') as f:
    pred = json.load(f)

In [7]:
with open('../../data/external/hotpot_dev_distractor_v1.json', 'r') as f:
    dev = json.load(f)

In [10]:
dict_id2ans = dict()
for ins in dev:
    _id = ins['_id']
    ans = ins['answer']
    dict_id2ans[_id] = ans

# Long ans span

In [48]:
list_len_span = []
list_len_lbl = []
for _id in pred['answer'].keys():
    span = pred['answer'][_id]
    list_len_span.append(len(span.split()))
    list_len_lbl.append(len(dict_id2ans[_id].split()))
list_len_span.sort(reverse=True)
list_len_lbl.sort(reverse=True)

In [49]:
list_len_lbl[0:10]

[29, 26, 19, 19, 18, 18, 17, 16, 16, 16]

In [50]:
list_len_span[0:50]

[214,
 182,
 170,
 154,
 153,
 149,
 141,
 135,
 120,
 119,
 117,
 117,
 117,
 116,
 104,
 101,
 100,
 99,
 98,
 97,
 95,
 90,
 89,
 88,
 88,
 87,
 82,
 79,
 75,
 75,
 71,
 67,
 67,
 62,
 62,
 55,
 55,
 55,
 51,
 49,
 49,
 44,
 44,
 41,
 40,
 39,
 37,
 37,
 37,
 34]

# Ans len by wh type

In [9]:
dict_id2query = dict()
for ins in dev:
    _id = ins['_id']
    q = ins['question']
    dict_id2query[_id] = q

In [57]:
import spacy
nlp = spacy.load('en')

In [69]:
import spacy
nlp = spacy.load('en')
def findWHword(sentence):
    candidate = ['when', 'how', 'where', 'which', 'what', 'who', 'how many', 'whose', 'whom']
    sentence = sentence.lower()
    doc = nlp(sentence)
    if 'how' in sentence.split() and 'how many' in sentence:
        return 'how many'
    for w in reversed(doc):
        if w.pos_ == 'NN': continue
        else:
            for can in candidate:
                if can in w.text:
                    return can
            break
    whs = []
    for idx, token in enumerate(doc):
        for can in candidate:
            if can in token.text:
                return can
    if 'name' in sentence.lower() or doc[-1].lemma_ == 'be' or doc[-1].pos_ == 'ADP':
        return 'what'
    return None

In [71]:
from tqdm.notebook import tqdm

In [75]:
list_len_span = []
list_len_lbl = []
list_wh = ['when', 'how', 'where', 'which', 'what', 'who', 'how many', 'whose', 'whom', 'others']
dict_wh2list_len = dict()
for wh in list_wh:
    dict_wh2list_len[wh] = []

for _id in tqdm(pred['answer'].keys()):
    question = dict_id2query[_id]
    wh = findWHword(question)
    ans = dict_id2ans[_id]
    ans_len = len(ans.split())
    if ans_len > 15:
        print(wh, "###", question, "###", ans)
    if wh is None:
        dict_wh2list_len['others'].append(ans_len)
    else:
        dict_wh2list_len[wh].append(ans_len)

for wh in dict_wh2list_len.keys():
    dict_wh2list_len[wh].sort(reverse=True)
    print(wh, dict_wh2list_len[wh][0:10])

HBox(children=(FloatProgress(value=0.0, max=7405.0), HTML(value='')))

which ###  Which 1970's film was released first, Charley and the Angel or The Boatniks? ### The Boatniks is a 1970 American comedy film starring Robert Morse, Stefanie Powers, Don Ameche and Phil Silvers.
what ### What city was the capital of the Ostrogothic Kingdom and the birth place of Ornella Fiorentini? ### Ravenna (] , also locally ] ; Romagnol: "Ravèna" ) is the capital city of the Province of Ravenna
who ### Who had more of their English novels turned into Oscar-nominated films, Raja Rao or Pat Conroy? ### Two of his novels, "The Prince of Tides" and "The Great Santini", were made into Oscar-nominated films.
what ### J. Searle Dawley and Ken Annakin were both directors of what? ### Kenneth Cooper "Ken" Annakin, OBE (10 August 1914 – 22 April 2009) was a prolific English film director.
which ### If the Charhki  Dadri crash was less dangerous than the Tenerife airport disaster, which occured firat?  ### On March 27, 1977, two Boeing 747 passenger jets, KLM Flight 4805 and Pan Am 

In [64]:
wh is None

False

# Check answers

In [6]:
cnt = 0
em_cnt = 0
for _id in pred['answer'].keys():
    lbl = dict_id2ans[_id]
    ans = pred['answer'][_id]
    if exact_match_score(ans, lbl):
        em_cnt += 1
    if fuzz.ratio(lbl, ans) >= 90 and not exact_match_score(ans, lbl):
        print(lbl, "###", ans)
        cnt += 1

9,984 ### 9, 984
Robert Erskine Childers DSC ### Robert Erskine Childers
1,462 ### 1, 462
35,124 ### 35, 124
$10.5 million ### $ 10. 5 million
right-hand ### right - hand
super-regional shopping mall ### super - regional shopping mall
Slaughterhouse-Five ### Slaughterhouse - Five
3,384,569 ### 3, 384, 569
Pakistani ### Pakistan
76,416 ### 76, 416
728,000 ft² ### 728, 000 ft²
51,271 ### 51, 271
1.95 m ### 1. 95 m
4,613 ### 4, 613
The R-8 Human Rhythm Composer ### The R - 8 Human Rhythm Composer
Queen In-hyun's Man ### Queen In - hyun's Man
Symphony No. 7 ### Symphony No. 1
Jean-Loup Jacques Marie Chrétien ### Jean - Loup Jacques Marie Chrétien
natural-ingredients-only personal care products ### natural - ingredients - only personal care products
Hessians ### Hessian
video game ### videogame
gull-wing doors ### gull - wing doors
1,800 ### 1, 800
Regional Rural Bank ### Regional Rural Banks
media for the 65.8 million ### media for the 65. 8 million
Thomas Warburton ### Tom Warburton
26–30

In [7]:
cnt

250

In [8]:
em_cnt/len(dev)

0.5665091154625254

In [9]:
(em_cnt+cnt)/len(dev)

0.600270087778528

In [10]:
lbl

'Norwood, Massachusetts'

In [11]:
cnt = 0
for _id in pred['answer'].keys():
    lbl = dict_id2ans[_id]
    ans = pred['answer'][_id]
    if ans == "":
        cnt += 1

In [12]:
cnt

151

In [13]:
(em_cnt+cnt)/len(dev)

0.5869007427413909

In [11]:
with open('preds500.json', 'r') as f:
    pred = json.load(f)

In [12]:
from termcolor import colored

In [13]:
cnt = 0
for k in pred['answer'].keys():
    span = pred['answer'][k]
    ent = pred['ent'][k]
    srl = pred['srl'][k]
    lbl = dict_id2ans[k]
    if lbl == 'yes' or lbl == 'no':
        continue
    if exact_match_score(span, lbl):
        continue
    if not (span.lower() in ent.lower() or ent.lower() in span.lower()):
        print(span, "###", ent, "###", colored(lbl, 'green'))
        print("")
        cnt += 1
        continue
    if not (span.lower() in srl.lower() or srl.lower() in span.lower()):
        print(span, "###", srl, "###", colored(lbl, 'green'))
        print("")
        cnt += 1
        continue

Annie Morton ### Terry Richardson ### [32mTerry Richardson[0m

Henry Roth ### Erskine Childers ### [32mRobert Erskine Childers DSC[0m

Peter Schmeichel ### Peter Bolesław Schmeichel MBE ### [32mWorld's Best Goalkeeper[0m

Henry John ### Kaiser Ventures corporation ### [32mHenry J. Kaiser[0m

Charles Nungesser ### Charles Eugène Jules Marie Nungesser ### [32mCharles Eugène[0m

forest ### the Firth of Forth ### [32mYellowcraig[0m

Jerry Glanville ### Jerry Michael Glanville ### [32mKeith Bostic[0m

point guard ### 5 ### [32mshortest player ever to play in the National Basketball Association[0m

Strasbourg's metropolitan area ### Alsace ### [32m276,170 inhabitants[0m

Sela Ann Ward ### What ### [32mAnn[0m

drafted ### the United States Armed Forces ### [32mConscription[0m

Monica Samille Lewinsky ### Monica Lewinsky ### [32mMonica Lewinsky[0m

film editor and director ### English ### [32mdirector[0m

Coahuila, Mexico. Centered on the town of El Nacimiento in Múzq

In [38]:
cnt

102

# Wrong span but correct ent/srl

In [16]:
def correct_ans(ans, lbl):
    ans = ans.lower()
    lbl = lbl.lower()
    return ans in lbl or lbl in ans

In [19]:
cnt = 0
for k in pred['answer'].keys():
    span = pred['answer'][k]
    ent = pred['ent'][k]
    srl = pred['srl'][k]
    lbl = dict_id2ans[k]
    if lbl == 'yes' or lbl == 'no':
        continue
    if exact_match_score(span, lbl):
        continue
    if correct_ans(ent, lbl) or correct_ans(srl, lbl):
        print(k, span, "### ent:", ent,  "### srl: ", srl, "### lbl:", colored(lbl, 'green'))
        print("")
        cnt += 1
        continue

5a8c7595554299585d9e36b6 Chief of Protocol of the United States ### ent: the United States ### srl:  as Chief of Protocol of the United States. ### lbl: [32mChief of Protocol[0m

5a8e3ea95542995a26add48d Greenwich Village ### ent: Greenwich Village ### srl:  an Italian American best-selling author of sixteen books, television writer, film director, and entrepreneur based in Greenwich Village, New York City. ### lbl: [32mGreenwich Village, New York City[0m

5a87ab905542996e4f3088c1 4,000 capacity (3,677 ### ent: 4,000 ### srl:  a 4,000 capacity (3,677 seated) multi-purpose arena, in Lewiston, Maine, that opened in 1958. ### lbl: [32m3,677 seated[0m

5a7bbb64554299042af8f7cc Annie Morton ### ent: Terry Richardson ### srl:  Annie Morton (born October 8, 1970) ### lbl: [32mTerry Richardson[0m

5ab6d09255429954757d337d 1986 to 2013 ### ent: 1986 ### srl:  a Scottish former football manager and player who managed Manchester United from 1986 to 2013. ### lbl: [32mfrom 1986 to 2013[0

# SRL is right but ent is not

In [20]:
cnt = 0
for k in pred['answer'].keys():
    span = pred['answer'][k]
    ent = pred['ent'][k]
    srl = pred['srl'][k]
    lbl = dict_id2ans[k]
    if lbl == 'yes' or lbl == 'no':
        continue
    if exact_match_score(span, lbl):
        continue
    if (not correct_ans(ent, lbl)) and correct_ans(srl, lbl):
        print(k, span, "### ent:", ent,  "### srl: ", srl, "### lbl:", colored(lbl, 'green'))
        print("")
        cnt += 1

5a8c7595554299585d9e36b6 Chief of Protocol of the United States ### ent: the United States ### srl:  as Chief of Protocol of the United States. ### lbl: [32mChief of Protocol[0m

5a87ab905542996e4f3088c1 4,000 capacity (3,677 ### ent: 4,000 ### srl:  a 4,000 capacity (3,677 seated) multi-purpose arena, in Lewiston, Maine, that opened in 1958. ### lbl: [32m3,677 seated[0m

5ae22b8d554299234fd0440f Peter Schmeichel ### ent: Peter Bolesław Schmeichel MBE ### srl:  the IFFHS World's Best Goalkeeper ### lbl: [32mWorld's Best Goalkeeper[0m

5a7d54165542995f4f402256 forest ### ent: the Firth of Forth ### srl:  Yellowcraig, ### lbl: [32mYellowcraig[0m

5a7759fc5542993569682d60 Tenerife ### ent: Tenerife ### srl:  a national park located in Tenerife (Canary Islands, Spain). ### lbl: [32mCanary Islands, Spain[0m

5aba749055429901930fa7d8 film editor and director ### ent: English ### srl:  an Armenian-American film editor and director. ### lbl: [32mdirector[0m

5a8e0a005542995085b373a

In [21]:
cnt

36

# ent is right but SRL is not

In [22]:
cnt = 0
for k in pred['answer'].keys():
    span = pred['answer'][k]
    ent = pred['ent'][k]
    srl = pred['srl'][k]
    lbl = dict_id2ans[k]
    if lbl == 'yes' or lbl == 'no':
        continue
    if exact_match_score(span, lbl):
        continue
    if (not correct_ans(srl, lbl)) and correct_ans(ent, lbl):
        print(k, span, "### ent:", ent,  "### srl: ", srl, "### lbl:", colored(lbl, 'green'))
        print("")
        cnt += 1

5a7bbb64554299042af8f7cc Annie Morton ### ent: Terry Richardson ### srl:  Annie Morton (born October 8, 1970) ### lbl: [32mTerry Richardson[0m

5a8979f4554299669944a52e Sela Ann Ward ### ent: Sela Ann Ward ### srl:  What ### lbl: [32mAnn[0m

5ab2a186554299295394677b 1884 ### ent: 2009 ### srl:  to commemorate the deaths of 329 men from the 66th (Berkshire) Regiment of Foot during the campaign in the Second Anglo-Afghan War in Afghanistan between 1878 and 1880. ### lbl: [32m2009[0m

5a8eea4a5542990e94052bb7 8th and 16th centuries ### ent: the 8th and 16th centuries ### srl:  into Middle Frisian, spoken from the 16th to the 19th century. ### lbl: [32mbetween the 8th and 16th centuries[0m

5add673e5542992ae4cec54d Ducks and Beavers ### ent: Oregon Ducks ### srl:  The Ducks and Beavers ### lbl: [32mOregon Ducks football[0m

5adf65555542992d7e9f9334 Barry Magid ### ent: Wendell Berry ### srl:  by Barry Magid ### lbl: [32mWendell Berry[0m

5ac289ff5542996366519a02 Brabejum ### en

In [23]:
cnt

12

# entity as answer prediction

In [25]:
new_pred = {'answer': dict(), 'sp': dict()}
cnt = 0
for k in pred['answer'].keys():
    span = pred['answer'][k]
    ent = pred['ent'][k]
    srl = pred['srl'][k]
    lbl = dict_id2ans[k]
    if ent != "":
        new_pred['answer'][k] = ent
    else:
        new_pred['answer'][k] = span

In [28]:
with open('entity_ans_pred.json', 'w+') as f:
    json.dump(new_pred, f)