In [35]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from datasets import load_from_disk

import numpy as np
np.random.seed(42)

import json, copy, os, sys
from tqdm import tqdm

import logging
logger = logging.getLogger('logger')
logging.basicConfig(level=logging.INFO)
logger.setLevel(logging.INFO)

sys.path.append(os.path.abspath(os.getcwd()))
from annotation_utilities import *

# this is the list of phenomena and which option they need to be annotated with:
phenomena = {
    'addition':'add-omit',
    'ambiguous-translation-wrong-discourse-connective-since-causal':'diff_flexible',
    'ambiguous-translation-wrong-discourse-connective-since-temporal':'diff_flexible',
    'ambiguous-translation-wrong-discourse-connective-while-contrast':'diff_flexible',
    'ambiguous-translation-wrong-discourse-connective-while-temporal':'diff_flexible',
    'ambiguous-translation-wrong-gender-female-anti':'diff_flexible',
    'ambiguous-translation-wrong-gender-female-pro':'diff_flexible',
    'ambiguous-translation-wrong-gender-male-anti':'diff_flexible',
    'ambiguous-translation-wrong-gender-male-pro':'diff_flexible',
    'ambiguous-translation-wrong-sense-frequent':'diff_flexible',
    'ambiguous-translation-wrong-sense-infrequent':'diff_flexible',
    'anaphoric_group_it-they:deletion':'annotate_word',
    'anaphoric_group_it-they:substitution':'annotate_word',
    'anaphoric_intra_non-subject_it:deletion':'annotate_word',
    'anaphoric_intra_non-subject_it:substitution':'annotate_word',
    'anaphoric_intra_subject_it:deletion':'annotate_word',
    'anaphoric_intra_subject_it:substitution':'annotate_word',
    'anaphoric_intra_they:deletion':'annotate_word',
    'anaphoric_intra_they:substitution':'annotate_word',
    'anaphoric_singular_they:deletion':'annotate_word',
    'anaphoric_singular_they:substitution':'annotate_word',
    'antonym-replacement':'REF_flexible',
    'commonsense-only-ref-ambiguous':'diff_flexible',
    'commonsense-src-and-ref-ambiguous':'diff_flexible',
    'copy-source':'whole_sentence',
    'coreference-based-on-commonsense':'mixed_flexible',
    'do-not-translate':'whole_sentence',
    'hallucination-date-time':'date',
    'hallucination-named-entity-level-1':'diff_flexible',
    'hallucination-named-entity-level-2':'REF_flexible',
    'hallucination-named-entity-level-3':'REF_flexible',
    'hallucination-number-level-1':'diff_flexible',
    'hallucination-number-level-2':'REF_flexible',
    'hallucination-number-level-3':'REF_flexible',
    'hallucination-real-data-vs-ref-word':'diff_flexible',
    'hallucination-real-data-vs-synonym':'diff_flexible',
    'hallucination-unit-conversion-amount-matches-ref':'units',
    'hallucination-unit-conversion-unit-matches-ref':'units',
    'hypernym-replacement':'REF_flexible',
    'hyponym-replacement':'REF_flexible',
    'lexical-overlap':'?',
    'modal_verb:deletion':'add-omit',
    'modal_verb:substitution':'diff_flexible',
    'nonsense':'REF_flexible',
    'omission':'add-omit',
    'ordering-mismatch':'swap',
    'overly-literal-vs-correct-idiom':'diff_flexible',
    'overly-literal-vs-explanation':'diff_flexible',
    'overly-literal-vs-ref-word':'diff_flexible',
    'overly-literal-vs-synonym':'diff_flexible',
    'pleonastic_it:deletion':'annotate_word',
    'pleonastic_it:substitution':'annotate_word',
    'punctuation:deletion_all':'add-omit',
    'punctuation:deletion_commas':'add-omit',
    'punctuation:deletion_quotes':'add-omit',
    'punctuation:statement-to-question':'add-omit',
    'real-world-knowledge-entailment':'diff_flexible',
    'real-world-knowledge-hypernym-vs-distractor':'diff_flexible',
    'real-world-knowledge-hypernym-vs-hyponym':'diff_flexible',
    'real-world-knowledge-synonym-vs-antonym':'diff_flexible',
    'similar-language-high':'whole_sentence',
    'similar-language-low':'whole_sentence',
    'untranslated-vs-ref-word':'diff_flexible',   # here add-omit can be used for getting character level replacements too
    'untranslated-vs-synonym':'diff_flexible',
    'xnli-addition-contradiction':'?',
    'xnli-addition-neutral':'?',
    'xnli-omission-contradiction':'?',
    'xnli-omission-neutral':'?'
}

phenomena_tobe_processed = input("enter the phenomena: ") 
if phenomena_tobe_processed not in phenomena.keys():
        logger.error("The phenomena should be one of these: {}".format(sys.argv[1], phenomena.keys()))
        exit()

folder = os.getcwd()
dataset_path = os.path.join(folder, '../../dataset')
if not os.path.exists(dataset_path):
    logger.error('No dataset path: %s' %(dataset_path))
    exit()

logger.info('Loading the dataset...')
dataset = load_from_disk(dataset_path)
logger.info('Dataset loaded.')


# if there are already some annotations overwrite them and append new ones
annotated_dataset_path = os.path.join(folder, 'annotated.txt')
if os.path.exists(annotated_dataset_path):
    logger.info('Path {} already exists. Loading..'.format(annotated_dataset_path))
    with open(annotated_dataset_path, "r") as f:
        annotations = json.load(f)
    annotations = {int(k):v for k,v in annotations.items()}
else:
    logger.info('Creating new annotations.txt file at {}'.format(annotated_dataset_path))
    annotations = dict()

# calculate statistics about the annotations:
# for every mode, calculate no. of skipped, no. of unsure and ids, and no. of done.
stats_template = {
            'total':0,
            'success':0,
            'too_long':[],
            'no_change':[],
            'error':[],
            'skipped':[],
            'other':[]  
        }
stats_path = os.path.join(folder, 'stats.txt')
if os.path.exists(stats_path):
    logger.info('Path {} already exists. Loading..'.format(stats_path))
    with open(stats_path, "r") as f:
        stats = json.load(f)
    # we want to overwrite the statistics for the new phenomena
    for p in phenomena_tobe_processed:
        stats[p] = copy.deepcopy(stats_template)
else:
    logger.info('Creating new stats.txt file at {}'.format(stats_path))
    stats = {}
    for key in phenomena.keys():
        stats[key] = copy.deepcopy(stats_template)
logger.info("READY")

# the UI (?) part of the annotation in general (ask if they want to accept the annotation, call manual_annotation if no)
def manual_annotation_io(idx):
    sample = dataset['train'][idx]
    if idx in annotations:
        change = annotations[idx]['annotation']   # now it's normalized annotation.
        if len(change) == 1 and len(change[0]["in_good"]['token_index']) == len(change[0]["in_bad"]['token_index']):
            return 0
    if phenomena[sample["phenomena"]] in ['?', 'mixed_flexible']:
        print("-----> For this sample we can compare the Incorrect translation with either Reference or Good translation.")
    elif phenomena[sample["phenomena"]] in ['REF_flexible']:
        print("-----> For this sample we compare the Incorrect translation with the Reference.")
    else:
        print("-----> For this sample we compare the Incorrect translation with the Good translation.\n")
    if idx in annotations:
        print("Source sentence: ", sample['source'])
        print("Reference: ", sample['reference'])
        print("Good Translation: ", sample['good-translation'])
        print("Incorrect Translation: ", sample['incorrect-translation'])
        print('Suggested annotation:')
        print(annotations[idx]['annotation'], '\n')
        inp = input('To accept the suggested annotation click on enter. To skip this one enter skip. Otherwise enter anything else:')
        if inp == "skip":
            annotations.pop(idx)
            return 1  # this means, we are skipping, so should delete this annotation and then continue with the next.
        res = manual_annotation(idx, inp)
        if res == -1:
            # do not add the annotation if you stop at this point
            annotations.pop(idx)
            return -1
    else:
        print("No automatic translations for this sample.")
        res = manual_annotation(idx)
        if res == -1:
            return -1

# the UI (?) part of the manual annotation
def manual_annotation(idx, inp="."):
    while inp != "":
        sample = dataset['train'][idx]
        print("Source sentence: ", sample['source'])
        print("Reference: ", sample['reference'])
        print("Good Translation: ", sample['good-translation'])
        print("Incorrect Translation: ", sample['incorrect-translation'])
        inp = input("Enter the incorrect translation with the < and > to show the error spans (exit to stop): \n")
        bad = inp
        if bad == "exit":
            return -1
        inp = input("Enter the correct/reference translation with the < and > to show the error spans (exit to stop): \n")
        good = inp
        if good == "exit":
            return -1
        change = calculate_change(good, bad, sample)
        print("Annotation: ", change)
        inp = input("\n To accept it press enter or to annotate again enter any other string: ")
        if inp == "":
            sample['annotation'] = change
            sample['method'] = "manual annotation"
            annotations[idx] = sample
    return annotations[idx]

# given a manually annotated sample (where there are <> in incorrect and good/reference sentences)
# calculate the character spans in the original sentences and return the change in our annotation format
def calculate_change(good, bad, sample):  
    bad_id = 0
    span = False # False is when we are not inside a span, True is inside a span
    change = []
    for i, c in enumerate(bad):
        if c == "<":
            if span:
                logger.error("< not closed. Try again.\n")
                return manual_annotation(".", sample)
            else:
                start = bad_id
                start_annotate = i
                bad_id -= 1
                span = True
        elif c == ">":
            if not span:
                logger.error("No opening < Try again.\n")
                return manual_annotation(".", sample)
            else:
                change.append({"in_good":None, 
                    "in_bad":{'token_index':None, 
                    'character_span':(start,bad_id), 
                               'token':bad[start_annotate+1:i]}})
                bad_id -= 1
                span = False
        bad_id += 1
    good_id = 0
    span = False # False is when we are not inside a span, True is inside a span
    for i, c in enumerate(good):
        if c == "<":
            if span:
                logger.error("< not closed. Try again.\n")
                return manual_annotation(".", sample)
            else:
                start = good_id
                start_annotate = i
                good_id -= 1
                span = True
        elif c == ">":
            if not span:
                logger.error("No opening < Try again.\n")
                return manual_annotation(".", sample)
            else:
                change.append({"in_good":{'token_index':None, 
                    'character_span':(start,good_id), 
                               'token':good[start_annotate+1:i]}, 
                    "in_bad":None})
                good_id -= 1
                span = False
        good_id += 1
    return change

# If same ref and incorrect sentence was annotated before then just copy the annotation
def check_seen_before(sample):
    for annotated_sample in annotations.values():
        if annotated_sample["reference"] == sample["reference"] and annotated_sample["incorrect-translation"] == sample["incorrect-translation"]:
              return (annotated_sample["annotation"], annotated_sample["method"])
    return None

def process_sample(idx, sample, manual=False):
    if phenomena[sample["phenomena"]] == 'add-omit':
        try:
            change = diff_char_level(sample["good-translation"], sample["incorrect-translation"])
            if len(change) == 0:
                logger.warning('No change in id {}'.format(idx))
                stats[sample["phenomena"]]["no_change"].append((idx, sample['langpair']))
            else:
                stats[sample["phenomena"]]["success"] += 1
                change = standardize_annotation(change, sample["good-translation"], sample["incorrect-translation"])
            sample['annotation'] = change
            sample['method'] = phenomena[sample["phenomena"]]
            annotations[idx] = sample
        except:
            logger.warning('error in char level annotate, id {}'.format(idx))
            stats[sample["phenomena"]]["error"].append((idx, sample['langpair']))

    elif phenomena[sample["phenomena"]] == 'annotate_word':
        try:
            change = annotate_word(sample["good-translation"], sample["incorrect-translation"])
            if len(change) == 0:
                logger.warning('No change in id {}'.format(idx))
                stats[sample["phenomena"]]["no_change"].append((idx, sample['langpair']))
            else:
                stats[sample["phenomena"]]["success"] += 1
                change = standardize_annotation(change, sample["good-translation"], sample["incorrect-translation"])
            sample['annotation'] = change
            sample['method'] = phenomena[sample["phenomena"]]
            annotations[idx] = sample
        except:
            logger.warning('error in word level annotate, id {}'.format(idx))
            stats[sample["phenomena"]]["error"].append((idx, sample['langpair']))

    elif phenomena[sample["phenomena"]] in ['diff_flexible', 'REF_flexible', 'mixed_flexible']:
        if phenomena[sample["phenomena"]] == 'diff_flexible':
            good = sample["good-translation"]
        elif phenomena[sample["phenomena"]] == 'mixed_flexible':
            good, g, g_spans = ref_or_good(sample["reference"], sample["good-translation"], sample["incorrect-translation"])
        else: 
            good = sample["reference"]
        bad = sample["incorrect-translation"]
        g, g_spans = tokenize(good)
        b, b_spans = tokenize(bad)

        # special treatment to japanese chinese and thailandish because they don't use spaces, so can't be split            
        if sample['langpair'][-2:] not in ['ja', 'zh', 'th']:      
            if len(g) == len(b):   # if there are multiple one word replacements
                change = diff(g, g_spans, b, b_spans, phenomena="replacement")
            if len(g) != len(b) or len(change) == 0:
                try:
                    change = diff_flexible(good, g, g_spans, bad, b, b_spans)
                    if len(change) == 0 and good != bad:
                        change = diff_char_level(good, bad) 
                except:
                    logger.warning('error in id {}'.format(idx))
                    stats[sample["phenomena"]]["error"].append((idx, sample['langpair']))
            if len(change) == 0:
                logger.warning('No change in id {}'.format(idx,g,b,change))
                stats[sample["phenomena"]]["no_change"].append((idx, sample['langpair']))
            elif len(change) != 0 and ((change[0]['in_good'] != None and len(change[0]['in_good']['token']) > 50) or (change[0]['in_bad'] != None and len(change[0]['in_bad']['token']) > 50)):
                logger.warning('check this - too long: %s' %idx)
                stats[sample["phenomena"]]["too_long"].append((idx, sample['langpair']))
            else:
                stats[sample["phenomena"]]["success"] += 1
                change = standardize_annotation(change, good, bad)
            sample['annotation'] = change
            sample['method'] = phenomena[sample["phenomena"]]
            annotations[idx] = sample  
        else:
            try:
                change = diff_char_level(good, bad) 
                if len(change) == 0 and good != bad:
                    logger.warning('No change in id {}'.format(idx,g,b,change))
                    stats[sample["phenomena"]]["no_change"].append((idx, sample['langpair']))
                elif len(change) != 0 and ((change[0]['in_good'] != None and len(change[0]['in_good']['token']) > 30) or (change[0]['in_bad'] != None and len(change[0]['in_bad']['token']) > 30)):
                    logger.warning('check this - too long: %s' %idx)
                    stats[sample["phenomena"]]["too_long"].append((idx, sample['langpair']))
                else:
                    stats[sample["phenomena"]]["success"] += 1
                    change = standardize_annotation(change, good, bad)
                sample['annotation'] = change
                sample['method'] = phenomena[sample["phenomena"]]
                annotations[idx] = sample
            except: 
                logger.warning('error in id {}'.format(idx))
                stats[sample["phenomena"]]["error"].append((idx, sample['langpair']))

    elif phenomena[sample["phenomena"]] == 'units':
        try:
            g, b, change = annotate_units(sample["good-translation"],sample["incorrect-translation"])
            if len(change) == 0 and g != b:
                logger.warning('No change in id {}, \ng: {}, \nb: {},\nr: {}'.format(idx, g, b))
                stats[sample["phenomena"]]["no_change"].append((idx, sample['langpair']))
            elif len(change) > 1:
                logger.warning('Multiple changes in {} id {}'.format(sample["phenomena"], idx))
                stats[sample["phenomena"]]["other"].append((idx, sample['langpair']))
            else:
                stats[sample["phenomena"]]["success"] += 1
                change = standardize_annotation(change, sample["good-translation"], sample["incorrect-translation"])
            sample['annotation'] = change
            sample['method'] = phenomena[sample["phenomena"]]
            annotations[idx] = sample  
        except: 
            logger.warning('error in id {}'.format(idx))
            stats[sample["phenomena"]]["error"].append((idx, sample['langpair']))

    elif phenomena[sample["phenomena"]] == 'swap':
        try:
            change = annotate_swap_word_lvl(sample["good-translation"],sample["incorrect-translation"])
            if len(change) < 2 and sample["good-translation"] != sample["incorrect-translation"]:
                logger.warning('No change in id {}, \ng: {}, \nb: {}'.format(idx, sample["good-translation"], sample["incorrect-translation"]))
                stats[sample["phenomena"]]["no_change"].append((idx, sample['langpair']))
            elif change[0]['in_good'] != None and change[1]['in_good'] != None and change[0]['in_good'] == change[1]['in_good']:
                logger.warning('check this: %s - swapped words are the same!' %idx)
                stats[sample["phenomena"]]["other"].append((idx, sample['langpair']))
            elif (change[0]['in_good'] != None and len(change[0]['in_good']['token']) > 50) or (change[0]['in_bad'] != None and len(change[0]['in_bad']['token']) > 50):
                logger.warning('check this: %s' %idx)
                stats[sample["phenomena"]]["too_long"].append((idx, sample['langpair']))
            else:
                stats[sample["phenomena"]]["success"] += 1
                change = standardize_annotation(change, sample["good-translation"], sample["incorrect-translation"])
            sample['annotation'] = change
            sample['method'] = phenomena[sample["phenomena"]]
            annotations[idx] = sample
        except: 
            logger.warning('error in id {}'.format(idx))
            stats[sample["phenomena"]]["error"].append((idx, sample['langpair']))

    elif phenomena[sample["phenomena"]] == 'date':
        try:
            change = diff_dates(sample["good-translation"],sample["incorrect-translation"])
            stats[sample["phenomena"]]["success"] += 1
            change = standardize_annotation(change, sample["good-translation"], sample["incorrect-translation"])
            sample['annotation'] = change
            sample['method'] = phenomena[sample["phenomena"]]
            annotations[idx] = sample
        except: 
            logger.warning('error in id {}'.format(idx))
            stats[sample["phenomena"]]["error"].append((idx, sample['langpair']))
    elif phenomena[sample['phenomena']] == 'whole_sentence':
        change = whole_sentence(sample["good-translation"], sample["incorrect-translation"])
        stats[sample["phenomena"]]["success"] += 1
        change = standardize_annotation(change, sample["good-translation"], sample["incorrect-translation"])
        sample['annotation'] = change
        sample['method'] = phenomena[sample["phenomena"]]
        annotations[idx] = sample
    if manual:
        res = manual_annotation_io(idx)
        if res == 1:  # SKIPPING
            return 1 
        # if exit, first save a new annotations file to save progress and then exit
        if res == -1:
            with open(checkpoint, "w+") as f:
                json.dump(annotations, f, indent=2, ensure_ascii=False)  # encode dict into JSON
            return -1
    return 1  # 1 for success
        
def process_phenomena(samples, manual=False):
    for idx,sample in tqdm(samples):
        # here don't worry about stats - it will be probably completely wrong
        if idx not in annotations.keys():
            stats[sample["phenomena"]]["total"] += 1
            
            # check if it was annotated before
            res = check_seen_before(sample)
            if res != None:
                sample['annotation'] = res[0]
                sample['method'] = res[1]
                annotations[idx] = sample
            else:
                res = process_sample(idx, sample, manual)
                if res == -1:
                    return -1

enter the phenomena: hallucination-named-entity-level-1


INFO:logger:Loading the dataset...
INFO:logger:Dataset loaded.
INFO:logger:Path /mnt/c/Users/user/OneDrive/Masaüstü/work/ACES_private/challenge_set_annotation/annotated.txt already exists. Loading..
INFO:logger:Path /mnt/c/Users/user/OneDrive/Masaüstü/work/ACES_private/challenge_set_annotation/stats.txt already exists. Loading..
INFO:logger:READY


In [36]:

# if we have multiple word spans next to each other, then concatenate them in one span.
# no need for this when we have token_index: None or token_index:list because then it is already one big span
# also make sure token_index is a list for all changes
def standardize_annotation(change, good, bad):
    skip = False
    for c in change:
        if (c['in_good'] != None and (c['in_good']['token_index'] == None or type(c['in_good']['token_index'])==list))\
            or (c['in_bad'] != None and (c['in_bad']['token_index'] == None or type(c['in_bad']['token_index'])==list))\
            or c['in_good'] == None or c['in_bad'] == None:
            skip = True
            logger.debug("first check")
            break
    if skip:   # if skipping then change all the integer token indices to lists
        for c in change:
            if c['in_good'] != None and c['in_good']['token_index'] != None and type(c['in_good']['token_index']) != list:
                c['in_good']['token_index'] = [c['in_good']['token_index']]
            if c['in_bad'] != None and c['in_bad']['token_index'] != None and type(c['in_bad']['token_index']) != list:
                c['in_bad']['token_index'] = [c['in_bad']['token_index']]
        return change
    good_tokens = []
    bad_tokens = []
    good_span = ()   # char span
    bad_span = ()
    change_new = []
    for c in change:
        g = c['in_good']
        b = c['in_bad']
        if len(good_tokens) == 0 and len(bad_tokens) == 0:
            good_tokens.append(g['token_index'])
            bad_tokens.append(b['token_index'])
            good_span = g['character_span']
            bad_span = b['character_span']
        elif g['token_index'] == good_tokens[-1] + 1 and b['token_index'] == bad_tokens[-1] + 1:
            good_tokens.append(g['token_index'])
            bad_tokens.append(b['token_index'])
            good_span = (good_span[0], g['character_span'][1])
            bad_span = (bad_span[0], b['character_span'][1])
        else:
            change_new.append({'in_good': {'token_index': good_tokens,
                        'character_span': good_span,
                        'token': good[good_span[0]:good_span[1]]}, 
                    'in_bad': {'token_index': bad_tokens,
                        'character_span': bad_span,
                        'token': bad[bad_span[0]:bad_span[1]]}})
            good_tokens = [g['token_index']]
            bad_tokens = [b['token_index']]
            good_span = g['character_span']
            bad_span = b['character_span']

    change_new.append({'in_good': {'token_index': good_tokens,
                        'character_span': good_span,
                        'token': good[good_span[0]:good_span[1]]}, 
                    'in_bad': {'token_index': bad_tokens,
                        'character_span': bad_span,
                        'token': bad[bad_span[0]:bad_span[1]]}})

    return change_new

In [37]:
# set up continuing
samples = []
for idx, sample in enumerate(dataset['train']):
    if sample['phenomena'] in phenomena_tobe_processed:
        samples.append((idx, sample))
        
# if manual:
if not os.path.exists(os.path.join(folder, 'manual_annotations')):
    os.mkdir(os.path.join(folder, 'manual_annotations'))
checkpoint = os.path.join(folder, 'manual_annotations/annotated_checkpoint_{}.txt'.format(phenomena_tobe_processed))
if os.path.exists(checkpoint):
    logger.info('Path {} already exists. Loading..'.format(checkpoint))
    with open(checkpoint, "r") as f:
        annotations = json.load(f)
    annotations = {int(k):v for k,v in annotations.items()}
else:
    annotations = dict()

In [104]:
# to start from the beginning
# annotations = dict()

In [38]:
# THE ACTUAL PART
logger.setLevel(logging.INFO)
process_phenomena(samples, manual=True)

  0%|                                                                                          | 0/999 [00:00<?, ?it/s]

-----> For this sample we compare the Incorrect translation with the Good translation.

Source sentence:  The song was written and composed by Gala produced by Filippo Andrea Carmeni and Maurizio Molella.
Reference:  La canción fue escrita y compuesta por Gala producida por Filippo Andrea Carmeni y Maurizio Molella.
Good Translation:  Esta canción fue escrita y compuesta por la gala producida por Filippo Andrea Carmeni y Maurizio Molella.
Incorrect Translation:  Esta canción fue escrita y compuesta por la gala producida por Karen Gamboa y Maurizio Molella.
Suggested annotation:
[{'in_good': {'token_index': [11, 12, 13], 'character_span': (63, 85), 'token': 'Filippo Andrea Carmeni'}, 'in_bad': {'token_index': [11, 12], 'character_span': (63, 75), 'token': 'Karen Gamboa'}}] 

To accept the suggested annotation click on enter. To skip this one enter skip. Otherwise enter anything else:


  0%|▏                                                                                 | 2/999 [00:07<59:26,  3.58s/it]

-----> For this sample we compare the Incorrect translation with the Good translation.

Source sentence:  On November 13, 2016, David Machado was murdered in Fox Grove Park near the city of Hughson by Dennis Wallace.
Reference:  El 13 de noviembre de 2016, David Machado fue asesinado en Fox Grove Park cerca de la ciudad de Hughson en manos de Dennis Wallace.
Good Translation:  El 13 de noviembre de 2016, el Adjunto David Machado fue asesinado en Fox Grove Park cerca de la ciudad de Hughson por Dennis Wallace.
Incorrect Translation:  El 13 de noviembre de 2016, el Magnolia Harvey fue asesinado en Fox Grove Park cerca de la ciudad de Hughson por Dennis Wallace.
Suggested annotation:
[{'in_good': {'token_index': [7, 8, 9], 'character_span': (31, 52), 'token': 'Adjunto David Machado'}, 'in_bad': {'token_index': [7, 8], 'character_span': (31, 46), 'token': 'Magnolia Harvey'}}] 

To accept the suggested annotation click on enter. To skip this one enter skip. Otherwise enter anything else:




-----> For this sample we compare the Incorrect translation with the Good translation.

Source sentence:  Blake has been married to Patricia Meyer since 1984 and together they have two sons: Ryan (born 1988) and Dale (born 1992).
Reference:  Blake está casado con Patricia Meyer desde 1984 y juntos tienen dos hijos: Ryan (nacido en 1988) y Dale (nacido en 1992).
Good Translation:  Desde 1984 Blake está casado con Patricia Meyer y tienen dos hijos: Ryan (nacido en 1988) y Dale (nacido en 1992).
Incorrect Translation:  Desde 1984 Blake está casado con Patricia Meyer y tienen dos hijos: Ryan (nacido en 1988) y Dale (nacido en 1992).
Suggested annotation:
[] 

To accept the suggested annotation click on enter. To skip this one enter skip. Otherwise enter anything else:s
Source sentence:  Blake has been married to Patricia Meyer since 1984 and together they have two sons: Ryan (born 1988) and Dale (born 1992).
Reference:  Blake está casado con Patricia Meyer desde 1984 y juntos tienen dos hi

  1%|▋                                                                               | 8/999 [00:31<1:05:08,  3.94s/it]


-1

In [25]:
logger.setLevel(logging.INFO)
logger.debug('hi')

In [109]:
# To run normalization over the already annotated samples in coreference-based-on-commonsense
for idx in annotations:
    sample = annotations[idx]
    good, _, _ = ref_or_good(sample["reference"], sample["good-translation"], sample["incorrect-translation"])
    bad = sample['incorrect-translation']
    change = sample['annotation']
    try:
        sample['annotation'] = standardize_annotation(change, good, bad)
    except:
        print(idx)

In [6]:
# to count the number of samples in any phenomena
count = 0
for sample in dataset["train"]:
    if sample["phenomena"] in ["lexical-overlap", 'xnli-omission-neutral', 'xnli-omission-contradiction', 'xnli-addition-neutral', 'xnli-addition-contradiction']:
        count += 1

In [6]:
good = "J' ai déplacé mes vêtements mouillés de la valise au sac à linge et, par conséquent, il est devenu lourd."
bad =  "J' ai déplacé mes vêtements mouillés de la valise au sac à linge et, par conséquent, elle est devenue lourde."
change = [{'in_good': {'token_index': [16],
     'character_span': [85, 87],
     'token': 'il'},
    'in_bad': {'token_index': [16],
     'character_span': [85, 89],
     'token': 'elle'}},
   {'in_good': {'token_index': [18],
     'character_span': [92, 98],
     'token': 'devenu'},
    'in_bad': {'token_index': [18],
     'character_span': [94, 101],
     'token': 'devenue'}},
   {'in_good': {'token_index': [19],
     'character_span': [99, 104],
     'token': 'lourd'},
    'in_bad': {'token_index': [19],
     'character_span': [102, 108],
     'token': 'lourde'}}]

In [17]:
logger.setLevel(logging.DEBUG)
standardize_annotation(change, good, bad)

[{'in_good': {'token_index': [16], 'character_span': (85, 87), 'token': 'il'},
  'in_bad': {'token_index': [16],
   'character_span': (85, 89),
   'token': 'elle'}},
 {'in_good': {'token_index': [18, 19],
   'character_span': (92, 104),
   'token': 'devenu lourd'},
  'in_bad': {'token_index': [18, 19],
   'character_span': (94, 108),
   'token': 'devenue lourde'}}]

In [112]:
with open(checkpoint, "w+") as f:
    json.dump(annotations, f, indent=2, ensure_ascii=False)  # encode dict into JSON

In [27]:
annotations

{1067: {'source': 'The song took longer to sing than the ballad because it was more words.',
  'good-translation': "La chanson a pris plus de temps à chanter que la ballade parce que c'était plus de mots.",
  'incorrect-translation': 'La chanson a pris plus de temps à chanter que la ballade parce que la ballade était plus de mots.',
  'reference': 'La chanson a pris plus de temps à chanter que la ballade parce que la chanson était plus de mots.',
  'phenomena': 'coreference-based-on-commonsense',
  'langpair': 'en-fr',
  'annotation': [{'in_good': {'token_index': [15],
     'character_span': [70, 77],
     'token': 'chanson'},
    'in_bad': {'token_index': [15],
     'character_span': [70, 77],
     'token': 'ballade'}}],
  'method': 'mixed_flexible'},
 1068: {'source': 'The song took longer to sing than the ballad because it was less words.',
  'good-translation': "La chanson a pris plus de temps à chanter que la ballade parce que c'était moins de mots.",
  'incorrect-translation': 'L