In [9]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from datasets import load_from_disk

import numpy as np
np.random.seed(42)

import json, copy, os, sys
from tqdm import tqdm

import logging
logger = logging.getLogger('logger')
logging.basicConfig(level=logging.INFO)
logger.setLevel(logging.INFO)

sys.path.append(os.path.abspath(os.getcwd()))
from annotation_utilities import *

# this is the list of phenomena and which option they need to be annotated with:
phenomena = {
    'addition':'add-omit',
    'ambiguous-translation-wrong-discourse-connective-since-causal':'diff_flexible',
    'ambiguous-translation-wrong-discourse-connective-since-temporal':'diff_flexible',
    'ambiguous-translation-wrong-discourse-connective-while-contrast':'diff_flexible',
    'ambiguous-translation-wrong-discourse-connective-while-temporal':'diff_flexible',
    'ambiguous-translation-wrong-gender-female-anti':'diff_flexible',
    'ambiguous-translation-wrong-gender-female-pro':'diff_flexible',
    'ambiguous-translation-wrong-gender-male-anti':'diff_flexible',
    'ambiguous-translation-wrong-gender-male-pro':'diff_flexible',
    'ambiguous-translation-wrong-sense-frequent':'diff_flexible',
    'ambiguous-translation-wrong-sense-infrequent':'diff_flexible',
    'anaphoric_group_it-they:deletion':'annotate_word',
    'anaphoric_group_it-they:substitution':'annotate_word',
    'anaphoric_intra_non-subject_it:deletion':'annotate_word',
    'anaphoric_intra_non-subject_it:substitution':'annotate_word',
    'anaphoric_intra_subject_it:deletion':'annotate_word',
    'anaphoric_intra_subject_it:substitution':'annotate_word',
    'anaphoric_intra_they:deletion':'annotate_word',
    'anaphoric_intra_they:substitution':'annotate_word',
    'anaphoric_singular_they:deletion':'annotate_word',
    'anaphoric_singular_they:substitution':'annotate_word',
    'antonym-replacement':'REF_flexible',
    'commonsense-only-ref-ambiguous':'diff_flexible',
    'commonsense-src-and-ref-ambiguous':'diff_flexible',
    'copy-source':'whole_sentence',
    'coreference-based-on-commonsense':'mixed_flexible',
    'do-not-translate':'whole_sentence',
    'hallucination-date-time':'date',
    'hallucination-named-entity-level-1':'diff_flexible',
    'hallucination-named-entity-level-2':'REF_flexible',
    'hallucination-named-entity-level-3':'REF_flexible',
    'hallucination-number-level-1':'diff_flexible',
    'hallucination-number-level-2':'REF_flexible',
    'hallucination-number-level-3':'REF_flexible',
    'hallucination-real-data-vs-ref-word':'diff_flexible',
    'hallucination-real-data-vs-synonym':'diff_flexible',
    'hallucination-unit-conversion-amount-matches-ref':'units',
    'hallucination-unit-conversion-unit-matches-ref':'units',
    'hypernym-replacement':'REF_flexible',
    'hyponym-replacement':'REF_flexible',
    'lexical-overlap':'?',
    'modal_verb:deletion':'add-omit',
    'modal_verb:substitution':'diff_flexible',
    'nonsense':'REF_flexible',
    'omission':'add-omit',
    'ordering-mismatch':'swap',
    'overly-literal-vs-correct-idiom':'diff_flexible',
    'overly-literal-vs-explanation':'diff_flexible',
    'overly-literal-vs-ref-word':'diff_flexible',
    'overly-literal-vs-synonym':'diff_flexible',
    'pleonastic_it:deletion':'annotate_word',
    'pleonastic_it:substitution':'annotate_word',
    'punctuation:deletion_all':'add-omit',
    'punctuation:deletion_commas':'add-omit',
    'punctuation:deletion_quotes':'add-omit',
    'punctuation:statement-to-question':'add-omit',
    'real-world-knowledge-entailment':'diff_flexible',
    'real-world-knowledge-hypernym-vs-distractor':'diff_flexible',
    'real-world-knowledge-hypernym-vs-hyponym':'diff_flexible',
    'real-world-knowledge-synonym-vs-antonym':'diff_flexible',
    'similar-language-high':'whole_sentence',
    'similar-language-low':'whole_sentence',
    'untranslated-vs-ref-word':'diff_flexible',   # here add-omit can be used for getting character level replacements too
    'untranslated-vs-synonym':'diff_flexible',
    'xnli-addition-contradiction':'?',
    'xnli-addition-neutral':'?',
    'xnli-omission-contradiction':'?',
    'xnli-omission-neutral':'?'
}

folder = os.getcwd()
manual_annotations = os.path.join(folder, 'manual_annotations')
if not os.path.exists(manual_annotations):
    os.mkdir(manual_annotations)
    
phenomena_tobe_processed = input("enter the phenomena: ") 
if phenomena_tobe_processed == 'test':
    # load the subset.json
    dataset_path = os.path.join(manual_annotations, 'subset.json')
    if not os.path.exists(dataset_path):
        logger.error('No dataset path: %s' %(dataset_path))
        exit()
    logger.info('Loading the test dataset...')
    with open(dataset_path, "r") as f:
        samples = json.load(f)
    logger.info('Test dataset loaded.')
elif phenomena_tobe_processed not in phenomena.keys():
    logger.error("The phenomena should be one of these: {}".format(sys.argv[1], phenomena.keys()))
    exit()
else:
    dataset_path = os.path.join(folder, '../../dataset')
    if not os.path.exists(dataset_path):
        logger.error('No dataset path: %s' %(dataset_path))
        exit()
    logger.info('Loading the dataset...')
    dataset = load_from_disk(dataset_path)
    logger.info('Dataset loaded.')
    samples = dict()
    for idx, sample in enumerate(dataset['train']):
        if sample['phenomena'] in phenomena_tobe_processed:
            samples[idx] = sample
        
checkpoint = os.path.join(folder, 'manual_annotations/annotated_checkpoint_{}.txt'.format(phenomena_tobe_processed))
if os.path.exists(checkpoint):
    logger.info('Path {} already exists. Loading..'.format(checkpoint))
    with open(checkpoint, "r") as f:
        annotations = json.load(f)
    annotations = {int(k):v for k,v in annotations.items()}
else:
    annotations = dict()
    
# calculate statistics about the annotations:
# for every mode, calculate no. of skipped, no. of unsure and ids, and no. of done.
stats_template = {
            'total':0,
            'success':0,
            'too_long':[],
            'no_change':[],
            'error':[],
            'other':[]  
        }
stats_path = os.path.join(folder, 'ACES_private/challenge_set_annotation/stats.txt')
if os.path.exists(stats_path):
    logger.info('Path {} already exists. Loading..'.format(stats_path))
    with open(stats_path, "r") as f:
        stats = json.load(f)
    # we want to overwrite the statistics for the new phenomena
    for p in phenomena_tobe_processed:
        stats[p] = copy.deepcopy(stats_template)
else:
    logger.info('Creating new stats.txt file at {}'.format(stats_path))
    stats = {}
    for key in phenomena.keys():
        stats[key] = copy.deepcopy(stats_template)
    stats['test'] = copy.deepcopy(stats_template)
    
            
logger.info("READY")

# the UI (?) part of the annotation in general (ask if they want to accept the annotation, call manual_annotation if no)
def manual_annotation_io(idx):
    sample = samples[idx]
    if idx in annotations:
        change = annotations[idx]['annotation']   # now it's normalized annotation.
        if len(change) == 1 and len(change[0]["in_good"]['token_index']) == len(change[0]["in_bad"]['token_index']):
            return 0
    if phenomena[sample["phenomena"]] in ['?', 'mixed_flexible']:
        print("-----> For this sample we can compare the Incorrect translation with either Reference or Good translation.")
    elif phenomena[sample["phenomena"]] in ['REF_flexible']:
        print("-----> For this sample we compare the Incorrect translation with the Reference.")
    else:
        print("-----> For this sample we compare the Incorrect translation with the Good translation.\n")
    if idx in annotations:
        print("\nID: ", idx)
        print("Source sentence: ", sample['source'])
        print("Reference: ", sample['reference'])
        print("Good Translation: ", sample['good-translation'])
        print("Incorrect Translation: ", sample['incorrect-translation'])
        print('Suggested annotation:')
        print(annotations[idx]['annotation'], '\n')
        inp = input('To accept the suggested annotation click on enter. To skip this one enter skip. Otherwise enter anything else:')
        if inp == "skip":
            annotations.pop(idx)
            return 1  # this means, we are skipping, so should delete this annotation and then continue with the next.
        res = manual_annotation(idx, inp)
        if res == -1:
            # do not add the annotation if you stop at this point
            annotations.pop(idx)
            return -1
    else:
        print("No automatic translations for this sample.")
        res = manual_annotation(idx)
        if res == -1:
            return -1

# the UI (?) part of the manual annotation
def manual_annotation(idx, inp="."):
    while inp != "":
        sample = samples[idx]
        print("Source sentence: ", sample['source'])
        print("Reference: ", sample['reference'])
        print("Good Translation: ", sample['good-translation'])
        print("Incorrect Translation: ", sample['incorrect-translation'])
        inp = input("Enter the incorrect translation with the < and > to show the error spans (exit to stop): \n")
        bad = inp
        if bad == "exit":
            return -1
        inp = input("Enter the correct/reference translation with the < and > to show the error spans (exit to stop): \n")
        good = inp
        if good == "exit":
            return -1
        change = calculate_change(good, bad, sample)
        print("Annotation: ", change)
        inp = input("\n To accept it press enter or to annotate again enter any other string: ")
        if inp == "":
            sample['annotation'] = change
            sample['method'] = "manual annotation"
            annotations[idx] = sample
    return annotations[idx]

# given a manually annotated sample (where there are <> in incorrect and good/reference sentences)
# calculate the character spans in the original sentences and return the change in our annotation format
def calculate_change(good, bad, sample):  
    bad_id = 0
    span = False # False is when we are not inside a span, True is inside a span
    change = []
    for i, c in enumerate(bad):
        if c == "<":
            if span:
                logger.error("< not closed. Try again.\n")
                return manual_annotation(".", sample)
            else:
                start = bad_id
                start_annotate = i
                bad_id -= 1
                span = True
        elif c == ">":
            if not span:
                logger.error("No opening < Try again.\n")
                return manual_annotation(".", sample)
            else:
                change.append({"in_good":None, 
                    "in_bad":{'token_index':None, 
                    'character_span':(start,bad_id), 
                               'token':bad[start_annotate+1:i]}})
                bad_id -= 1
                span = False
        bad_id += 1
    good_id = 0
    span = False # False is when we are not inside a span, True is inside a span
    for i, c in enumerate(good):
        if c == "<":
            if span:
                logger.error("< not closed. Try again.\n")
                return manual_annotation(".", sample)
            else:
                start = good_id
                start_annotate = i
                good_id -= 1
                span = True
        elif c == ">":
            if not span:
                logger.error("No opening < Try again.\n")
                return manual_annotation(".", sample)
            else:
                change.append({"in_good":{'token_index':None, 
                    'character_span':(start,good_id), 
                               'token':good[start_annotate+1:i]}, 
                    "in_bad":None})
                good_id -= 1
                span = False
        good_id += 1
    return change

# process given sample, annotate or do manual annotation (only in the annotations.ipynb, in process_dataset.py only automatic annotation)
def process_sample(idx, sample, manual=False, detokenize=False):
    if phenomena[sample["phenomena"]] == 'mixed_flexible':
        good_og = ref_or_good(sample["reference"], sample["good-translation"], sample["incorrect-translation"])
    elif phenomena[sample["phenomena"]] == 'REF_flexible':
        good_og = sample["reference"]
    else:
        good_og = sample["good-translation"]
    bad_og = sample["incorrect-translation"]
    # if detokenize we just annotate the detokenized sentences, then map the character span back to the original sentence
    # in the standardize_annotation function in annotation_utilities.py
    if detokenize:
        try:
            good, good_map = detokenize_text(good_og, lang=sample["langpair"].split('-')[1])
            bad, bad_map = detokenize_text(bad_og, lang=sample["langpair"].split('-')[1])
            maps = (good_map, bad_map)
        except:
            good, bad = good_og, bad_og
            maps = None
    else:
        good, bad = good_og, bad_og
        maps = None # the standardize_annotation function will understand that it does not need to revert detokenization 
        # if maps parameter is None.
    originals = (good_og, bad_og)
    
    if phenomena[sample["phenomena"]] == 'add-omit':
        try:
            change = diff_char_level(good, bad)
            if len(change) == 0:
                logger.warning('No change in id {}'.format(idx))
                stats[sample["phenomena"]]["no_change"].append((idx, sample['langpair']))
            else:
                stats[sample["phenomena"]]["success"] += 1
                change = standardize_annotation(change, good, bad, maps, originals)
            sample['annotation'] = change
            sample['method'] = phenomena[sample["phenomena"]]
            annotations[idx] = sample
        except:
            logger.warning('error in char level annotate, id {}'.format(idx))
            stats[sample["phenomena"]]["error"].append((idx, sample['langpair']))

    elif phenomena[sample["phenomena"]] == 'annotate_word':
        try:
            change = annotate_word(good, bad)
            if len(change) == 0:
                logger.warning('No change in id {}'.format(idx))
                stats[sample["phenomena"]]["no_change"].append((idx, sample['langpair']))
            else:
                stats[sample["phenomena"]]["success"] += 1
                change = standardize_annotation(change, good, bad, maps, originals)
            sample['annotation'] = change
            sample['method'] = phenomena[sample["phenomena"]]
            annotations[idx] = sample
        except:
            logger.warning('error in word level annotate, id {}'.format(idx))
            stats[sample["phenomena"]]["error"].append((idx, sample['langpair']))

    elif phenomena[sample["phenomena"]] in ['diff_flexible', 'REF_flexible', 'mixed_flexible']:
        g, g_spans = tokenize(good)
        b, b_spans = tokenize(bad)

        # special treatment to japanese chinese and thailandish because they don't use spaces, so can't be split            
        if sample['langpair'][-2:] not in ['ja', 'zh', 'th']:      
            if len(g) == len(b):   # if there are multiple one word replacements
                change = diff(g, g_spans, b, b_spans, phenomena="replacement")
            if len(g) != len(b) or len(change) == 0:
                try:
                    change = diff_flexible(good, g, g_spans, bad, b, b_spans)
                    if len(change) == 0 and good != bad:
                        change = diff_char_level(good, bad) 
                except:
                    logger.warning('error in id {}'.format(idx))
                    stats[sample["phenomena"]]["error"].append((idx, sample['langpair']))
            if len(change) == 0:
                logger.warning('No change in id {}'.format(idx,g,b,change))
                stats[sample["phenomena"]]["no_change"].append((idx, sample['langpair']))
            elif len(change) != 0 and ((change[0]['in_good'] != None and len(change[0]['in_good']['token']) > 50) or (change[0]['in_bad'] != None and len(change[0]['in_bad']['token']) > 50)):
                logger.warning('check this - too long: %s' %idx)
                stats[sample["phenomena"]]["too_long"].append((idx, sample['langpair']))
            else:
                stats[sample["phenomena"]]["success"] += 1
                change = standardize_annotation(change, good, bad, maps, originals)
            sample['annotation'] = change
            sample['method'] = phenomena[sample["phenomena"]]
            annotations[idx] = sample  
        else:
            try:
                change = diff_char_level(good, bad) 
                if len(change) == 0 and good != bad:
                    logger.warning('No change in id {}'.format(idx,g,b,change))
                    stats[sample["phenomena"]]["no_change"].append((idx, sample['langpair']))
                elif len(change) != 0 and ((change[0]['in_good'] != None and len(change[0]['in_good']['token']) > 30) or (change[0]['in_bad'] != None and len(change[0]['in_bad']['token']) > 30)):
                    logger.warning('check this - too long: %s' %idx)
                    stats[sample["phenomena"]]["too_long"].append((idx, sample['langpair']))
                else:
                    stats[sample["phenomena"]]["success"] += 1
                    change = standardize_annotation(change, good, bad, maps, originals)
                sample['annotation'] = change
                sample['method'] = phenomena[sample["phenomena"]]
                annotations[idx] = sample
            except: 
                logger.warning('error in id {}'.format(idx))
                stats[sample["phenomena"]]["error"].append((idx, sample['langpair']))

    elif phenomena[sample["phenomena"]] == 'units':
        try:
            g, b, change = annotate_units(good,bad)
            if len(change) == 0 and g != b:
                logger.warning('No change in id {}, \ng: {}, \nb: {},\nr: {}'.format(idx, g, b))
                stats[sample["phenomena"]]["no_change"].append((idx, sample['langpair']))
            elif len(change) > 1:
                logger.warning('Multiple changes in {} id {}'.format(sample["phenomena"], idx))
                stats[sample["phenomena"]]["other"].append((idx, sample['langpair']))
            else:
                stats[sample["phenomena"]]["success"] += 1
                change = standardize_annotation(change, good, bad, maps, originals)
            sample['annotation'] = change
            sample['method'] = phenomena[sample["phenomena"]]
            annotations[idx] = sample  
        except: 
            logger.warning('error in id {}'.format(idx))
            stats[sample["phenomena"]]["error"].append((idx, sample['langpair']))

    elif phenomena[sample["phenomena"]] == 'swap':
        try:
            change = annotate_swap_word_lvl(good,bad)
            if len(change) < 2 and good != bad:
                logger.warning('No change in id {}, \ng: {}, \nb: {}'.format(idx, good, bad))
                stats[sample["phenomena"]]["no_change"].append((idx, sample['langpair']))
            elif change[0]['in_good'] != None and change[1]['in_good'] != None and change[0]['in_good'] == change[1]['in_good']:
                logger.warning('check this: %s - swapped words are the same!' %idx)
                stats[sample["phenomena"]]["other"].append((idx, sample['langpair']))
            elif (change[0]['in_good'] != None and len(change[0]['in_good']['token']) > 50) or (change[0]['in_bad'] != None and len(change[0]['in_bad']['token']) > 50):
                logger.warning('check this: %s' %idx)
                stats[sample["phenomena"]]["too_long"].append((idx, sample['langpair']))
            else:
                stats[sample["phenomena"]]["success"] += 1
                change = standardize_annotation(change, good, bad, maps, originals)
            sample['annotation'] = change
            sample['method'] = phenomena[sample["phenomena"]]
            annotations[idx] = sample
        except: 
            logger.warning('error in id {}'.format(idx))
            stats[sample["phenomena"]]["error"].append((idx, sample['langpair']))

    elif phenomena[sample["phenomena"]] == 'date':
        try:
            change = diff_dates(good,bad)
            stats[sample["phenomena"]]["success"] += 1
            change = standardize_annotation(change, good, bad, maps, originals)
            sample['annotation'] = change
            sample['method'] = phenomena[sample["phenomena"]]
            annotations[idx] = sample
        except: 
            logger.warning('error in id {}'.format(idx))
            stats[sample["phenomena"]]["error"].append((idx, sample['langpair']))
    elif phenomena[sample['phenomena']] == 'whole_sentence':
        change = whole_sentence(good, bad)
        stats[sample["phenomena"]]["success"] += 1
        change = standardize_annotation(change, good, bad, maps, originals)
        sample['annotation'] = change
        sample['method'] = phenomena[sample["phenomena"]]
        annotations[idx] = sample
    if manual:
        res = manual_annotation_io(idx)
        if res == 1:  # SKIPPING
            return 1 
        # if exit, first save a new annotations file to save progress and then exit
        if res == -1:
            with open(checkpoint, "w+") as f:
                json.dump(annotations, f, indent=2, ensure_ascii=False)  # encode dict into JSON
            return -1
    return 1  # 1 for success
        
def process_phenomena(samples, manual=False, detokenize=False):
    for idx,sample in tqdm(samples.items()):
        # here don't worry about stats - it will be probably completely wrong
        if idx not in annotations.keys():
            stats[sample["phenomena"]]["total"] += 1
            
            # check if it was annotated before
            res = check_seen_before(sample, annotations)
            if res != None:
                sample['annotation'] = res[0]
                sample['method'] = res[1]
                annotations[idx] = sample
            else:
                try:
                    res = process_sample(idx, sample, manual, detokenize)
                except:
                    logger.error(idx)
                if res == -1:
                    return -1
    # save all annotations after finished
    with open(checkpoint, "w+") as f:
        json.dump(annotations, f, indent=2, ensure_ascii=False)  # encode dict into JSON

enter the phenomena: real-world-knowledge-hypernym-vs-distractor


INFO:logger:Loading the dataset...
INFO:logger:Dataset loaded.
INFO:logger:Creating new stats.txt file at /mnt/c/Users/user/OneDrive/Masaüstü/work/ACES_private/challenge_set_annotation/ACES_private/challenge_set_annotation/stats.txt
INFO:logger:READY


In [135]:
# run this cell to start from the beginning - not from a checkpoint (you will lose the prev checkpoint tho)
annotations = dict()

In [10]:
# THE ACTUAL PART
logger.setLevel(logging.INFO)
process_phenomena(samples, manual=True, detokenize=True)

  0%|                                                                                      | 0/40 [00:00<?, ?it/s]

-----> For this sample we compare the Incorrect translation with the Good translation.


ID:  16390
Source sentence:  In einem Wirtschaftssystem der Reinvestition, was geschieht, wenn wir unsere Handelvorteile verwenden und alle Überschüsse wiederinvestieren, damit wir mehr Äpfel und Birnen herstellen können, als wir zu Beginn hatten?
Reference:  In an economy with reinvestment, what happens if we can take our advantages of trade and reinvest any excess so that we can create more apples and pears than we had to start with?
Good Translation:  In an economic system of reinvestment, what if we use our trading advantages and reinvest any surpluses so we can produce more fruit than we started with?
Incorrect Translation:  In an economic system of reinvestment, what if we use our trading advantages and reinvest any surpluses so we can produce more bananas and oranges than we started with?
Suggested annotation:
[{'in_good': {'token_index': [22], 'character_span': (127, 132), 'token': 'fruit'}

  2%|█▉                                                                            | 1/40 [00:08<05:12,  8.02s/it]

-----> For this sample we compare the Incorrect translation with the Good translation.


ID:  16392
Source sentence:  Sie müssen durch zahlreiche Luftdruckkammern, eh, Fahrten gehen, ehe sie überhaupt U2's fliegen oder mit Druckanzug fliegen.
Reference:  They have to go through a number of altitude chambers, uh, rides before they start even flying U2's or flying with pressure suits.
Good Translation:  They have to go through numerous air pressure chambers, eh, rides before they even fly spy planes or fly in a pressure suit.
Incorrect Translation:  They have to go through numerous air pressure chambers, eh, rides before they even fly SR-71's or fly in a pressure suit.
Suggested annotation:
[{'in_good': {'token_index': [15, 16], 'character_span': (87, 97), 'token': 'spy planes'}, 'in_bad': {'token_index': [15], 'character_span': (87, 94), 'token': "SR-71's"}}] 

To accept the suggested annotation click on enter. To skip this one enter skip. Otherwise enter anything else:


  8%|█████▊                                                                        | 3/40 [00:22<04:38,  7.52s/it]

-----> For this sample we compare the Incorrect translation with the Good translation.


ID:  16401
Source sentence:  Sie haben gewährt, ich habe gesagt, das Engagement des Königs zu diesem Mann. Sein Ton verriet die Bitterkeit seines Grolls. Ich bin sicher, du hast diesem Mann nicht die Genehmigung des Königs erteilt, sagte er fröhlich.
Reference:  You have granted, I am told, the King's commission to this man. His very tone betrayed the bitterness of his rancour. I'm sure you didn't grant the King's commission to this man, he said cheerfully.
Good Translation:  You have granted, I have said, the monarch’s commitment to this man. His tone betrayed the bitterness of his resentment. I'm sure you didn't give this man the monarch's approval, he said cheerfully.
Incorrect Translation:  You have granted, I have said, the Queen’s commitment to this man. His tone betrayed the bitterness of his resentment. I'm sure you didn't give this man the Queen's approval, he said cheerfully.
Suggested an

 30%|███████████████████████                                                      | 12/40 [01:05<02:27,  5.27s/it]

-----> For this sample we compare the Incorrect translation with the Good translation.


ID:  16404
Source sentence:  Es gibt einen Ausschlag, der mit einigen Geschlechtskrankheiten einhergeht.
Reference:  There is a rash that comes along with some STIs.
Good Translation:  There is a rash that is associated with some infections.
Incorrect Translation:  There is a rash that is associated with some prion diseases.
Suggested annotation:
[{'in_good': {'token_index': [9], 'character_span': (45, 55), 'token': 'infections'}, 'in_bad': {'token_index': [9, 10], 'character_span': (45, 59), 'token': 'prion diseases'}}] 

To accept the suggested annotation click on enter. To skip this one enter skip. Otherwise enter anything else:


 38%|████████████████████████████▉                                                | 15/40 [01:35<02:45,  6.60s/it]

-----> For this sample we compare the Incorrect translation with the Good translation.


ID:  16407
Source sentence:  Ich möchte wissen, sind sie oft in England?
Reference:  I want to know, are they often in England?
Good Translation:  I want to know, are you often in the UK?
Incorrect Translation:  I want to know, are you often in Scotland?
Suggested annotation:
[{'in_good': {'token_index': [8, 9], 'character_span': (33, 39), 'token': 'the UK'}, 'in_bad': {'token_index': [8], 'character_span': (33, 41), 'token': 'Scotland'}}] 

To accept the suggested annotation click on enter. To skip this one enter skip. Otherwise enter anything else:


 45%|██████████████████████████████████▋                                          | 18/40 [01:39<01:52,  5.09s/it]

-----> For this sample we compare the Incorrect translation with the Good translation.


ID:  24599
Source sentence:  In an economy with reinvestment, what happens if we can take our advantages of trade and reinvest any excess so that we can create more apples and pears than we had to start with?
Reference:  In einem Wirtschaftssystem der Reinvestition, was geschieht, wenn wir unsere Handelvorteile verwenden und alle Überschüsse wiederinvestieren, damit wir mehr Äpfel und Birnen herstellen können, als wir zu Beginn hatten?
Good Translation:  Was wäre, wenn wir in einem Wirtschaftssystem der Reinvestition unsere Handelsvorteile nutzen und alle Überschüsse reinvestieren, damit wir mehr Früchte produzieren können, als wir begonnen haben?
Incorrect Translation:  Was wäre, wenn wir in einem Wirtschaftssystem der Reinvestition unsere Handelsvorteile nutzen und alle Überschüsse reinvestieren, damit wir mehr Bananen und Orangen produzieren können, als wir begonnen haben?
Suggested annotation:


 60%|██████████████████████████████████████████████▏                              | 24/40 [01:49<00:56,  3.51s/it]

-----> For this sample we compare the Incorrect translation with the Good translation.


ID:  24600
Source sentence:  You have granted, I am told, the King's commission to this man. His very tone betrayed the bitterness of his rancour. I'm sure you didn't grant the King's commission to this man, he said cheerfully.
Reference:  Sie haben gewährt, ich habe gesagt, das Engagement des Königs zu diesem Mann. Sein Ton verriet die Bitterkeit seines Grolls. Ich bin sicher, du hast diesem Mann nicht die Genehmigung des Königs erteilt, sagte er fröhlich.
Good Translation:  Sie haben, sagte ich, die Verpflichtung des Monarchen gegenüber diesem Mann gewährt. Sein Ton verriet die Bitterkeit seines Grolls. Ich bin sicher, Sie haben diesem Mann nicht die Zustimmung des Monarchen gegeben, sagte er fröhlich.
Incorrect Translation:  Sie haben, sagte ich, die Verpflichtung der Königin gegenüber diesem Mann gewährt. Sein Ton verriet die Bitterkeit seines Grolls. Ich bin sicher, Sie haben diesem Mann nicht

 62%|████████████████████████████████████████████████▏                            | 25/40 [01:58<01:01,  4.10s/it]

-----> For this sample we compare the Incorrect translation with the Good translation.


ID:  24603
Source sentence:  and we keep our cans in one thing and glass in another and paper in another it once it gets full getting it loaded into the car and taking it is a pain. It's hard to load up the paper, glass and can after separating them into their own container.
Reference:  und wir trennen unsere Dosen, Glas und Papier und wenn die verschiedenen Behälter voll sind, werden sie ins Auto geladen und wir bringen sie weg, es ist nervig. Es ist schwierig, das Papier, Glas und die Dosen aufzuladen, nachdem man sie in ihre eigenen Behälter getrennt hat.
Good Translation:  und wir trennen unsere dosen, glas und papier und wenn die verschiedenen container voll sind, wird es ins fahrzeug geladen und wir nehmen es mit, das ist ärgerlich. Es ist schwierig, das Papier, das Glas und die Dose zu füllen, nachdem sie in ihre eigenen Behälter getrennt wurden.
Incorrect Translation:  und wir trennen unser



-----> For this sample we compare the Incorrect translation with the Good translation.


ID:  24607
Source sentence:  The Republican National Convention in Houston was in August.
Reference:  Die republikanische Nationalversammlung in Houston fand im August statt.
Good Translation:  Im Sommer fand die Republikanische Nationalversammlung in Houston statt.
Incorrect Translation:  Im Juli fand die Republikanische Nationalversammlung in statt.
Suggested annotation:
[{'in_good': {'token_index': [1, 2, 3, 4, 5, 6, 7], 'character_span': (3, 65), 'token': 'Sommer fand die Republikanische Nationalversammlung in Houston'}, 'in_bad': {'token_index': [1, 2, 3, 4, 5, 6], 'character_span': (3, 55), 'token': 'Juli fand die Republikanische Nationalversammlung in'}}] 

To accept the suggested annotation click on enter. To skip this one enter skip. Otherwise enter anything else:Im <Juli> fand die Republikanische Nationalversammlung in statt.
Source sentence:  The Republican National Convention in Houston



-----> For this sample we compare the Incorrect translation with the Good translation.


ID:  24612
Source sentence:  There is a rash that comes along with some STIs.
Reference:  Es gibt einen Ausschlag, der mit einigen Geschlechtskrankheiten einhergeht.
Good Translation:  Es gibt einen Ausschlag, der mit einigen Infektionen verbunden ist.
Incorrect Translation:  Es gibt einen Hautausschlag, der mit einigen Prionenerkrankungen in Verbindung gebracht wird.
Suggested annotation:
[{'in_good': {'token_index': [3, 4, 5, 6, 7, 8, 9], 'character_span': (14, 66), 'token': 'Ausschlag, der mit einigen Infektionen verbunden ist'}, 'in_bad': {'token_index': [3, 4, 5, 6, 7, 8, 9, 10, 11], 'character_span': (14, 92), 'token': 'Hautausschlag, der mit einigen Prionenerkrankungen in Verbindung gebracht wird'}}] 

To accept the suggested annotation click on enter. To skip this one enter skip. Otherwise enter anything else:d
Source sentence:  There is a rash that comes along with some STIs.
Reference:  E

100%|█████████████████████████████████████████████████████████████████████████████| 40/40 [04:08<00:00,  6.21s/it]


In [None]:
# After this cell there are extra stuff - no need to run

In [109]:
# To run normalization over the already annotated samples in coreference-based-on-commonsense
# No need to do that for manual annotation!
for idx in annotations:
    sample = annotations[idx]
    good, _, _ = ref_or_good(sample["reference"], sample["good-translation"], sample["incorrect-translation"])
    bad = sample['incorrect-translation']
    change = sample['annotation']
    try:
        sample['annotation'] = standardize_annotation(change, good, bad)
    except:
        print(idx)

In [112]:
with open(checkpoint, "w+") as f:
    json.dump(annotations, f, indent=2, ensure_ascii=False)  # encode dict into JSON

In [6]:
# to count the number of samples in any phenomena
# No need to do that for manual annotation!
count = 0
for sample in dataset["train"]:
    if sample["phenomena"] in ["lexical-overlap", 'xnli-omission-neutral', 'xnli-omission-contradiction', 'xnli-addition-neutral', 'xnli-addition-contradiction']:
        count += 1