In [22]:
%%html
<style>
table {float:left}
</style>

In [13]:
import os
from tqdm import tqdm
import xml.etree.ElementTree as et

#### Functions

In [4]:
import copy

def calc_precision(tp, fp):
    return tp/(tp + fp)

def calc_recall(tp, fn):
    return tp/(tp + fn)

def calc_fscore(precision, recall):
    return 2 * (precision * recall) / (precision + recall)

def evaluate(gold_truth_labels, predictions):
    # Counts of true positives, false positives & false negatives
    tp, fp, fn = 0, 0, 0
    
    # List with false positives and false negatives
    fps, fns = [], []
    
    for gold, pred in zip(gold_truth_labels, predictions):
        
        tp_tmp, fp_tmp, fn_tmp, fns_temp, fps_temp  = evaluate_one_article(gold, pred)
        
        tp += tp_tmp
        fp += fp_tmp
        fn += fn_tmp
        
        fns.extend(fns_temp)
        fps.extend(fps_temp) 
        
    precision = calc_precision(tp, fp)
    recall = calc_recall(tp, fn)
    f_score = calc_fscore(precision, recall)    
    
    print(f'fp: {fp} | tp: {tp} | fn: {fn}')
    print(f'precision: {precision} | recall: {recall} | f-score: {f_score}')
    
    return fps, fns  
    

def evaluate_one_article(gold_truth, prediction):
    
    gold = gold_truth['entities'].copy()
    pred = prediction['entities'].copy()
    
    # Counts of true positives, false positives & false negatives
    tp, fp, fn = 0, 0, 0
    
    # List with false positives and false negatives
    fps, fns = [], []
    
    
    i = 0
    
    while len(gold) > 0 and len(pred) > 0:
        i += 1

        # Check if the first two elements are the same
        if gold[0] == pred[0]:
            tp += 1
            gold.pop(0)
            pred.pop(0)
        
        else:
            # Grab the first appearing element
            element, source = (gold[0], 'gold') if gold[0]['start_pos'] < pred[0]['start_pos'] else (pred[0], 'pred')
            
            # Remove the element first appearing element
            if source == 'gold':
                fn += 1
                fns.append(element['text'])
                gold.remove(element)
            elif source == 'pred':
                fp += 1
                fps.append(element['text'])
                pred.remove(element)
    
    if len(gold) > 0:
        fn += 1
    elif len(pred) > 0:
        fp += 1
        
    return tp, fp, fn, fns, fps       

def run_flair(text):

    # make a sentence
    sentence = Sentence(text)

    # run NER over sentence
    tagger.predict(sentence)
    
    for entity in sentence.to_dict(tag_type='ner')['entities']:
        print(entity)

### Load the ner-multi model

In [10]:
from flair.data import Sentence
from flair.models import SequenceTagger

# load tagger
tagger = SequenceTagger.load("flair/ner-multi")

# make example sentence in any of the four languages
sentence = Sentence("George Washington ging nach Washington")

# predict NER tags
tagger.predict(sentence)

## TR-News

In [7]:
# Get file path LGL dataset
file_path = '../../data/TR-News/TR-News.xml'

# Load the data
tree = et.parse(file_path)
root = tree.getroot()

# Grab example title
example = root[0][0].text
example

'Policeman shot dead after assassinating Russian ambassador to Turkey, shouting ‘Don’t forget Aleppo!’\n        '

#### Ground truth labels

In [19]:
all_ground_truth = []

for article in root:
    
    gold_truth = {'text': article.find('text').text,
                  'entities': sorted([{'text': top.find('phrase').text,
                                'start_pos': int(top.find('start').text),
                                'end_pos': int(top.find('end').text)} for top in article.findall('toponyms/toponym')
                                 if top.find('gaztag/lat') != None and top.find('gaztag/lon') != None
                                     ], key=lambda k: k['start_pos'])}
    
    
    all_ground_truth.append(gold_truth)

#### Predictions for TR-News

In [20]:
predictions = []

for article in tqdm(all_ground_truth):
    
    text = article['text']
    
    # make a sentence
    sentence = Sentence(text)
    
    # run NER over sentence
    tagger.predict(sentence)
    
    pred = sentence.to_dict(tag_type='ner')
    pred['entities'] = [entity for entity in pred['entities'] if entity['labels'][0].value == 'LOC']
    [entity.pop('labels') for entity in pred['entities']]
    pred.pop('labels')
    
    predictions.append(pred)

100%|████████████████████████████████████████████████████████████████████████████████| 118/118 [01:28<00:00,  1.33it/s]


#### Results TR-News & Comparison

In [21]:
# only toponyms w/ lat/long
fps, fns = evaluate(all_ground_truth, predictions)

fp: 272 | tp: 866 | fn: 387
precision: 0.7609841827768014 | recall: 0.6911412609736632 | f-score: 0.7243831033040569


In [18]:
# all toponyms
fps, fns = evaluate(all_ground_truth, predictions)

fp: 241 | tp: 900 | fn: 396
precision: 0.7887817703768624 | recall: 0.6944444444444444 | f-score: 0.7386130488305294


## LGL

In [33]:
# Get file path LGL dataset
file_path = '../../data/LGL/LGL.xml'

# Load the data
tree = et.parse(file_path)
root = tree.getroot()

#### Ground truth labels

In [37]:
all_ground_truth = []

for article in root:
    
    gold_truth = {'text': article.find('text').text,
                  'entities': sorted([{'text': top.find('phrase').text,
                                'start_pos': int(top.find('start').text),
                                'end_pos': int(top.find('end').text)} for top in article.findall('toponyms/toponym')
                                              if top.find('gaztag/lat') != None and top.find('gaztag/lon') != None
                                     ], key=lambda k: k['start_pos'])}
    
    
    all_ground_truth.append(gold_truth)

#### Predictions for LGL

In [38]:
predictions = []

for article in tqdm(all_ground_truth):
    
    text = article['text']
    
    # make a sentence
    sentence = Sentence(text)
    
    # run NER over sentence
    tagger.predict(sentence)
    
    pred = sentence.to_dict(tag_type='ner')
    pred['entities'] = [entity for entity in pred['entities'] if entity['labels'][0].value == 'LOC']
    [entity.pop('labels') for entity in pred['entities']]
    pred.pop('labels')
    
    predictions.append(pred)
    
    

100%|████████████████████████████████████████████████████████████████████████████████| 588/588 [07:24<00:00,  1.32it/s]


#### Results LGL & comparison

In [39]:
# filter toponyms (only w/ lat & long)
fps, fns = evaluate(all_ground_truth, predictions)

fp: 1411 | tp: 2818 | fn: 1541
precision: 0.6663513833057461 | recall: 0.6464785501261757 | f-score: 0.656264555193293


In [36]:
# all toponyms
fps, fns = evaluate(all_ground_truth, predictions)

fp: 1035 | tp: 3298 | fn: 1679
precision: 0.7611354719593815 | recall: 0.6626481816355234 | f-score: 0.708485499462943


In [None]:
fns

In [None]:
fps

## GeoWebNews

In [26]:
# Get file path LGL dataset
file_path = '../../data/GeoWebNews/GeoWebNews.xml'

# Load the data
tree = et.parse(file_path)
root = tree.getroot()

#### Ground truth labels

In [30]:
all_ground_truth = []

for article in root:
    
    gold_truth = {'text': article.find('text').text,
                  'entities': sorted([{'text': top.find('extractedName').text,
                                'start_pos': int(top.find('start').text),
                                'end_pos': int(top.find('end').text)} for top in article.findall('toponyms/toponym')
#                                               if top.find('latitude') != None and top.find('longitude') != None
                                     ], key=lambda k: k['start_pos'])}
    
    
    all_ground_truth.append(gold_truth)

#### Predictions GeoWebNews

In [31]:
predictions = []

for article in tqdm(all_ground_truth):
    
    text = article['text']
    
    # make a sentence
    sentence = Sentence(text)
    
    # run NER over sentence
    tagger.predict(sentence)
    
    pred = sentence.to_dict(tag_type='ner')
    pred['entities'] = [entity for entity in pred['entities'] if entity['labels'][0].value == 'LOC']
    [entity.pop('labels') for entity in pred['entities']]
    pred.pop('labels')
    
    predictions.append(pred)
    
    

100%|████████████████████████████████████████████████████████████████████████████████| 200/200 [03:12<00:00,  1.04it/s]


#### Results GeoWebNews

In [29]:
# only toponyms with long / lat info
fps, fns = evaluate(all_ground_truth, predictions)

fp: 181 | tp: 1652 | fn: 876
precision: 0.9012547735951991 | recall: 0.6534810126582279 | f-score: 0.7576243980738363


In [32]:
# all toponyms --> fn much higher because many annotated toponyms aren't locations (not sure why this is)
fps, fns = evaluate(all_ground_truth, predictions)

fp: 162 | tp: 1674 | fn: 4121
precision: 0.9117647058823529 | recall: 0.288869715271786 | f-score: 0.438736731752064
