In [1]:
%%html
<style>
table {float:left}
</style>

In [2]:
import os
import spacy
from tqdm import tqdm
import xml.etree.ElementTree as et

In [3]:
import copy

def calc_precision(tp, fp):
    return tp/(tp + fp)

def calc_recall(tp, fn):
    return tp/(tp + fn)

def calc_fscore(precision, recall):
    return 2 * (precision * recall) / (precision + recall)

def evaluate(gold_truth_labels, predictions):
    # Counts of true positives, false positives & false negatives
    tp, fp, fn = 0, 0, 0
    
    # List with false positives and false negatives
    fps, fns = [], []
    
    for gold, pred in zip(gold_truth_labels, predictions):
        
        tp_tmp, fp_tmp, fn_tmp, fns_temp, fps_temp  = evaluate_one_article(gold, pred)
        
        tp += tp_tmp
        fp += fp_tmp
        fn += fn_tmp
        
        fns.extend(fns_temp)
        fps.extend(fps_temp) 
        
    precision = calc_precision(tp, fp)
    recall = calc_recall(tp, fn)
    f_score = calc_fscore(precision, recall)    
    
    print(f'fp: {fp} | tp: {tp} | fn: {fn}')
    print(f'precision: {precision:.3f} | recall: {recall:.3f} | f-score: {f_score:.3f}')
    
    return fps, fns  
    

def evaluate_one_article(gold_truth, prediction):
    
    gold = gold_truth['entities'].copy()
    pred = prediction['entities'].copy()
    
    # Counts of true positives, false positives & false negatives
    tp, fp, fn = 0, 0, 0
    
    # List with false positives and false negatives
    fps, fns = [], []
    
    
    i = 0
    
    while len(gold) > 0 and len(pred) > 0:
        i += 1

        # Check if the first two elements are the same
        if gold[0] == pred[0]:
            tp += 1
            gold.pop(0)
            pred.pop(0)
        
        else:
            # Grab the first appearing element
            element, source = (gold[0], 'gold') if gold[0]['start_pos'] < pred[0]['start_pos'] else (pred[0], 'pred')
            
            # Remove the element first appearing element
            if source == 'gold':
                fn += 1
                fns.append(element['text'])
                gold.remove(element)
            elif source == 'pred':
                fp += 1
                fps.append(element['text'])
                pred.remove(element)
    
    if len(gold) > 0:
        fn += 1
    elif len(pred) > 0:
        fp += 1
        
    return tp, fp, fn, fns, fps       

def run_flair(text):

    # make a sentence
    sentence = Sentence(text)

    # run NER over sentence
    tagger.predict(sentence)
    
    for entity in sentence.to_dict(tag_type='ner')['entities']:
        print(entity)

In [4]:
def load_file(file_path):
    """
    Loads file and returns all the articles
    """
    # Load the data
    tree = et.parse(file_path)
    root = tree.getroot()

    return root

def process_article(article, filtered, file_path):
    """
    Takes article and process into desired structure
    """
    if 'GeoWebNews' in file_path:
        if filtered:
            return {'text': article.find('text').text, 
                    'entities': sorted([{'text': top.find('extractedName').text, 
                                                      'start_pos': int(top.find('start').text), 
                                                      'end_pos': int(top.find('end').text)} for top in article.findall('toponyms/toponym') 
                                                     if top.find('latitude') != None and top.find('longitude') != None], key=lambda k: k['start_pos'])}
        
        else:
            return {'text': article.find('text').text, 
                    'entities': sorted([{'text': top.find('extractedName').text, 
                                                      'start_pos': int(top.find('start').text), 
                                                      'end_pos': int(top.find('end').text)} for top in article.findall('toponyms/toponym')], key=lambda k: k['start_pos'])}
    
    
    elif not filtered:
        return {'text': article.find('text').text,
                'entities': sorted([{'text': top.find('phrase').text,
                            'start_pos': int(top.find('start').text),
                            'end_pos': int(top.find('end').text)} for top in article.findall('toponyms/toponym')
                                 ], key=lambda k: k['start_pos'])}
        
    else:
        return {'text': article.find('text').text,
                'entities': sorted([{'text': top.find('phrase').text,
                            'start_pos': int(top.find('start').text),
                            'end_pos': int(top.find('end').text)} for top in article.findall('toponyms/toponym')
                             if top.find('gaztag/lat') != None and top.find('gaztag/lon') != None
                                 ], key=lambda k: k['start_pos'])}

def process_articles(root, filtered, file_path):
    """
    Takes articles and processes them into desired structure
    """
    data = []
    
    for article in root:
        
        data.append(process_article(article, filtered, file_path))
    
    return data

def prepare_data(file_path, filtered):
    
    root = load_file(file_path)
    
    data = process_articles(root, filtered, file_path)
    
    return data

In [5]:
def make_predictions_spacy(data):
    
    predictions = []
    
    texts = [article['text'] for article in data]
    
    for doc in tqdm(nlp.pipe(texts)):
        # Do something with the doc here
        
        pred = {'entities': [{'text': ent.text, 
                              'start_pos': len(doc[0:ent.end].text) - len(doc[ent.start]), 
                              'end_pos': len(doc[0:ent.end].text)} for ent in doc.ents if ent.label_ == 'LOC']}
        
        if pred:
            pred['text'] = doc
            
            predictions.append(pred)
        
    return predictions


### Load the default transfomer spacy model

In [6]:
# Only enable the ner tagger
nlp = spacy.load("en_core_web_lg", disable=["tok2vec", "tagger", "parser", "attribute_ruler", "lemmatizer"])

## TR-News

In [7]:
# Get file path LGL dataset
file_path = '../../data/TR-News/TR-News.xml'

In [8]:
data_all_toponyms = prepare_data(file_path, filtered=False)
data_filtered_toponyms = prepare_data(file_path, filtered=True)

#### Predictions for TR-News

In [9]:
predictions_all_toponyms = make_predictions_spacy(data_all_toponyms)
predictions_filtered_toponyms = make_predictions_spacy(data_filtered_toponyms)

118it [00:01, 71.91it/s]
118it [00:01, 79.46it/s]


#### Results TR-News & Comparison

In [10]:
# only toponyms w/ lat/long
fps, fns = evaluate(data_filtered_toponyms, predictions_filtered_toponyms)

fp: 38 | tp: 18 | fn: 378
precision: 0.321 | recall: 0.045 | f-score: 0.080


In [11]:
fps

['the South Regional Jail',
 'Atlantic',
 'Southern District',
 'Eastside',
 'Central\n            Anatolia',
 'the South Pacific',
 'the Central Coast',
 'West Coast',
 'the Saint Peter and Saint Paul Coptic Orthodox Church',
 'Southern California',
 'Red Deer',
 'Western Canada’s',
 'Pacific',
 'West',
 'North America',
 'Easter',
 'Easter',
 'the Sunshine Coast',
 'Western Australia',
 'the Northern Territory',
 'Cedar Flat Rd',
 'the Peace River',
 'Peace River Valley',
 'Mile Hill',
 'the South Plains College',
 'Eastern Cape',
 'Europe',
 'Plantation Lakes Apartments',
 'Lakes Edge Apartments',
 'Lakes Edge',
 'Hypermarket',
 'South']

In [12]:
fns

['Oxford',
 'North Carolina',
 'West Virginia',
 'Oxford',
 'Granville County',
 'West Virginia',
 'Lewisburg',
 'West Virginia',
 'North Carolina',
 'West Virginia',
 'AUSTIN',
 'Texas',
 'Washington',
 'Texas',
 'College Station',
 'Texas',
 'Russia',
 'Texas',
 'Texas',
 'Ohio',
 'U.S.',
 'U.S.',
 'Cincinnati',
 'West Chester',
 'U.S.',
 'Cincinnati',
 'U.S.',
 'Xavier University',
 'United States',
 'British',
 'U.S.',
 'European',
 'Syria',
 'U.S.',
 'Cincinnati',
 'U.S.',
 'U.S.',
 'Alberta',
 'Calgary',
 'Calgary',
 'Alberta',
 'Red Deer',
 'Vancouver',
 'Canada',
 'Vancouver',
 'Turkey',
 'Kayseri',
 'Istanbul',
 'Kayseri',
 'Anatolia',
 'German',
 'Iraqi',
 'Ludwigshafen',
 'Ludwigshafen',
 'Rhineland-Palatinate',
 'Ludwigshafen',
 'Iraqi',
 'German',
 'Germany',
 'Iran',
 'Australia',
 'South Pacific',
 'Singapore',
 'Singapore',
 'Singapore',
 'Singapore',
 'South East Asian',
 'BERLIN',
 'Berlin',
 'Beijing',
 'China',
 'Berlin',
 'German',
 'European',
 'American',
 'China

In [13]:
# all toponyms
fps, fns = evaluate(data_all_toponyms, predictions_all_toponyms)

fp: 38 | tp: 18 | fn: 391
precision: 0.321 | recall: 0.044 | f-score: 0.077


In [14]:
fps

['the South Regional Jail',
 'Atlantic',
 'Southern District',
 'Eastside',
 'Central\n            Anatolia',
 'the South Pacific',
 'the Central Coast',
 'West Coast',
 'the Saint Peter and Saint Paul Coptic Orthodox Church',
 'Southern California',
 'Red Deer',
 'Western Canada’s',
 'Pacific',
 'West',
 'North America',
 'Easter',
 'Easter',
 'the Sunshine Coast',
 'Western Australia',
 'the Northern Territory',
 'Cedar Flat Rd',
 'the Peace River',
 'Peace River Valley',
 'Mile Hill',
 'the South Plains College',
 'Eastern Cape',
 'Europe',
 'Plantation Lakes Apartments',
 'Lakes Edge Apartments',
 'Lakes Edge',
 'Hypermarket',
 'South']

In [15]:
fns

['Oxford',
 'North Carolina',
 'West Virginia',
 'Oxford',
 'Granville County',
 'West Virginia',
 'Lewisburg',
 'West Virginia',
 'North Carolina',
 'West Virginia',
 'AUSTIN',
 'Texas',
 'Washington',
 'Texas',
 'College Station',
 'Texas',
 'Russia',
 'Texas',
 'Texas',
 'Ohio',
 'U.S.',
 'U.S.',
 'Cincinnati',
 'West Chester',
 'U.S.',
 'Cincinnati',
 'U.S.',
 'Xavier University',
 'United States',
 'British',
 'U.S.',
 'European',
 'Syria',
 'U.S.',
 'Cincinnati',
 'U.S.',
 'U.S.',
 'Alberta',
 'Calgary',
 'Calgary',
 'Alberta',
 'Red Deer',
 'Vancouver',
 'Canada',
 'Vancouver',
 'Turkey',
 'Kayseri',
 'Istanbul',
 'Kayseri',
 'Anatolia',
 'German',
 'Iraqi',
 'Ludwigshafen',
 'Ludwigshafen',
 'Rhineland-Palatinate',
 'Ludwigshafen',
 'Iraqi',
 'German',
 'Germany',
 'Iran',
 'Australia',
 'South Pacific',
 'Singapore',
 'Singapore',
 'Singapore',
 'Singapore',
 'South East Asian',
 'BERLIN',
 'Berlin',
 'Beijing',
 'China',
 'Berlin',
 'German',
 'European',
 'American',
 'China

##### scoring overview

|   Geoparser Name                 | Precision | Recall | F-Score |
|:-----------------               |:---------:|:------:|:-------:|
| StanfordNERparser                     |   0.890   |  0.731 |  0.803  |
| TopoClusterparser                     |   0.883   |  0.714 |  0.790  |
|__Flair (all toponyms)__               |__0.803__  |__0.699__|__0.748__|
|   CamCoderparser                      |   0.897   |  0.638 |  0.746  |
|__Flair MULTI ner (all toponyms)__     |__0.779__  |__0.694__|__0.739__|
|__Flair MULTI ner FAST (all toponyms)__|__0.802__  |__0.683__|__0.738__|
|__Flair (filtered)__                   |__0.773__  |__0.695__|__0.732__|
|   DBpediaparser                       |   0.861   |  0.631 |  0.728  |
|__Flair MULTI ner (filtered)__         |__0.761__  |__0.691__|__0.724__|
|__Flair MULTI ner FAST (filtered)__    |__0.772__  |__0.679__|__0.722__|
|    CLAVINparser                       |   0.908   |  0.505 |  0.649  |
|  Edinburghparser                      |   0.709   |  0.538 |  0.612  |
|__SpaCy MULTI ner (filtered)__         |__0.612__  |__0.561__|__0.586__|
|__SpaCy MULTI ner  (all)__             |__0.611__  |__0.542__|__0.575__|
|   SpaCyNERparser                      |   0.659   |  0.402 |  0.500  |

## LGL

In [16]:
# Get file path LGL dataset
file_path = '../../data/LGL/LGL.xml'

In [17]:
data_all_toponyms = prepare_data(file_path, filtered=False)
data_filtered_toponyms = prepare_data(file_path, filtered=True)

#### Predictions for LGL

In [18]:
predictions_all_toponyms = make_predictions_spacy(data_all_toponyms)
predictions_filtered_toponyms = make_predictions_spacy(data_filtered_toponyms)

588it [00:07, 83.40it/s]
588it [00:06, 88.58it/s]


#### Results LGL & Comparison

In [19]:
# only toponyms w/ lat/long
fps, fns = evaluate(data_filtered_toponyms, predictions_filtered_toponyms)

fp: 271 | tp: 27 | fn: 1748
precision: 0.091 | recall: 0.015 | f-score: 0.026


In [20]:
fps

['Twin Lakes - Highway',
 'the Red River',
 'Red River',
 'the Red River Valley',
 'Red River',
 'Red River',
 'North Nokomis Street',
 'Old Town',
 'Old Town',
 'Bronx Latin',
 'Old Town',
 'Southern Sudan',
 'Sub-Saharan Africa',
 'the Middle East',
 'Gulf',
 'the Middle East',
 'Gulf',
 'Gulf',
 'Gulf',
 'the Middle East',
 'the High Dam',
 'Hudson',
 'Evergreen Lake',
 'North Texas',
 'North Texas',
 'North Arlington Neighbors',
 'South Arlington',
 'Northeast Georgia',
 'Northeast Georgia',
 'North Lumpkin Street',
 'Northeast Georgia',
 'Northeast Georgia',
 'West',
 'Metro Nashville',
 'Illinois Route',
 'District Drive',
 'New England',
 'the Naugatuck River',
 'North Main Street',
 'the Huntingdon Sea Cadets',
 'Mount Gilead',
 'the Kanawha Valley',
 'Knights Inn',
 'Kanawha River',
 'the Kanawha River',
 'Tri-State',
 'Tri-State',
 'Tri-State',
 'Plains West Wilderness',
 'the Greenbrier River Valley',
 'the Cranberry Wilderness',
 'the Dry Fork River',
 'the Otter Creek Wild

In [21]:
fns

['Mahnomen',
 'Twin Lakes',
 'Fargo',
 'Alexandria',
 'Alexandria',
 'Red River',
 'Fargo',
 'Moorhead',
 'Red River',
 'Fargo',
 'Nebraska',
 'Fargo',
 'Fargo',
 'Iraq',
 'Afghanistan',
 'North Dakota',
 'Oakport Township',
 'Moorhead',
 'Red River',
 'Minnesota',
 'Oakport',
 'Moorhead',
 'Minnesota',
 'Fargo',
 'Fargo',
 'Red River',
 'Douglas County',
 'Alexandria',
 'Lake Vermont',
 'Brandon',
 'Kensington',
 'Freeborn Lake',
 'Alexandria',
 'Alexandria',
 'Alexandria',
 'Alexandria',
 'Alexandria',
 'Alexandria',
 'Washington',
 'D.C.',
 'New York City',
 'Sudan',
 'Egypt',
 'Sudan',
 'Egypt',
 'Egypt',
 'Sudan',
 'Darfur',
 'Egypt',
 'Sudan',
 'Sudan',
 'Egypt',
 'Egypt',
 'Sudanese',
 'Cairo',
 'Sudan',
 'Chinese',
 'Egypt',
 'Sudan',
 'Africa',
 'Egypt',
 'Middle East',
 'Egypt',
 'Egyptian',
 'US',
 'Egyptian',
 'US',
 'Egyptian',
 'Israeli',
 'Middle East',
 'Iraq',
 'Kuwait',
 'Egypt',
 'Egypt',
 'Egypt',
 'Palestinian',
 'Israeli',
 'US',
 'Palestinian',
 'US',
 'Palestini

In [22]:
# all toponyms
fps, fns = evaluate(data_all_toponyms, predictions_all_toponyms)

fp: 257 | tp: 44 | fn: 1978
precision: 0.146 | recall: 0.022 | f-score: 0.038


In [23]:
fps

['Twin Lakes - Highway',
 'the Red River',
 'Red River',
 'the Red River Valley',
 'Red River',
 'Red River',
 'Freeborn Lake',
 'North Nokomis Street',
 'Old Town',
 'Old Town',
 'Bronx Latin',
 'Old Town',
 'Southern Sudan',
 'Sub-Saharan Africa',
 'the Middle East',
 'Gulf',
 'the Middle East',
 'Gulf',
 'Gulf',
 'Gulf',
 'the Middle East',
 'the High Dam',
 'Hudson',
 'Evergreen Lake',
 'North Texas',
 'North Texas',
 'North Arlington Neighbors',
 'South Arlington',
 'Northeast Georgia',
 'Northeast Georgia',
 'North Lumpkin Street',
 'Sunny Hills Drive',
 'Northeast Georgia',
 'Northeast Georgia',
 'West',
 'Metro Nashville',
 'Illinois Route',
 'District Drive',
 'Hot Springs Drive',
 'New England',
 'the Naugatuck River',
 'North Main Street',
 'the Huntingdon Sea Cadets',
 'Mount Gilead',
 'Recreation District',
 'the Kanawha Valley',
 'Knights Inn',
 'Kanawha River',
 'the Kanawha River',
 'Tri-State',
 'Tri-State',
 'Tri-State',
 'Plains West Wilderness',
 'the Greenbrier Riv

In [24]:
fns

['Highway 200',
 'Mahnomen',
 'Mahnomen County Road',
 'Mahnomen County Road',
 'Twin Lakes',
 'Highway 10',
 'Fargo',
 'Alexandria',
 'Alexandria',
 'Red River',
 'Fargo',
 'Moorhead',
 'Red River',
 'Fargo',
 'Red River Valley',
 'Nebraska',
 'Fargo',
 'Fargo',
 'Iraq',
 'Afghanistan',
 '40th Avenue South',
 'North Dakota',
 'Oakport Township',
 'Moorhead',
 'Red River',
 'Minnesota',
 'Oakport',
 'Moorhead',
 'Minnesota',
 'Fargo',
 'Fargo',
 'Red River',
 'Douglas County',
 'County Road 35',
 'Alexandria',
 'Lake Vermont',
 'County Road 56',
 'County Road 15',
 'Brandon',
 'County Road 96',
 'Kensington',
 'Freeborn Lake',
 'Nokomis',
 'Alexandria',
 'North Nokomis Street',
 'Alexandria',
 'Alexandria',
 'Alexandria',
 'Alexandria',
 'Old Town',
 'Alexandria',
 'Old Town',
 'Washington',
 'D.C.',
 'New York City',
 'Old Town',
 'Sudan',
 'Egypt',
 'Sudan',
 'Egypt',
 'Egypt',
 'Sudan',
 'Darfur',
 'Egypt',
 'Sudan',
 'Sudan',
 'Egypt',
 'Egypt',
 'Sudanese',
 'Cairo',
 'Sudan',
 'C

##### scoring overview
 
| Geoparser Name                          | Precision | Recall  | F-Score |
|:-------------------                     |-----------|-------- |---------|
|__Flair (all)__                          |__0.763__  |__0.676__|__0.717__|
| DBpediaparser                           | 0.813     | 0.635   | 0.713   |
|__Flair MULTI(all)__                     |__0.761__  |__0.663__|__0.708__|
|__Flair MULTI FAST(all)__                |__0.770__  |__0.644__|__0.701__|
| StanfordNERparser                       | 0.744     | 0.622   | 0.677   |
| TopoClusterparser                       | 0.763     | 0.577   | 0.657   |
|__Flair (filtered)__                     |__0.660__  |__0.653__|__0.657__|
|__Flair MULTI(filtered)__                |__0.666__  |__0.646__|__0.656__|
| CamCoderparser                          | 0.811     | 0.548   | 0.654   |
|__Flair MULTI FAST (filtered)__          |__0.674__  |__0.629__|__0.651__|
| CLAVINparser                            | 0.808     | 0.444   | 0.573   |
| Edinburghparser                         | 0.723     | 0.383   | 0.501   |
| SpaCyNERparser                          | 0.493     | 0.371   | 0.423   |
|__SpaCy MULTI(filtered)__                |__0.415__  |__0.416__|__0.416__|
|__SpaCy MULTI FAST (all)__               |__0.419__  |__0.376__|__0.397__|

## GeoWebNews

In [25]:
# Get file path LGL dataset
file_path = '../../data/GeoWebNews/GeoWebNews.xml'

In [26]:
data_all_toponyms = prepare_data(file_path, filtered=False)
data_filtered_toponyms = prepare_data(file_path, filtered=True)

#### Predictions for GeoWebNews

In [27]:
predictions_all_toponyms = make_predictions_spacy(data_all_toponyms)
predictions_filtered_toponyms = make_predictions_spacy(data_filtered_toponyms)

200it [00:03, 65.04it/s]
200it [00:02, 69.07it/s]


#### Results GeoWebNews & Comparison

In [28]:
# only toponyms w/ lat/long
fps, fns = evaluate(data_filtered_toponyms, predictions_filtered_toponyms)

fp: 68 | tp: 69 | fn: 852
precision: 0.504 | recall: 0.075 | f-score: 0.130


In [29]:
fps

['the Mississippi River',
 'Frenchmen Street',
 'Fire Island',
 'West Coast',
 'North America',
 'West Coast',
 'West Coast',
 'the Middle East',
 'West',
 'Asia-Pacific',
 'Tamil Nadu',
 'Kamenné Square',
 'the Middle East',
 'the Middle East',
 'the Middle East',
 'the Middle East',
 'Ocean Infinity',
 'the Indian Ocean',
 'Indian Ocean',
 'the North West',
 'the North West',
 'New Quay',
 'New Quay',
 'Vatican Gardens',
 'the Greek islands',
 'the Nile Delta',
 'the Korean Peninsula',
 'Korean Peninsula',
 'Korean Peninsula',
 'the Korean Peninsula',
 'Connecting California',
 'South America',
 'North Fourth Street',
 'Red Oak',
 'the Pacific Ocean',
 'West Surrey',
 'Western Cape SAPS',
 'Western Cape',
 'the Eastern Cape',
 'the Tijuana River',
 'North America’s',
 'Eastern Ghouta',
 'Eastern Ghouta',
 'Glenrock Lagoon',
 'the Middle East',
 'the Middle East',
 'West Coast',
 'Thames Valley',
 'Northern Europe',
 'the Safety Bay',
 'South Side',
 'River North',
 'North State Stree

In [30]:
fns

['Louisiana',
 'French',
 'French Quarter',
 'Mississippi River',
 'Faubourg Marigny',
 'German',
 'Irish',
 'Rue d’Enghein',
 'Lafayette',
 'Almonaster',
 'Franklin',
 'Marigny Plantation',
 'Press Street',
 'Faubourg D’Aunoy',
 'Chartres',
 'Franklin',
 'Methodist church',
 'Royal Street',
 'Royal Street',
 'Rue Casa Calvo',
 'Faubourg Marigny',
 '2231 Royal',
 'Greek',
 'Elysian Fields',
 'Royal Street',
 'Bourbon',
 'Dauphine',
 'Desire Street',
 'Bywater',
 'Elysian Fields',
 'New Orleans Railways and Light Company Claiborne Power House',
 'French',
 'Marigny Canal',
 'Washington Square',
 'Champs-Élysées',
 'Elysian Fields',
 'Pontchartrain Railroad',
 'Appalachians',
 'Royal Street',
 'Washington Square',
 'Holy Redeemer Church',
 'Third Presbyterian Church',
 'Frenchmen Street',
 'Abuja',
 'African',
 'Nigeria',
 'African',
 'African',
 'Nigeria',
 'African',
 'African',
 'African',
 'African',
 'Mediterranean',
 'Oceanside',
 'Fire Island',
 'France',
 'EU',
 'Brussels',
 'Bel

In [31]:
# all toponyms
fps, fns = evaluate(data_all_toponyms, predictions_all_toponyms)

fp: 67 | tp: 70 | fn: 1618
precision: 0.511 | recall: 0.041 | f-score: 0.077


In [32]:
fps

['the Mississippi River',
 'Frenchmen Street',
 'Fire Island',
 'West Coast',
 'North America',
 'West Coast',
 'West Coast',
 'the Middle East',
 'West',
 'Asia-Pacific',
 'Tamil Nadu',
 'Kamenné Square',
 'the Middle East',
 'the Middle East',
 'the Middle East',
 'the Middle East',
 'Ocean Infinity',
 'the Indian Ocean',
 'Indian Ocean',
 'the North West',
 'the North West',
 'the Pilanesburg Mountains',
 'New Quay',
 'New Quay',
 'New Quay',
 'Conleth Hill',
 'Vatican Gardens',
 'the Greek islands',
 'the Nile Delta',
 'the Korean Peninsula',
 'Korean Peninsula',
 'Korean Peninsula',
 'the Korean Peninsula',
 'the Korean Peninsula',
 'Latin America',
 'Connecting California',
 'South America',
 'North Fourth Street',
 'Red Oak',
 'the Pacific Ocean',
 'West Surrey',
 'Western Cape SAPS',
 'Western Cape',
 'the Eastern Cape',
 'Eastern Cape',
 'the Tijuana River',
 'North America’s',
 'Eastern Ghouta',
 'Eastern Ghouta',
 'Glenrock Lagoon',
 'the Middle East',
 'the Middle East',
 '

In [33]:
fns

['area',
 'plantation',
 'mansion',
 'substation',
 'Louisiana',
 'Louisiana Purchase',
 'parcel',
 'French',
 'plat',
 'French Quarter',
 'Mississippi River',
 'squares',
 'neighborhood',
 'city',
 'Faubourg Marigny',
 'community',
 'African Americans',
 'German',
 'Irish',
 'populations',
 'blocks',
 'residents',
 'blocks',
 'intersection',
 'mills',
 'plant',
 'stables',
 'factory',
 'barn',
 'streets',
 'Rue d’Enghein',
 'street',
 'Lafayette',
 'Almonaster',
 'Franklin',
 'avenue',
 'Marigny Plantation',
 'faubourg',
 'Press Street',
 'area',
 'Faubourg D’Aunoy',
 'neighbor',
 'building',
 'Chartres',
 'Franklin',
 'Methodist church',
 'restaurant',
 'street',
 'complex',
 'Royal Street',
 'Royal Street',
 'Rue Casa Calvo',
 'Faubourg Marigny',
 '2231 Royal',
 '2231 Royal',
 'townhouse',
 'basement',
 'Greek',
 'structures',
 'Elysian Fields',
 'Royal Street',
 'Bourbon',
 'Dauphine',
 'Desire Street',
 'Bywater',
 'system',
 'neighborhoods',
 'edifice',
 'Elysian Fields',
 'New O

##### scoring overview
|   Geoparser Name                 | Precision | Recall  | F-Score |
|:-----------------                |:---------:|:------: |:-------:|
|__Flair(filtered)__               |__0.901__  |__0.662__|__0.763__|
|__Flair MULTI ner(filtered)__     |__0.901__  |__0.662__|__0.763__|
|__Flair MULTI FAST ner(filtered)__|__0.906__  |__0.638__|__0.749__|
| StanfordNERparser                |   0.885   |  0.635  |  0.739  |
|   CamCoderparser                 |   0.895   |  0.562  |  0.691  |
| TopoClusterparser                |   0.838   |  0.559  |  0.670  |
|  Edinburghparser                 |   0.819   |  0.538  |  0.650  |
|   DBpediaparser                  |   0.847   |  0.510  |  0.637  |
|    CLAVINparser                  |   0.909   |  0.394  |  0.549  |
|__SpaCy MULTI ner(filtered)__     |__0.564__  |__0.454__|__0.503__|
|   SpaCyNERparser                 |   0.561   |  0.389  |  0.460  |
|__Flair(all)__                    |__0.911__  |__0.293__|__0.443__|
|__Flair MULTI ner(all)__          |__0.912__  |__0.289__|__0.439__|
|__Flair MULTI FAST ner(all)__     |__0.912__  |__0.280__|__0.428__|
|__SpaCy MULTI ner(all)__          |__0.582__  |__0.200__|__0.298__|