In [1]:
import sys

# Insert utils folder into path
sys.path.insert(1, '../utils')

## Loading Fine-tuned mBERT model and predictions for non-labelled dataset

In [2]:
model_path = '../models/ner-multilingual-bert-fine-tuned-conll-2003'
label_list = ['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC', 'B-MISC', 'I-MISC']

from transformers import AutoModelForTokenClassification

model = AutoModelForTokenClassification.from_pretrained(model_path, num_labels=len(label_list))

## TR-News

### Loading the dataset

In [3]:
# Get file path TR-News dataset
file_path = '../../../data/TR-News/TR-News.xml'

import loading_functions

data_all_toponyms = loading_functions.prepare_data(file_path, filtered=False, split=True)

data_filtered_toponyms = loading_functions.prepare_data(file_path, filtered=True, split=True)

### Processing the data for Huggingface Trainer

In [4]:
from transformers import AutoTokenizer
    
tokenizer = AutoTokenizer.from_pretrained(model_path)

In [5]:
import preparing_dataset

TRN = preparing_dataset.prepare_dataset(data_all_toponyms, tokenizer)

TRN_filtered = preparing_dataset.prepare_dataset(data_filtered_toponyms, tokenizer)

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

### Prepare evaluation trainer for predictions

In [6]:
from transformers import DataCollatorForTokenClassification

data_collator = DataCollatorForTokenClassification(tokenizer)

from transformers import Trainer

test_trainer = Trainer(model, 
                       data_collator=data_collator)

### Make Predictions

In [7]:
import numpy as np

raw_pred, _, _ = test_trainer.predict(TRN)
predictions = np.argmax(raw_pred, axis=2)

raw_pred_filtered, _, _ = test_trainer.predict(TRN_filtered)
predictions_filtered = np.argmax(raw_pred_filtered, axis=2)

### Process predictions

In [8]:
import process_predictions

processed_results = process_predictions.process_predictions(predictions, TRN, label_list, tokenizer)

processed_results_filtered = process_predictions.process_predictions(predictions_filtered, TRN_filtered, label_list, tokenizer)

### Evaluation TR-News

In [9]:
import evaluate

# All toponyms
fps, fns = evaluate.evaluate(data_all_toponyms, processed_results)

fp: 298 | tp: 954 | fn: 341
precision: 0.762 | recall: 0.737 | f-score: 0.749


In [10]:
fps

['-',
 'White House',
 'South Regional Jail',
 'U',
 '.',
 'S',
 '.',
 'Rose Garden',
 'Southern Poverty Law Center',
 'Ronald Reagan Building',
 'U',
 'U',
 '.',
 'S',
 '.',
 'U',
 '.',
 'S',
 '.',
 'U',
 '.',
 'S',
 '.',
 'U',
 '.',
 'S',
 '.',
 'Islamic State',
 'U',
 '.',
 'S',
 '.',
 'U',
 '.',
 'S',
 '.',
 'Southern',
 'Wooster St.',
 'Cumberland Farms',
 'South Main St',
 '-',
 'North West',
 'Kremlin',
 'The',
 'Kremlin',
 'Downtown Eastside',
 'London City',
 'London City',
 'Central Anatolia',
 'Rhineland',
 '-',
 'Palatinate',
 'Dolby Theatre',
 'Air Berlin',
 'B',
 '.',
 'C',
 '.',
 'Macdonald',
 '-',
 'Cartier International Airport',
 'Montreal',
 '-',
 'Pierre Elliott Trudeau International Airport',
 'Mont',
 '-',
 'Royal Avenue',
 'Sage House',
 "Children's Hospital of Manitoba",
 "' s Hospital",
 'St',
 'Phoenix',
 'Phoenix',
 'Que',
 '.',
 'Crusader',
 'Karak',
 'Paris Town Hall',
 'Nazi Germany',
 'Petit Cambodge',
 'St',
 '. Peter',
 'St. Paul',
 'St',
 '. Mark',
 'C

In [11]:
fns

['Turkish',
 'Turkish',
 'Syrian',
 'Syrian',
 'U.S.',
 'Turkish',
 'Kurdish',
 'Turkish',
 'Russian',
 'Syrian',
 'Russian',
 'Turkish',
 'Russian',
 'Turkish',
 'Turkish',
 'Russian',
 'Turkish',
 'Turkish',
 'Russian',
 'Russian',
 'Turkish',
 'Russian',
 'Russian',
 'Russian',
 'Syrian',
 'Granville County',
 'WASHINGTON',
 'U.S.',
 'Texas',
 'Texas',
 'Texas',
 'U.S.',
 'U.S.',
 'U.S.',
 'U.S.',
 'U.S.',
 'Xavier University',
 'British',
 'U.S.',
 'European',
 'U.S.',
 'U.S.',
 'U.S.',
 'DETROIT',
 'U.S.',
 'U.S.',
 'New York',
 'Michigan',
 'Cuban',
 'BANTAM',
 'Bantam',
 'New Milford',
 'TORRINGTON',
 'Russian',
 'Russian',
 'Russian',
 'Russia',
 'Russian',
 'London',
 'London',
 'Anatolia',
 'German',
 'Iraqi',
 'Rhineland-Palatinate',
 'Iraqi',
 'German',
 'Spanish',
 'Venice',
 'Canadian',
 'Cannes',
 'Spanish',
 'France',
 'French',
 'French',
 'Spanish',
 'BERLIN',
 'Berlin',
 'German',
 'European',
 'American',
 'Chinese',
 'B.C.',
 'Macdonald-Cartier International Airpor

In [12]:
# Filtered toponyms
fps, fns = evaluate.evaluate(data_filtered_toponyms, processed_results_filtered)

fp: 332 | tp: 918 | fn: 333
precision: 0.734 | recall: 0.734 | f-score: 0.734


In [13]:
fps

['-',
 'White House',
 'South Regional Jail',
 'U',
 '.',
 'S',
 '.',
 'Rose Garden',
 'Southern Poverty Law Center',
 'Ronald Reagan Building',
 'U',
 'U',
 '.',
 'S',
 '.',
 'U',
 '.',
 'S',
 '.',
 'U',
 '.',
 'S',
 '.',
 'U',
 '.',
 'S',
 '.',
 'Islamic State',
 'U',
 '.',
 'S',
 '.',
 'U',
 '.',
 'S',
 '.',
 'Southern',
 'Wooster St.',
 'Cumberland Farms',
 'South Main St',
 '-',
 'North West',
 'Kremlin',
 'The',
 'Kremlin',
 'Downtown Eastside',
 'London City',
 'London City',
 'Central Anatolia',
 'Rhineland',
 '-',
 'Palatinate',
 'Dolby Theatre',
 'Air Berlin',
 'B',
 '.',
 'C',
 '.',
 'West Coast',
 'Macdonald',
 '-',
 'Cartier International Airport',
 'Montreal',
 '-',
 'Pierre Elliott Trudeau International Airport',
 'Mont',
 '-',
 'Royal Avenue',
 'Sage House',
 "Children's Hospital of Manitoba",
 "' s Hospital",
 'College Street',
 'Dufferin Street',
 'St',
 'Phoenix',
 'Phoenix',
 'Que',
 '.',
 'Crusader',
 'Karak',
 'Karak Castle',
 'Paris Town Hall',
 'Nazi Germany',
 

In [14]:
fns

['Turkish',
 'Turkish',
 'Syrian',
 'Syrian',
 'U.S.',
 'Turkish',
 'Turkish',
 'Russian',
 'Syrian',
 'Russian',
 'Turkish',
 'Russian',
 'Turkish',
 'Turkish',
 'Russian',
 'Turkish',
 'Turkish',
 'Russian',
 'Russian',
 'Turkish',
 'Russian',
 'Russian',
 'Russian',
 'Syrian',
 'Granville County',
 'WASHINGTON',
 'U.S.',
 'Texas',
 'Texas',
 'Texas',
 'U.S.',
 'U.S.',
 'U.S.',
 'U.S.',
 'U.S.',
 'Xavier University',
 'British',
 'U.S.',
 'European',
 'U.S.',
 'U.S.',
 'U.S.',
 'DETROIT',
 'U.S.',
 'U.S.',
 'New York',
 'Michigan',
 'Cuban',
 'BANTAM',
 'Bantam',
 'New Milford',
 'TORRINGTON',
 'Russian',
 'Russian',
 'Russian',
 'Russia',
 'Russian',
 'London',
 'London',
 'Anatolia',
 'German',
 'Iraqi',
 'Rhineland-Palatinate',
 'Iraqi',
 'German',
 'Spanish',
 'Venice',
 'Canadian',
 'Cannes',
 'Spanish',
 'France',
 'French',
 'French',
 'Spanish',
 'BERLIN',
 'Berlin',
 'German',
 'European',
 'American',
 'Chinese',
 'B.C.',
 'Macdonald-Cartier International Airport',
 'Montre

## LGL

### Loading the dataset

In [15]:
# Get file path LGL dataset
file_path = '../../../data/LGL/LGL.xml'

import loading_functions

data_all_toponyms = loading_functions.prepare_data(file_path, filtered=False, split=True)

data_filtered_toponyms = loading_functions.prepare_data(file_path, filtered=True, split=True)

### Processing the data for Huggingface Trainer

In [16]:
from transformers import AutoTokenizer
    
tokenizer = AutoTokenizer.from_pretrained(model_path)

In [17]:
import preparing_dataset

LGL = preparing_dataset.prepare_dataset(data_all_toponyms, tokenizer)

LGL_filtered = preparing_dataset.prepare_dataset(data_filtered_toponyms, tokenizer)

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

### Prepare evaluation trainer for predictions

In [18]:
from transformers import DataCollatorForTokenClassification

data_collator = DataCollatorForTokenClassification(tokenizer)

from transformers import Trainer

test_trainer = Trainer(model, 
                       data_collator=data_collator)

### Make Predictions

In [19]:
import numpy as np

raw_pred, _, _ = test_trainer.predict(LGL)
predictions = np.argmax(raw_pred, axis=2)

raw_pred_filtered, _, _ = test_trainer.predict(LGL_filtered)
predictions_filtered = np.argmax(raw_pred_filtered, axis=2)

### Process predictions

In [20]:
import process_predictions

processed_results = process_predictions.process_predictions(predictions, LGL, label_list, tokenizer)

processed_results_filtered = process_predictions.process_predictions(predictions_filtered, LGL_filtered, label_list, tokenizer)

### Evaluation LGL

In [21]:
import evaluate

# All toponyms
fps, fns = evaluate.evaluate(data_all_toponyms, processed_results)

fp: 1676 | tp: 3256 | fn: 1680
precision: 0.660 | recall: 0.660 | f-score: 0.660


In [22]:
fps

['Orchard St.',
 'Cottonport Fire Station',
 'St',
 '. James Youth Detention Center',
 'Minnesota House',
 'R',
 '-',
 'Otter Tail',
 'Highway',
 'Otter Tail / Grant',
 'Grant /',
 'Wilkin',
 'Otter',
 'Tail',
 'Highway',
 '-',
 'Hesco',
 'Fargo City',
 'Fargodome',
 'Sandbag Central',
 'Douglas County Hospital',
 'St',
 'R',
 '-',
 'Ky',
 '.',
 'Conn',
 '.',
 'S',
 '.',
 'D',
 '.',
 'Ariz',
 '.',
 'Minnie Howard',
 'Durant Center',
 'Washington,',
 'D. C',
 '.',
 'Northwest D. C.',
 'D',
 '.',
 'C',
 '.',
 'East End',
 'Southern Sudan',
 'Sub',
 '-',
 'Saharan Africa',
 'Egypt',
 'Sudan',
 'Egypt',
 'Sudan',
 'Egypt',
 'Sudan',
 'Egypt',
 'Sudan',
 'Gulf',
 'Gulf',
 'US',
 'US',
 'US',
 'Gulf',
 'US',
 'Gulf',
 'US',
 'Sharm El - Sheikh',
 'St',
 ". John's Lutheran School",
 'Sheldon Peck Homestead',
 'Woodfield Shopping Center',
 'Streets of Woodfield',
 'Hudson',
 'Decatur',
 'U',
 '.',
 'S',
 '.',
 'Ill',
 '.',
 'Pe',
 'Tarrant',
 'Houston',
 'Big',
 'Law Enforcement Center',
 'Cit

In [23]:
fns

['Rapides Parish',
 'Cottonport',
 'Alexandria',
 'MANSFIELD',
 'Mansfield',
 'Shreveport',
 'DeSoto Parish',
 'Cook',
 'Minnesota',
 'Minnesota',
 'Marshall',
 'Chisholm',
 'Highway 200',
 'Mahnomen County Road',
 'Mahnomen County Road',
 'Highway 10',
 'Otter Tail County',
 'Highway 108',
 'Otter Tail/Grant',
 'Grant/Wilkin',
 'County Road 43',
 'Highway 114',
 'Douglas County',
 'Minnesota',
 'Minnesota',
 'Otter Tail',
 'Alexandria',
 'Minnesota',
 'Fargo',
 'Douglas County',
 'County Road 35',
 'County Road 56',
 'County Road 15',
 'County Road 96',
 'Nokomis',
 'Douglas County',
 'St. Cloud',
 'Alexandria',
 'Alexandria',
 'Ky.',
 'Conn.',
 'S.D.',
 'Ariz.',
 'Alexandria',
 'Alexandria',
 'Washington',
 'D.C.',
 'Washington',
 'D.C.',
 'D.C.',
 'D.C.',
 'Virginia',
 'Sudanese',
 'Sudan',
 'Chinese',
 'Africa',
 'African',
 'Egyptian',
 'Egyptian',
 'Sudan',
 'Sudanese',
 'Sudanese',
 'Sudanese',
 'Sudanese',
 'Egyptian',
 'Sudanese',
 'Sudanese',
 'Sudanese',
 'Sudanese',
 'Egypt

In [24]:
# Filtered toponyms
fps, fns = evaluate.evaluate(data_filtered_toponyms, processed_results_filtered)

fp: 1992 | tp: 2775 | fn: 1545
precision: 0.582 | recall: 0.642 | f-score: 0.611


In [25]:
fps

['Orchard St.',
 'Cottonport Fire Station',
 'Memphis St.',
 'Augusta St.',
 'St',
 '. James Youth Detention Center',
 'Minnesota House',
 'R',
 '-',
 'Otter Tail',
 'Highway',
 'Otter Tail / Grant',
 'Grant /',
 'Wilkin',
 'Lake Mary',
 'Benson',
 'Otter',
 'Tail',
 'Highway',
 '-',
 'Red River Valley',
 '40th Avenue South',
 'Hesco',
 'Fargo City',
 'Fargodome',
 'Sandbag Central',
 'North Nokomis Street',
 'Darling Avenue',
 'Douglas County Hospital',
 'St',
 'County Road 109',
 'R',
 '-',
 'Van Dorn Street',
 'Ky',
 '.',
 'Conn',
 '.',
 'S',
 '.',
 'D',
 '.',
 'Ariz',
 '.',
 'Minnie Howard',
 'Durant Center',
 'Old Town',
 'Old Town',
 'Washington,',
 'D. C',
 '.',
 'Old Town',
 'Northwest D. C.',
 'Penn Quarter',
 'D',
 '.',
 'C',
 '.',
 'East End',
 'Southern Sudan',
 'Sub',
 '-',
 'Saharan Africa',
 'Egypt',
 'Sudan',
 'Egypt',
 'Sudan',
 'Egypt',
 'Sudan',
 'Egypt',
 'Sudan',
 'Gulf',
 'Gulf',
 'US',
 'US',
 'US',
 'Gulf',
 'US',
 'Gulf',
 'US',
 'Sharm El - Sheikh',
 'High Dam

In [26]:
fns

['Rapides Parish',
 'Cottonport',
 'Alexandria',
 'MANSFIELD',
 'Mansfield',
 'Shreveport',
 'Cook',
 'Minnesota',
 'Minnesota',
 'Marshall',
 'Chisholm',
 'Otter Tail County',
 'Douglas County',
 'Minnesota',
 'Minnesota',
 'Otter Tail',
 'Alexandria',
 'Minnesota',
 'Fargo',
 'Douglas County',
 'Douglas County',
 'St. Cloud',
 'Alexandria',
 'Alexandria',
 'Ky.',
 'Conn.',
 'S.D.',
 'Ariz.',
 'Alexandria',
 'Alexandria',
 'Washington',
 'D.C.',
 'D.C.',
 'D.C.',
 'D.C.',
 'Virginia',
 'Sudanese',
 'Sudan',
 'Chinese',
 'Africa',
 'African',
 'Egyptian',
 'Egyptian',
 'Sudan',
 'Sudanese',
 'Sudanese',
 'Sudanese',
 'Sudanese',
 'Egyptian',
 'Sudanese',
 'Sudanese',
 'Sudanese',
 'Sudanese',
 'Egyptian',
 'Sudanese',
 'Sudanese',
 'Alexandria',
 'Sudanese',
 'Sri Lankan',
 'Sri Lankan',
 'Sinhalese',
 'Sinhalese',
 'Sinhalese',
 'Sri Lankan',
 'Sri Lankan',
 'Sri Lankan',
 'Sri Lankan',
 'Sri Lankan',
 'Sri Lankan',
 'Sri Lankan',
 'Sinhalese',
 'Sinhalese',
 'Sri Lankan',
 'Sri Lanka

## GeoWebNews

### Loading the dataset

In [27]:
# Get file path GWN dataset
file_path = '../../../data/GeoWebNews/GeoWebNews.xml'

import loading_functions

data_all_toponyms = loading_functions.prepare_data(file_path, filtered=False, split=True)

data_filtered_toponyms = loading_functions.prepare_data(file_path, filtered=True, split=True)

### Processing the data for Huggingface Trainer

In [28]:
from transformers import AutoTokenizer
    
tokenizer = AutoTokenizer.from_pretrained(model_path)

In [29]:
import preparing_dataset

GWN = preparing_dataset.prepare_dataset(data_all_toponyms, tokenizer)

GWN_filtered = preparing_dataset.prepare_dataset(data_filtered_toponyms, tokenizer)



  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

### Prepare evaluation trainer for predictions

In [30]:
from transformers import DataCollatorForTokenClassification

data_collator = DataCollatorForTokenClassification(tokenizer)

from transformers import Trainer

test_trainer = Trainer(model, 
                       data_collator=data_collator)

### Make Predictions

In [31]:
import numpy as np

raw_pred, _, _ = test_trainer.predict(GWN)
predictions = np.argmax(raw_pred, axis=2)

raw_pred_filtered, _, _ = test_trainer.predict(GWN_filtered)
predictions_filtered = np.argmax(raw_pred_filtered, axis=2)

### Process predictions

In [32]:
import process_predictions

processed_results = process_predictions.process_predictions(predictions, GWN, label_list, tokenizer)

processed_results_filtered = process_predictions.process_predictions(predictions_filtered, GWN_filtered, label_list, tokenizer)

### Evaluation GWN

In [33]:
import evaluate

# All toponyms
fps, fns = evaluate.evaluate(data_all_toponyms, processed_results)

fp: 441 | tp: 1608 | fn: 3706
precision: 0.785 | recall: 0.303 | f-score: 0.437


In [34]:
fps

['Royal',
 'Desire',
 'Champs',
 '-',
 'Élysées',
 'Smoky Mary',
 'Virginia',
 'Buckland',
 'Ronald',
 'Patriot',
 'Vienna',
 'Fairfax',
 '-',
 'GMU',
 'Orange',
 'Turtle Creek Cir.',
 'Va',
 'Marjory Stoneman',
 'Kremlin',
 'St',
 ". Peter's Basilica",
 'White House',
 'White House',
 'Bethany',
 'Ayton',
 'Commonwealth of Independent States',
 'CIS',
 'Dubai',
 'North America',
 'Treasure Beach',
 'Washington, D. C.',
 'U',
 'Sheikh Akil',
 'Islamic State',
 'West',
 'U',
 '.',
 'S',
 '.',
 'U',
 '.',
 'S',
 '.',
 'Aeroflot',
 'Kremlin',
 'VNA',
 'Tamil',
 'No',
 '2',
 'Eva',
 'Seasons',
 'Polisario',
 'Vanni',
 'DWTC',
 'MEA',
 'Bakgatla',
 '-',
 'ba',
 '-',
 'Kgafela',
 'Bakgatla',
 'Washington, D. C.',
 'D',
 '.',
 'C',
 '.',
 'U',
 'GA',
 'CT',
 'CT',
 '-',
 'U',
 'Pontifex',
 'CALAM',
 'Calamba',
 'Read More',
 'Benue',
 'Benue',
 'Guma',
 'Peshawar',
 'Park Wana',
 'New Quay',
 'Gilgit',
 '-',
 'Baltistan',
 'Gilgit',
 '-',
 'Baltistan',
 'Gilgit',
 '-',
 'Baltistan',
 'Gilgit'

In [35]:
fns

['area',
 'plantation',
 'mansion',
 'substation',
 'Louisiana',
 'Louisiana Purchase',
 'parcel',
 'French',
 'plat',
 'squares',
 'neighborhood',
 'city',
 'street',
 'avenue',
 'faubourg',
 'area',
 'neighbor',
 'building',
 'Methodist church',
 'restaurant',
 'street',
 'complex',
 '2231 Royal',
 '2231 Royal',
 'townhouse',
 'basement',
 'Greek',
 'structures',
 'system',
 'neighborhoods',
 'edifice',
 'grid',
 'avenue',
 'park',
 'Champs-Élysées',
 'Pontchartrain Railroad',
 'Pontchartrain Railroad',
 'Appalachians',
 'line',
 'block',
 'edifice',
 'Carnegie Library',
 'church',
 'library',
 'lot',
 'BEIRUT',
 'Kurdish',
 'city',
 'Turkish',
 'Syrian',
 'Kurdish',
 'towns',
 'Syrian',
 'Turkish',
 'campaign',
 'Syrian',
 'Kurdish',
 'frontier',
 'forces',
 'Turkish',
 'Britain',
 'Syrian',
 'Syrian Observatory for Human Rights Monitoring',
 'group',
 'Kurdish',
 'Turkish',
 'Syrian',
 'clashes',
 'community',
 'style',
 'design',
 'residences',
 'garage',
 'clubroom',
 'terrace',


In [36]:
# Filtered toponyms
fps, fns = evaluate.evaluate(data_filtered_toponyms, processed_results_filtered)

fp: 449 | tp: 1584 | fn: 892
precision: 0.779 | recall: 0.640 | f-score: 0.703


In [37]:
fps

['Faubourg',
 'Royal',
 'Desire',
 'Claiborne Power House',
 'Champs',
 '-',
 'Élysées',
 'Smoky Mary',
 'Jamison',
 'Norbury',
 'Granville',
 'Somerville',
 'Jamison',
 'Norbury',
 'Norbury',
 'Club',
 'Bed',
 'Buckland',
 'Ronald',
 'Patriot',
 'Vienna',
 'Fairfax',
 '-',
 'GMU',
 'Orange',
 'Turtle Creek Cir.',
 'Va',
 'Marjory Stoneman',
 'Kremlin',
 'St',
 ". Peter's Basilica",
 'White House',
 'Commonwealth of Independent States',
 'CIS',
 'Dubai',
 'North America',
 'Treasure Beach',
 'Washington, D. C.',
 'U',
 'Sheikh Akil',
 'Islamic State',
 'CTV Saskatoon',
 'U',
 '.',
 'S',
 '.',
 'U',
 '.',
 'S',
 '.',
 'West',
 'Aeroflot',
 'Kremlin',
 'VNA',
 'Tamil',
 'Tamil',
 'States',
 'No',
 '2',
 '2',
 'Eva',
 'Seasons',
 'DWTC',
 'Bakgatla',
 '-',
 'ba',
 '-',
 'Kgafela',
 'Bakgatla',
 'Washington,',
 'D',
 '. C.',
 'In',
 'D',
 '.',
 'C',
 '.',
 'U',
 'GA',
 'CT',
 'CT',
 '-',
 'U',
 'Pontifex',
 'CALAM',
 'Calamba',
 'Read More',
 'Umuanunu',
 'Benue',
 'Benue',
 'Guma',
 'Pesh

In [38]:
fns

['Louisiana',
 'French',
 'German',
 'Irish',
 'Methodist church',
 'Faubourg Marigny',
 '2231 Royal',
 'Greek',
 'New Orleans Railways and Light Company Claiborne Power House',
 'French',
 'Champs-Élysées',
 'Pontchartrain Railroad',
 'Appalachians',
 'BEIRUT',
 'Kurdish',
 'Turkish',
 'Syrian',
 'Kurdish',
 'Syrian',
 'Turkish',
 'Syrian',
 'Kurdish',
 'Turkish',
 'Britain',
 'Syrian',
 'Kurdish',
 'Turkish',
 'Syrian',
 'Robert Trent Jones Golf Club',
 'Virginia Gateway',
 'Buckland Elementary',
 'Ronald Wilson Reagan Middle',
 'Patriot High',
 'Vienna/Fairfax-GMU Metro',
 'Washington',
 'Washington',
 'Marjory Stoneman Douglas High School',
 'Parkland',
 'Washington',
 'Parkland',
 'African',
 'African',
 'African',
 'African',
 'African',
 'African',
 'Russian',
 'Russian',
 'Russian Higher School of Economics',
 'Mississippi',
 'Louisville',
 "St. Peter's Basilica",
 'Nigerian',
 'Nigerian',
 'Muscat',
 'Omani',
 'Oman',
 'New York',
 'Mediterranean',
 'France',
 'EU',
 'EU',
 'E