In [2]:
import sys

# Insert utils folder into path
sys.path.insert(1, '../utils')

## Loading Fine-tuned Prediction Pipeline: XLM RoBERTa model 

In [3]:
from transformers import pipeline

model_path = 'jplu/tf-xlm-r-ner-40-lang'
label_list = ['LOC', 'ORG', 'PER', 'O']

nlp_ner = pipeline("ner", 
                   model=model_path, 
                   tokenizer=(model_path, {"use_fast": True}), 
                   framework="tf")

Some layers from the model checkpoint at jplu/tf-xlm-r-ner-40-lang were not used when initializing TFXLMRobertaForTokenClassification: ['dropout_38']
- This IS expected if you are initializing TFXLMRobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFXLMRobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFXLMRobertaForTokenClassification were initialized from the model checkpoint at jplu/tf-xlm-r-ner-40-lang.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFXLMRobertaForTokenClassification for predictions without further training.


### DutchPolicyDocs

In [4]:
file_path = '../../../data/DutchPolicyDocs/DutchPolicyDocs.json' 

import loading_functions

toponym_data = loading_functions.prepare_data(file_path, filtered=False, split=False)

### Make Predictions

In [7]:
import XLM_predictions

processed_results = XLM_predictions.make_predictions(nlp_ner, toponym_data)

  0%|                                                                                         | 0/1044 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Instructions for updating:
Use tf.identity instead.


100%|██████████████████████████████████████████████████████████████████████████████| 1044/1044 [04:22<00:00,  3.98it/s]


### Evaluation DPD

In [8]:
import evaluate

# All toponyms
fps, fns = evaluate.evaluate(toponym_data, processed_results)

fp: 1613 | tp: 2186 | fn: 2185
precision: 0.575 | recall: 0.500 | f-score: 0.535


In [9]:
fps

['kmaar',
 'Amsterdam aan het IJ',
 'Maastricht aan de Maas',
 'srivier',
 'Al',
 'ma',
 'Alkmaar-',
 'Provin',
 'gemeente Alkmaar',
 'kmaar',
 'Noordhollands',
 'Kana',
 'km',
 'Regio Alkmaar',
 'Amsterdam',
 'Noord-Holland',
 'kmaar',
 'Alkmaar',
 'kmaar',
 'Alkmaarse',
 'kmaar',
 'Alkmaarse',
 'kmaar',
 'Alkmaar',
 'Alkmaar Noord',
 'kmaar',
 'De Schermer',
 'melo',
 'Noordbroek',
 'Buitenhaven',
 'Aadorp',
 'Fri',
 'Almelose',
 'Exoosche Aa',
 'Almelo',
 'Ha',
 'rik',
 'ze Almelo',
 'Scho',
 'land',
 'Bart',
 'hoek',
 "Bloom'n Ven",
 'bos',
 'Almelo-De Haandrik',
 'De Woesten',
 'Bleskolk',
 'Almelo',
 'Almelo',
 'Twent',
 'Almelose kern',
 'Kerkelanden',
 'Schelfhorst',
 'Windmolenbroek',
 'me',
 'Ossen',
 'hoek',
 'nerbroek',
 'Baarle-',
 'Baarle-Nassau',
 'Breda',
 'Belgische',
 'Nederlandse',
 'le-Nassau',
 'Bels Lijntje',
 'Baarle',
 'Bels Lijntje',
 'Merkske',
 'Baarle',
 'Bels Lijntje',
 'Merkske',
 'le-Nassau',
 'Tilburg',
 'Turnhout',
 'Baarle',
 'Chaam',
 'Baarle- Hertog'

## TR-News

### Loading the dataset

In [4]:
# Get file path TR-News dataset
file_path = '../../../data/TR-News/TR-News.xml'

import loading_functions

data_all_toponyms = loading_functions.prepare_data(file_path, filtered=False, split=True)

data_filtered_toponyms = loading_functions.prepare_data(file_path, filtered=True, split=True)

### Make Predictions

In [None]:
import XLM_predictions

processed_results = XLM_predictions.make_predictions(nlp_ner, data_all_toponyms)

### Evaluation TR-News

In [12]:
import evaluate

# All toponyms
fps, fns = evaluate.evaluate(data_all_toponyms, processed_results)

fp: 182 | tp: 754 | fn: 495
precision: 0.806 | recall: 0.604 | f-score: 0.690


In [13]:
fps

['Turki',
 'Turki',
 'Syria',
 'Syrian conflict',
 'Turkish',
 'Syria',
 'Turki',
 'Syria',
 'ville',
 'Carolina',
 'Beaver, West Virginia',
 'Alvin, Texas',
 'Houston suburb',
 'W',
 'HINGTON',
 'College Station, Texas',
 'west Ohio',
 'Cincinnati suburb',
 'Southern District of Ohio',
 'Redding, northern California',
 'Wooster St.',
 'ham',
 'gar',
 'Calgar',
 'Canadian',
 'Downtown Eastside',
 'Heath',
 'Central Anatolia',
 'Turki',
 'Iraq',
 'LIN',
 'Winnipe',
 'Churchill, Man.',
 'Phoenix',
 'Phoenix',
 'Gatineau, Que.,',
 'Syria',
 'Central European Time',
 'Nazi Germany',
 'French',
 'Abbassiya district',
 'Egypt',
 'Islamic Sharia',
 'Giza',
 's Haram district',
 'Abbas',
 'Kenya',
 'Surrey, B.C.',
 'Barrie, Ont.',
 'Great Recession',
 'Toronto area',
 'Trudeau',
 'Trudeau',
 'GTA',
 'Paul',
 '2016-17',
 'Red',
 'er',
 'Western Canada',
 'Pacific',
 'Government of Alberta',
 'U.',
 'Albert',
 'ern Alberta',
 'monton',
 'Ed',
 'Edmont',
 'Far North Queensland',
 'Far North Queen

In [14]:
fns

['Turkish',
 'Aleppo',
 'Turkish',
 'Syrian',
 'Syrian',
 'Aleppo',
 'U.S.',
 'Turkish',
 'Kurdish',
 'Turkish',
 'Russian',
 'Syrian',
 'Russia',
 'Turkey',
 'Turkish',
 'Russian',
 'Turkish',
 'Turkish',
 'Russian',
 'Turkish',
 'Turkish',
 'Russian',
 'Russian',
 'Turkish',
 'Russian',
 'Russian',
 'Russian',
 'Syrian',
 'Granville County',
 'North Carolina',
 'Beaver',
 'West Virginia',
 'Alvin',
 'Texas',
 'Houston',
 'New York',
 'New York',
 'White House',
 'WASHINGTON',
 'U.S.',
 'AUSTIN',
 'Texas',
 'Texas',
 'College Station',
 'Texas',
 'Ohio',
 'U.S.',
 'U.S.',
 'Cincinnati',
 'U.S.',
 'U.S.',
 'Xavier University',
 'British',
 'U.S.',
 'European',
 'Syria',
 'U.S.',
 'U.S.',
 'U.S.',
 'Ohio',
 'DETROIT',
 'U.S.',
 'U.S.',
 'US',
 'New York',
 'Michigan',
 'US',
 'US',
 'US',
 'Cuban',
 'Wisconsin',
 'US',
 'US',
 'US',
 'BANTAM',
 'Danbury',
 'Bantam',
 'New Milford',
 'Calgary',
 'Calgary',
 'US',
 'Russian',
 'US',
 'London',
 'Heathrow',
 'London',
 'Anatolia',
 'German

In [15]:
# Filtered toponyms
fps, fns = evaluate.evaluate(data_filtered_toponyms, processed_results_filtered)

fp: 186 | tp: 750 | fn: 461
precision: 0.801 | recall: 0.619 | f-score: 0.699


In [16]:
fps

['Turki',
 'Turki',
 'Syria',
 'Syrian conflict',
 'Turkish',
 'Syria',
 'Turki',
 'Syria',
 'ville',
 'Carolina',
 'Beaver, West Virginia',
 'Alvin, Texas',
 'Houston suburb',
 'W',
 'HINGTON',
 'College Station, Texas',
 'west Ohio',
 'Cincinnati suburb',
 'Southern District of Ohio',
 'Redding, northern California',
 'Wooster St.',
 'ham',
 'gar',
 'Calgar',
 'Canadian',
 'Downtown Eastside',
 'Heath',
 'Central Anatolia',
 'Turki',
 'Iraq',
 'LIN',
 'West Coast',
 'Winnipe',
 'Churchill, Man.',
 'Phoenix',
 'Phoenix',
 'Gatineau, Que.,',
 'Syria',
 'Central European Time',
 'Nazi Germany',
 'French',
 'Abbassiya district',
 'Egypt',
 'Islamic Sharia',
 'Giza',
 's Haram district',
 'Abbas',
 'Kenya',
 'Surrey, B.C.',
 'Barrie, Ont.',
 'Great Recession',
 'Toronto area',
 'Trudeau',
 'Trudeau',
 'GTA',
 'Paul',
 '2016-17',
 'Red',
 'er',
 'Western Canada',
 'Pacific',
 'Government of Alberta',
 'U.',
 'Albert',
 'ern Alberta',
 'monton',
 'Ed',
 'Edmont',
 'Far North Queensland',
 '

In [17]:
fns

['Turkish',
 'Aleppo',
 'Turkish',
 'Syrian',
 'Syrian',
 'Aleppo',
 'U.S.',
 'Turkish',
 'Turkish',
 'Russian',
 'Syrian',
 'Russia',
 'Turkey',
 'Turkish',
 'Russian',
 'Turkish',
 'Turkish',
 'Russian',
 'Turkish',
 'Turkish',
 'Russian',
 'Russian',
 'Turkish',
 'Russian',
 'Russian',
 'Russian',
 'Syrian',
 'Granville County',
 'North Carolina',
 'Beaver',
 'West Virginia',
 'Alvin',
 'Texas',
 'Houston',
 'New York',
 'New York',
 'White House',
 'WASHINGTON',
 'U.S.',
 'AUSTIN',
 'Texas',
 'Texas',
 'College Station',
 'Texas',
 'Ohio',
 'U.S.',
 'U.S.',
 'Cincinnati',
 'U.S.',
 'U.S.',
 'Xavier University',
 'British',
 'U.S.',
 'European',
 'Syria',
 'U.S.',
 'U.S.',
 'U.S.',
 'Ohio',
 'DETROIT',
 'U.S.',
 'U.S.',
 'US',
 'New York',
 'Michigan',
 'US',
 'US',
 'US',
 'Cuban',
 'Wisconsin',
 'US',
 'US',
 'US',
 'BANTAM',
 'Danbury',
 'Bantam',
 'New Milford',
 'Calgary',
 'Calgary',
 'US',
 'Russian',
 'US',
 'London',
 'Heathrow',
 'London',
 'Anatolia',
 'German',
 'Iraqi',

## LGL

### Loading the dataset

In [3]:
# Get file path LGL dataset
file_path = '../../../data/LGL/LGL.xml'

import loading_functions

data_all_toponyms = loading_functions.prepare_data(file_path, filtered=False, split=True, word_limit=1800)

data_filtered_toponyms = loading_functions.prepare_data(file_path, filtered=True, split=True, word_limit=1800)

### Make Predictions

In [4]:
import XLM_predictions

processed_results = XLM_predictions.make_predictions(nlp_ner, data_all_toponyms)

  0%|                                                                                          | 0/938 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Instructions for updating:
Use tf.identity instead.


100%|████████████████████████████████████████████████████████████████████████████████| 938/938 [07:29<00:00,  2.09it/s]


### Evaluation LGL

In [5]:
import evaluate

# All toponyms
fps, fns = evaluate.evaluate(data_all_toponyms, processed_results)

fp: 722 | tp: 2260 | fn: 2320
precision: 0.758 | recall: 0.493 | f-score: 0.598


In [6]:
fps

['ia',
 'Orchard St., Alexandria',
 'ia',
 'Sham',
 'reveport',
 'polis',
 'Minnesota',
 'R',
 'Mah',
 'en',
 'Mah',
 'Otter Ta',
 'Otter Tail',
 'G',
 'Douglas',
 'Alexandr',
 'head',
 'Farg',
 'Fargo Mayor',
 'Minnesota',
 'R-Elbow Lake',
 'Surprise, Ariz.',
 'Washington, D.C.',
 'Northwest D.C.',
 'Sudan',
 'Southern Sudan',
 'Sub-Saharan Africa',
 'fur',
 'Sudan',
 'Egypt',
 'Sudan',
 'Sudan',
 'Sudan',
 'Sudan',
 'Tamil',
 'Sri Lanka',
 'Sri',
 'Matara district',
 'Sri Lanka',
 'Sri',
 'Egypt',
 'Gulf',
 'invasion of Kuwait',
 'Gulf',
 'Damasc',
 'Arab world',
 'Israel',
 'Egypt',
 'Egypt',
 'Meadows',
 'Rolling',
 'ws',
 'Page County',
 'Page',
 'ville',
 'Hudson',
 'Decatur',
 'Le',
 'Bolingbrook, Ill.',
 'Jolie',
 'Wintersville, Ohio',
 'Houston',
 'field',
 'field',
 'Kermanshah province',
 'North Texas',
 'lake',
 'lington',
 'ton',
 'Mayor',
 'north Arlington',
 'South Arlington',
 'Prairie',
 'ton',
 'Northeast Georgia',
 'Northeast Georgia',
 'Barrow counties',
 'Athens-Cl

In [7]:
fns

['Alexandria',
 'Avoyelles',
 'Cottonport',
 'Alexandria',
 'Mansfield',
 'MANSFIELD',
 'Mansfield',
 'Shreveport',
 'Mansfield',
 'Shreveport',
 'Shreveport',
 'Shreveport',
 'DeSoto Parish',
 'Cook',
 'Minneapolis',
 'Marshall',
 'Chisholm',
 'Highway 200',
 'Mahnomen County Road',
 'Mahnomen County Road',
 'Highway 10',
 'Otter Tail County',
 'Highway 108',
 'Otter Tail/Grant',
 'Grant/Wilkin',
 'County Road 43',
 'Highway 114',
 'Douglas County',
 'Minnesota',
 'Minnesota',
 'Parkers Prairie',
 'Alexandria',
 'Alexandria',
 'Red River',
 'Fargo',
 'Moorhead',
 'Red River',
 'Fargo',
 'Fargo',
 'Fargo',
 '40th Avenue South',
 'Oakport',
 'Moorhead',
 'Minnesota',
 'Fargo',
 'Fargo',
 'U.S.',
 'Douglas County',
 'County Road 35',
 'County Road 56',
 'County Road 15',
 'County Road 96',
 'Nokomis',
 'North Nokomis Street',
 'Darling Avenue',
 'Albany',
 'County Road 109',
 'Van Dorn Street',
 'Alexandria',
 'Alexandria',
 'America',
 'Alexandria',
 'Surprise',
 'Ariz.',
 'Alexandria',

In [9]:
# Filtered toponyms
fps, fns = evaluate.evaluate(data_filtered_toponyms, processed_results)

fp: 861 | tp: 2105 | fn: 2013
precision: 0.710 | recall: 0.511 | f-score: 0.594


In [10]:
fps

['ia',
 'Orchard St., Alexandria',
 'ia',
 'Memphis St.',
 'Augusta St.',
 'reveport',
 'polis',
 'Minnesota',
 'R',
 'Mah',
 'en',
 'Mah',
 'Otter Ta',
 'Otter Tail',
 'G',
 'Douglas',
 'Lake Mary',
 'Benson',
 'Alexandr',
 'head',
 'Farg',
 'Red River Valley',
 'Fargo Mayor',
 'Minnesota',
 'R-Elbow Lake',
 'Surprise, Ariz.',
 'Old Town',
 'Old Town',
 'Washington, D.C.',
 'Northwest D.C.',
 'Penn Quarter',
 'Sudan',
 'Southern Sudan',
 'Sub-Saharan Africa',
 'fur',
 'Sudan',
 'Egypt',
 'Sudan',
 'Sudan',
 'Sudan',
 'Sudan',
 'Tamil',
 'Sri Lanka',
 'Sri',
 'Matara district',
 'Sri Lanka',
 'Sri',
 'Egypt',
 'Gulf',
 'invasion of Kuwait',
 'Gulf',
 'Damasc',
 'Arab world',
 'Israel',
 'Egypt',
 'Egypt',
 'Meadows',
 'Rolling',
 'ws',
 'North Monticello',
 'Page County',
 'Babcock Grove',
 'Page',
 'ville',
 'Hudson',
 'Evergreen Lake',
 'Decatur',
 'Le',
 'Bolingbrook, Ill.',
 'Jolie',
 'North Texas',
 "Burger's Lake",
 'Wintersville, Ohio',
 'Houston',
 'field',
 'field',
 'Kermansh

In [11]:
fns

['Alexandria',
 'Avoyelles',
 'Cottonport',
 'Alexandria',
 'Mansfield',
 'MANSFIELD',
 'Mansfield',
 'Shreveport',
 'Mansfield',
 'Shreveport',
 'Shreveport',
 'Shreveport',
 'Cook',
 'Minneapolis',
 'Marshall',
 'Chisholm',
 'Otter Tail County',
 'Douglas County',
 'Minnesota',
 'Minnesota',
 'Parkers Prairie',
 'Alexandria',
 'Alexandria',
 'Red River',
 'Fargo',
 'Moorhead',
 'Red River',
 'Fargo',
 'Fargo',
 'Fargo',
 'Oakport',
 'Moorhead',
 'Minnesota',
 'Fargo',
 'Fargo',
 'U.S.',
 'Douglas County',
 'Albany',
 'Alexandria',
 'Alexandria',
 'America',
 'Alexandria',
 'Surprise',
 'Ariz.',
 'Alexandria',
 'Washington',
 'D.C.',
 'D.C.',
 'D.C.',
 'Egypt',
 'Sudanese',
 'Cairo',
 'Sudan',
 'Chinese',
 'Africa',
 'African',
 'Darfur',
 'Egyptian',
 'Egyptian',
 'Sudan',
 'Darfur',
 'Egypt',
 'Sudan',
 'Sudanese',
 'Sudanese',
 'Egyptian',
 'Sudanese',
 'Sudanese',
 'Sudanese',
 'Sudan',
 'Sudanese',
 'Sudanese',
 'Sudan',
 'Sudanese',
 'Egyptian',
 'Sudan',
 'Sudanese',
 'Sudanese

## GeoWebNews

### Loading the dataset

In [3]:
# Get file path GWN dataset
file_path = '../../../data/GeoWebNews/GeoWebNews.xml'

import loading_functions

data_all_toponyms = loading_functions.prepare_data(file_path, filtered=False, split=True, word_limit=1850)

data_filtered_toponyms = loading_functions.prepare_data(file_path, filtered=True, split=True, word_limit=1850)

### Make Predictions

In [4]:
import XLM_predictions

processed_results = XLM_predictions.make_predictions(nlp_ner, data_all_toponyms)

  0%|                                                                                          | 0/371 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Instructions for updating:
Use tf.identity instead.


100%|████████████████████████████████████████████████████████████████████████████████| 371/371 [03:12<00:00,  1.93it/s]


### Evaluation GeoWebNews

In [5]:
import evaluate

# All toponyms
fps, fns = evaluate.evaluate(data_all_toponyms, processed_results)

fp: 233 | tp: 1229 | fn: 3676
precision: 0.841 | recall: 0.251 | f-score: 0.386


In [6]:
fps

['African American',
 'Syrian Kurdish enclave',
 'Manassa',
 'Gainesville, Va',
 'Parkland, Florida',
 'Nigeria',
 'Columbus, Ohio',
 'COLUMBUS, Ohio',
 'Nigeria',
 'Bollywood',
 'Greece Italy',
 'Emirat',
 'West',
 'Syria',
 'western Syria',
 'ea',
 'Idlib province',
 'dale',
 'dale',
 'Austin, Texas',
 'L',
 'LON',
 'oi',
 'Asia-Pacific',
 'Nadu',
 'State of Tamil Nadu',
 'Pittsburgh, Pennsylvania',
 'Venezuela',
 'Orinoco area',
 'United',
 'ANGELES',
 'Middle East and Africa',
 'Malaysia',
 'South African',
 'apartheid',
 'platinum belt',
 'platinum belt',
 'Bafokeng community',
 'fokeng',
 'eng',
 'Nigeria',
 'D',
 'Cork city',
 '22nd St. North',
 'LON',
 'Laguna 2nd District',
 'hall',
 'hall',
 'Sokoto',
 'Guma Local Government Area of the state',
 'OUTH WAZIRISTAN',
 'New Quay, Ceredigion',
 'Beverly Hills, California',
 'Gilgit',
 'Baltistan',
 'Boise, Idaho',
 'Klamath Falls, Oregon',
 'Sacramento, California',
 'YORK',
 'M',
 'ICO CITY',
 'South Kilburn',
 'Grannum',
 'ALEIG

In [7]:
fns

['area',
 'plantation',
 'mansion',
 'substation',
 'Louisiana',
 'Louisiana Purchase',
 'parcel',
 'French',
 'plat',
 'French Quarter',
 'squares',
 'neighborhood',
 'city',
 'Faubourg Marigny',
 'community',
 'African Americans',
 'BEIRUT',
 'Kurdish',
 'city',
 'Turkish',
 'Syrian',
 'Kurdish',
 'Qamishli',
 'towns',
 'Syrian',
 'Turkish',
 'campaign',
 'Syrian',
 'frontier',
 'forces',
 'Turkish',
 'Syrian',
 'Syrian Observatory for Human Rights Monitoring',
 'group',
 'Kurdish',
 'Turkish',
 'Syrian',
 'clashes',
 'community',
 'style',
 'design',
 'residences',
 'garage',
 'clubroom',
 'terrace',
 'yard',
 'Jamison',
 'amenities',
 'Lake Manassas',
 'reservoir',
 'homes',
 'lake',
 'Residents',
 'clubhouse',
 'courts',
 'fields',
 'playground',
 'trails',
 'Robert Trent Jones Golf Club',
 'Robert Trent Jones Golf Club',
 'Stonewall Golf Club',
 'Stonewall Golf Club',
 'Stonewall',
 'Stonewall',
 'Virginia Gateway',
 'Virginia Gateway',
 'Haymarket Village Center',
 'Haymarket Vi

In [9]:
# Filtered toponyms
fps, fns = evaluate.evaluate(data_filtered_toponyms, processed_results)

fp: 344 | tp: 1076 | fn: 1187
precision: 0.758 | recall: 0.475 | f-score: 0.584


In [10]:
fps

['African American',
 'Syrian Kurdish enclave',
 'Manassa',
 'Gainesville, Va',
 'Parkland, Florida',
 'Nigeria',
 'Columbus, Ohio',
 'COLUMBUS, Ohio',
 'Nigeria',
 'Bollywood',
 'Greece Italy',
 'Emirat',
 'West',
 'Syria',
 'western Syria',
 'ea',
 'Idlib province',
 'dale',
 'dale',
 'Austin, Texas',
 'L',
 'LON',
 'West',
 'oi',
 'Asia-Pacific',
 'Nadu',
 'Tamil Nadu',
 'Karnataka',
 'Tamil Nadu',
 'Karnataka',
 'Tamil Nadu',
 'Tamil Nadu',
 'Tamil Nadu',
 'China',
 'Chinese',
 'Pittsburgh, Pennsylvania',
 'Venezuela',
 'Orinoco area',
 'United',
 'Middle East and Africa',
 'Malaysia',
 'Norwegian',
 'South African',
 'apartheid',
 'platinum belt',
 'platinum belt',
 'Bafokeng community',
 'fokeng',
 'eng',
 'D',
 'Cork city',
 '22nd St. North',
 'LON',
 'Britain',
 'U.S.',
 'British',
 'Laguna 2nd District',
 'hall',
 'hall',
 'Umuanunu',
 'Sokoto',
 'Guma Local Government Area of the state',
 'OUTH WAZIRISTAN',
 'New Quay, Ceredigion',
 'Beverly Hills, California',
 'Gilgit',
 'B

In [11]:
fns

['Louisiana',
 'French',
 'French Quarter',
 'Faubourg Marigny',
 'BEIRUT',
 'Kurdish',
 'Turkish',
 'Syrian',
 'Kurdish',
 'Qamishli',
 'Syrian',
 'Turkish',
 'Syrian',
 'Turkish',
 'Syrian',
 'Kurdish',
 'Turkish',
 'Syrian',
 'Lake Manassas',
 'Robert Trent Jones Golf Club',
 'Stonewall Golf Club',
 'Stonewall',
 'Virginia Gateway',
 'Haymarket Village Center',
 'Buckland Elementary',
 'Ronald Wilson Reagan Middle',
 'Patriot High',
 'Turtle Point',
 'Wegmans',
 'Old Town Manassas',
 'Vienna/Fairfax-GMU Metro',
 'Dulles International Airport',
 'Washington',
 'Washington',
 'Parkland',
 'African',
 'African',
 'African',
 'Russian',
 'Hollywood',
 'Connecticut',
 'Columbus',
 'Ohio',
 "St. Peter's Basilica",
 'Nigerian',
 'Muscat',
 'Muscat',
 'New York',
 'Mediterranean',
 'Manila',
 'Manila Cathedral',
 'Europe',
 'France',
 'EU',
 'Europe',
 'EU',
 'EU',
 'Europe',
 'European',
 'European',
 'European',
 'France',
 'Europe',
 'European',
 'Europe',
 'Greece',
 'Italy',
 'August',