In [2]:
# Import necessary packages
# Ensure installation of nltk package in conda environment
import os
import time

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Add directory above current directory to path so we can import our pre-built package
import sys; sys.path.insert(0, '../..')
from entity_disambiguation.preprocessing import process_input, normalize_text

# Import NLP stopwords
from nltk.corpus import stopwords

# Import progress bar package
from tqdm import tqdm

In [3]:
try: 
    stop = stopwords.words('english')
except LookupError:
    # Download stopwords if it's your first time
    import nltk
    nltk.download('stopwords')

## 1. Process Aida-Conll-Yago (ACY) dataset into train/test split

In [4]:
# relative path to ACY dataset, should be the same for all users within `entity-disambiguation` repository
acy_path = '../../data/aida-conll-yago-dataset/'

# process_input() takes path location where you've stored AIDA-YAGO2-DATASET.tsv file
# Transforms tsv file into train, test split
# x are words, y are indices
train_x, train_y, test_x, test_y = process_input(acy_path, train = .8)

In [5]:
# Display shape of output train data
display((train_x.shape, train_y.shape))
display((test_x.shape, test_y.shape))

((17805, 2), (17805,))

((4452, 2), (4452,))

In [6]:
# Display x preview
train_x.head(10)

Unnamed: 0,token,in_between_word_count
0,german,2
1,british,3
2,brussels,6
3,european,3
4,commission,0
5,german,6
6,british,5
7,germany,15
8,european,4
9,union,0


In [7]:
# Display y preview
train_y[0:3]

array(['11867', '31717', '3708'], dtype='<U8')

In [8]:
# Save train x/y into numpy arrays
x_np = train_x.token.values
y_np = np.array(train_y, dtype = 'int64')

### Prepare normalized KWNLP

In [10]:
# Provide directory path for KWNLP data
# Should be same for all users of `entity-disambiguation` repository
kwnlp_path = '../../data/kwnlp'

# Load article data
article_df = pd.read_csv(os.path.join(kwnlp_path, 'kwnlp-enwiki-20200920-article.csv'))

# Load anchor target counts data
anchor_df = pd.read_csv(os.path.join(kwnlp_path, 'kwnlp-enwiki-20200920-anchor-target-counts.csv'))

In [11]:
# Display article preview
article_df.head()

Unnamed: 0,page_id,item_id,page_title,views,len_article_chars,len_intro_chars,in_link_count,out_link_count,tmpl_good_article,tmpl_featured_article,tmpl_pseudoscience,tmpl_conspiracy_theories,isa_Q17442446,isa_Q14795564,isa_Q18340514
0,12,6199,Anarchism,35558,40449,409,3826,371,1,0,0,0,0,0,0
1,25,38404,Autism,40081,47659,419,2313,309,0,1,0,0,0,0,0
2,39,101038,Albedo,10770,18766,293,3090,115,0,0,0,0,0,0,0
3,290,9659,A,29398,9538,609,173,149,0,0,0,0,0,0,0
4,303,173,Alabama,46680,74276,369,11864,744,0,0,0,0,0,0,0


In [12]:
# Display anchor preview
anchor_df.head()

Unnamed: 0,anchor_text,target_page_id,count
0,United States,3434750,152451
1,World War II,32927,133668
2,India,14533,112069
3,France,5843419,109669
4,footballer,10568,101027


In [13]:
# Copy anchor_df to new dataframe
at_count_df = anchor_df.copy()

# Normalize anchor_text (lower-case, strip whitespace)
at_count_df["normalized_anchor_text"] = at_count_df["anchor_text"].apply(normalize_text)

# Return all anchor_texts that are non-zero, non-null
at_count_df = at_count_df.loc[at_count_df['normalized_anchor_text'].str.len() > 0, :]

print(len(at_count_df))
at_count_df.head(3)

15269229


Unnamed: 0,anchor_text,target_page_id,count,normalized_anchor_text
0,United States,3434750,152451,united states
1,World War II,32927,133668,world war ii
2,India,14533,112069,india


Inner join anchor-target data (mention to linked entity) with Wikipedia page article data. This lets us collate stats like page views with the target (entity) of mentions. Page views serves as another baseline model by selecting the page/entity that is most viewed for that anchor text.

In [14]:
# Merge at_count and article stats dataframes
at_count_df = pd.merge(
    at_count_df,
    article_df,
    how="inner",
    left_on="target_page_id",
    right_on="page_id")

# Rename columns for clarity
at_count_df = at_count_df.rename(columns={
    'title': 'target_page_title',
    'item_id': 'target_item_id',
    'views': 'target_page_views',
    'count': 'anchor_target_count',
    'page_title': 'target_page_title'})

# Specify column ordering
at_count_df = at_count_df[[
    "normalized_anchor_text",
    "target_page_id",
    "target_item_id",
    "target_page_title",
    "target_page_views",
    "anchor_target_count"]]

# Display preview
at_count_df.head(3)

Unnamed: 0,normalized_anchor_text,target_page_id,target_item_id,target_page_title,target_page_views,anchor_target_count
0,united states,3434750,30,United_States,460156,152451
1,american,3434750,30,United_States,460156,65722
2,usa,3434750,30,United_States,460156,8559


Drop NaNs. These have been encoded as `string` through the text normalisation previously.

In [15]:
# Drop NaNs
len_orig = len(at_count_df)
at_count_df = at_count_df.loc[at_count_df['normalized_anchor_text'] != 'nan']
print('Dropped rows:', len_orig - len(at_count_df))

Dropped rows: 3596


## Develop Wikipedia2Vec Model

We will create a normalized anchor text dataset from KWNLP. We will then use the pre-trained word embeddings provided via [Wikipedia2Vec](https://wikipedia2vec.github.io/wikipedia2vec/) to calculate a similarity score between a mention in ACY and a normalized anchor text in KWNLP.

In [16]:
# Import package
from wikipedia2vec import Wikipedia2Vec

In [17]:
# Load unzipped pkl file
wiki2vec = Wikipedia2Vec.load("../../embeddings/enwiki_20180420_100d.pkl")

In [29]:
# Experiment with returning words from anchor texts
for word in at_count_df['normalized_anchor_text'][:5]:
    print("Anchor Text: ", word)
    found_word = wiki2vec.get_word(word)
    print("Found Word: ", found_word)
    if found_word is None:
        continue
    else:
        similar_word = wiki2vec.most_similar(wiki2vec.get_word(word), 5)
        print("Similar Word: ", similar_word)
    print("**************************")
    

Anchor Text:  united states
Found Word:  None
Anchor Text:  american
Found Word:  <Word american>
Similar Word:  [(<Word american>, 1.0), (<Word african>, 0.6946235), (<Word canadian>, 0.6769873), (<Word africanamerican>, 0.67509526), (<Word caribbeanist>, 0.6693679)]
**************************
Anchor Text:  usa
Found Word:  <Word usa>
Similar Word:  [(<Word usa>, 1.0000001), (<Word finnfest>, 0.7673373), (<Word priceton>, 0.74322855), (<Word mddtusa>, 0.72683185), (<Word hanahou>, 0.7265224)]
**************************
Anchor Text:  u.s.
Found Word:  None
Anchor Text:  us
Found Word:  <Word us>
Similar Word:  [(<Word us>, 1.0), (<Word 20px25px>, 0.72674584), (<Word truckstops>, 0.6859466), (<Word 39billion>, 0.68373144), (<Word 2004dollars>, 0.67024976)]
**************************


In [36]:
# Experiment with returning entities from anchor texts
for word in at_count_df['normalized_anchor_text'][:5]:
    print("Anchor Text: ", word)
    found_entity = wiki2vec.get_entity(word)
    print("Found Entity: ", found_entity)
#     if found_entity is None:
#         continue
#     else:
#         similar_word = wiki2vec.most_similar(wiki2vec.get_word(word), 5)
#         print("Similar Word: ", similar_word)
    print("**************************")
    

Anchor Text:  united states
Found Entity:  None
**************************
Anchor Text:  american
Found Entity:  None
**************************
Anchor Text:  usa
Found Entity:  None
**************************
Anchor Text:  u.s.
Found Entity:  None
**************************
Anchor Text:  us
Found Entity:  None
**************************


In [37]:
# Our normalizing function has actually prevented wiki2vec from correctly identifying entities
# Below, capitalized 'United States' returns correct entity whereas lowercased 'united states' does not
print(wiki2vec.get_entity('United States'))
print(wiki2vec.get_entity('united states'))

<Entity United States>
None


#### Challenges & Pivot

It appears unlikely to be able to use Wikipedia2Vec to automatically link anchor texts to Entities. Instead, let's try to use Gensim and the word similarity score or distance metric functionality.

In [39]:
# Import gensim
from gensim.models import KeyedVectors

In [41]:
%%time

# Use KeyedVectors to load txt file
w2v = KeyedVectors.load_word2vec_format("../../embeddings/enwiki_20180420_100d.txt")

CPU times: user 5min 45s, sys: 15.6 s, total: 6min
Wall time: 6min 13s


In [46]:
# How many unique tokens are in our ACY dataset
print("Token Count: ", len(x_np), ", Unique Tokens Count: ", len(np.unique(x_np)))

Token Count:  17805 , Unique Tokens Count:  4142


In [60]:
# How many page titles do we have in our KWNLP dataset
print("Page Count: ", len(article_df['page_title']))

Page Count:  6189965


In [73]:
%%time
# Create single token similarity matrix to assess computation time

# Prepare blank matrix
w2v_similarity_matrix = []
page_titles_not_found = 0
for token in x_np[:1]:
    print("Token: ", token)
    for page_title in article_df['page_title'][:20]:
        print("Page Title: ", page_title)
        try:
            token_title_similarity = w2v.similarity(token, page_title)
            print("Token-Title Similarity: ", round(token_title_similarity, 5))
        except KeyError:
            page_titles_not_found += 1
print("******************************************************")
print("Page Titles Not In Wikipedia2Vec Vocabulary: ", page_titles_not_found)
print("******************************************************")

Token:  german
Page Title:  Anarchism
Page Title:  Autism
Page Title:  Albedo
Page Title:  A
Page Title:  Alabama
Page Title:  Achilles
Page Title:  Abraham_Lincoln
Page Title:  Aristotle
Page Title:  An_American_in_Paris
Page Title:  Academy_Award_for_Best_Production_Design
Page Title:  Academy_Awards
Page Title:  Actrius
Page Title:  Animalia_(book)
Page Title:  International_Atomic_Time
Page Title:  Altruism
Page Title:  Ayn_Rand
Page Title:  Alain_Connes
Page Title:  Allan_Dwan
Page Title:  Algeria
Page Title:  List_of_Atlas_Shrugged_characters
******************************************************
Page Titles Not In Wikipedia2Vec Vocabulary:  20
******************************************************
CPU times: user 3.59 ms, sys: 8.42 ms, total: 12 ms
Wall time: 20.6 ms


#### It appears impossible to match page titles to words in the Wikipedia2Vec vocabulary.

In [127]:
%%time
# Create single token similarity matrix to assess computation time
match_pct = 1

# Prepare blank matrix
w2v_similarity_dict = {}
anchor_texts_not_found = 0

# Swap between tqdm depending on if you want progress bar
for token in tqdm(x_np[:int(np.ceil(len(x_np)*0.05))]):
# for token in x_np[:int(np.ceil(len(x_np)*0.01))]:

#     print("Token: ", token)
    anchor_text_dict = {}
    
# Swap between tqdm depending on if you want progress bar
#     for anchor_text in tqdm(at_count_df['normalized_anchor_text'][:int(np.ceil(len(at_count_df)*(match_pct/100)))]):
    for anchor_text in at_count_df['normalized_anchor_text'][:int(np.ceil(len(at_count_df)*(match_pct/100)))]:
#         print("Anchor Text: ", anchor_text)
        try:
            token_title_similarity = w2v.similarity(token, anchor_text)
#             print("Token-Title Similarity: ", round(token_title_similarity, 5))
            anchor_text_dict[anchor_text] = token_title_similarity
        except KeyError:
            anchor_texts_not_found += 1
#             print(KeyError, "AT Not in Vocabulary")
    try:
        matched_value = max(anchor_text_dict, key=anchor_text_dict.get)
        matched_score = anchor_text_dict[matched_value]
    except ValueError:
        matched_value = None
        matched_score = 0.
    print("Token: ", token)
    print("Token Matched Value: ", matched_value, round(matched_score,5))
    w2v_similarity_dict[token] = (matched_value, matched_score)
print("******************************************************")
print("Anchor Texts Not In Wikipedia2Vec Vocabulary: ", anchor_texts_not_found)
print("******************************************************")

  0%|          | 1/891 [00:04<1:02:50,  4.24s/it]

Token:  german
Token Matched Value:  german 1.0


  0%|          | 2/891 [00:06<52:04,  3.51s/it]  

Token:  british
Token Matched Value:  british 1.0


  0%|          | 3/891 [00:07<42:53,  2.90s/it]

Token:  brussels
Token Matched Value:  brussels 1.0


  0%|          | 4/891 [00:09<36:44,  2.49s/it]

Token:  european
Token Matched Value:  european 1.0


  1%|          | 5/891 [00:10<32:14,  2.18s/it]

Token:  commission
Token Matched Value:  commission 1.0


  1%|          | 6/891 [00:11<28:51,  1.96s/it]

Token:  german
Token Matched Value:  german 1.0


  1%|          | 7/891 [00:13<26:05,  1.77s/it]

Token:  british
Token Matched Value:  british 1.0


  1%|          | 8/891 [00:14<25:09,  1.71s/it]

Token:  germany
Token Matched Value:  germany 1.0


  1%|          | 9/891 [00:16<23:28,  1.60s/it]

Token:  european
Token Matched Value:  european 1.0


  1%|          | 10/891 [00:17<22:39,  1.54s/it]

Token:  union
Token Matched Value:  union 1.0


  1%|          | 11/891 [00:19<22:01,  1.50s/it]

Token:  britain
Token Matched Value:  britain 1.0


  1%|▏         | 12/891 [00:20<21:09,  1.44s/it]

Token:  commission
Token Matched Value:  commission 1.0


  1%|▏         | 13/891 [00:21<20:39,  1.41s/it]

Token:  european
Token Matched Value:  european 1.0


  2%|▏         | 14/891 [00:23<20:55,  1.43s/it]

Token:  union
Token Matched Value:  union 1.0


  2%|▏         | 15/891 [00:24<20:36,  1.41s/it]

Token:  franz
Token Matched Value:  ludwig 0.83038


  2%|▏         | 16/891 [00:25<20:31,  1.41s/it]

Token:  fischler
Token Matched Value:  bender 0.6349


  2%|▏         | 17/891 [00:27<20:18,  1.39s/it]

Token:  britain
Token Matched Value:  britain 1.0


  2%|▏         | 18/891 [00:28<20:00,  1.38s/it]

Token:  france
Token Matched Value:  france 1.0


  2%|▏         | 19/891 [00:29<19:34,  1.35s/it]

Token:  bse
Token Matched Value:  bse 1.0


  2%|▏         | 20/891 [00:31<19:25,  1.34s/it]

Token:  spanish
Token Matched Value:  spanish 1.0


  2%|▏         | 21/891 [00:32<19:23,  1.34s/it]

Token:  loyola
Token Matched Value:  college 0.71429


  2%|▏         | 22/891 [00:33<19:05,  1.32s/it]

Token:  de
Token Matched Value:  de 1.0


  3%|▎         | 23/891 [00:35<18:53,  1.31s/it]

Token:  palacio
Token Matched Value:  madrid 0.76879


  3%|▎         | 24/891 [00:36<19:02,  1.32s/it]

Token:  france
Token Matched Value:  france 1.0


  3%|▎         | 25/891 [00:37<19:02,  1.32s/it]

Token:  britain
Token Matched Value:  britain 1.0


  3%|▎         | 26/891 [00:39<18:51,  1.31s/it]

Token:  bse
Token Matched Value:  bse 1.0


  3%|▎         | 27/891 [00:40<19:33,  1.36s/it]

Token:  british
Token Matched Value:  british 1.0


  3%|▎         | 28/891 [00:41<19:27,  1.35s/it]

Token:  german
Token Matched Value:  german 1.0


  3%|▎         | 29/891 [00:43<19:18,  1.34s/it]

Token:  british
Token Matched Value:  british 1.0


  3%|▎         | 30/891 [00:44<19:16,  1.34s/it]

Token:  europe
Token Matched Value:  europe 1.0


  3%|▎         | 31/891 [00:45<19:06,  1.33s/it]

Token:  bonn
Token Matched Value:  bonn 1.0


  4%|▎         | 32/891 [00:47<19:05,  1.33s/it]

Token:  british
Token Matched Value:  british 1.0


  4%|▎         | 33/891 [00:48<18:54,  1.32s/it]

Token:  germany
Token Matched Value:  germany 1.0


  4%|▍         | 34/891 [00:49<18:39,  1.31s/it]

Token:  britain
Token Matched Value:  britain 1.0


  4%|▍         | 35/891 [00:51<18:34,  1.30s/it]

Token:  british
Token Matched Value:  british 1.0


  4%|▍         | 36/891 [00:52<18:36,  1.31s/it]

Token:  hendrix
Token Matched Value:  hendrix 1.0


  4%|▍         | 37/891 [00:53<19:53,  1.40s/it]

Token:  london
Token Matched Value:  london 1.0


  4%|▍         | 38/891 [00:54<14:59,  1.05s/it]

Token:  u.s.
Token Matched Value:  None 0.0


  4%|▍         | 39/891 [00:55<17:21,  1.22s/it]

Token:  jimi
Token Matched Value:  jimi 1.0


  4%|▍         | 40/891 [00:57<17:52,  1.26s/it]

Token:  hendrix
Token Matched Value:  hendrix 1.0


  5%|▍         | 41/891 [00:58<18:12,  1.29s/it]

Token:  hendrix
Token Matched Value:  hendrix 1.0


  5%|▍         | 42/891 [00:59<19:02,  1.35s/it]

Token:  london
Token Matched Value:  london 1.0


  5%|▍         | 43/891 [01:01<19:50,  1.40s/it]

Token:  english
Token Matched Value:  english 1.0


  5%|▍         | 44/891 [01:02<19:42,  1.40s/it]

Token:  nottingham
Token Matched Value:  nottingham 1.0


  5%|▌         | 45/891 [01:04<19:23,  1.37s/it]

Token:  hendrix
Token Matched Value:  hendrix 1.0


  5%|▌         | 46/891 [01:05<19:24,  1.38s/it]

Token:  hendrix
Token Matched Value:  hendrix 1.0


  5%|▌         | 47/891 [01:06<19:03,  1.35s/it]

Token:  australian
Token Matched Value:  australian 1.0


  5%|▌         | 48/891 [01:08<19:09,  1.36s/it]

Token:  china
Token Matched Value:  china 1.0


  5%|▌         | 49/891 [01:09<20:13,  1.44s/it]

Token:  taiwan
Token Matched Value:  taiwan 1.0


  6%|▌         | 50/891 [01:11<20:06,  1.43s/it]

Token:  beijing
Token Matched Value:  beijing 1.0


  6%|▌         | 51/891 [01:12<19:54,  1.42s/it]

Token:  china
Token Matched Value:  china 1.0


  6%|▌         | 52/891 [01:14<19:58,  1.43s/it]

Token:  taipei
Token Matched Value:  taipei 1.0


  6%|▌         | 53/891 [01:15<19:57,  1.43s/it]

Token:  taiwan
Token Matched Value:  taiwan 1.0


  6%|▌         | 54/891 [01:16<19:18,  1.38s/it]

Token:  strait
Token Matched Value:  straits 0.7295


  6%|▌         | 55/891 [01:18<18:57,  1.36s/it]

Token:  ukraine
Token Matched Value:  ukraine 1.0


  6%|▋         | 56/891 [01:19<18:34,  1.34s/it]

Token:  taiwanese
Token Matched Value:  taiwanese 1.0


  6%|▋         | 57/891 [01:20<18:17,  1.32s/it]

Token:  lien
Token Matched Value:  ching 0.6176


  7%|▋         | 58/891 [01:22<18:19,  1.32s/it]

Token:  chan
Token Matched Value:  ching 0.84038


  7%|▋         | 59/891 [01:23<18:11,  1.31s/it]

Token:  beijing
Token Matched Value:  beijing 1.0


  7%|▋         | 60/891 [01:24<18:01,  1.30s/it]

Token:  chinese
Token Matched Value:  chinese 1.0


  7%|▋         | 61/891 [01:25<17:54,  1.29s/it]

Token:  taiwan
Token Matched Value:  taiwan 1.0


  7%|▋         | 62/891 [01:27<17:49,  1.29s/it]

Token:  foreign
Token Matched Value:  diplomats 0.65518


  7%|▋         | 63/891 [01:28<18:02,  1.31s/it]

Token:  ministry
Token Matched Value:  ministry 1.0


  7%|▋         | 64/891 [01:29<17:52,  1.30s/it]

Token:  china
Token Matched Value:  china 1.0


  7%|▋         | 65/891 [01:31<17:44,  1.29s/it]

Token:  taipei
Token Matched Value:  taipei 1.0


  7%|▋         | 66/891 [01:32<17:38,  1.28s/it]

Token:  taiwan
Token Matched Value:  taiwan 1.0


  8%|▊         | 67/891 [01:33<17:36,  1.28s/it]

Token:  reuters
Token Matched Value:  reuters 1.0


  8%|▊         | 68/891 [01:34<17:34,  1.28s/it]

Token:  television
Token Matched Value:  television 1.0


  8%|▊         | 69/891 [01:36<17:41,  1.29s/it]

Token:  taiwan
Token Matched Value:  taiwan 1.0


  8%|▊         | 70/891 [01:37<17:34,  1.28s/it]

Token:  beijing
Token Matched Value:  beijing 1.0


  8%|▊         | 71/891 [01:38<17:31,  1.28s/it]

Token:  china
Token Matched Value:  china 1.0


  8%|▊         | 72/891 [01:40<17:31,  1.28s/it]

Token:  taipei
Token Matched Value:  taipei 1.0


  8%|▊         | 73/891 [01:41<17:48,  1.31s/it]

Token:  ukraine
Token Matched Value:  ukraine 1.0


  8%|▊         | 74/891 [01:42<17:44,  1.30s/it]

Token:  taiwanese
Token Matched Value:  taiwanese 1.0


  8%|▊         | 75/891 [01:44<18:04,  1.33s/it]

Token:  china
Token Matched Value:  china 1.0


  9%|▊         | 76/891 [01:45<17:57,  1.32s/it]

Token:  taiwan
Token Matched Value:  taiwan 1.0


  9%|▊         | 77/891 [01:46<17:51,  1.32s/it]

Token:  beijing
Token Matched Value:  beijing 1.0


  9%|▉         | 78/891 [01:48<17:45,  1.31s/it]

Token:  china
Token Matched Value:  china 1.0


  9%|▉         | 79/891 [01:49<17:48,  1.32s/it]

Token:  taiwan
Token Matched Value:  taiwan 1.0


  9%|▉         | 80/891 [01:50<17:39,  1.31s/it]

Token:  xinhua
Token Matched Value:  beijinger 0.6833


  9%|▉         | 81/891 [01:51<17:32,  1.30s/it]

Token:  association
Token Matched Value:  association 1.0


  9%|▉         | 82/891 [01:53<17:28,  1.30s/it]

Token:  for
Token Matched Value:  and 0.75731


  9%|▉         | 83/891 [01:54<17:26,  1.29s/it]

Token:  relations
Token Matched Value:  relations 1.0


  9%|▉         | 84/891 [01:55<17:17,  1.29s/it]

Token:  across
Token Matched Value:  outside 0.72401


 10%|▉         | 85/891 [01:57<17:14,  1.28s/it]

Token:  the
Token Matched Value:  the 1.0


 10%|▉         | 86/891 [01:58<17:14,  1.29s/it]

Token:  taiwan
Token Matched Value:  taiwan 1.0


 10%|▉         | 87/891 [01:59<17:41,  1.32s/it]

Token:  straits
Token Matched Value:  straits 1.0


 10%|▉         | 88/891 [02:01<17:56,  1.34s/it]

Token:  german
Token Matched Value:  german 1.0


 10%|▉         | 89/891 [02:02<17:42,  1.32s/it]

Token:  frankfurt
Token Matched Value:  frankfurt 1.0


 10%|█         | 90/891 [02:03<17:35,  1.32s/it]

Token:  german
Token Matched Value:  german 1.0


 10%|█         | 91/891 [02:05<17:42,  1.33s/it]

Token:  germans
Token Matched Value:  germans 1.0


 10%|█         | 92/891 [02:06<18:07,  1.36s/it]

Token:  german
Token Matched Value:  german 1.0


 10%|█         | 93/891 [02:07<18:02,  1.36s/it]

Token:  german
Token Matched Value:  german 1.0


 11%|█         | 94/891 [02:09<17:43,  1.33s/it]

Token:  volkswagen
Token Matched Value:  bmw 0.80866


 11%|█         | 95/891 [02:10<17:27,  1.32s/it]

Token:  ag
Token Matched Value:  ag 1.0


 11%|█         | 96/891 [02:11<17:33,  1.32s/it]

Token:  opel
Token Matched Value:  bmw 0.80178


 11%|█         | 97/891 [02:13<17:43,  1.34s/it]

Token:  ag
Token Matched Value:  ag 1.0


 11%|█         | 98/891 [02:14<17:31,  1.33s/it]

Token:  general
Token Matched Value:  general 1.0


 11%|█         | 99/891 [02:15<17:59,  1.36s/it]

Token:  motors
Token Matched Value:  electric 0.72697


 11%|█         | 100/891 [02:17<17:55,  1.36s/it]

Token:  ford
Token Matched Value:  ford 1.0


 11%|█▏        | 101/891 [02:18<17:38,  1.34s/it]

Token:  porsche
Token Matched Value:  bmw 0.85439


 11%|█▏        | 102/891 [02:19<17:26,  1.33s/it]

Token:  porsche
Token Matched Value:  bmw 0.85439


 12%|█▏        | 103/891 [02:21<17:12,  1.31s/it]

Token:  greek
Token Matched Value:  greek 1.0


 12%|█▏        | 104/891 [02:22<17:01,  1.30s/it]

Token:  athens
Token Matched Value:  athens 1.0


 12%|█▏        | 105/891 [02:23<16:53,  1.29s/it]

Token:  greek
Token Matched Value:  greek 1.0


 12%|█▏        | 106/891 [02:24<16:49,  1.29s/it]

Token:  costas
Token Matched Value:  alexandros 0.58465


 12%|█▏        | 107/891 [02:26<16:59,  1.30s/it]

Token:  simitis
Token Matched Value:  omirou 0.69991


 12%|█▏        | 108/891 [02:27<16:54,  1.30s/it]

Token:  costas
Token Matched Value:  alexandros 0.58465


 12%|█▏        | 109/891 [02:28<16:57,  1.30s/it]

Token:  skandalidis
Token Matched Value:  omirou 0.81893


 12%|█▏        | 110/891 [02:30<16:57,  1.30s/it]

Token:  costas
Token Matched Value:  alexandros 0.58465


 12%|█▏        | 111/891 [02:31<16:56,  1.30s/it]

Token:  simitis
Token Matched Value:  omirou 0.69991


 13%|█▎        | 112/891 [02:32<16:54,  1.30s/it]

Token:  london
Token Matched Value:  london 1.0


 13%|█▎        | 113/891 [02:34<16:51,  1.30s/it]

Token:  bayerische
Token Matched Value:  bayerischer 0.78391


 13%|█▎        | 114/891 [02:35<16:51,  1.30s/it]

Token:  vereinsbank
Token Matched Value:  ag 0.76389


 13%|█▎        | 115/891 [02:36<16:56,  1.31s/it]

Token:  german
Token Matched Value:  german 1.0


 13%|█▎        | 116/891 [02:38<17:14,  1.33s/it]

Token:  bayerische
Token Matched Value:  bayerischer 0.78391


 13%|█▎        | 117/891 [02:39<17:04,  1.32s/it]

Token:  vereinsbank
Token Matched Value:  ag 0.76389


 13%|█▎        | 118/891 [02:40<16:53,  1.31s/it]

Token:  london
Token Matched Value:  london 1.0


 13%|█▎        | 119/891 [02:41<16:46,  1.30s/it]

Token:  swedish
Token Matched Value:  swedish 1.0


 13%|█▎        | 120/891 [02:43<16:43,  1.30s/it]

Token:  london
Token Matched Value:  london 1.0


 14%|█▎        | 121/891 [02:44<17:09,  1.34s/it]

Token:  english
Token Matched Value:  english 1.0


 14%|█▎        | 122/891 [02:45<17:13,  1.34s/it]

Token:  sweden
Token Matched Value:  sweden 1.0


 14%|█▍        | 123/891 [02:47<17:09,  1.34s/it]

Token:  syria
Token Matched Value:  syria 1.0


 14%|█▍        | 124/891 [02:48<16:51,  1.32s/it]

Token:  tartous
Token Matched Value:  dimashq 0.76547


 14%|█▍        | 125/891 [02:49<16:37,  1.30s/it]

Token:  israel
Token Matched Value:  israel 1.0


 14%|█▍        | 126/891 [02:51<16:28,  1.29s/it]

Token:  syria
Token Matched Value:  syria 1.0


 14%|█▍        | 127/891 [02:52<16:38,  1.31s/it]

Token:  jerusalem
Token Matched Value:  jerusalem 1.0


 14%|█▍        | 128/891 [02:53<16:33,  1.30s/it]

Token:  israel
Token Matched Value:  israel 1.0


 14%|█▍        | 129/891 [02:55<16:24,  1.29s/it]

Token:  syria
Token Matched Value:  syria 1.0


 15%|█▍        | 130/891 [02:56<16:16,  1.28s/it]

Token:  itamar
Token Matched Value:  israeli 0.67548


 15%|█▍        | 131/891 [02:57<16:13,  1.28s/it]

Token:  rabinovich
Token Matched Value:  varshava 0.65515


 15%|█▍        | 132/891 [02:58<16:10,  1.28s/it]

Token:  israel
Token Matched Value:  israel 1.0


 15%|█▍        | 133/891 [03:00<16:08,  1.28s/it]

Token:  washington
Token Matched Value:  washington 1.0


 15%|█▌        | 134/891 [03:01<16:05,  1.28s/it]

Token:  syria
Token Matched Value:  syria 1.0


 15%|█▌        | 135/891 [03:02<16:03,  1.28s/it]

Token:  israel
Token Matched Value:  israel 1.0


 15%|█▌        | 136/891 [03:03<16:02,  1.27s/it]

Token:  radio
Token Matched Value:  radio 1.0


 15%|█▌        | 137/891 [03:05<16:28,  1.31s/it]

Token:  damascus
Token Matched Value:  damascus 1.0


 15%|█▌        | 138/891 [03:06<16:35,  1.32s/it]

Token:  rabinovich
Token Matched Value:  varshava 0.65515


 16%|█▌        | 139/891 [03:07<16:28,  1.31s/it]

Token:  eliahu
Token Matched Value:  harav 0.67537


 16%|█▌        | 140/891 [03:08<12:25,  1.01it/s]

Token:  ben-elissar
Token Matched Value:  None 0.0


 16%|█▌        | 141/891 [03:09<13:45,  1.10s/it]

Token:  israeli
Token Matched Value:  israeli 1.0


 16%|█▌        | 142/891 [03:10<14:38,  1.17s/it]

Token:  egypt
Token Matched Value:  egypt 1.0


 16%|█▌        | 143/891 [03:12<15:15,  1.22s/it]

Token:  likud
Token Matched Value:  shas 0.82822


 16%|█▌        | 144/891 [03:13<15:27,  1.24s/it]

Token:  israel
Token Matched Value:  israel 1.0


 16%|█▋        | 145/891 [03:14<15:57,  1.28s/it]

Token:  syria
Token Matched Value:  syria 1.0


 16%|█▋        | 146/891 [03:16<16:27,  1.33s/it]

Token:  washington
Token Matched Value:  washington 1.0


 16%|█▋        | 147/891 [03:18<17:48,  1.44s/it]

Token:  damascus
Token Matched Value:  damascus 1.0


 17%|█▋        | 148/891 [03:19<18:09,  1.47s/it]

Token:  syria
Token Matched Value:  syria 1.0


 17%|█▋        | 149/891 [03:20<17:47,  1.44s/it]

Token:  israel
Token Matched Value:  israel 1.0


 17%|█▋        | 150/891 [03:22<17:30,  1.42s/it]

Token:  israeli
Token Matched Value:  israeli 1.0


 17%|█▋        | 151/891 [03:23<17:18,  1.40s/it]

Token:  damascus
Token Matched Value:  damascus 1.0


 17%|█▋        | 152/891 [03:25<17:04,  1.39s/it]

Token:  israeli
Token Matched Value:  israeli 1.0


 17%|█▋        | 153/891 [03:26<16:53,  1.37s/it]

Token:  david
Token Matched Value:  david 1.0


 17%|█▋        | 154/891 [03:27<16:39,  1.36s/it]

Token:  levy
Token Matched Value:  goldberg 0.67473


 17%|█▋        | 155/891 [03:28<16:24,  1.34s/it]

Token:  israel
Token Matched Value:  israel 1.0


 18%|█▊        | 156/891 [03:30<16:12,  1.32s/it]

Token:  radio
Token Matched Value:  radio 1.0


 18%|█▊        | 157/891 [03:31<15:58,  1.31s/it]

Token:  israeli
Token Matched Value:  israeli 1.0


 18%|█▊        | 158/891 [03:32<15:52,  1.30s/it]

Token:  benjamin
Token Matched Value:  benjamin 1.0


 18%|█▊        | 159/891 [03:34<15:45,  1.29s/it]

Token:  netanyahu
Token Matched Value:  israelis 0.72976


 18%|█▊        | 160/891 [03:35<16:03,  1.32s/it]

Token:  golan
Token Matched Value:  israeli 0.72882


 18%|█▊        | 161/891 [03:36<15:50,  1.30s/it]

Token:  heights
Token Matched Value:  heights 1.0


 18%|█▊        | 162/891 [03:38<15:42,  1.29s/it]

Token:  israel
Token Matched Value:  israel 1.0


 18%|█▊        | 163/891 [03:39<15:37,  1.29s/it]

Token:  syria
Token Matched Value:  syria 1.0


 18%|█▊        | 164/891 [03:40<15:31,  1.28s/it]

Token:  middle
Token Matched Value:  middle 1.0


 19%|█▊        | 165/891 [03:41<15:25,  1.28s/it]

Token:  east
Token Matched Value:  east 1.0


 19%|█▊        | 166/891 [03:43<15:25,  1.28s/it]

Token:  golan
Token Matched Value:  israeli 0.72882


 19%|█▊        | 167/891 [03:44<15:23,  1.28s/it]

Token:  golan
Token Matched Value:  israeli 0.72882


 19%|█▉        | 168/891 [03:45<15:20,  1.27s/it]

Token:  israel
Token Matched Value:  israel 1.0


 19%|█▉        | 169/891 [03:46<15:38,  1.30s/it]

Token:  channel
Token Matched Value:  channel 1.0


 19%|█▉        | 170/891 [03:48<16:04,  1.34s/it]

Token:  two
Token Matched Value:  two 1.0


 19%|█▉        | 171/891 [03:49<16:08,  1.35s/it]

Token:  damascus
Token Matched Value:  damascus 1.0


 19%|█▉        | 172/891 [03:51<15:59,  1.33s/it]

Token:  israel
Token Matched Value:  israel 1.0


 19%|█▉        | 173/891 [03:52<15:46,  1.32s/it]

Token:  netanyahu
Token Matched Value:  israelis 0.72976


 20%|█▉        | 174/891 [03:53<15:36,  1.31s/it]

Token:  netanyahu
Token Matched Value:  israelis 0.72976


 20%|█▉        | 175/891 [03:54<15:28,  1.30s/it]

Token:  syria
Token Matched Value:  syria 1.0


 20%|█▉        | 176/891 [03:56<15:20,  1.29s/it]

Token:  united
Token Matched Value:  united 1.0


 20%|█▉        | 177/891 [03:57<15:15,  1.28s/it]

Token:  states
Token Matched Value:  states 1.0


 20%|█▉        | 178/891 [03:58<15:10,  1.28s/it]

Token:  moscow
Token Matched Value:  moscow 1.0


 20%|██        | 179/891 [04:00<15:08,  1.28s/it]

Token:  polish
Token Matched Value:  polish 1.0


 20%|██        | 180/891 [04:01<15:05,  1.27s/it]

Token:  libya
Token Matched Value:  libya 1.0


 20%|██        | 181/891 [04:02<15:06,  1.28s/it]

Token:  tunis
Token Matched Value:  tunis 1.0


 20%|██        | 182/891 [04:03<15:02,  1.27s/it]

Token:  polish
Token Matched Value:  polish 1.0


 21%|██        | 183/891 [04:05<15:05,  1.28s/it]

Token:  polish
Token Matched Value:  polish 1.0


 21%|██        | 184/891 [04:06<15:16,  1.30s/it]

Token:  libya
Token Matched Value:  libya 1.0


 21%|██        | 185/891 [04:07<15:16,  1.30s/it]

Token:  polish
Token Matched Value:  polish 1.0


 21%|██        | 186/891 [04:09<15:22,  1.31s/it]

Token:  polish
Token Matched Value:  polish 1.0


 21%|██        | 187/891 [04:10<15:18,  1.30s/it]

Token:  reuters
Token Matched Value:  reuters 1.0


 21%|██        | 188/891 [04:11<15:23,  1.31s/it]

Token:  poland
Token Matched Value:  poland 1.0


 21%|██        | 189/891 [04:13<15:31,  1.33s/it]

Token:  libya
Token Matched Value:  libya 1.0


 21%|██▏       | 190/891 [04:14<15:25,  1.32s/it]

Token:  polish
Token Matched Value:  polish 1.0


 21%|██▏       | 191/891 [04:15<15:15,  1.31s/it]

Token:  libya
Token Matched Value:  libya 1.0


 22%|██▏       | 192/891 [04:17<15:28,  1.33s/it]

Token:  iranian
Token Matched Value:  iranian 1.0


 22%|██▏       | 193/891 [04:18<15:43,  1.35s/it]

Token:  baghdad
Token Matched Value:  baghdad 1.0


 22%|██▏       | 194/891 [04:19<15:53,  1.37s/it]

Token:  baghdad
Token Matched Value:  baghdad 1.0


 22%|██▏       | 195/891 [04:21<15:53,  1.37s/it]

Token:  iranian
Token Matched Value:  iranian 1.0


 22%|██▏       | 196/891 [04:22<15:46,  1.36s/it]

Token:  iraq
Token Matched Value:  iraq 1.0


 22%|██▏       | 197/891 [04:23<15:26,  1.33s/it]

Token:  iran
Token Matched Value:  iran 1.0


 22%|██▏       | 198/891 [04:25<15:11,  1.32s/it]

Token:  kurdish
Token Matched Value:  turkmen 0.85568


 22%|██▏       | 199/891 [04:26<14:59,  1.30s/it]

Token:  iranian
Token Matched Value:  iranian 1.0


 22%|██▏       | 200/891 [04:27<14:53,  1.29s/it]

Token:  iraq
Token Matched Value:  iraq 1.0


 23%|██▎       | 201/891 [04:28<14:48,  1.29s/it]

Token:  massoud
Token Matched Value:  taliban 0.82362


 23%|██▎       | 202/891 [04:30<14:45,  1.29s/it]

Token:  rajavi
Token Matched Value:  saddam 0.74716


 23%|██▎       | 203/891 [04:31<14:44,  1.29s/it]

Token:  baghdad
Token Matched Value:  baghdad 1.0


 23%|██▎       | 204/891 [04:32<14:40,  1.28s/it]

Token:  kurdistan
Token Matched Value:  sunnistan 0.81715


 23%|██▎       | 205/891 [04:34<14:44,  1.29s/it]

Token:  democratic
Token Matched Value:  democratic 1.0


 23%|██▎       | 206/891 [04:35<14:55,  1.31s/it]

Token:  party
Token Matched Value:  party 1.0


 23%|██▎       | 207/891 [04:36<14:50,  1.30s/it]

Token:  of
Token Matched Value:  of 1.0


 23%|██▎       | 208/891 [04:37<14:41,  1.29s/it]

Token:  iran
Token Matched Value:  iran 1.0


 23%|██▎       | 209/891 [04:39<14:36,  1.28s/it]

Token:  kdpi
Token Matched Value:  islamist 0.75717


 24%|██▎       | 210/891 [04:40<14:30,  1.28s/it]

Token:  iran
Token Matched Value:  iran 1.0


 24%|██▎       | 211/891 [04:41<14:28,  1.28s/it]

Token:  kurds
Token Matched Value:  turkmens 0.88895


 24%|██▍       | 212/891 [04:43<14:25,  1.27s/it]

Token:  iran
Token Matched Value:  iran 1.0


 24%|██▍       | 213/891 [04:44<14:23,  1.27s/it]

Token:  iraq
Token Matched Value:  iraq 1.0


 24%|██▍       | 214/891 [04:45<14:20,  1.27s/it]

Token:  kdpi
Token Matched Value:  islamist 0.75717


 24%|██▍       | 215/891 [04:46<14:34,  1.29s/it]

Token:  iraqi
Token Matched Value:  iraqi 1.0


 24%|██▍       | 216/891 [04:48<14:28,  1.29s/it]

Token:  kurdish
Token Matched Value:  turkmen 0.85568


 24%|██▍       | 217/891 [04:49<14:30,  1.29s/it]

Token:  baghdad
Token Matched Value:  baghdad 1.0


 24%|██▍       | 218/891 [04:50<14:39,  1.31s/it]

Token:  iraqi
Token Matched Value:  iraqi 1.0


 25%|██▍       | 219/891 [04:52<15:01,  1.34s/it]

Token:  kurdish
Token Matched Value:  turkmen 0.85568


 25%|██▍       | 220/891 [04:53<15:12,  1.36s/it]

Token:  iran
Token Matched Value:  iran 1.0


 25%|██▍       | 221/891 [04:55<15:20,  1.37s/it]

Token:  puk
Token Matched Value:  taliban 0.60024


 25%|██▍       | 222/891 [04:56<15:02,  1.35s/it]

Token:  puk
Token Matched Value:  taliban 0.60024


 25%|██▌       | 223/891 [04:57<14:47,  1.33s/it]

Token:  iraq
Token Matched Value:  iraq 1.0


 25%|██▌       | 224/891 [04:58<14:37,  1.32s/it]

Token:  kurdistan
Token Matched Value:  sunnistan 0.81715


 25%|██▌       | 225/891 [05:00<14:33,  1.31s/it]

Token:  democratic
Token Matched Value:  democratic 1.0


 25%|██▌       | 226/891 [05:01<14:23,  1.30s/it]

Token:  party
Token Matched Value:  party 1.0


 25%|██▌       | 227/891 [05:02<14:16,  1.29s/it]

Token:  kdp
Token Matched Value:  sunnistan 0.66881


 26%|██▌       | 228/891 [05:04<14:20,  1.30s/it]

Token:  iraqi
Token Matched Value:  iraqi 1.0


 26%|██▌       | 229/891 [05:05<14:13,  1.29s/it]

Token:  kurdish
Token Matched Value:  turkmen 0.85568


 26%|██▌       | 230/891 [05:06<14:12,  1.29s/it]

Token:  iraq
Token Matched Value:  iraq 1.0


 26%|██▌       | 231/891 [05:07<14:07,  1.28s/it]

Token:  iraqi
Token Matched Value:  iraqi 1.0


 26%|██▌       | 232/891 [05:09<14:13,  1.30s/it]

Token:  kuwait
Token Matched Value:  kuwait 1.0


 26%|██▌       | 233/891 [05:10<14:16,  1.30s/it]

Token:  gulf
Token Matched Value:  gulf 1.0


 26%|██▋       | 234/891 [05:11<14:16,  1.30s/it]

Token:  war
Token Matched Value:  war 1.0


 26%|██▋       | 235/891 [05:13<14:06,  1.29s/it]

Token:  iranian
Token Matched Value:  iranian 1.0


 26%|██▋       | 236/891 [05:14<14:03,  1.29s/it]

Token:  kdp
Token Matched Value:  sunnistan 0.66881


 27%|██▋       | 237/891 [05:15<13:57,  1.28s/it]

Token:  qasri
Token Matched Value:  ibn 0.8152


 27%|██▋       | 238/891 [05:16<13:59,  1.29s/it]

Token:  suleimaniya
Token Matched Value:  basra 0.74647


 27%|██▋       | 239/891 [05:18<14:23,  1.32s/it]

Token:  iranian
Token Matched Value:  iranian 1.0


 27%|██▋       | 240/891 [05:19<14:19,  1.32s/it]

Token:  iraqi
Token Matched Value:  iraqi 1.0


 27%|██▋       | 241/891 [05:20<14:09,  1.31s/it]

Token:  kurds
Token Matched Value:  turkmens 0.88895


 27%|██▋       | 242/891 [05:22<14:13,  1.31s/it]

Token:  iran
Token Matched Value:  iran 1.0


 27%|██▋       | 243/891 [05:23<14:33,  1.35s/it]

Token:  iraq
Token Matched Value:  iraq 1.0


 27%|██▋       | 244/891 [05:25<14:49,  1.38s/it]

Token:  kurdish
Token Matched Value:  turkmen 0.85568


 27%|██▋       | 245/891 [05:26<15:01,  1.40s/it]

Token:  iraqi
Token Matched Value:  iraqi 1.0


 28%|██▊       | 246/891 [05:27<14:50,  1.38s/it]

Token:  kurds
Token Matched Value:  turkmens 0.88895


 28%|██▊       | 247/891 [05:29<14:27,  1.35s/it]

Token:  baghdad
Token Matched Value:  baghdad 1.0


 28%|██▊       | 248/891 [05:30<14:14,  1.33s/it]

Token:  saudi
Token Matched Value:  saudi 1.0


 28%|██▊       | 249/891 [05:31<14:07,  1.32s/it]

Token:  manama
Token Matched Value:  manama 1.0


 28%|██▊       | 250/891 [05:33<13:58,  1.31s/it]

Token:  saudi
Token Matched Value:  saudi 1.0


 28%|██▊       | 251/891 [05:34<13:53,  1.30s/it]

Token:  israel
Token Matched Value:  israel 1.0


 28%|██▊       | 252/891 [05:35<13:52,  1.30s/it]

Token:  arafat
Token Matched Value:  hussein 0.74708


 28%|██▊       | 253/891 [05:37<13:56,  1.31s/it]

Token:  jerusalem
Token Matched Value:  jerusalem 1.0


 29%|██▊       | 254/891 [05:38<13:51,  1.31s/it]

Token:  israel
Token Matched Value:  israel 1.0


 29%|██▊       | 255/891 [05:39<13:47,  1.30s/it]

Token:  palestinian
Token Matched Value:  palestinian 1.0


 29%|██▊       | 256/891 [05:40<13:42,  1.30s/it]

Token:  yasser
Token Matched Value:  hussein 0.74832


 29%|██▉       | 257/891 [05:42<13:42,  1.30s/it]

Token:  arafat
Token Matched Value:  hussein 0.74708


 29%|██▉       | 258/891 [05:43<13:37,  1.29s/it]

Token:  arafat
Token Matched Value:  hussein 0.74708


 29%|██▉       | 259/891 [05:44<13:35,  1.29s/it]

Token:  reuters
Token Matched Value:  reuters 1.0


 29%|██▉       | 260/891 [05:46<13:31,  1.29s/it]

Token:  arafat
Token Matched Value:  hussein 0.74708


 29%|██▉       | 261/891 [05:47<13:28,  1.28s/it]

Token:  israeli
Token Matched Value:  israeli 1.0


 29%|██▉       | 262/891 [05:48<13:24,  1.28s/it]

Token:  shimon
Token Matched Value:  harav 0.76315


 30%|██▉       | 263/891 [05:49<13:22,  1.28s/it]

Token:  peres
Token Matched Value:  graça 0.62215


 30%|██▉       | 264/891 [05:51<13:23,  1.28s/it]

Token:  ramallah
Token Matched Value:  palestinian 0.8067


 30%|██▉       | 265/891 [05:52<13:25,  1.29s/it]

Token:  gaza
Token Matched Value:  palestinians 0.80492


 30%|██▉       | 266/891 [05:53<13:21,  1.28s/it]

Token:  israel
Token Matched Value:  israel 1.0


 30%|██▉       | 267/891 [05:55<13:33,  1.30s/it]

Token:  palestinian
Token Matched Value:  palestinian 1.0


 30%|███       | 268/891 [05:56<13:47,  1.33s/it]

Token:  palestinian
Token Matched Value:  palestinian 1.0


 30%|███       | 269/891 [05:57<14:00,  1.35s/it]

Token:  benjamin
Token Matched Value:  benjamin 1.0


 30%|███       | 270/891 [05:59<13:44,  1.33s/it]

Token:  netanyahu
Token Matched Value:  israelis 0.72976


 30%|███       | 271/891 [06:00<13:33,  1.31s/it]

Token:  ramallah
Token Matched Value:  palestinian 0.8067


 31%|███       | 272/891 [06:01<13:24,  1.30s/it]

Token:  arafat
Token Matched Value:  hussein 0.74708


 31%|███       | 273/891 [06:02<13:14,  1.29s/it]

Token:  arafat
Token Matched Value:  hussein 0.74708


 31%|███       | 274/891 [06:04<13:16,  1.29s/it]

Token:  israeli
Token Matched Value:  israeli 1.0


 31%|███       | 275/891 [06:05<13:11,  1.28s/it]

Token:  allenby
Token Matched Value:  transjordan 0.61434


 31%|███       | 276/891 [06:06<13:08,  1.28s/it]

Token:  bridge
Token Matched Value:  bridge 1.0


 31%|███       | 277/891 [06:08<13:04,  1.28s/it]

Token:  jordan
Token Matched Value:  jordan 1.0


 31%|███       | 278/891 [06:09<13:01,  1.28s/it]

Token:  arafat
Token Matched Value:  hussein 0.74708


 31%|███▏      | 279/891 [06:10<13:05,  1.28s/it]

Token:  israel
Token Matched Value:  israel 1.0


 31%|███▏      | 280/891 [06:11<12:59,  1.28s/it]

Token:  gaza
Token Matched Value:  palestinians 0.80492


 32%|███▏      | 281/891 [06:13<12:57,  1.27s/it]

Token:  arafat
Token Matched Value:  hussein 0.74708


 32%|███▏      | 282/891 [06:14<12:57,  1.28s/it]

Token:  peres
Token Matched Value:  graça 0.62215


 32%|███▏      | 283/891 [06:15<12:53,  1.27s/it]

Token:  gaza
Token Matched Value:  palestinians 0.80492


 32%|███▏      | 284/891 [06:16<12:53,  1.27s/it]

Token:  jerusalem
Token Matched Value:  jerusalem 1.0


 32%|███▏      | 285/891 [06:18<12:52,  1.28s/it]

Token:  yasser
Token Matched Value:  hussein 0.74832


 32%|███▏      | 286/891 [06:19<12:57,  1.29s/it]

Token:  arafat
Token Matched Value:  hussein 0.74708


 32%|███▏      | 287/891 [06:20<12:58,  1.29s/it]

Token:  shimon
Token Matched Value:  harav 0.76315


 32%|███▏      | 288/891 [06:22<12:58,  1.29s/it]

Token:  peres
Token Matched Value:  graça 0.62215


 32%|███▏      | 289/891 [06:23<13:27,  1.34s/it]

Token:  gaza
Token Matched Value:  palestinians 0.80492


 33%|███▎      | 290/891 [06:24<13:19,  1.33s/it]

Token:  palestinians
Token Matched Value:  palestinians 1.0


 33%|███▎      | 291/891 [06:26<13:10,  1.32s/it]

Token:  israeli
Token Matched Value:  israeli 1.0


 33%|███▎      | 292/891 [06:27<13:28,  1.35s/it]

Token:  palestinian
Token Matched Value:  palestinian 1.0


 33%|███▎      | 293/891 [06:29<13:49,  1.39s/it]

Token:  peres
Token Matched Value:  graça 0.62215


 33%|███▎      | 294/891 [06:30<13:53,  1.40s/it]

Token:  palestinian
Token Matched Value:  palestinian 1.0


 33%|███▎      | 295/891 [06:32<14:15,  1.44s/it]

Token:  israeli
Token Matched Value:  israeli 1.0


 33%|███▎      | 296/891 [06:33<13:55,  1.40s/it]

Token:  arafat
Token Matched Value:  hussein 0.74708


 33%|███▎      | 297/891 [06:34<13:30,  1.36s/it]

Token:  israel
Token Matched Value:  israel 1.0


 33%|███▎      | 298/891 [06:35<13:13,  1.34s/it]

Token:  palestinian
Token Matched Value:  palestinian 1.0


 34%|███▎      | 299/891 [06:37<13:01,  1.32s/it]

Token:  peres
Token Matched Value:  graça 0.62215


 34%|███▎      | 300/891 [06:38<12:51,  1.31s/it]

Token:  israeli
Token Matched Value:  israeli 1.0


 34%|███▍      | 301/891 [06:39<12:44,  1.30s/it]

Token:  benjamin
Token Matched Value:  benjamin 1.0


 34%|███▍      | 302/891 [06:41<12:40,  1.29s/it]

Token:  netanyahu
Token Matched Value:  israelis 0.72976


 34%|███▍      | 303/891 [06:42<12:35,  1.28s/it]

Token:  peres
Token Matched Value:  graça 0.62215


 34%|███▍      | 304/891 [06:43<12:37,  1.29s/it]

Token:  likud
Token Matched Value:  shas 0.82822


 34%|███▍      | 305/891 [06:44<12:31,  1.28s/it]

Token:  afghan
Token Matched Value:  afghan 1.0


 34%|███▍      | 306/891 [06:46<12:28,  1.28s/it]

Token:  uae
Token Matched Value:  uae 1.0


 34%|███▍      | 307/891 [06:47<12:25,  1.28s/it]

Token:  taleban
Token Matched Value:  taleban 1.0


 35%|███▍      | 308/891 [06:48<12:21,  1.27s/it]

Token:  dubai
Token Matched Value:  dubai 1.0


 35%|███▍      | 309/891 [06:49<12:22,  1.28s/it]

Token:  afghan
Token Matched Value:  afghan 1.0


 35%|███▍      | 310/891 [06:51<12:21,  1.28s/it]

Token:  united
Token Matched Value:  united 1.0


 35%|███▍      | 311/891 [06:52<12:19,  1.27s/it]

Token:  arab
Token Matched Value:  arab 1.0


 35%|███▌      | 312/891 [06:53<12:29,  1.29s/it]

Token:  emirates
Token Matched Value:  emirates 1.0


 35%|███▌      | 313/891 [06:55<12:23,  1.29s/it]

Token:  russian
Token Matched Value:  russian 1.0


 35%|███▌      | 314/891 [06:56<12:19,  1.28s/it]

Token:  taleban
Token Matched Value:  taleban 1.0


 35%|███▌      | 315/891 [06:57<12:17,  1.28s/it]

Token:  afghanistan
Token Matched Value:  afghanistan 1.0


 35%|███▌      | 316/891 [06:59<12:28,  1.30s/it]

Token:  afghan
Token Matched Value:  afghan 1.0


 36%|███▌      | 317/891 [07:00<12:32,  1.31s/it]

Token:  abu
Token Matched Value:  abd 0.82083


 36%|███▌      | 318/891 [07:01<12:49,  1.34s/it]

Token:  dhabi
Token Matched Value:  dubai 0.91116


 36%|███▌      | 319/891 [07:03<13:11,  1.38s/it]

Token:  russian
Token Matched Value:  russian 1.0


 36%|███▌      | 320/891 [07:04<13:07,  1.38s/it]

Token:  uae
Token Matched Value:  uae 1.0


 36%|███▌      | 321/891 [07:05<12:49,  1.35s/it]

Token:  sharjah
Token Matched Value:  dubai 0.84325


 36%|███▌      | 322/891 [07:07<12:38,  1.33s/it]

Token:  taleban
Token Matched Value:  taleban 1.0


 36%|███▋      | 323/891 [07:08<12:28,  1.32s/it]

Token:  kandahar
Token Matched Value:  kabul 0.8774


 36%|███▋      | 324/891 [07:09<12:30,  1.32s/it]

Token:  afghanistan
Token Matched Value:  afghanistan 1.0


 36%|███▋      | 325/891 [07:11<12:27,  1.32s/it]

Token:  uae
Token Matched Value:  uae 1.0


 37%|███▋      | 326/891 [07:12<12:20,  1.31s/it]

Token:  kandahar
Token Matched Value:  kabul 0.8774


 37%|███▋      | 327/891 [07:13<12:13,  1.30s/it]

Token:  afghan
Token Matched Value:  afghan 1.0


 37%|███▋      | 328/891 [07:14<12:09,  1.30s/it]

Token:  kabul
Token Matched Value:  kabul 1.0


 37%|███▋      | 329/891 [07:16<12:06,  1.29s/it]

Token:  kandahar
Token Matched Value:  kabul 0.8774


 37%|███▋      | 330/891 [07:17<12:03,  1.29s/it]

Token:  taleban
Token Matched Value:  taleban 1.0


 37%|███▋      | 331/891 [07:18<12:00,  1.29s/it]

Token:  kabul
Token Matched Value:  kabul 1.0


 37%|███▋      | 332/891 [07:20<12:04,  1.30s/it]

Token:  burhanuddin
Token Matched Value:  mohammad 0.80354


 37%|███▋      | 333/891 [07:21<11:57,  1.29s/it]

Token:  rabbani
Token Matched Value:  mohammad 0.82688


 37%|███▋      | 334/891 [07:22<12:06,  1.30s/it]

Token:  taleban
Token Matched Value:  taleban 1.0


 38%|███▊      | 335/891 [07:24<12:16,  1.32s/it]

Token:  abu
Token Matched Value:  abd 0.82083


 38%|███▊      | 336/891 [07:25<12:16,  1.33s/it]

Token:  dhabi
Token Matched Value:  dubai 0.91116


 38%|███▊      | 337/891 [07:26<12:16,  1.33s/it]

Token:  russians
Token Matched Value:  russians 1.0


 38%|███▊      | 338/891 [07:28<12:09,  1.32s/it]

Token:  russian
Token Matched Value:  russian 1.0


 38%|███▊      | 339/891 [07:29<11:59,  1.30s/it]

Token:  tatarstan
Token Matched Value:  bashkortostan 0.92794


 38%|███▊      | 340/891 [07:35<24:50,  2.71s/it]

Token:  taleban
Token Matched Value:  taleban 1.0


 38%|███▊      | 341/891 [07:39<28:38,  3.12s/it]

Token:  taleban
Token Matched Value:  taleban 1.0


 38%|███▊      | 342/891 [07:40<24:09,  2.64s/it]

Token:  albania
Token Matched Value:  albania 1.0


 38%|███▊      | 343/891 [07:42<20:29,  2.24s/it]

Token:  russian
Token Matched Value:  russian 1.0


 39%|███▊      | 344/891 [07:43<17:46,  1.95s/it]

Token:  rabbani
Token Matched Value:  mohammad 0.82688


 39%|███▊      | 345/891 [07:44<16:20,  1.80s/it]

Token:  moscow
Token Matched Value:  moscow 1.0


 39%|███▉      | 346/891 [07:46<15:16,  1.68s/it]

Token:  russians
Token Matched Value:  russians 1.0


 39%|███▉      | 347/891 [07:47<14:14,  1.57s/it]

Token:  uae
Token Matched Value:  uae 1.0


 39%|███▉      | 348/891 [07:48<13:24,  1.48s/it]

Token:  abu
Token Matched Value:  abd 0.82083


 39%|███▉      | 349/891 [07:50<12:53,  1.43s/it]

Token:  dhabi
Token Matched Value:  dubai 0.91116


 39%|███▉      | 350/891 [07:51<12:41,  1.41s/it]

Token:  iraq
Token Matched Value:  iraq 1.0


 39%|███▉      | 351/891 [07:52<12:22,  1.37s/it]

Token:  saddam
Token Matched Value:  saddam 1.0


 40%|███▉      | 352/891 [07:54<12:09,  1.35s/it]

Token:  russia
Token Matched Value:  russia 1.0


 40%|███▉      | 353/891 [07:55<12:01,  1.34s/it]

Token:  zhirinovsky
Token Matched Value:  putin 0.83392


 40%|███▉      | 354/891 [07:56<11:52,  1.33s/it]

Token:  baghdad
Token Matched Value:  baghdad 1.0


 40%|███▉      | 355/891 [07:58<11:45,  1.32s/it]

Token:  iraqi
Token Matched Value:  iraqi 1.0


 40%|███▉      | 356/891 [07:59<11:41,  1.31s/it]

Token:  saddam
Token Matched Value:  saddam 1.0


 40%|████      | 357/891 [08:00<11:35,  1.30s/it]

Token:  hussein
Token Matched Value:  hussein 1.0


 40%|████      | 358/891 [08:01<11:33,  1.30s/it]

Token:  russian
Token Matched Value:  russian 1.0


 40%|████      | 359/891 [08:03<11:32,  1.30s/it]

Token:  vladimir
Token Matched Value:  vladimir 1.0


 40%|████      | 360/891 [08:04<11:30,  1.30s/it]

Token:  zhirinovsky
Token Matched Value:  putin 0.83392


 41%|████      | 361/891 [08:05<11:25,  1.29s/it]

Token:  baghdad
Token Matched Value:  baghdad 1.0


 41%|████      | 362/891 [08:07<11:35,  1.32s/it]

Token:  moscow
Token Matched Value:  moscow 1.0


 41%|████      | 363/891 [08:08<11:37,  1.32s/it]

Token:  iraqi
Token Matched Value:  iraqi 1.0


 41%|████      | 364/891 [08:09<11:37,  1.32s/it]

Token:  zhirinovsky
Token Matched Value:  putin 0.83392


 41%|████      | 365/891 [08:11<11:41,  1.33s/it]

Token:  saddam
Token Matched Value:  saddam 1.0


 41%|████      | 366/891 [08:12<11:48,  1.35s/it]

Token:  baghdad
Token Matched Value:  baghdad 1.0


 41%|████      | 367/891 [08:13<11:36,  1.33s/it]

Token:  russian
Token Matched Value:  russian 1.0


 41%|████▏     | 368/891 [08:15<11:26,  1.31s/it]

Token:  duma
Token Matched Value:  putin 0.64322


 41%|████▏     | 369/891 [08:16<11:19,  1.30s/it]

Token:  iraq
Token Matched Value:  iraq 1.0


 42%|████▏     | 370/891 [08:17<11:12,  1.29s/it]

Token:  kuwait
Token Matched Value:  kuwait 1.0


 42%|████▏     | 371/891 [08:19<11:08,  1.29s/it]

Token:  zhirinovsky
Token Matched Value:  putin 0.83392


 42%|████▏     | 372/891 [08:20<11:04,  1.28s/it]

Token:  russian
Token Matched Value:  russian 1.0


 42%|████▏     | 373/891 [08:20<08:22,  1.03it/s]

Token:  u.n.
Token Matched Value:  None 0.0


 42%|████▏     | 374/891 [08:21<09:05,  1.06s/it]

Token:  iraq
Token Matched Value:  iraq 1.0


 42%|████▏     | 375/891 [08:23<09:37,  1.12s/it]

Token:  moscow
Token Matched Value:  moscow 1.0


 42%|████▏     | 376/891 [08:24<09:58,  1.16s/it]

Token:  baghdad
Token Matched Value:  baghdad 1.0


 42%|████▏     | 377/891 [08:25<10:14,  1.20s/it]

Token:  zhirinovsky
Token Matched Value:  putin 0.83392


 42%|████▏     | 378/891 [08:26<10:25,  1.22s/it]

Token:  iraq
Token Matched Value:  iraq 1.0


 43%|████▎     | 379/891 [08:28<10:31,  1.23s/it]

Token:  iraq
Token Matched Value:  iraq 1.0


 43%|████▎     | 380/891 [08:29<10:34,  1.24s/it]

Token:  saddam
Token Matched Value:  saddam 1.0


 43%|████▎     | 381/891 [08:30<10:35,  1.25s/it]

Token:  iraq
Token Matched Value:  iraq 1.0


 43%|████▎     | 382/891 [08:31<10:41,  1.26s/it]

Token:  baghdad
Token Matched Value:  baghdad 1.0


 43%|████▎     | 383/891 [08:33<11:01,  1.30s/it]

Token:  iraqi
Token Matched Value:  iraqi 1.0


 43%|████▎     | 384/891 [08:34<11:00,  1.30s/it]

Token:  reuters
Token Matched Value:  reuters 1.0


 43%|████▎     | 385/891 [08:35<10:57,  1.30s/it]

Token:  iraq
Token Matched Value:  iraq 1.0


 43%|████▎     | 386/891 [08:37<10:51,  1.29s/it]

Token:  saddam
Token Matched Value:  saddam 1.0


 43%|████▎     | 387/891 [08:38<10:51,  1.29s/it]

Token:  hussein
Token Matched Value:  hussein 1.0


 44%|████▎     | 388/891 [08:39<11:03,  1.32s/it]

Token:  russian
Token Matched Value:  russian 1.0


 44%|████▎     | 389/891 [08:41<11:02,  1.32s/it]

Token:  vladimir
Token Matched Value:  vladimir 1.0


 44%|████▍     | 390/891 [08:42<10:59,  1.32s/it]

Token:  zhirinovsky
Token Matched Value:  putin 0.83392


 44%|████▍     | 391/891 [08:43<10:58,  1.32s/it]

Token:  baghdad
Token Matched Value:  baghdad 1.0


 44%|████▍     | 392/891 [08:45<10:50,  1.30s/it]

Token:  iraq
Token Matched Value:  iraq 1.0


 44%|████▍     | 393/891 [08:46<10:45,  1.30s/it]

Token:  umm
Token Matched Value:  wadi 0.80416


 44%|████▍     | 394/891 [08:47<11:00,  1.33s/it]

Token:  qasr
Token Matched Value:  masjed 0.69886


 44%|████▍     | 395/891 [08:49<10:48,  1.31s/it]

Token:  gulf
Token Matched Value:  gulf 1.0


 44%|████▍     | 396/891 [08:50<10:40,  1.29s/it]

Token:  lebanon
Token Matched Value:  lebanon 1.0


 45%|████▍     | 397/891 [08:51<10:42,  1.30s/it]

Token:  beirut
Token Matched Value:  beirut 1.0


 45%|████▍     | 398/891 [08:52<10:43,  1.31s/it]

Token:  beirut
Token Matched Value:  beirut 1.0


 45%|████▍     | 399/891 [08:54<10:41,  1.30s/it]

Token:  reuters
Token Matched Value:  reuters 1.0


 45%|████▍     | 400/891 [08:54<08:04,  1.01it/s]

Token:  an-nahar
Token Matched Value:  None 0.0


 45%|████▌     | 401/891 [08:55<08:47,  1.08s/it]

Token:  hizbollah
Token Matched Value:  islamist 0.79308


 45%|████▌     | 402/891 [08:57<09:15,  1.14s/it]

Token:  israeli
Token Matched Value:  israeli 1.0


 45%|████▌     | 403/891 [08:57<07:03,  1.15it/s]

Token:  as-safir
Token Matched Value:  None 0.0


 45%|████▌     | 404/891 [08:58<08:02,  1.01it/s]

Token:  israel
Token Matched Value:  israel 1.0


 45%|████▌     | 405/891 [08:59<08:43,  1.08s/it]

Token:  syria
Token Matched Value:  syria 1.0


 46%|████▌     | 406/891 [09:01<09:11,  1.14s/it]

Token:  lebanon
Token Matched Value:  lebanon 1.0


 46%|████▌     | 407/891 [09:02<09:31,  1.18s/it]

Token:  beirut
Token Matched Value:  beirut 1.0


 46%|████▌     | 408/891 [09:02<07:15,  1.11it/s]

Token:  ad-diyar
Token Matched Value:  None 0.0


 46%|████▌     | 409/891 [09:03<08:10,  1.02s/it]

Token:  lebanon
Token Matched Value:  lebanon 1.0


 46%|████▌     | 410/891 [09:05<08:47,  1.10s/it]

Token:  pakistan
Token Matched Value:  pakistan 1.0


 46%|████▌     | 411/891 [09:06<09:12,  1.15s/it]

Token:  maronite
Token Matched Value:  syro 0.77064


 46%|████▌     | 412/891 [09:07<09:29,  1.19s/it]

Token:  cme
Token Matched Value:  rse 0.60583


 46%|████▋     | 413/891 [09:09<09:57,  1.25s/it]

Token:  chicago
Token Matched Value:  chicago 1.0


 46%|████▋     | 414/891 [09:10<11:11,  1.41s/it]

Token:  cme
Token Matched Value:  rse 0.60583


 47%|████▋     | 415/891 [09:12<11:03,  1.39s/it]

Token:  montgomery
Token Matched Value:  jackson 0.78752


 47%|████▋     | 416/891 [09:13<10:52,  1.37s/it]

Token:  ala
Token Matched Value:  ala 1.0


 47%|████▋     | 417/891 [09:14<10:46,  1.36s/it]

Token:  kindercare
Token Matched Value:  inc 0.69552


 47%|████▋     | 418/891 [09:16<10:32,  1.34s/it]

Token:  learning
Token Matched Value:  learning 1.0


 47%|████▋     | 419/891 [09:17<10:34,  1.34s/it]

Token:  centers
Token Matched Value:  centres 0.85614


 47%|████▋     | 420/891 [09:18<10:29,  1.34s/it]

Token:  inc
Token Matched Value:  inc 1.0


 47%|████▋     | 421/891 [09:20<10:18,  1.32s/it]

Token:  lehman
Token Matched Value:  fisher 0.68766


 47%|████▋     | 422/891 [09:21<10:10,  1.30s/it]

Token:  snet
Token Matched Value:  telecommunications 0.63666


 47%|████▋     | 423/891 [09:22<10:08,  1.30s/it]

Token:  lehman
Token Matched Value:  fisher 0.68766


 48%|████▊     | 424/891 [09:24<10:01,  1.29s/it]

Token:  phoenix
Token Matched Value:  phoenix 1.0


 48%|████▊     | 425/891 [09:25<09:57,  1.28s/it]

Token:  greek
Token Matched Value:  greek 1.0


 48%|████▊     | 426/891 [09:26<09:54,  1.28s/it]

Token:  athens
Token Matched Value:  athens 1.0


 48%|████▊     | 427/891 [09:27<09:51,  1.27s/it]

Token:  greek
Token Matched Value:  greek 1.0


 48%|████▊     | 428/891 [09:29<09:48,  1.27s/it]

Token:  costas
Token Matched Value:  alexandros 0.58465


 48%|████▊     | 429/891 [09:30<09:46,  1.27s/it]

Token:  simitis
Token Matched Value:  omirou 0.69991


 48%|████▊     | 430/891 [09:31<09:44,  1.27s/it]

Token:  costas
Token Matched Value:  alexandros 0.58465


 48%|████▊     | 431/891 [09:32<09:42,  1.27s/it]

Token:  skandalidis
Token Matched Value:  omirou 0.81893


 48%|████▊     | 432/891 [09:34<09:41,  1.27s/it]

Token:  costas
Token Matched Value:  alexandros 0.58465


 49%|████▊     | 433/891 [09:35<09:39,  1.27s/it]

Token:  simitis
Token Matched Value:  omirou 0.69991


 49%|████▊     | 434/891 [09:36<09:38,  1.27s/it]

Token:  france
Token Matched Value:  france 1.0


 49%|████▉     | 435/891 [09:37<09:38,  1.27s/it]

Token:  le
Token Matched Value:  du 0.78944


 49%|████▉     | 436/891 [09:39<09:44,  1.28s/it]

Token:  monde
Token Matched Value:  autre 0.81736


 49%|████▉     | 437/891 [09:40<09:40,  1.28s/it]

Token:  paris
Token Matched Value:  paris 1.0


 49%|████▉     | 438/891 [09:41<09:39,  1.28s/it]

Token:  le
Token Matched Value:  du 0.78944


 49%|████▉     | 439/891 [09:43<09:49,  1.31s/it]

Token:  monde
Token Matched Value:  autre 0.81736


 49%|████▉     | 440/891 [09:44<09:48,  1.30s/it]

Token:  africans
Token Matched Value:  africans 1.0


 49%|████▉     | 441/891 [09:45<09:56,  1.33s/it]

Token:  alain
Token Matched Value:  maurice 0.71239


 50%|████▉     | 442/891 [09:47<10:02,  1.34s/it]

Token:  juppe
Token Matched Value:  hollande 0.69228


 50%|████▉     | 443/891 [09:48<09:55,  1.33s/it]

Token:  paris
Token Matched Value:  paris 1.0


 50%|████▉     | 444/891 [09:49<09:47,  1.31s/it]

Token:  flnc
Token Matched Value:  nlf 0.66329


 50%|████▉     | 445/891 [09:51<09:44,  1.31s/it]

Token:  corsican
Token Matched Value:  corsican 1.0


 50%|█████     | 446/891 [09:52<09:49,  1.32s/it]

Token:  bally
Token Matched Value:  bally 1.0


 50%|█████     | 447/891 [09:53<09:58,  1.35s/it]

Token:  french
Token Matched Value:  french 1.0


 50%|█████     | 448/891 [09:55<09:49,  1.33s/it]

Token:  french
Token Matched Value:  french 1.0


 50%|█████     | 449/891 [09:55<07:24,  1.01s/it]

Token:  sud-ptt
Token Matched Value:  None 0.0


 51%|█████     | 450/891 [09:56<07:58,  1.08s/it]

Token:  france
Token Matched Value:  france 1.0


 51%|█████     | 451/891 [09:58<08:31,  1.16s/it]

Token:  telecom
Token Matched Value:  telecommunications 0.84306


 51%|█████     | 452/891 [09:59<08:49,  1.21s/it]

Token:  heidrun
Token Matched Value:  buch 0.59826


 51%|█████     | 453/891 [10:00<08:56,  1.23s/it]

Token:  statoil
Token Matched Value:  offshore 0.65575


 51%|█████     | 454/891 [10:01<09:02,  1.24s/it]

Token:  oslo
Token Matched Value:  oslo 1.0


 51%|█████     | 455/891 [10:03<09:12,  1.27s/it]

Token:  heidrun
Token Matched Value:  buch 0.59826


 51%|█████     | 456/891 [10:04<09:14,  1.27s/it]

Token:  statoil
Token Matched Value:  offshore 0.65575


 51%|█████▏    | 457/891 [10:05<09:17,  1.28s/it]

Token:  heidrun
Token Matched Value:  buch 0.59826


 51%|█████▏    | 458/891 [10:07<09:14,  1.28s/it]

Token:  oslo
Token Matched Value:  oslo 1.0


 52%|█████▏    | 459/891 [10:08<09:11,  1.28s/it]

Token:  finnish
Token Matched Value:  finnish 1.0


 52%|█████▏    | 460/891 [10:09<09:10,  1.28s/it]

Token:  helsinki
Token Matched Value:  helsinki 1.0


 52%|█████▏    | 461/891 [10:10<09:06,  1.27s/it]

Token:  finland
Token Matched Value:  finland 1.0


 52%|█████▏    | 462/891 [10:12<09:03,  1.27s/it]

Token:  bank
Token Matched Value:  bank 1.0


 52%|█████▏    | 463/891 [10:13<09:11,  1.29s/it]

Token:  of
Token Matched Value:  of 1.0


 52%|█████▏    | 464/891 [10:14<09:18,  1.31s/it]

Token:  finland
Token Matched Value:  finland 1.0


 52%|█████▏    | 465/891 [10:16<09:21,  1.32s/it]

Token:  finland
Token Matched Value:  finland 1.0


 52%|█████▏    | 466/891 [10:17<09:24,  1.33s/it]

Token:  european
Token Matched Value:  european 1.0


 52%|█████▏    | 467/891 [10:18<09:14,  1.31s/it]

Token:  union
Token Matched Value:  union 1.0


 53%|█████▎    | 468/891 [10:20<09:07,  1.29s/it]

Token:  dutch
Token Matched Value:  dutch 1.0


 53%|█████▎    | 469/891 [10:21<09:03,  1.29s/it]

Token:  amsterdam
Token Matched Value:  amsterdam 1.0


 53%|█████▎    | 470/891 [10:22<08:57,  1.28s/it]

Token:  dutch
Token Matched Value:  dutch 1.0


 53%|█████▎    | 471/891 [10:23<08:53,  1.27s/it]

Token:  gmt
Token Matched Value:  utc 0.75906


 53%|█████▎    | 472/891 [10:25<08:50,  1.27s/it]

Token:  gmt
Token Matched Value:  utc 0.75906


 53%|█████▎    | 473/891 [10:26<08:48,  1.26s/it]

Token:  amsterdam
Token Matched Value:  amsterdam 1.0


 53%|█████▎    | 474/891 [10:27<08:47,  1.27s/it]

Token:  german
Token Matched Value:  german 1.0


 53%|█████▎    | 475/891 [10:28<08:46,  1.27s/it]

Token:  british
Token Matched Value:  british 1.0


 53%|█████▎    | 476/891 [10:30<08:45,  1.27s/it]

Token:  bonn
Token Matched Value:  bonn 1.0


 54%|█████▎    | 477/891 [10:31<08:43,  1.26s/it]

Token:  germany
Token Matched Value:  germany 1.0


 54%|█████▎    | 478/891 [10:32<08:40,  1.26s/it]

Token:  british
Token Matched Value:  british 1.0


 54%|█████▍    | 479/891 [10:33<08:44,  1.27s/it]

Token:  zdf
Token Matched Value:  fernsehen 0.8379


 54%|█████▍    | 480/891 [10:35<08:43,  1.27s/it]

Token:  britain
Token Matched Value:  britain 1.0


 54%|█████▍    | 481/891 [10:36<08:40,  1.27s/it]

Token:  european
Token Matched Value:  european 1.0


 54%|█████▍    | 482/891 [10:37<08:39,  1.27s/it]

Token:  commission
Token Matched Value:  commission 1.0


 54%|█████▍    | 483/891 [10:39<08:40,  1.28s/it]

Token:  franz
Token Matched Value:  ludwig 0.83038


 54%|█████▍    | 484/891 [10:40<08:38,  1.27s/it]

Token:  fischler
Token Matched Value:  bender 0.6349


 54%|█████▍    | 485/891 [10:41<08:37,  1.27s/it]

Token:  britain
Token Matched Value:  britain 1.0


 55%|█████▍    | 486/891 [10:42<08:34,  1.27s/it]

Token:  france
Token Matched Value:  france 1.0


 55%|█████▍    | 487/891 [10:44<08:32,  1.27s/it]

Token:  bse
Token Matched Value:  bse 1.0


 55%|█████▍    | 488/891 [10:45<08:38,  1.29s/it]

Token:  bse
Token Matched Value:  bse 1.0


 55%|█████▍    | 489/891 [10:46<08:47,  1.31s/it]

Token:  british
Token Matched Value:  british 1.0


 55%|█████▍    | 490/891 [10:48<08:55,  1.33s/it]

Token:  zdf
Token Matched Value:  fernsehen 0.8379


 55%|█████▌    | 491/891 [10:49<08:49,  1.32s/it]

Token:  germany
Token Matched Value:  germany 1.0


 55%|█████▌    | 492/891 [10:50<08:41,  1.31s/it]

Token:  britain
Token Matched Value:  britain 1.0


 55%|█████▌    | 493/891 [10:52<08:37,  1.30s/it]

Token:  british
Token Matched Value:  british 1.0


 55%|█████▌    | 494/891 [10:53<08:36,  1.30s/it]

Token:  british
Token Matched Value:  british 1.0


 56%|█████▌    | 495/891 [10:54<08:34,  1.30s/it]

Token:  british
Token Matched Value:  british 1.0


 56%|█████▌    | 496/891 [10:55<08:31,  1.30s/it]

Token:  britain
Token Matched Value:  britain 1.0


 56%|█████▌    | 497/891 [10:57<08:34,  1.31s/it]

Token:  world
Token Matched Value:  world 1.0


 56%|█████▌    | 498/891 [10:58<08:34,  1.31s/it]

Token:  series
Token Matched Value:  series 1.0


 56%|█████▌    | 499/891 [10:59<08:37,  1.32s/it]

Token:  of
Token Matched Value:  of 1.0


 56%|█████▌    | 500/891 [11:01<08:32,  1.31s/it]

Token:  golf
Token Matched Value:  golf 1.0


 56%|█████▌    | 501/891 [11:02<08:27,  1.30s/it]

Token:  akron
Token Matched Value:  cleveland 0.83152


 56%|█████▋    | 502/891 [11:03<08:32,  1.32s/it]

Token:  ohio
Token Matched Value:  ohio 1.0


 56%|█████▋    | 503/891 [11:05<08:32,  1.32s/it]

Token:  nec
Token Matched Value:  ibm 0.61816


 57%|█████▋    | 504/891 [11:06<08:26,  1.31s/it]

Token:  world
Token Matched Value:  world 1.0


 57%|█████▋    | 505/891 [11:07<08:20,  1.30s/it]

Token:  series
Token Matched Value:  series 1.0


 57%|█████▋    | 506/891 [11:09<08:22,  1.30s/it]

Token:  of
Token Matched Value:  of 1.0


 57%|█████▋    | 507/891 [11:10<09:20,  1.46s/it]

Token:  golf
Token Matched Value:  golf 1.0


 57%|█████▋    | 508/891 [11:11<06:59,  1.10s/it]

Token:  u.s.
Token Matched Value:  None 0.0


 57%|█████▋    | 509/891 [11:12<07:20,  1.15s/it]

Token:  paul
Token Matched Value:  paul 1.0


 57%|█████▋    | 510/891 [11:13<07:31,  1.19s/it]

Token:  goydos
Token Matched Value:  stricker 0.71982


 57%|█████▋    | 511/891 [11:15<07:50,  1.24s/it]

Token:  billy
Token Matched Value:  johnny 0.864


 57%|█████▋    | 512/891 [11:16<07:56,  1.26s/it]

Token:  mayfair
Token Matched Value:  islington 0.662


 58%|█████▊    | 513/891 [11:17<08:10,  1.30s/it]

Token:  hidemichi
Token Matched Value:  miako 0.61812


 58%|█████▊    | 514/891 [11:19<08:19,  1.32s/it]

Token:  tanaka
Token Matched Value:  okada 0.89195


 58%|█████▊    | 515/891 [11:20<08:17,  1.32s/it]

Token:  japan
Token Matched Value:  japan 1.0


 58%|█████▊    | 516/891 [11:21<08:09,  1.31s/it]

Token:  steve
Token Matched Value:  mike 0.90153


 58%|█████▊    | 517/891 [11:22<08:04,  1.30s/it]

Token:  stricker
Token Matched Value:  stricker 1.0


 58%|█████▊    | 518/891 [11:24<08:03,  1.30s/it]

Token:  justin
Token Matched Value:  shawn 0.80517


 58%|█████▊    | 519/891 [11:25<07:58,  1.29s/it]

Token:  leonard
Token Matched Value:  harold 0.81044


 58%|█████▊    | 520/891 [11:26<07:54,  1.28s/it]

Token:  mark
Token Matched Value:  mike 0.74831


 58%|█████▊    | 521/891 [11:28<07:51,  1.27s/it]

Token:  brooks
Token Matched Value:  brooks 1.0


 59%|█████▊    | 522/891 [11:29<07:50,  1.28s/it]

Token:  tim
Token Matched Value:  mike 0.89261


 59%|█████▊    | 523/891 [11:30<07:48,  1.27s/it]

Token:  herron
Token Matched Value:  clark 0.79857


 59%|█████▉    | 524/891 [11:31<07:46,  1.27s/it]

Token:  duffy
Token Matched Value:  mcdonald 0.84008


 59%|█████▉    | 525/891 [11:33<07:43,  1.27s/it]

Token:  waldorf
Token Matched Value:  bhs 0.60131


 59%|█████▉    | 526/891 [11:34<07:44,  1.27s/it]

Token:  davis
Token Matched Value:  davis 1.0


 59%|█████▉    | 527/891 [11:35<07:45,  1.28s/it]

Token:  love
Token Matched Value:  dream 0.75681


 59%|█████▉    | 528/891 [11:36<07:42,  1.27s/it]

Token:  anders
Token Matched Value:  carl 0.7507


 59%|█████▉    | 529/891 [11:38<07:39,  1.27s/it]

Token:  forsbrand
Token Matched Value:  swede 0.6843


 59%|█████▉    | 530/891 [11:39<07:39,  1.27s/it]

Token:  sweden
Token Matched Value:  sweden 1.0


 60%|█████▉    | 531/891 [11:40<07:38,  1.27s/it]

Token:  nick
Token Matched Value:  nick 1.0


 60%|█████▉    | 532/891 [11:42<07:35,  1.27s/it]

Token:  faldo
Token Matched Value:  pga 0.70264


 60%|█████▉    | 533/891 [11:43<07:33,  1.27s/it]

Token:  britain
Token Matched Value:  britain 1.0


 60%|█████▉    | 534/891 [11:44<07:33,  1.27s/it]

Token:  john
Token Matched Value:  john 1.0


 60%|██████    | 535/891 [11:45<07:35,  1.28s/it]

Token:  cook
Token Matched Value:  mcdonald 0.78127


 60%|██████    | 536/891 [11:47<07:35,  1.28s/it]

Token:  steve
Token Matched Value:  mike 0.90153


 60%|██████    | 537/891 [11:48<07:37,  1.29s/it]

Token:  jones
Token Matched Value:  parker 0.8645


 60%|██████    | 538/891 [11:49<07:44,  1.32s/it]

Token:  phil
Token Matched Value:  mike 0.88833


 60%|██████    | 539/891 [11:51<07:51,  1.34s/it]

Token:  mickelson
Token Matched Value:  stricker 0.75057


 61%|██████    | 540/891 [11:52<07:49,  1.34s/it]

Token:  greg
Token Matched Value:  mike 0.90521


 61%|██████    | 541/891 [11:53<07:43,  1.32s/it]

Token:  norman
Token Matched Value:  norman 1.0


 61%|██████    | 542/891 [11:55<07:41,  1.32s/it]

Token:  australia
Token Matched Value:  australia 1.0


 61%|██████    | 543/891 [11:56<07:35,  1.31s/it]

Token:  ernie
Token Matched Value:  willie 0.81669


 61%|██████    | 544/891 [11:57<07:30,  1.30s/it]

Token:  els
Token Matched Value:  català 0.61098


 61%|██████    | 545/891 [11:59<07:26,  1.29s/it]

Token:  south
Token Matched Value:  south 1.0


 61%|██████▏   | 546/891 [12:00<07:22,  1.28s/it]

Token:  africa
Token Matched Value:  africa 1.0


 61%|██████▏   | 547/891 [12:01<07:20,  1.28s/it]

Token:  scott
Token Matched Value:  james 0.85129


 62%|██████▏   | 548/891 [12:02<07:18,  1.28s/it]

Token:  hoch
Token Matched Value:  mann 0.72746


 62%|██████▏   | 549/891 [12:04<07:26,  1.30s/it]

Token:  clarence
Token Matched Value:  william 0.7571


 62%|██████▏   | 550/891 [12:05<07:29,  1.32s/it]

Token:  rose
Token Matched Value:  bloom 0.64331


 62%|██████▏   | 551/891 [12:06<07:23,  1.30s/it]

Token:  loren
Token Matched Value:  russ 0.70814


 62%|██████▏   | 552/891 [12:08<07:18,  1.29s/it]

Token:  roberts
Token Matched Value:  matthews 0.89042


 62%|██████▏   | 553/891 [12:09<07:18,  1.30s/it]

Token:  fred
Token Matched Value:  frank 0.87236


 62%|██████▏   | 554/891 [12:10<07:20,  1.31s/it]

Token:  funk
Token Matched Value:  funk 1.0


 62%|██████▏   | 555/891 [12:12<07:15,  1.30s/it]

Token:  sven
Token Matched Value:  swede 0.723


 62%|██████▏   | 556/891 [12:12<05:28,  1.02it/s]

Token:  struver
Token Matched Value:  None 0.0


 63%|██████▎   | 557/891 [12:13<05:55,  1.06s/it]

Token:  germany
Token Matched Value:  germany 1.0


 63%|██████▎   | 558/891 [12:14<06:16,  1.13s/it]

Token:  alexander
Token Matched Value:  alexander 1.0


 63%|██████▎   | 559/891 [12:16<06:27,  1.17s/it]

Token:  cejka
Token Matched Value:  broadcaster 0.67406


 63%|██████▎   | 560/891 [12:17<06:35,  1.20s/it]

Token:  germany
Token Matched Value:  germany 1.0


 63%|██████▎   | 561/891 [12:18<06:41,  1.22s/it]

Token:  hal
Token Matched Value:  jack 0.73939


 63%|██████▎   | 562/891 [12:19<06:50,  1.25s/it]

Token:  sutton
Token Matched Value:  middleton 0.84142


 63%|██████▎   | 563/891 [12:21<07:03,  1.29s/it]

Token:  tom
Token Matched Value:  mike 0.86554


 63%|██████▎   | 564/891 [12:22<07:06,  1.31s/it]

Token:  lehman
Token Matched Value:  fisher 0.68766


 63%|██████▎   | 565/891 [12:22<05:22,  1.01it/s]

Token:  d.a.
Token Matched Value:  None 0.0


 64%|██████▎   | 566/891 [12:24<05:57,  1.10s/it]

Token:  weibring
Token Matched Value:  golfer 0.75251


 64%|██████▎   | 567/891 [12:25<06:13,  1.15s/it]

Token:  brad
Token Matched Value:  mike 0.86701


 64%|██████▎   | 568/891 [12:26<06:25,  1.19s/it]

Token:  bryant
Token Matched Value:  johnson 0.82748


 64%|██████▍   | 569/891 [12:28<06:33,  1.22s/it]

Token:  craig
Token Matched Value:  ian 0.85142


 64%|██████▍   | 570/891 [12:29<06:37,  1.24s/it]

Token:  parry
Token Matched Value:  middleton 0.76412


 64%|██████▍   | 571/891 [12:30<06:42,  1.26s/it]

Token:  australia
Token Matched Value:  australia 1.0


 64%|██████▍   | 572/891 [12:31<06:41,  1.26s/it]

Token:  stewart
Token Matched Value:  clark 0.83785


 64%|██████▍   | 573/891 [12:33<06:41,  1.26s/it]

Token:  ginn
Token Matched Value:  packer 0.68019


 64%|██████▍   | 574/891 [12:34<06:40,  1.26s/it]

Token:  australia
Token Matched Value:  australia 1.0


 65%|██████▍   | 575/891 [12:35<06:41,  1.27s/it]

Token:  corey
Token Matched Value:  shawn 0.80622


 65%|██████▍   | 576/891 [12:37<06:38,  1.26s/it]

Token:  pavin
Token Matched Value:  pga 0.63318


 65%|██████▍   | 577/891 [12:38<06:38,  1.27s/it]

Token:  craig
Token Matched Value:  ian 0.85142


 65%|██████▍   | 578/891 [12:39<06:39,  1.28s/it]

Token:  stadler
Token Matched Value:  wagner 0.61652


 65%|██████▍   | 579/891 [12:40<06:39,  1.28s/it]

Token:  fred
Token Matched Value:  frank 0.87236


 65%|██████▌   | 580/891 [12:42<06:39,  1.29s/it]

Token:  couples
Token Matched Value:  married 0.62763


 65%|██████▌   | 581/891 [12:43<06:37,  1.28s/it]

Token:  paul
Token Matched Value:  paul 1.0


 65%|██████▌   | 582/891 [12:44<06:36,  1.28s/it]

Token:  stankowski
Token Matched Value:  polak 0.6015


 65%|██████▌   | 583/891 [12:46<06:35,  1.28s/it]

Token:  costantino
Token Matched Value:  francesco 0.81408


 66%|██████▌   | 584/891 [12:47<06:33,  1.28s/it]

Token:  rocca
Token Matched Value:  castello 0.82515


 66%|██████▌   | 585/891 [12:48<06:31,  1.28s/it]

Token:  italy
Token Matched Value:  italy 1.0


 66%|██████▌   | 586/891 [12:49<06:29,  1.28s/it]

Token:  jim
Token Matched Value:  mike 0.90582


 66%|██████▌   | 587/891 [12:51<06:35,  1.30s/it]

Token:  furyk
Token Matched Value:  stricker 0.72928


 66%|██████▌   | 588/891 [12:52<06:40,  1.32s/it]

Token:  satoshi
Token Matched Value:  okada 0.84792


 66%|██████▌   | 589/891 [12:53<06:41,  1.33s/it]

Token:  higashi
Token Matched Value:  ōsaka 0.83609


 66%|██████▌   | 590/891 [12:55<06:42,  1.34s/it]

Token:  japan
Token Matched Value:  japan 1.0


 66%|██████▋   | 591/891 [12:56<06:35,  1.32s/it]

Token:  willie
Token Matched Value:  willie 1.0


 66%|██████▋   | 592/891 [12:57<06:31,  1.31s/it]

Token:  wood
Token Matched Value:  wood 1.0


 67%|██████▋   | 593/891 [12:59<06:30,  1.31s/it]

Token:  shigeki
Token Matched Value:  sato 0.78772


 67%|██████▋   | 594/891 [13:00<06:27,  1.31s/it]

Token:  maruyama
Token Matched Value:  sato 0.78004


 67%|██████▋   | 595/891 [13:01<06:23,  1.30s/it]

Token:  japan
Token Matched Value:  japan 1.0


 67%|██████▋   | 596/891 [13:02<06:19,  1.29s/it]

Token:  scott
Token Matched Value:  james 0.85129


 67%|██████▋   | 597/891 [13:04<06:18,  1.29s/it]

Token:  mccarron
Token Matched Value:  matthews 0.69224


 67%|██████▋   | 598/891 [13:05<06:16,  1.28s/it]

Token:  wayne
Token Matched Value:  wayne 1.0


 67%|██████▋   | 599/891 [13:06<06:12,  1.28s/it]

Token:  westner
Token Matched Value:  stricker 0.68109


 67%|██████▋   | 600/891 [13:08<06:10,  1.27s/it]

Token:  south
Token Matched Value:  south 1.0


 67%|██████▋   | 601/891 [13:09<06:20,  1.31s/it]

Token:  africa
Token Matched Value:  africa 1.0


 68%|██████▊   | 602/891 [13:12<08:53,  1.85s/it]

Token:  tom
Token Matched Value:  mike 0.86554


 68%|██████▊   | 603/891 [13:13<08:14,  1.72s/it]

Token:  watson
Token Matched Value:  watson 1.0


 68%|██████▊   | 604/891 [13:15<07:45,  1.62s/it]

Token:  japan
Token Matched Value:  japan 1.0


 68%|██████▊   | 605/891 [13:16<07:35,  1.59s/it]

Token:  gloria
Token Matched Value:  miriam 0.71262


 68%|██████▊   | 606/891 [13:18<07:12,  1.52s/it]

Token:  bistrita
Token Matched Value:  dunărea 0.73817


 68%|██████▊   | 607/891 [13:19<06:50,  1.45s/it]

Token:  bistrita
Token Matched Value:  dunărea 0.73817


 68%|██████▊   | 608/891 [13:20<06:45,  1.43s/it]

Token:  gloria
Token Matched Value:  miriam 0.71262


 68%|██████▊   | 609/891 [13:22<06:38,  1.41s/it]

Token:  bistrita
Token Matched Value:  dunărea 0.73817


 68%|██████▊   | 610/891 [13:23<06:30,  1.39s/it]

Token:  romania
Token Matched Value:  romania 1.0


 69%|██████▊   | 611/891 [13:25<06:28,  1.39s/it]

Token:  malta
Token Matched Value:  malta 1.0


 69%|██████▊   | 612/891 [13:26<06:27,  1.39s/it]

Token:  cup
Token Matched Value:  cup 1.0


 69%|██████▉   | 613/891 [13:27<06:19,  1.37s/it]

Token:  winners
Token Matched Value:  winners 1.0


 69%|██████▉   | 614/891 [13:28<06:10,  1.34s/it]

Token:  cup
Token Matched Value:  cup 1.0


 69%|██████▉   | 615/891 [13:30<06:02,  1.31s/it]

Token:  gloria
Token Matched Value:  miriam 0.71262


 69%|██████▉   | 616/891 [13:31<05:56,  1.30s/it]

Token:  bistrita
Token Matched Value:  dunărea 0.73817


 69%|██████▉   | 617/891 [13:32<05:54,  1.29s/it]

Token:  gilbert
Token Matched Value:  richard 0.76929


 69%|██████▉   | 618/891 [13:34<05:53,  1.29s/it]

Token:  agius
Token Matched Value:  natale 0.6205


 69%|██████▉   | 619/891 [13:35<05:52,  1.30s/it]

Token:  gloria
Token Matched Value:  miriam 0.71262


 70%|██████▉   | 620/891 [13:36<05:49,  1.29s/it]

Token:  bistrita
Token Matched Value:  dunărea 0.73817


 70%|██████▉   | 621/891 [13:37<05:48,  1.29s/it]

Token:  cup
Token Matched Value:  cup 1.0


 70%|██████▉   | 622/891 [13:39<05:47,  1.29s/it]

Token:  winners
Token Matched Value:  winners 1.0


 70%|██████▉   | 623/891 [13:40<05:45,  1.29s/it]

Token:  cup
Token Matched Value:  cup 1.0


 70%|███████   | 624/891 [13:41<05:43,  1.29s/it]

Token:  york
Token Matched Value:  york 1.0


 70%|███████   | 625/891 [13:43<05:41,  1.29s/it]

Token:  england
Token Matched Value:  england 1.0


 70%|███████   | 626/891 [13:44<05:39,  1.28s/it]

Token:  mark
Token Matched Value:  mike 0.74831


 70%|███████   | 627/891 [13:45<05:37,  1.28s/it]

Token:  prescott
Token Matched Value:  fisher 0.71146


 70%|███████   | 628/891 [13:46<05:35,  1.28s/it]

Token:  nunthorpe
Token Matched Value:  tipperkevin 0.6762


 71%|███████   | 629/891 [13:48<05:34,  1.28s/it]

Token:  stakes
Token Matched Value:  handicap 0.83502


 71%|███████   | 630/891 [13:49<05:32,  1.27s/it]

Token:  george
Token Matched Value:  george 1.0


 71%|███████   | 631/891 [13:50<05:30,  1.27s/it]

Token:  duffield
Token Matched Value:  thornton 0.81958


 71%|███████   | 632/891 [13:51<05:28,  1.27s/it]

Token:  longchamp
Token Matched Value:  tipperkevin 0.60929


 71%|███████   | 633/891 [13:53<05:26,  1.27s/it]

Token:  royal
Token Matched Value:  royal 1.0


 71%|███████   | 634/891 [13:54<05:27,  1.28s/it]

Token:  ascot
Token Matched Value:  pimlico 0.69517


 71%|███████▏  | 635/891 [13:55<05:30,  1.29s/it]

Token:  europe
Token Matched Value:  europe 1.0


 71%|███████▏  | 636/891 [13:57<05:34,  1.31s/it]

Token:  york
Token Matched Value:  york 1.0


 71%|███████▏  | 637/891 [13:58<05:44,  1.36s/it]

Token:  england
Token Matched Value:  england 1.0


 72%|███████▏  | 638/891 [13:59<05:39,  1.34s/it]

Token:  nunthorpe
Token Matched Value:  tipperkevin 0.6762


 72%|███████▏  | 639/891 [14:01<05:33,  1.32s/it]

Token:  stakes
Token Matched Value:  handicap 0.83502


 72%|███████▏  | 640/891 [14:02<05:27,  1.30s/it]

Token:  george
Token Matched Value:  george 1.0


 72%|███████▏  | 641/891 [14:03<05:24,  1.30s/it]

Token:  duffield
Token Matched Value:  thornton 0.81958


 72%|███████▏  | 642/891 [14:05<05:22,  1.30s/it]

Token:  jason
Token Matched Value:  alex 0.86268


 72%|███████▏  | 643/891 [14:06<05:20,  1.29s/it]

Token:  weaver
Token Matched Value:  walker 0.75415


 72%|███████▏  | 644/891 [14:07<05:16,  1.28s/it]

Token:  mark
Token Matched Value:  mike 0.74831


 72%|███████▏  | 645/891 [14:08<05:14,  1.28s/it]

Token:  prescott
Token Matched Value:  fisher 0.71146


 73%|███████▎  | 646/891 [14:10<05:13,  1.28s/it]

Token:  newmarket
Token Matched Value:  derbys 0.66378


 73%|███████▎  | 647/891 [14:11<05:11,  1.28s/it]

Token:  toshiba
Token Matched Value:  sony 0.7086


 73%|███████▎  | 648/891 [14:12<05:08,  1.27s/it]

Token:  classic
Token Matched Value:  classic 1.0


 73%|███████▎  | 649/891 [14:13<05:08,  1.27s/it]

Token:  carlsbad
Token Matched Value:  altadena 0.67771


 73%|███████▎  | 650/891 [14:15<05:06,  1.27s/it]

Token:  california
Token Matched Value:  california 1.0


 73%|███████▎  | 651/891 [14:16<05:16,  1.32s/it]

Token:  toshiba
Token Matched Value:  sony 0.7086


 73%|███████▎  | 652/891 [14:17<05:14,  1.31s/it]

Token:  classic
Token Matched Value:  classic 1.0


 73%|███████▎  | 653/891 [14:19<05:09,  1.30s/it]

Token:  arantxa
Token Matched Value:  halep 0.72524


 73%|███████▎  | 654/891 [14:20<05:05,  1.29s/it]

Token:  sanchez
Token Matched Value:  gallego 0.80893


 74%|███████▎  | 655/891 [14:21<05:05,  1.30s/it]

Token:  vicario
Token Matched Value:  florencia 0.67823


 74%|███████▎  | 656/891 [14:23<05:06,  1.30s/it]

Token:  spain
Token Matched Value:  spain 1.0


 74%|███████▎  | 657/891 [14:24<05:03,  1.30s/it]

Token:  japan
Token Matched Value:  japan 1.0


 74%|███████▍  | 658/891 [14:25<05:00,  1.29s/it]

Token:  kimiko
Token Matched Value:  yuko 0.71462


 74%|███████▍  | 659/891 [14:27<04:59,  1.29s/it]

Token:  date
Token Matched Value:  prior 0.65923


 74%|███████▍  | 660/891 [14:28<05:03,  1.32s/it]

Token:  japan
Token Matched Value:  japan 1.0


 74%|███████▍  | 661/891 [14:29<05:07,  1.34s/it]

Token:  japan
Token Matched Value:  japan 1.0


 74%|███████▍  | 662/891 [14:31<05:06,  1.34s/it]

Token:  sandrine
Token Matched Value:  virginie 0.81766


 74%|███████▍  | 663/891 [14:32<05:02,  1.32s/it]

Token:  testud
Token Matched Value:  virginie 0.74956


 75%|███████▍  | 664/891 [14:33<04:58,  1.32s/it]

Token:  france
Token Matched Value:  france 1.0


 75%|███████▍  | 665/891 [14:34<04:54,  1.30s/it]

Token:  ai
Token Matched Value:  ai 1.0


 75%|███████▍  | 666/891 [14:36<04:54,  1.31s/it]

Token:  sugiyama
Token Matched Value:  yuko 0.72591


 75%|███████▍  | 667/891 [14:37<04:51,  1.30s/it]

Token:  japan
Token Matched Value:  japan 1.0


 75%|███████▍  | 668/891 [14:38<04:48,  1.29s/it]

Token:  nathalie
Token Matched Value:  virginie 0.84134


 75%|███████▌  | 669/891 [14:40<04:45,  1.29s/it]

Token:  tauziat
Token Matched Value:  halep 0.72455


 75%|███████▌  | 670/891 [14:41<04:43,  1.28s/it]

Token:  france
Token Matched Value:  france 1.0


 75%|███████▌  | 671/891 [14:41<03:34,  1.03it/s]

Token:  shi-ting
Token Matched Value:  None 0.0


 75%|███████▌  | 672/891 [14:42<03:51,  1.06s/it]

Token:  wang
Token Matched Value:  wu 0.89741


 76%|███████▌  | 673/891 [14:44<04:04,  1.12s/it]

Token:  taiwan
Token Matched Value:  taiwan 1.0


 76%|███████▌  | 674/891 [14:45<04:13,  1.17s/it]

Token:  commack
Token Matched Value:  bronx 0.69158


 76%|███████▌  | 675/891 [14:46<04:20,  1.21s/it]

Token:  new
Token Matched Value:  new 1.0


 76%|███████▌  | 676/891 [14:48<04:23,  1.23s/it]

Token:  york
Token Matched Value:  york 1.0


 76%|███████▌  | 677/891 [14:49<04:25,  1.24s/it]

Token:  michael
Token Matched Value:  michael 1.0


 76%|███████▌  | 678/891 [14:50<04:26,  1.25s/it]

Token:  chang
Token Matched Value:  lin 0.86664


 76%|███████▌  | 679/891 [14:50<03:20,  1.06it/s]

Token:  u.s.
Token Matched Value:  None 0.0


 76%|███████▋  | 680/891 [14:52<03:39,  1.04s/it]

Token:  sergi
Token Matched Value:  spaniard 0.66377


 76%|███████▋  | 681/891 [14:53<03:54,  1.11s/it]

Token:  bruguera
Token Matched Value:  madrileña 0.67068


 77%|███████▋  | 682/891 [14:54<04:03,  1.16s/it]

Token:  spain
Token Matched Value:  spain 1.0


 77%|███████▋  | 683/891 [14:55<04:08,  1.20s/it]

Token:  michael
Token Matched Value:  michael 1.0


 77%|███████▋  | 684/891 [14:57<04:11,  1.22s/it]

Token:  joyce
Token Matched Value:  sylvia 0.81887


 77%|███████▋  | 685/891 [14:57<03:10,  1.08it/s]

Token:  u.s.
Token Matched Value:  None 0.0


 77%|███████▋  | 686/891 [14:58<03:29,  1.02s/it]

Token:  richey
Token Matched Value:  mcdonald 0.70022


 77%|███████▋  | 687/891 [15:00<03:50,  1.13s/it]

Token:  reneberg
Token Matched Value:  doubles 0.66733


 77%|███████▋  | 688/891 [15:00<02:55,  1.16it/s]

Token:  u.s.
Token Matched Value:  None 0.0


 77%|███████▋  | 689/891 [15:01<03:24,  1.01s/it]

Token:  martin
Token Matched Value:  martin 1.0


 77%|███████▋  | 690/891 [15:03<03:45,  1.12s/it]

Token:  damm
Token Matched Value:  holsten 0.57542


 78%|███████▊  | 691/891 [15:04<03:54,  1.17s/it]

Token:  czech
Token Matched Value:  czech 1.0


 78%|███████▊  | 692/891 [15:05<03:59,  1.20s/it]

Token:  republic
Token Matched Value:  republic 1.0


 78%|███████▊  | 693/891 [15:06<04:02,  1.22s/it]

Token:  younes
Token Matched Value:  mohamed 0.662


 78%|███████▊  | 694/891 [15:08<04:03,  1.23s/it]

Token:  el
Token Matched Value:  el 1.0


 78%|███████▊  | 695/891 [15:09<04:06,  1.26s/it]

Token:  aynaoui
Token Matched Value:  halep 0.71377


 78%|███████▊  | 696/891 [15:10<04:19,  1.33s/it]

Token:  morocco
Token Matched Value:  morocco 1.0


 78%|███████▊  | 697/891 [15:12<04:15,  1.31s/it]

Token:  karol
Token Matched Value:  karol 1.0


 78%|███████▊  | 698/891 [15:13<04:10,  1.30s/it]

Token:  kucera
Token Matched Value:  spokane 0.56173


 78%|███████▊  | 699/891 [15:14<04:08,  1.29s/it]

Token:  slovakia
Token Matched Value:  slovakia 1.0


 79%|███████▊  | 700/891 [15:16<04:06,  1.29s/it]

Token:  hicham
Token Matched Value:  mohamed 0.75736


 79%|███████▊  | 701/891 [15:17<04:19,  1.36s/it]

Token:  arazi
Token Matched Value:  lashkari 0.56484


 79%|███████▉  | 702/891 [15:18<04:14,  1.35s/it]

Token:  morocco
Token Matched Value:  morocco 1.0


 79%|███████▉  | 703/891 [15:20<04:08,  1.32s/it]

Token:  dalglish
Token Matched Value:  everton 0.73314


 79%|███████▉  | 704/891 [15:21<04:03,  1.30s/it]

Token:  blackburn
Token Matched Value:  blackburn 1.0


 79%|███████▉  | 705/891 [15:22<04:06,  1.32s/it]

Token:  london
Token Matched Value:  london 1.0


 79%|███████▉  | 706/891 [15:24<04:06,  1.33s/it]

Token:  kenny
Token Matched Value:  jimmy 0.82211


 79%|███████▉  | 707/891 [15:25<04:05,  1.33s/it]

Token:  dalglish
Token Matched Value:  everton 0.73314


 79%|███████▉  | 708/891 [15:26<04:02,  1.33s/it]

Token:  blackburn
Token Matched Value:  blackburn 1.0


 80%|███████▉  | 709/891 [15:28<04:03,  1.34s/it]

Token:  english
Token Matched Value:  english 1.0


 80%|███████▉  | 710/891 [15:29<04:03,  1.35s/it]

Token:  blackburn
Token Matched Value:  blackburn 1.0


 80%|███████▉  | 711/891 [15:30<03:59,  1.33s/it]

Token:  dalglish
Token Matched Value:  everton 0.73314


 80%|███████▉  | 712/891 [15:32<03:58,  1.33s/it]

Token:  blackburn
Token Matched Value:  blackburn 1.0


 80%|████████  | 713/891 [15:33<04:00,  1.35s/it]

Token:  spain
Token Matched Value:  spain 1.0


 80%|████████  | 714/891 [15:34<04:00,  1.36s/it]

Token:  dalglish
Token Matched Value:  everton 0.73314


 80%|████████  | 715/891 [15:36<03:55,  1.34s/it]

Token:  dalglish
Token Matched Value:  everton 0.73314


 80%|████████  | 716/891 [15:37<03:51,  1.32s/it]

Token:  blackburn
Token Matched Value:  blackburn 1.0


 80%|████████  | 717/891 [15:38<03:47,  1.31s/it]

Token:  english
Token Matched Value:  english 1.0


 81%|████████  | 718/891 [15:40<03:44,  1.30s/it]

Token:  county
Token Matched Value:  county 1.0


 81%|████████  | 719/891 [15:41<03:41,  1.29s/it]

Token:  championship
Token Matched Value:  championship 1.0


 81%|████████  | 720/891 [15:42<03:39,  1.28s/it]

Token:  london
Token Matched Value:  london 1.0


 81%|████████  | 721/891 [15:43<03:37,  1.28s/it]

Token:  english
Token Matched Value:  english 1.0


 81%|████████  | 722/891 [15:45<03:37,  1.29s/it]

Token:  county
Token Matched Value:  county 1.0


 81%|████████  | 723/891 [15:46<03:34,  1.28s/it]

Token:  championship
Token Matched Value:  championship 1.0


 81%|████████▏ | 724/891 [15:46<02:41,  1.04it/s]

Token:  weston-super-mare
Token Matched Value:  None 0.0


 81%|████████▏ | 725/891 [15:47<02:56,  1.06s/it]

Token:  durham
Token Matched Value:  durham 1.0


 81%|████████▏ | 726/891 [15:49<03:05,  1.12s/it]

Token:  somerset
Token Matched Value:  somerset 1.0


 82%|████████▏ | 727/891 [15:50<03:11,  1.17s/it]

Token:  colchester
Token Matched Value:  norwich 0.80647


 82%|████████▏ | 728/891 [15:51<03:15,  1.20s/it]

Token:  gloucestershire
Token Matched Value:  gloucestershire 1.0


 82%|████████▏ | 729/891 [15:51<02:27,  1.10it/s]

Token:  j.
Token Matched Value:  None 0.0


 82%|████████▏ | 730/891 [15:53<02:43,  1.02s/it]

Token:  russell
Token Matched Value:  parker 0.87036


 82%|████████▏ | 731/891 [15:54<02:54,  1.09s/it]

Token:  essex
Token Matched Value:  essex 1.0


 82%|████████▏ | 732/891 [15:55<03:02,  1.15s/it]

Token:  cardiff
Token Matched Value:  cardiff 1.0


 82%|████████▏ | 733/891 [15:57<03:06,  1.18s/it]

Token:  kent
Token Matched Value:  kent 1.0


 82%|████████▏ | 734/891 [15:58<03:09,  1.21s/it]

Token:  glamorgan
Token Matched Value:  gloucestershire 0.79991


 82%|████████▏ | 735/891 [15:59<03:11,  1.23s/it]

Token:  leicester
Token Matched Value:  leicester 1.0


 83%|████████▎ | 736/891 [16:00<03:11,  1.24s/it]

Token:  leicestershire
Token Matched Value:  leicestershire 1.0


 83%|████████▎ | 737/891 [16:02<03:11,  1.25s/it]

Token:  hampshire
Token Matched Value:  hampshire 1.0


 83%|████████▎ | 738/891 [16:03<03:12,  1.26s/it]

Token:  northampton
Token Matched Value:  northampton 1.0


 83%|████████▎ | 739/891 [16:04<03:19,  1.32s/it]

Token:  sussex
Token Matched Value:  sussex 1.0


 83%|████████▎ | 740/891 [16:06<03:25,  1.36s/it]

Token:  northamptonshire
Token Matched Value:  northamptonshire 1.0


 83%|████████▎ | 741/891 [16:07<03:24,  1.36s/it]

Token:  trent
Token Matched Value:  staffordshire 0.67476


 83%|████████▎ | 742/891 [16:08<03:20,  1.34s/it]

Token:  bridge
Token Matched Value:  bridge 1.0


 83%|████████▎ | 743/891 [16:10<03:16,  1.33s/it]

Token:  nottinghamshire
Token Matched Value:  nottinghamshire 1.0


 84%|████████▎ | 744/891 [16:11<03:13,  1.32s/it]

Token:  surrey
Token Matched Value:  surrey 1.0


 84%|████████▎ | 745/891 [16:12<03:10,  1.30s/it]

Token:  worcester
Token Matched Value:  worcester 1.0


 84%|████████▎ | 746/891 [16:14<03:08,  1.30s/it]

Token:  warwickshire
Token Matched Value:  warwickshire 1.0


 84%|████████▍ | 747/891 [16:15<03:07,  1.30s/it]

Token:  worcestershire
Token Matched Value:  worcestershire 1.0


 84%|████████▍ | 748/891 [16:16<03:06,  1.31s/it]

Token:  headingley
Token Matched Value:  leeds 0.71632


 84%|████████▍ | 749/891 [16:18<03:06,  1.32s/it]

Token:  yorkshire
Token Matched Value:  yorkshire 1.0


 84%|████████▍ | 750/891 [16:19<03:05,  1.32s/it]

Token:  lancashire
Token Matched Value:  lancashire 1.0


 84%|████████▍ | 751/891 [16:20<03:02,  1.31s/it]

Token:  england
Token Matched Value:  england 1.0


 84%|████████▍ | 752/891 [16:21<03:00,  1.30s/it]

Token:  pakistan
Token Matched Value:  pakistan 1.0


 85%|████████▍ | 753/891 [16:23<02:58,  1.29s/it]

Token:  london
Token Matched Value:  london 1.0


 85%|████████▍ | 754/891 [16:24<02:56,  1.29s/it]

Token:  england
Token Matched Value:  england 1.0


 85%|████████▍ | 755/891 [16:25<02:55,  1.29s/it]

Token:  pakistan
Token Matched Value:  pakistan 1.0


 85%|████████▍ | 756/891 [16:27<02:53,  1.28s/it]

Token:  the
Token Matched Value:  the 1.0


 85%|████████▍ | 757/891 [16:28<02:51,  1.28s/it]

Token:  oval
Token Matched Value:  oval 1.0


 85%|████████▌ | 758/891 [16:29<02:55,  1.32s/it]

Token:  england
Token Matched Value:  england 1.0


 85%|████████▌ | 759/891 [16:31<02:53,  1.31s/it]

Token:  waqar
Token Matched Value:  mohammad 0.79655


 85%|████████▌ | 760/891 [16:32<02:50,  1.30s/it]

Token:  younis
Token Matched Value:  ahmed 0.79593


 85%|████████▌ | 761/891 [16:33<02:48,  1.29s/it]

Token:  mushtaq
Token Matched Value:  rasool 0.82116


 86%|████████▌ | 762/891 [16:34<02:48,  1.31s/it]

Token:  ahmed
Token Matched Value:  ahmed 1.0


 86%|████████▌ | 763/891 [16:36<02:50,  1.33s/it]

Token:  saeed
Token Matched Value:  ahmed 0.86024


 86%|████████▌ | 764/891 [16:37<02:52,  1.35s/it]

Token:  anwar
Token Matched Value:  ahmed 0.86776


 86%|████████▌ | 765/891 [16:39<02:49,  1.35s/it]

Token:  waqar
Token Matched Value:  mohammad 0.79655


 86%|████████▌ | 766/891 [16:40<02:48,  1.34s/it]

Token:  younis
Token Matched Value:  ahmed 0.79593


 86%|████████▌ | 767/891 [16:41<02:44,  1.32s/it]

Token:  mohammad
Token Matched Value:  mohammad 1.0


 86%|████████▌ | 768/891 [16:42<02:40,  1.30s/it]

Token:  akram
Token Matched Value:  mohammad 0.82902


 86%|████████▋ | 769/891 [16:44<02:37,  1.29s/it]

Token:  mushtaq
Token Matched Value:  rasool 0.82116


 86%|████████▋ | 770/891 [16:45<02:36,  1.29s/it]

Token:  ahmed
Token Matched Value:  ahmed 1.0


 87%|████████▋ | 771/891 [16:46<02:33,  1.28s/it]

Token:  wasim
Token Matched Value:  pathan 0.79


 87%|████████▋ | 772/891 [16:48<02:32,  1.28s/it]

Token:  akram
Token Matched Value:  mohammad 0.82902


 87%|████████▋ | 773/891 [16:49<02:31,  1.28s/it]

Token:  wasim
Token Matched Value:  pathan 0.79


 87%|████████▋ | 774/891 [16:50<02:29,  1.27s/it]

Token:  akram
Token Matched Value:  mohammad 0.82902


 87%|████████▋ | 775/891 [16:51<02:27,  1.27s/it]

Token:  waqar
Token Matched Value:  mohammad 0.79655


 87%|████████▋ | 776/891 [16:53<02:26,  1.27s/it]

Token:  younis
Token Matched Value:  ahmed 0.79593


 87%|████████▋ | 777/891 [16:54<02:25,  1.28s/it]

Token:  mohammad
Token Matched Value:  mohammad 1.0


 87%|████████▋ | 778/891 [16:55<02:25,  1.29s/it]

Token:  akram
Token Matched Value:  mohammad 0.82902


 87%|████████▋ | 779/891 [16:57<02:24,  1.29s/it]

Token:  mushtaq
Token Matched Value:  rasool 0.82116


 88%|████████▊ | 780/891 [16:58<02:22,  1.29s/it]

Token:  ahmed
Token Matched Value:  ahmed 1.0


 88%|████████▊ | 781/891 [16:59<02:21,  1.29s/it]

Token:  pakistan
Token Matched Value:  pakistan 1.0


 88%|████████▊ | 782/891 [17:00<02:19,  1.28s/it]

Token:  saeed
Token Matched Value:  ahmed 0.86024


 88%|████████▊ | 783/891 [17:02<02:19,  1.29s/it]

Token:  anwar
Token Matched Value:  ahmed 0.86776


 88%|████████▊ | 784/891 [17:03<02:18,  1.29s/it]

Token:  ijaz
Token Matched Value:  ahmed 0.77284


 88%|████████▊ | 785/891 [17:04<02:16,  1.29s/it]

Token:  ahmed
Token Matched Value:  ahmed 1.0


 88%|████████▊ | 786/891 [17:05<01:42,  1.02it/s]

Token:  inzamam-ul-haq
Token Matched Value:  None 0.0


 88%|████████▊ | 787/891 [17:06<01:51,  1.07s/it]

Token:  salim
Token Matched Value:  mohammed 0.85611


 88%|████████▊ | 788/891 [17:07<01:57,  1.14s/it]

Token:  malik
Token Matched Value:  ahmed 0.84072


 89%|████████▊ | 789/891 [17:08<02:04,  1.22s/it]

Token:  asif
Token Matched Value:  ahmed 0.83874


 89%|████████▊ | 790/891 [17:10<02:21,  1.40s/it]

Token:  mujtaba
Token Matched Value:  mohammad 0.82213


 89%|████████▉ | 791/891 [17:12<02:20,  1.40s/it]

Token:  wasim
Token Matched Value:  pathan 0.79


 89%|████████▉ | 792/891 [17:13<02:16,  1.38s/it]

Token:  akram
Token Matched Value:  mohammad 0.82902


 89%|████████▉ | 793/891 [17:14<02:12,  1.35s/it]

Token:  moin
Token Matched Value:  mohammad 0.75226


 89%|████████▉ | 794/891 [17:16<02:08,  1.33s/it]

Token:  khan
Token Matched Value:  mohammad 0.79369


 89%|████████▉ | 795/891 [17:17<02:05,  1.31s/it]

Token:  mushtaq
Token Matched Value:  rasool 0.82116


 89%|████████▉ | 796/891 [17:18<02:03,  1.30s/it]

Token:  ahmed
Token Matched Value:  ahmed 1.0


 89%|████████▉ | 797/891 [17:19<02:02,  1.30s/it]

Token:  waqar
Token Matched Value:  mohammad 0.79655


 90%|████████▉ | 798/891 [17:21<02:00,  1.29s/it]

Token:  younis
Token Matched Value:  ahmed 0.79593


 90%|████████▉ | 799/891 [17:22<01:58,  1.28s/it]

Token:  ferguson
Token Matched Value:  mcdonald 0.87053


 90%|████████▉ | 800/891 [17:23<01:56,  1.28s/it]

Token:  scottish
Token Matched Value:  scottish 1.0


 90%|████████▉ | 801/891 [17:25<01:54,  1.27s/it]

Token:  glasgow
Token Matched Value:  glasgow 1.0


 90%|█████████ | 802/891 [17:26<01:53,  1.28s/it]

Token:  everton
Token Matched Value:  everton 1.0


 90%|█████████ | 803/891 [17:27<01:51,  1.27s/it]

Token:  duncan
Token Matched Value:  cameron 0.90553


 90%|█████████ | 804/891 [17:28<01:53,  1.30s/it]

Token:  ferguson
Token Matched Value:  mcdonald 0.87053


 90%|█████████ | 805/891 [17:30<01:51,  1.30s/it]

Token:  manchester
Token Matched Value:  manchester 1.0


 90%|█████████ | 806/891 [17:31<01:49,  1.29s/it]

Token:  united
Token Matched Value:  united 1.0


 91%|█████████ | 807/891 [17:32<01:48,  1.29s/it]

Token:  scottish
Token Matched Value:  scottish 1.0


 91%|█████████ | 808/891 [17:35<02:25,  1.76s/it]

Token:  glasgow
Token Matched Value:  glasgow 1.0


 91%|█████████ | 809/891 [17:53<08:49,  6.46s/it]

Token:  rangers
Token Matched Value:  rangers 1.0


 91%|█████████ | 810/891 [17:57<07:43,  5.72s/it]

Token:  ally
Token Matched Value:  ally 1.0


 91%|█████████ | 811/891 [18:04<08:21,  6.26s/it]

Token:  mccoist
Token Matched Value:  qpr 0.72279


 91%|█████████ | 812/891 [19:57<50:16, 38.18s/it]

Token:  world
Token Matched Value:  world 1.0


 91%|█████████ | 813/891 [20:30<47:35, 36.61s/it]

Token:  cup
Token Matched Value:  cup 1.0


 91%|█████████▏| 814/891 [20:36<35:11, 27.42s/it]

Token:  austria
Token Matched Value:  austria 1.0


 91%|█████████▏| 815/891 [20:37<24:52, 19.64s/it]

Token:  vienna
Token Matched Value:  vienna 1.0


 92%|█████████▏| 816/891 [20:38<17:40, 14.13s/it]

Token:  ferguson
Token Matched Value:  mcdonald 0.87053


 92%|█████████▏| 817/891 [21:44<36:29, 29.58s/it]

Token:  scotland
Token Matched Value:  scotland 1.0


 92%|█████████▏| 818/891 [21:49<26:50, 22.06s/it]

Token:  scotland
Token Matched Value:  scotland 1.0


 92%|█████████▏| 819/891 [21:50<19:02, 15.87s/it]

Token:  craig
Token Matched Value:  ian 0.85142


 92%|█████████▏| 820/891 [21:51<13:37, 11.51s/it]

Token:  brown
Token Matched Value:  brown 1.0


 92%|█████████▏| 821/891 [21:53<09:50,  8.43s/it]

Token:  celtic
Token Matched Value:  celtic 1.0


 92%|█████████▏| 822/891 [21:54<07:12,  6.27s/it]

Token:  jackie
Token Matched Value:  frankie 0.82353


 92%|█████████▏| 823/891 [22:04<08:32,  7.53s/it]

Token:  mcnamara
Token Matched Value:  mcdonald 0.76631


 92%|█████████▏| 824/891 [22:16<09:46,  8.75s/it]

Token:  england
Token Matched Value:  england 1.0


 93%|█████████▎| 825/891 [22:29<10:54,  9.92s/it]

Token:  london
Token Matched Value:  london 1.0


 93%|█████████▎| 826/891 [23:40<30:43, 28.36s/it]

Token:  england
Token Matched Value:  england 1.0


 93%|█████████▎| 827/891 [23:42<21:47, 20.42s/it]

Token:  pakistan
Token Matched Value:  pakistan 1.0


 93%|█████████▎| 828/891 [23:43<15:26, 14.71s/it]

Token:  the
Token Matched Value:  the 1.0


 93%|█████████▎| 829/891 [23:44<11:02, 10.68s/it]

Token:  oval
Token Matched Value:  oval 1.0


 93%|█████████▎| 830/891 [23:46<07:57,  7.83s/it]

Token:  keane
Token Matched Value:  mcdonald 0.75011


 93%|█████████▎| 831/891 [23:47<05:50,  5.84s/it]

Token:  manchester
Token Matched Value:  manchester 1.0


 93%|█████████▎| 832/891 [23:48<04:21,  4.44s/it]

Token:  united
Token Matched Value:  united 1.0


 93%|█████████▎| 833/891 [23:49<03:20,  3.46s/it]

Token:  london
Token Matched Value:  london 1.0


 94%|█████████▎| 834/891 [23:54<03:36,  3.79s/it]

Token:  ireland
Token Matched Value:  ireland 1.0


 94%|█████████▎| 835/891 [24:05<05:37,  6.04s/it]

Token:  roy
Token Matched Value:  willie 0.72261


 94%|█████████▍| 836/891 [24:17<07:07,  7.77s/it]

Token:  keane
Token Matched Value:  mcdonald 0.75011


 94%|█████████▍| 837/891 [24:28<07:55,  8.81s/it]

Token:  english
Token Matched Value:  english 1.0


 94%|█████████▍| 838/891 [24:30<05:59,  6.78s/it]

Token:  f.a.
Token Matched Value:  None 0.0


 94%|█████████▍| 839/891 [25:36<21:21, 24.64s/it]

Token:  cup
Token Matched Value:  cup 1.0


 94%|█████████▍| 840/891 [25:38<15:00, 17.65s/it]

Token:  manchester
Token Matched Value:  manchester 1.0


 94%|█████████▍| 841/891 [25:39<10:36, 12.74s/it]

Token:  united
Token Matched Value:  united 1.0


 95%|█████████▍| 842/891 [25:40<07:34,  9.27s/it]

Token:  united
Token Matched Value:  united 1.0


 95%|█████████▍| 843/891 [25:41<05:28,  6.84s/it]

Token:  alex
Token Matched Value:  alex 1.0


 95%|█████████▍| 844/891 [25:43<04:01,  5.14s/it]

Token:  ferguson
Token Matched Value:  mcdonald 0.87053


 95%|█████████▍| 845/891 [25:44<03:01,  3.95s/it]

Token:  canadian
Token Matched Value:  canadian 1.0


 95%|█████████▍| 846/891 [25:45<02:20,  3.12s/it]

Token:  open
Token Matched Value:  closed 0.58186


 95%|█████████▌| 847/891 [25:47<02:05,  2.85s/it]

Token:  toronto
Token Matched Value:  toronto 1.0


 95%|█████████▌| 848/891 [25:58<03:48,  5.32s/it]

Token:  canadian
Token Matched Value:  canadian 1.0


 95%|█████████▌| 849/891 [26:10<04:59,  7.14s/it]

Token:  open
Token Matched Value:  closed 0.58186


 95%|█████████▌| 850/891 [26:21<05:41,  8.33s/it]

Token:  daniel
Token Matched Value:  benjamin 0.82638


 96%|█████████▌| 851/891 [27:32<18:04, 27.11s/it]

Token:  nestor
Token Matched Value:  milos 0.65591


 96%|█████████▌| 852/891 [27:33<12:35, 19.37s/it]

Token:  canada
Token Matched Value:  canada 1.0


 96%|█████████▌| 853/891 [27:34<08:48, 13.92s/it]

Token:  thomas
Token Matched Value:  thomas 1.0


 96%|█████████▌| 854/891 [27:35<06:13, 10.10s/it]

Token:  muster
Token Matched Value:  confederates 0.60801


 96%|█████████▌| 855/891 [27:37<04:27,  7.43s/it]

Token:  austria
Token Matched Value:  austria 1.0


 96%|█████████▌| 856/891 [27:38<03:15,  5.58s/it]

Token:  mikael
Token Matched Value:  swede 0.60571


 96%|█████████▌| 857/891 [27:39<02:25,  4.27s/it]

Token:  tillstrom
Token Matched Value:  kurina 0.58824


 96%|█████████▋| 858/891 [27:40<01:50,  3.34s/it]

Token:  sweden
Token Matched Value:  sweden 1.0


 96%|█████████▋| 859/891 [27:41<01:25,  2.69s/it]

Token:  goran
Token Matched Value:  canadi 0.74372


 97%|█████████▋| 860/891 [27:47<01:54,  3.70s/it]

Token:  ivanisevic
Token Matched Value:  canadi 0.69291


 97%|█████████▋| 861/891 [27:59<02:58,  5.94s/it]

Token:  croatia
Token Matched Value:  croatia 1.0


 97%|█████████▋| 862/891 [28:10<03:39,  7.55s/it]

Token:  wayne
Token Matched Value:  wayne 1.0


 97%|█████████▋| 863/891 [28:21<04:01,  8.64s/it]

Token:  ferreira
Token Matched Value:  carvalho 0.8961


 97%|█████████▋| 864/891 [29:28<11:46, 26.17s/it]

Token:  south
Token Matched Value:  south 1.0


 97%|█████████▋| 865/891 [29:29<08:06, 18.70s/it]

Token:  africa
Token Matched Value:  africa 1.0


 97%|█████████▋| 866/891 [29:31<05:36, 13.44s/it]

Token:  jiri
Token Matched Value:  vratislav 0.6221


 97%|█████████▋| 867/891 [29:32<03:54,  9.76s/it]

Token:  novak
Token Matched Value:  milos 0.61137


 97%|█████████▋| 868/891 [29:33<02:45,  7.18s/it]

Token:  czech
Token Matched Value:  czech 1.0


 98%|█████████▊| 869/891 [29:34<01:58,  5.38s/it]

Token:  republic
Token Matched Value:  republic 1.0


 98%|█████████▊| 870/891 [29:35<01:26,  4.12s/it]

Token:  marcelo
Token Matched Value:  carvalho 0.79135


 98%|█████████▊| 871/891 [29:36<01:04,  3.24s/it]

Token:  rios
Token Matched Value:  santos 0.76116


 98%|█████████▊| 872/891 [29:38<00:49,  2.61s/it]

Token:  chile
Token Matched Value:  chile 1.0


 98%|█████████▊| 873/891 [29:46<01:19,  4.41s/it]

Token:  kenneth
Token Matched Value:  ronald 0.85383


 98%|█████████▊| 874/891 [29:57<01:48,  6.40s/it]

Token:  carlsen
Token Matched Value:  chess 0.57582


 98%|█████████▊| 875/891 [30:09<02:06,  7.89s/it]

Token:  denmark
Token Matched Value:  denmark 1.0


 98%|█████████▊| 876/891 [30:20<02:13,  8.88s/it]

Token:  malivai
Token Matched Value:  halep 0.63493


 98%|█████████▊| 877/891 [31:06<04:42, 20.21s/it]

Token:  washington
Token Matched Value:  washington 1.0


 99%|█████████▊| 878/891 [31:07<03:04, 14.22s/it]

Token:  u.s.
Token Matched Value:  None 0.0


 99%|█████████▊| 879/891 [31:08<02:04, 10.34s/it]

Token:  alex
Token Matched Value:  alex 1.0


 99%|█████████▉| 880/891 [31:09<01:23,  7.59s/it]

Token:  corretja
Token Matched Value:  compatriot 0.71141


 99%|█████████▉| 881/891 [31:10<00:56,  5.67s/it]

Token:  spain
Token Matched Value:  spain 1.0


 99%|█████████▉| 882/891 [31:12<00:38,  4.32s/it]

Token:  todd
Token Matched Value:  mike 0.81807


 99%|█████████▉| 883/891 [31:13<00:27,  3.38s/it]

Token:  martin
Token Matched Value:  martin 1.0


 99%|█████████▉| 884/891 [31:13<00:17,  2.44s/it]

Token:  u.s.
Token Matched Value:  None 0.0


 99%|█████████▉| 885/891 [31:14<00:12,  2.05s/it]

Token:  renzo
Token Matched Value:  francesco 0.72873


 99%|█████████▉| 886/891 [31:15<00:08,  1.79s/it]

Token:  furlan
Token Matched Value:  dino 0.62233


100%|█████████▉| 887/891 [31:21<00:12,  3.09s/it]

Token:  italy
Token Matched Value:  italy 1.0


100%|█████████▉| 888/891 [31:33<00:17,  5.72s/it]

Token:  mark
Token Matched Value:  mike 0.74831


100%|█████████▉| 889/891 [31:46<00:15,  7.76s/it]

Token:  philippoussis
Token Matched Value:  halep 0.69997


100%|█████████▉| 890/891 [33:10<00:30, 30.65s/it]

Token:  australia
Token Matched Value:  australia 1.0


100%|██████████| 891/891 [33:12<00:00,  2.24s/it]

Token:  marc
Token Matched Value:  alex 0.74432
******************************************************
Anchor Texts Not In Wikipedia2Vec Vocabulary:  110861165
******************************************************
CPU times: user 22min 41s, sys: 46.6 s, total: 23min 28s
Wall time: 33min 12s





In [128]:
# Display generated dict
w2v_similarity_dict

{'german': ('german', 1.0),
 'british': ('british', 0.99999994),
 'brussels': ('brussels', 1.0),
 'european': ('european', 1.0),
 'commission': ('commission', 1.0),
 'germany': ('germany', 1.0),
 'union': ('union', 1.0),
 'britain': ('britain', 1.0),
 'franz': ('ludwig', 0.83037525),
 'fischler': ('bender', 0.6348997),
 'france': ('france', 1.0),
 'bse': ('bse', 1.0),
 'spanish': ('spanish', 1.0),
 'loyola': ('college', 0.71429205),
 'de': ('de', 1.0),
 'palacio': ('madrid', 0.7687936),
 'europe': ('europe', 1.0),
 'bonn': ('bonn', 1.0),
 'hendrix': ('hendrix', 1.0),
 'london': ('london', 1.0),
 'u.s.': (None, 0.0),
 'jimi': ('jimi', 1.0),
 'english': ('english', 1.0),
 'nottingham': ('nottingham', 1.0),
 'australian': ('australian', 1.0),
 'china': ('china', 1.0),
 'taiwan': ('taiwan', 1.0),
 'beijing': ('beijing', 1.0),
 'taipei': ('taipei', 1.0),
 'strait': ('straits', 0.72950417),
 'ukraine': ('ukraine', 1.0),
 'taiwanese': ('taiwanese', 1.0),
 'lien': ('ching', 0.6176003),
 'chan'

In [118]:
# What is the word it matched?
max_key = max(w2v_similarity_dict, key=w2v_similarity_dict.get)
print(max_key, w2v_similarity_dict[max_key])

german ('german', 1.0)


We see better success at matching to anchor text but matching this to an entity is still reliant on previously made connections.

In [None]:
# Create text-to-page-to-title dataframe (as predictions)
preds_wiki2vec = pd.DataFrame({'mention': x_np, 
                             'entity_page_id': None,
                             'target_page_title': None
                            })

# tqdm is a progress bar package
# For all anchor texts, update with predictions from wiki2vec
for i in tqdm(range(len(preds_wiki2vec))):
    try:
        preds_wiki2vec.iloc[i, 1:3] = max_anchor_links.loc[preds_wiki2vec.iloc[i, 0], ['target_page_id', 'target_page_title']].values
    except KeyError:
        # Leave at default None values
        continue