# Named Entity Recognition

Two different Natural Language Processing libraries are going to be applied for the purpose of identifying entities in out dataset (considering as dataset the questions extracted from the LC-QuAD dataset and the corresponding entities for each question as the real labels).

## Flair

In [1]:
#import sys
#!{sys.executable} -m pip install flair

from flair.data import Sentence
from flair.models import SequenceTagger

tagger = SequenceTagger.load('ner')
tagger2 = SequenceTagger.load('ner-ontonotes')
tagger3 = SequenceTagger.load('por')
tagger4 = SequenceTagger.load('frame')

sentence = Sentence('George went to Washington .')

# predict NER tags
tagger.predict(sentence)

# print sentence with predicted tags
print(sentence.to_tagged_string())

tagger2.predict(sentence)
print(sentence.to_tagged_string())



George <S-PER> went to Washington <S-LOC> .
George <S-PERSON> went to Washington <S-GPE> .


In [None]:
import re

reg1 = re.compile(r' <B-\w+> ')
reg2 = re.compile(r' <E-\w+>')
reg3 = re.compile(r' <I-\w+> ')


#ofile = open('results/NER.txt', 'wt', encoding='utf-8')

tags = []

with open('sqa_questions_index.txt', 'rt', encoding='utf-8') as f:
    questions = f.read().splitlines()

for question in questions:
    idx, question = question.split(',', 1)
    sentence = Sentence(question.replace('?', '').strip())
    # predict NER tags
    tagger.predict(sentence)
        
    for entity in sentence.get_spans('ner'):
        tag, _ = str(entity).split('-',1)
        if tag not in tags:
            tags.append(tag)
        _, ent = str(entity).split(']: \"')
        ent = ent.replace('\"', '')
        #ofile.writelines(idx + ',' + ent +'\n')
        
print(tags)        
#ofile.close()

In [None]:
### S - Single,   B - Begin,   E - End,   I - Intermediate
### PER: Person,    LOC: Location,    ORG: Organization,    MISC: Miscelaneous

In [None]:
import re

reg1 = re.compile(r' <B-\w+> ')
reg2 = re.compile(r' <E-\w+>')
reg3 = re.compile(r' <I-\w+> ')


#ofile = open('results/NER12.txt', 'wt', encoding='utf-8')

tags = []

with open('sqa_questions_index.txt', 'rt', encoding='utf-8') as f:
    questions = f.read().splitlines()

for question in questions:
    idx, question = question.split(',', 1)
    sentence = Sentence(question.replace('?', '').strip())
    # predict NER tags
    tagger2.predict(sentence)
        
    for entity in sentence.get_spans('ner'):
        tag, _ = str(entity).split('-',1)
        if tag not in tags:
            tags.append(tag)
        _, ent = str(entity).split(']: \"')
        ent = ent.replace('\"', '')
        #ofile.writelines(idx + ',' + ent +'\n')
        
print(tags)        
#ofile.close()

In [3]:
### both tags together

import re

reg1 = re.compile(r' <B-\w+> ')
reg2 = re.compile(r' <E-\w+>')
reg3 = re.compile(r' <I-\w+> ')


ofile1 = open('results/NER_tags_2.txt', 'wt', encoding='utf-8')
ofile2 = open('results/NER12_tags_2.txt', 'wt', encoding='utf-8')

rem_tags = ['CARDINAL','ORDINAL','DATE']
# We exclude these types to exclude wrong identification of entities in the questions
# Example: 514,What is the agency name of the Ministry of Information and Communication Technology (Thailand)
#          and distribution region  of the Channel 9 MCOT HD ?
# In this question, '9' would be identified as a cardinal entity, but the real entity in this case is 'Channel 9 MCOT HD',
# so excluding the cardinal type we allow the model to identify the full compound entity.

with open('sqa_questions_index.txt', 'rt', encoding='utf-8') as f:
    questions = f.read().splitlines()

remove = ['What is ', 'What are ', 'Which ', 'Who is ', 'Who were ', 'Who ', 'Was ', 'How many ', 'How much ',
              'has been ', 'Do ', 'Does ', 'How ', 'Where ', 'go ']    

questions_cleaned = []

for question in questions:
    for rem in remove:
        if rem in question:
            question = question.replace(rem,'')

    questions_cleaned.append(question)

for question in questions_cleaned:
    idx, question = question.split(',', 1)
    try:
        sentence = Sentence(question.replace('?', '').replace('Name ','').strip())
        sentence2 = Sentence(question.replace('?', '').replace('Name ','').strip())
    except:
        print(question)
    # predict NER tags
    tagger.predict(sentence)
    tagger2.predict(sentence2)
        
    for entity in sentence.get_spans('ner'):
        tag, _ = str(entity).split('-',1)
        _, ent = str(entity).split(']: \"')
        ent = ent.replace('\"', '')
        ofile1.writelines(idx + ',' + ent +'\n')           
            
    for entity in sentence2.get_spans('ner'):
        tag, _ = str(entity).split('-',1)
        if tag not in rem_tags:
            _, ent = str(entity).split(']: \"')
            ent = ent.replace('\"', '')
            ofile2.writelines(idx + ',' + ent +'\n')

ofile1.close()
ofile2.close()




In [47]:
with open('results/NER_tags_2.txt', 'rt', encoding='utf-8') as f:
    entities_ner = f.read().splitlines()
    
with open('results/NER12_tags_2.txt', 'rt', encoding='utf-8') as f:
    entities_ner12 = f.read().splitlines()

In [48]:
print(entities_ner[0:10])
print(entities_ner12[0:10])

['0,Bill Finger', '1,winston churchill', '1,Selwyn Lloyd', '2,Gestapo', '3,Mumbai North', '4,Roberto Clemente Bridge', '5,Theos Philopator', '5,Cleopatra', '6,Li Si', '7,PAvel Moroz']
['0,Bill Finger', '1,winston churchill', '1,Selwyn Lloyd', '2,Gestapo', '3,Mumbai North', '4,Roberto Clemente Bridge', '6,Li Si', '7,PAvel Moroz', '7,Yakov Estrin', '8,Broadmeadows, Victoria']


In [52]:
n = 5000

ner = [['']]*n
ner12 = [['']]*n

# store the entities identified by each Flair NER model
for entity in entities_ner:
    idx, ent = entity.split(',', 1)
    if ner[int(idx)] == ['']:
        ner[int(idx)] = [entity]
    else:
        ner[int(idx)].append(entity)
        
for entity in entities_ner12:
    idx, ent = entity.split(',', 1)
    entity = entity.replace('the ','')
    if ner12[int(idx)] == ['']:
        ner12[int(idx)] = [entity]
    else:
        ner12[int(idx)].append(entity)

In [53]:
ner[0:20]

[['0,Bill Finger'],
 ['1,winston churchill', '1,Selwyn Lloyd'],
 ['2,Gestapo'],
 ['3,Mumbai North'],
 ['4,Roberto Clemente Bridge'],
 ['5,Theos Philopator', '5,Cleopatra'],
 ['6,Li Si'],
 ['7,PAvel Moroz', '7,Yakov Estrin'],
 ['8,Broadmeadows, Victoria'],
 ['9,ASC Creative Services'],
 ['10,Ernest Rutherford', '10,Charles Drummond Ellis'],
 ['11,REP Parasol'],
 ["12,Monroe Carell Jr. Children's Hospital",
  '12,Vanderbilt',
  '12,Duncan U. Fletcher'],
 ['13,Nader Guirat,', '13,Josef Johansson'],
 [''],
 ['15,HBO'],
 ['16,Nikolai Morozov', '16,Stanislav Morozov'],
 ['17,Abhijit Kunte', '17,Kasparov'],
 ['18,MSX Basics'],
 ['19,Ronaldo']]

In [54]:
ner12[0:20]

[['0,Bill Finger'],
 ['1,winston churchill', '1,Selwyn Lloyd'],
 ['2,Gestapo'],
 ['3,Mumbai North'],
 ['4,Roberto Clemente Bridge'],
 [''],
 ['6,Li Si'],
 ['7,PAvel Moroz', '7,Yakov Estrin'],
 ['8,Broadmeadows, Victoria'],
 ['9,ASC Creative Services'],
 ['10,Ernest Rutherford', '10,Charles Drummond Ellis'],
 ['11,REP Parasol'],
 ["12,Monroe Carell Jr. Children's Hospital",
  '12,Vanderbilt',
  '12,Duncan U. Fletcher'],
 ['13,Nader Guirat,', '13,Josef Johansson'],
 ['14,sarah jane'],
 ['15,HBO'],
 ['16,Nikolai Morozov', '16,Stanislav Morozov'],
 ['17,Abhijit Kunte', '17,Kasparov'],
 [''],
 ['19,Ronaldo']]

In [57]:
ner_ner12[0:20]

[['0,Bill Finger'],
 ['1,winston churchill', '1,Selwyn Lloyd'],
 ['2,Gestapo'],
 ['3,Mumbai North'],
 ['4,Roberto Clemente Bridge'],
 ['5,Theos Philopator', '5,Cleopatra'],
 ['6,Li Si'],
 ['7,PAvel Moroz', '7,Yakov Estrin'],
 ['8,Broadmeadows, Victoria'],
 ['9,ASC Creative Services'],
 ['10,Ernest Rutherford', '10,Charles Drummond Ellis'],
 ['11,REP Parasol'],
 ["12,Monroe Carell Jr. Children's Hospital",
  '12,Vanderbilt',
  '12,Duncan U. Fletcher'],
 ['13,Nader Guirat,', '13,Josef Johansson'],
 ['14,sarah jane'],
 ['15,HBO'],
 ['16,Nikolai Morozov', '16,Stanislav Morozov'],
 ['17,Abhijit Kunte', '17,Kasparov'],
 [''],
 ['19,Ronaldo']]

In [56]:
#combine the entities identified by each model
ner_ner12 = ['']*n

for i in range(0, n):
    if len(ner[i])==len(ner12[i]):
        if all(elem in ner12[i] for elem in ner[i]) == True:   # when both models identify the same entities
            ner_ner12[i] = ner[i]
        else:
            if ner[i] == ['']:     # when first model doesn't identify any entitiy but the second does
                ner_ner12[i] = ner12[i]            
            else:
                inter = list(set(ner[i]) & set(ner12[i]))
                dif = list(set(ner12[i]) - set(ner[i]))
                ner_ner12[i] = inter+dif          
    elif len(ner[i])<len(ner12[i]):
        if all(elem in ner12[i] for elem in ner[i]) == True:     # when second model identifies more entities and the ones
            ner_ner12[i] = ner12[i]                              # identified by the first model are included there
        else:
            if ner[i] == ['']:
                ner_ner12[i] = ner12[i]            
            else:
                inter = list(set(ner[i]) & set(ner12[i]))
                dif = list(set(ner[i]) - set(ner12[i]))
                ner_ner12[i] = inter+dif
                # when some entity is compound by more than one word, it might happen that one of the models identifies it as 
                # separate entities, so this is done to just consider the longer one, which is the actual entity.
                # Example: 45,Which shareholder of Dagenham wind turbines is also the parent company of the Ford Falcon Cobra?
                # In this question the ner model identifies as entities "Dagenham" and "Ford Falcon Cobra", but the ner12
                # identifies as entities "Dagenham", "Ford" and "Falcon Cobra", which is not correct in this case. This way,
                # we exclude the entities "Ford" and "Falcon Cobra", because they are included in the entity "Ford Falcon Cobra".
                
    elif len(ner[i])>len(ner12[i]):
        if all(elem in ner[i] for elem in ner12[i]) == True:
            ner_ner12[i] = ner[i]
        else:
            if ner12[i] == ['']:
                ner_ner12[i] = ner[i]
            else:
                inter = list(set(ner[i]) & set(ner12[i]))
                dif = list(set(ner12[i]) - set(ner[i]))
                ner_ner12[i] = inter+dif       

In [13]:
with open('sqa_entities_labels_index.txt', 'rt', encoding='utf-8') as f:
    real_entities = f.read().replace('\ufeff', '').splitlines()

real = [['']]*n

for entity in real_entities:
    idx, ent = entity.split(',', 1)
    if real[int(idx)] == ['']:
        real[int(idx)] = [entity]
    else:
        real[int(idx)].append(entity)

In [14]:
real[0:10]

[['0,Bill Finger'],
 ['1,Selwyn Lloyd', '1,Winston Churchill'],
 ['2,Gestapo'],
 ['3,Mumbai North'],
 ['4,Roberto Clemente Bridge'],
 ['5,Ptolemy XIII Theos Philopator', '5,Cleopatra V'],
 ['6,Li Si'],
 ['7,Pavel Moroz', '7,Yakov Estrin'],
 ['8,Broadmeadows, Victoria'],
 ['9,ASC Creative Services']]

In [15]:
print(len(ner))
print(len(ner12))
print(len(real))

5000
5000
5000


In [16]:
def compare_predictions(real, ner_pred):
    match = 0
    partial = 0
    less = 0
    more = 0
    none_match = 0
    none_less = 0
    none_more = 0
    predicted = 0

    nonpredicted = []
    same_true = []
    same_pred = []


    from sklearn.metrics import f1_score

    for i in range(0,n):
        true = [x.lower() for x in real[i]]
        pred = [x.lower() for x in ner_pred[i]]
        lt = len(true)
        lp = len(pred)

        if lt == lp:
            matching = [p for p in pred if p in true]
            if matching == pred or matching == true:
                match+=1
                predicted+=len(pred)
            else:
                if matching != []:
                    partial+=1
                    predicted+=len(pred)
                else:
                    none_match+=1
                    nonpredicted.append(pred)               

            same_true.append(true)
            same_pred.append(pred)

        elif lt > lp:
            matching = [p for p in pred if p in true]
            if matching != []:
                less+=1
                predicted+=len(pred)
            else:
                none_less+=1
                nonpredicted.append(pred)
        else:
            matching = [p for p in pred if p in true]
            if matching != []:
                more+=1
                predicted+=len(true)
            else:
                none_more+=1
                nonpredicted.append(pred)
                
    return match, partial, less, more, none_match, none_less, none_more, predicted

In [17]:
### compare only ner predictions
match, partial, less, more, none_match, none_less, none_more, predicted = compare_predictions(real, ner)
print(match)
print(partial)
print(less)
print(more)
print(none_match)
print(none_less)
print(none_more)
print(predicted)
print(predicted/len(real_entities))

3199
337
158
132
948
116
110
5122
0.7711532670882264


In [18]:
### compare only ner12 predictions
match, partial, less, more, none_match, none_less, none_more, predicted = compare_predictions(real, ner12)
print(match)
print(partial)
print(less)
print(more)
print(none_match)
print(none_less)
print(none_more)
print(predicted)
print(predicted/len(real_entities))

3034
262
191
151
1043
150
169
4886
0.7356218006624511


In [19]:
### compare ner+ner12 combined predictions

match, partial, less, more, none_match, none_less, none_more, predicted = compare_predictions(real, ner_ner12)
print(match)
print(partial)
print(less)
print(more)
print(none_match)
print(none_less)
print(none_more)
print(predicted)
print(predicted/len(real_entities))

3172
315
116
130
1047
133
87
5075
0.7640770852152966


## spaCy

In [46]:
# !{sys.executable} -m pip install spacy download en_core_web_md

import spacy
from spacy import displacy
from collections import Counter
import en_core_web_sm, en_core_web_md
nlp = en_core_web_sm.load()
nlp2 = en_core_web_md.load()

In [51]:
with open('sqa_questions_index.txt', 'rt', encoding='utf-8') as f:
    questions = f.read().splitlines()


ofile = open('results/NER_spacy_2.txt', 'wt', encoding='utf-8')

remove = ['What is ', 'What are ', 'Which ', 'Who is ', 'Who were', 'Who ', 'Was ', 'How many ', 'How much ',
          'has been ', 'Do ', 'Does ', 'How ', 'Where ', 'Name ']


questions_cleaned = []

for question in questions:
    for rem in remove:
        if rem in question:
            question = question.replace(rem,'')
            
    questions_cleaned.append(question)    
    
entities = []
    
for question in questions_cleaned:    
    idx, question = question.split(',',1)
    doc = nlp2(question.replace('?', '').strip())
    [entities.append(ent.text) for ent in doc.ents]
    try:    
        for entity in entities:
            ofile.writelines(idx + ',' + entity +'\n')
    except:
        ofile.writelines(idx + ',' +'\n')
    entities = []
    
ofile.close()

0 ['Bill Finger']
1 ['Selwyn Lloyd']
2 ['Gestapo']
3 ['Mumbai']
4 ['Roberto Clemente Bridge']
5 ['ptolemy XIII Theos Philopator', 'Cleopatra V']
6 ['Li Si']
7 ['PAvel Moroz', 'Yakov Estrin']
8 ['Broadmeadows, Victoria']
9 ['ASC Creative Services']
10 ['Ernest Rutherford', 'Charles Drummond Ellis']
11 ['REP']
12 ["the Monroe Carell Jr. Children's Hospital", 'Vanderbilt', 'the Duncan U. Fletcher']
13 ['Nader Guirat', 'Josef Johansson']
14 []
15 ['HBO']
16 ['Nikolai Morozov', 'Stanislav Morozov']
17 ['Abhijit Kunte', 'Kasparov']
18 []
19 ['seasons', 'Ronaldo']
20 ['SamurAbsheron', 'Anar Salmanov']
21 ['2nd Foreign Infantry Regiment']
22 ['3Sub']
23 ['Kevin Jonas', 'Joe Jonas']
24 ['Timm Gunn', 'Sunrise', 'HIMYM']
25 ['O.co Coliseum']
26 ['Rishkiesh']
27 []
28 ['PhDs', 'National Medal of Science']
29 ['Aston Villa 2000-02 season', 'Middlesbrough F.C.', '2009']
30 ['William Harper']
31 ['PCL']
32 ['Kevin Jonas', 'Jonas']
33 ['Sagan']
34 ['Rosemont', 'Illinois']
35 ['Greenup County High Scho

626 ['Rodrigues']
627 ['Waxiang']
628 ['Bogdanua', 'Upper Neretva']
629 ['Charles Drummond Ellis', 'PhD']
630 ['Chevrolet']
631 ['Martin Pugh', 'Alana Stewert']
632 ['the Pepsi Center']
633 []
634 ['US', 'Alfred Phillips', 'William Smith']
635 ['Michael Jeffery', 'Williuam Deane']
636 ['Richard Winn']
637 ['Neil Brown']
638 []
639 ['Mount Hoffmann']
640 ['Agalga']
641 ['LG Optimus Vu', 'Manu Cornet']
642 ['Edsel']
643 ['Menora']
644 ['Cape Town', 'the Royal Astronomical Society']
645 ['John McEwen', 'Phm Vn']
646 []
647 []
648 ['the Caspian Sea']
649 ['Congress of Industrial Organizations']
650 ['Catarina Lindgren']
651 ['John of Damascus']
652 ['the West Berkshire Council']
653 ['Hank Williams Jr.']
654 ['Pixar Canada', 'Pixar']
655 ['Indian']
656 ['NYC FC']
657 ['Tony Bennett']
658 ['English']
659 ['Sarah Jane', 'Ron Grainer']
660 ['David Isaacs']
661 []
662 ['Brazil', '2']
663 ['Tirana']
664 ['Jon', 'Vangelis']
665 []
666 ['Marika Gombitov']
667 ['9TV']
668 ['the Army Air Corps', 'I

1256 ['South']
1257 ['Battle of Fort stephenson']
1258 ['the American Mediterranean Sea', 'Entronque de Herradura']
1259 ["the Brown's Corners", 'Toronto', 'the George Karrys']
1260 ['Alcal de Henares']
1261 ['Guatemala']
1262 []
1263 ['Provisional Government of Saskatchewan']
1264 ['The Farmer Wants a Wife']
1265 ['Kentucky']
1266 ['US']
1267 []
1268 ['Jon Curran']
1269 ['Vietnam Airlines']
1270 ['Dartington College of Arts']
1271 ['New Yorker']
1272 ['Comcast SportsNet']
1273 []
1274 []
1275 ['PopCon', 'Louis Le Cocqq']
1276 ['Cyrus Mann']
1277 ['William Cushing']
1278 ["Filbert's Old Time Root Beer"]
1279 ['Primus', 'Comedy Central']
1280 ['Stars']
1281 []
1282 ['International Union of Anarchists']
1283 ['Hugh Hefner']
1284 ['WGBC']
1285 []
1286 ['Lancy', 'Geneva']
1287 []
1288 ['Ringgold High School', 'Pennsylvania']
1289 []
1290 ['Miguel de Cervantes', 'Francisco de Robles']
1291 ['Sino French War', 'Raoul Magrin-Vernerey']
1292 ['Honeybuns']
1293 ['Willis Sharpe Kilmer']
1294 ['L

1853 ['John Byrne', 'Emma Frost']
1854 ['Chinook', 'Dennis Muilenburg']
1855 ['National League']
1856 []
1857 ['Miluk', 'Cayuse']
1858 ['AMD']
1859 ['David Charles']
1860 ['Steven Palazao']
1861 ['Stephen E. Ambrose', 'HBO']
1862 ['Volos']
1863 ['Huey, Dewey', 'Louie', 'American', 'Pekin']
1864 ['Bobby Skinstad']
1865 ['marquette Golden Eagles']
1866 ['Hiroyuki Takei']
1867 ['Jon Speelman', 'Eric Schiller']
1868 ['the Croix de Guerre']
1869 ['Flats Painted Caves']
1870 ['Islam']
1871 ['Stoke City F.C.']
1872 []
1873 ['Jimmy Wales']
1874 ['Pennsylvania']
1875 ['Arkansas']
1876 ['Ted Osborne']
1877 ['Jon Voight']
1878 ['england', '20']
1879 ['RENFE', 'Renaissance Center']
1880 ['Jim Kiick', 'New Jersey']
1881 ['the United States']
1882 ['Maple Leaf S&E']
1883 ['The Boy is Mine', 'Atlanta']
1884 ['Snake Island']
1885 ['UEM']
1886 []
1887 ['Mount Raimer']
1888 ['Tambourine Man']
1889 ['Peter Thorneycroft']
1890 []
1891 ['the Marina Pankova', 'Viktor Ulyanich']
1892 ['Michigan', 'Baltimore'

2449 ['Sierra del Merendin']
2450 []
2451 ['Hynief']
2452 ['Jared Bernstein']
2453 ['France']
2454 ['Populous']
2455 ['Ludovic Boi', 'Fernando Augustin']
2456 ['one', 'Ankh-Morpork City Watch']
2457 ['Bryan Mantia']
2458 ['Cancun']
2459 ['Seguin', 'Texas']
2460 ['Mississippi']
2461 ['Jimmy Diggs']
2462 ['Washington, D.C.']
2463 ['Siege of Fort Recovery', 'Indian']
2464 ['Stanford', '1906-1917']
2465 ['Polytech', 'Bataan', 'Taguig']
2466 ['Kelly Osbourne', 'Ozzy Osbourne']
2467 []
2468 []
2469 ['Clinton', 'Winston Bryant']
2470 []
2471 ['Boeing', 'F/A-18E', 'Dennis Muilenburg']
2472 ['cyril hume']
2473 []
2474 ['Texas', 'Dallas']
2475 []
2476 ['Nandurbar']
2477 ['Tim Burton', 'Danny Elfman']
2478 ['Jason Maas']
2479 ['William Clark']
2480 ['LATV']
2481 ['Houston Astros']
2482 ['Islamic', 'Craig Van Tilbury']
2483 ['Solidus']
2484 []
2485 ['VMF-218', 'VMF-155']
2486 ['Michael springer']
2487 ['Thud']
2488 ['Steve Nash', 'Vancouver Whitecaps FC']
2489 ['A Todo Romantic Style']
2490 ['Chri

3046 ['James Roberts', 'US']
3047 []
3048 ['Arlington Heights', 'Illinois']
3049 ['Gymnastics', '2008']
3050 ['the American Magazine', 'New York Post']
3051 ['the The Conglomerate (American group', 'Blackstreet & Ma']
3052 ['SAGE Publications']
3053 ['Demis Roussos']
3054 ['Frampton', 'Camel']
3055 ['Pittsburgh Line']
3056 ['Gerard Marino']
3057 ['260']
3058 ['Raa', 'Adriatic']
3059 ['the Clinton Foundation']
3060 []
3061 []
3062 ['the Harry Parker', 'the Carl Sitton']
3063 ['Valaichchenai']
3064 ['McClain', 'one']
3065 ['Roger Barton']
3066 ['Kala Pani']
3067 ['Timothy Morton', 'Buddhism']
3068 ['Ptolemy XII Auletes', 'Ptolemy XIII Theos Philopator']
3069 ['Kalpan', 'Inc.', 'Iowa']
3070 ['Englewood', 'Colorado', 'one']
3071 ['Shelby County', 'Tennessee']
3072 ['the South Side Elevated Railroad', 'Sakari Suzuki']
3073 ['John Roberts']
3074 []
3075 ['one', 'Andrea']
3076 ['the Connecticut Lakes']
3077 ['Blayse']
3078 ['BBC HD']
3079 ['Bacillales', 'Firmicutes']
3080 ['the National Comme

3644 ['Tampa', 'Florida']
3645 ['GetTV']
3646 ['Novair International Airways']
3647 ['the Mediterranean Basketball Association']
3648 ['Philippines']
3649 ['the US Department']
3650 ['Frasier']
3651 ['John of Damascus', 'Edwin', 'Northumbria']
3652 ['Josh Bell']
3653 ['Fort Gadsden', 'Fort Barrancas']
3654 ['Royal Thai Army']
3655 ['Haqqi al-Azm']
3656 ['sweden']
3657 ['Dolley Madison', 'Montpelier']
3658 ['Moscow']
3659 ['Latania']
3660 ['8.1', 'OS', 'Lumia', '830']
3661 ['2005']
3662 []
3663 ['Mac OS', 'C++']
3664 ['moscow']
3665 ['Trevor Peacock', 'Fred Claus']
3666 ['Jan Paulsen']
3667 []
3668 ['Saint Petersburg']
3669 []
3670 ['HBO']
3671 ['Warren Magnuson']
3672 ['Andrew Lincoln', 'Million Dollar']
3673 ['Blytheville High School']
3674 ['Budapest']
3675 ['2012', 'Campeonato Brasileiro Srie A.']
3676 ['Eddie Bravo', 'Alan Jouban']
3677 ['MSX BASIC']
3678 ['Swedish']
3679 ['Harry', 'Potters']
3680 ['Lake Uniamsi']
3681 ['Peter Cosgrove']
3682 ['Indianapolis', 'Colts', 'Carolina Pan

4245 ['Douglas Netter']
4246 ['Upper Neratva']
4247 ['Johnny Cash', 'Kris Kristofferson']
4248 ['the Journal of Cerebral Blood Flow and Metabolism']
4249 ['Elie Wiesel']
4250 ['Kenya']
4251 []
4252 ['the National Museum of Racing and Hall of Fame']
4253 ['Charing Cross Bridge', 'Dorothea Sharp']
4254 ['Israel', 'Jaber Al-Ahmad Al-Sabah']
4255 ['the Martin Ragaway', 'Chuck Connors']
4256 ['Ashanti']
4257 ['Ricky Grevais']
4258 ['03', 'Bonnie & Clyde']
4259 ['Playtone']
4260 ['Malaysia']
4261 ['Galician', 'German']
4262 []
4263 ['Wikitionary']
4264 ['Thames Estuary', 'Reading Berkshire']
4265 ['Samuel Moreno Rojas']
4266 ['Nintendo eShop', 'Nintendo Network']
4267 ["Monty Python's", 'Cinema International Corporation']
4268 ['Trinity School', 'Brentwood']
4269 ['Abiye', 'Abebe', 'the Amsale Aberra']
4270 []
4271 ['American', 'Mediterranean Sea']
4272 ['Uni of California']
4273 ['PrivateCore']
4274 ['Season 11']
4275 ['Waddy']
4276 ['Baker', 'Dozen']
4277 []
4278 ['Mike', 'Toronto']
4279 [

4817 ['Terminator', 'Terminator 2:', 'Judgment Day']
4818 ['Christen-Democratisch en Vlaams']
4819 ['Juno']
4820 ['PlayN']
4821 ['Steven Moffat']
4822 ['BBC Two']
4823 ['Tupelo', 'Mississippi']
4824 ['Walt Disney Records']
4825 ['cohoes', 'NY']
4826 ['the Columbia University', 'William Campbell']
4827 ['Maharashtran']
4828 ['Cinema International Corporation']
4829 ['Ashot Grashi']
4830 ['Ae Fond Kiss', 'Halloween']
4831 ['Steve Buyer']
4832 ['Steven Moffat']
4833 ['Michael Schumacher', 'Pole', '1994', 'Spanish']
4834 ['Shaheed Minar', 'Kolkata']
4835 ['Canada']
4836 ['Batman Live']
4837 ['San Salvador Island', 'Bahamas']
4838 ['Audi India Correct Question']
4839 ['NASA', 'Gemini']
4840 ['South Side', '400']
4841 ['Michelle D. Johnson', 'United States Air Force Academy']
4842 ['Pittsburgh']
4843 ['Stanlee', 'the Tales of Suspense']
4844 ['Middlesbrough FC']
4845 ['National Basketball Association']
4846 ['Cubic Hermite']
4847 ['Dallas']
4848 ['bloomington']
4849 ['Paradise']
4850 ['Cople

In [14]:
with open('results/NER_spacy.txt', 'rt', encoding='utf-8') as f:
    entities_spacy_sm = f.read().splitlines()
    
with open('results/NER_spacy_2.txt', 'rt', encoding='utf-8') as f:
    entities_spacy_md = f.read().splitlines()

In [15]:
n = 5000

ner_sm = [['']]*n
ner_md = [['']]*n

for entity in entities_spacy_sm:
    idx, ent = entity.split(',', 1)
    if ner_sm[int(idx)] == ['']:
        ner_sm[int(idx)] = [entity]
    else:
        ner_sm[int(idx)].append(entity)
        
for entity in entities_spacy_md:
    idx, ent = entity.split(',', 1)
    if ner_md[int(idx)] == ['']:
        ner_md[int(idx)] = [entity]
    else:
        ner_md[int(idx)].append(entity)

In [16]:
print(ner_sm[0:10])
print(ner_md[0:10])

[['0,Bill Finger'], ['1,Selwyn Lloyd'], [''], ['3,Mumbai'], ['4,Roberto Clemente Bridge'], ['5,XIII Theos Philopator', '5,Cleopatra V'], ['6,Li Si'], ['7,PAvel Moroz', '7,the Yakov Estrin'], ['8,Broadmeadows', '8,Victoria'], ['9,ASC Creative Services']]
[['0,Bill Finger'], ['1,Selwyn Lloyd'], ['2,Gestapo'], ['3,Mumbai'], ['4,Roberto Clemente Bridge'], ['5,ptolemy XIII Theos Philopator', '5,Cleopatra V'], ['6,Li Si'], ['7,PAvel Moroz', '7,Yakov Estrin'], ['8,Broadmeadows, Victoria'], ['9,ASC Creative Services']]


In [18]:
ner_sm_md = ['']*n

for i in range(0, n):
    if len(ner_sm[i])==len(ner_md[i]):
        if all(elem in ner_md[i] for elem in ner_sm[i]) == True:
            ner_sm_md[i] = ner_sm[i]
        else:
            if ner_sm[i] == ['']:
                ner_sm_md[i] = ner_md[i]            
            else:
                ner_sm_md[i] = ner_sm[i]            
    elif len(ner_sm[i])<len(ner_md[i]):
        if all(elem in ner_md[i] for elem in ner_sm[i]) == True:
            ner_sm_md[i] = ner_md[i]           
        else:
            if ner_sm[i] == ['']:
                ner_sm_md[i] = ner_md[i]            
            else:
                inter = list(set(ner_sm[i]) & set(ner_md[i]))
                dif = list(set(ner_sm[i]) - set(ner_md[i]))
                ner_sm_md[i] = inter+dif
                
    elif len(ner_sm[i])>len(ner_md[i]):
        if all(elem in ner_sm[i] for elem in ner_md[i]) == True:
            ner_sm_md[i] = ner_md[i]
        else:
            if ner_md[i] == ['']:
                ner_sm_md[i] = ner_sm[i]
            else:
                inter = list(set(ner_sm[i]) & set(ner_md[i]))
                dif = list(set(ner_md[i]) - set(ner_sm[i]))
                ner_sm_md[i] = inter+dif

[['0,Bill Finger'], ['1,Selwyn Lloyd'], ['2,Gestapo'], ['3,Mumbai'], ['4,Roberto Clemente Bridge'], ['5,XIII Theos Philopator', '5,Cleopatra V'], ['6,Li Si'], ['7,PAvel Moroz', '7,the Yakov Estrin'], ['8,Broadmeadows, Victoria'], ['9,ASC Creative Services'], ['10,Ernest Rutherford', '10,Charles Drummond Ellis'], ['11,REP Parasol'], ["12,the Monroe Carell Jr. Children's Hospital", '12,Vanderbilt', '12,the Duncan U. Fletcher'], ['13,Nader Guirat', '13,Josef Johansson'], [''], ['15,HBO'], ['16,Nikolai Morozov', '16,Stanislav Morozov'], ['17,Abhijit Kunte', '17,Kasparov'], ['18,the MSX Basics'], ['19,seasons', '19,Ronaldo'], ['20,SamurAbsheron', '20,Anar Salmanov'], ['21,2nd Foreign Infantry Regiment'], ['22,3Sub'], ['23,Kevin Jonas', '23,Joe Jonas'], ['24,Timm Gunn', '24,Sunrise (HIMYM'], ['25,O.co Coliseum'], ['26,Rishkiesh'], ['27,one'], ['28,PhDs', '28,National Medal of Science'], ['29,Aston Villa 2000-02 season', '29,Middlesbrough F.C.', '29,2009-10'], ['30,William Harper'], ['31,PCL'

In [22]:
### compare only sm predictions

match, partial, less, more, none_match, none_less, none_more, predicted = compare_predictions(real, ner_sm)
print(match)
print(partial)
print(less)
print(more)
print(none_match)
print(none_less)
print(none_more)
print(predicted)
print(predicted/len(real_entities))

TRUE:  ['0,bill finger']
PRED:  ['0,bill finger']
SAME:  ['0,bill finger']
TRUE:  ['1,selwyn lloyd', '1,winston churchill']
PRED:  ['1,selwyn lloyd']
LESS:  ['1,selwyn lloyd']
TRUE:  ['2,gestapo']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['2,gestapo']
NONE_PRED_EQUAL:  ['']
TRUE:  ['3,mumbai north']
PRED:  ['3,mumbai']
SAME:  []
NONE_TRUE_EQUAL:  ['3,mumbai north']
NONE_PRED_EQUAL:  ['3,mumbai']
TRUE:  ['4,roberto clemente bridge']
PRED:  ['4,roberto clemente bridge']
SAME:  ['4,roberto clemente bridge']
TRUE:  ['5,ptolemy xiii theos philopator', '5,cleopatra v']
PRED:  ['5,xiii theos philopator', '5,cleopatra v']
SAME:  ['5,cleopatra v']
PARTIAL:  ['5,cleopatra v']
TRUE:  ['6,li si']
PRED:  ['6,li si']
SAME:  ['6,li si']
TRUE:  ['7,pavel moroz', '7,yakov estrin']
PRED:  ['7,pavel moroz', '7,the yakov estrin']
SAME:  ['7,pavel moroz']
PARTIAL:  ['7,pavel moroz']
TRUE:  ['8,broadmeadows, victoria']
PRED:  ['8,broadmeadows', '8,victoria']
NONE_TRUE_MORE:  ['8,broadmeadows, victoria']
NONE

NONE_PRED_EQUAL:  ['']
TRUE:  ['179,screen actors guild life achievement award', '179,henry e. catto, jr.']
PRED:  ['179,henry e. catto', '179,jr.', '179,screen actors']
NONE_TRUE_MORE:  ['179,screen actors guild life achievement award', '179,henry e. catto, jr.']
NONE_PRED_MORE:  ['179,henry e. catto', '179,jr.', '179,screen actors']
TRUE:  ['180,pope gregory i']
PRED:  ['180,pope gregory']
SAME:  []
NONE_TRUE_EQUAL:  ['180,pope gregory i']
NONE_PRED_EQUAL:  ['180,pope gregory']
TRUE:  ['181,human rights party']
PRED:  ['181,cambodian']
SAME:  []
NONE_TRUE_EQUAL:  ['181,human rights party']
NONE_PRED_EQUAL:  ['181,cambodian']
TRUE:  ['182,czech extraliga']
PRED:  ['182,the czech extraliga']
SAME:  []
NONE_TRUE_EQUAL:  ['182,czech extraliga']
NONE_PRED_EQUAL:  ['182,the czech extraliga']
TRUE:  ['183,croix de guerre']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['183,croix de guerre']
NONE_PRED_EQUAL:  ['']
TRUE:  ['184,azam f.c.']
PRED:  ['184,azam f.c.']
SAME:  ['184,azam f.c.']
TRUE:  [

PRED:  ['309,papeete']
SAME:  ['309,papeete']
TRUE:  ['310,lake victoria']
PRED:  ['310, lake victoria']
SAME:  []
NONE_TRUE_EQUAL:  ['310,lake victoria']
NONE_PRED_EQUAL:  ['310, lake victoria']
TRUE:  ['311,domenico modugno']
PRED:  ['311,domenico modugno']
SAME:  ['311,domenico modugno']
TRUE:  ['312,grameen bank']
PRED:  ['312,grameen bank']
SAME:  ['312,grameen bank']
TRUE:  ['313,benjamin franklin terry', '313,us route 59']
PRED:  ['313,benjamin franklin terry', '313,us route 59']
SAME:  ['313,benjamin franklin terry', '313,us route 59']
TRUE:  ['314,james langer']
PRED:  ['314,james langer']
SAME:  ['314,james langer']
TRUE:  ['315,john trumbull', '315,a foreign policy']
PRED:  ['315,john trumbull']
LESS:  ['315,john trumbull']
TRUE:  ['316,peter-john vettese']
PRED:  ['316,peter-john vettese']
SAME:  ['316,peter-john vettese']
TRUE:  ['317,edwin catmull']
PRED:  ['317,edwin catmull']
SAME:  ['317,edwin catmull']
TRUE:  ['318,eric roth']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  [

PRED:  ['378,novelas']
SAME:  []
NONE_TRUE_EQUAL:  ['378,novelas ejemplares']
NONE_PRED_EQUAL:  ['378,novelas']
TRUE:  ['379,anne spielberg']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['379,anne spielberg']
NONE_PRED_EQUAL:  ['']
TRUE:  ['380,nbc', '380,frasier']
PRED:  ['380,nbc', '380,fraiser']
SAME:  ['380,nbc']
PARTIAL:  ['380,nbc']
TRUE:  ['381,washington station', '381,cta blue line']
PRED:  ['381,the cta blue line']
NONE_TRUE_LESS:  ['381,washington station', '381,cta blue line']
NONE_PRED_LESS:  ['381,the cta blue line']
TRUE:  ['382,janata party']
PRED:  ['382,one', '382,janata party']
MORE:  ['382,janata party']
TRUE:  ['383,ford theatre']
PRED:  ['383,ford theatre']
SAME:  ['383,ford theatre']
TRUE:  ['384,the adventures of mimi']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['384,the adventures of mimi']
NONE_PRED_EQUAL:  ['']
TRUE:  ['385,arlington county, virginia']
PRED:  ['385,arlington county', '385,virginia']
NONE_TRUE_MORE:  ['385,arlington county, virginia']
NONE_PRED_MORE

SAME:  []
NONE_TRUE_EQUAL:  ['510,kingston, ontario', '510,st lawrence river']
NONE_PRED_EQUAL:  ['510,kingston', '510,ontario']
TRUE:  ['511,mtr']
PRED:  ['511,mtr']
SAME:  ['511,mtr']
TRUE:  ['512,vestigial peter']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['512,vestigial peter']
NONE_PRED_EQUAL:  ['']
TRUE:  ['513,buccinidae']
PRED:  ['513,buccinidae']
SAME:  ['513,buccinidae']
TRUE:  ['514,channel 9 mcot hd', '514,ministry of information and communication technology']
PRED:  ['514,the ministry of information and communication technology (thailand', '514, of the channel 9 mcot hd']
SAME:  []
NONE_TRUE_EQUAL:  ['514,channel 9 mcot hd', '514,ministry of information and communication technology']
NONE_PRED_EQUAL:  ['514,the ministry of information and communication technology (thailand', '514, of the channel 9 mcot hd']
TRUE:  ['515,kumta']
PRED:  ['515,kumta']
SAME:  ['515,kumta']
TRUE:  ['516,mac os 9', '516,tiny tower']
PRED:  ['516,mac os 9', '516,tiny tower']
SAME:  ['516,mac os 9',

PRED:  ['640,agalga']
SAME:  ['640,agalga']
TRUE:  ['641,lg optimus vu', '641,manu cornet']
PRED:  ['641,lg optimus vu', '641,manu cornet']
SAME:  ['641,lg optimus vu', '641,manu cornet']
TRUE:  ['642,edsel ranger']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['642,edsel ranger']
NONE_PRED_EQUAL:  ['']
TRUE:  ['643,menora tunnel']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['643,menora tunnel']
NONE_PRED_EQUAL:  ['']
TRUE:  ['644,cape town', '644,gold medal']
PRED:  ['644,cape town', '644,the royal astronomical society']
SAME:  ['644,cape town']
PARTIAL:  ['644,cape town']
TRUE:  ['645,john mcewen', '645,phm vn ng']
PRED:  ['645,john mcewen', '645,phm vn']
SAME:  ['645,john mcewen']
PARTIAL:  ['645,john mcewen']
TRUE:  ['646,hanged']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['646,hanged']
NONE_PRED_EQUAL:  ['']
TRUE:  ['647,vehicles']
PRED:  ['647,bridges']
SAME:  []
NONE_TRUE_EQUAL:  ['647,vehicles']
NONE_PRED_EQUAL:  ['647,bridges']
TRUE:  ['648,caspian sea']
PRED:  ['648,the caspian sea']
S

MORE:  ['844,rishikesh']
TRUE:  ['845,the comedy central']
PRED:  ['845,the comedy central']
SAME:  ['845,the comedy central']
TRUE:  ['846,ali habib mahmud', '846,stephen urban']
PRED:  ['846,stephen urban', '846,ali habib mahmud']
SAME:  ['846,stephen urban', '846,ali habib mahmud']
TRUE:  ['847,doctor mirabilis']
PRED:  ['847,mirabilis']
SAME:  []
NONE_TRUE_EQUAL:  ['847,doctor mirabilis']
NONE_PRED_EQUAL:  ['847,mirabilis']
TRUE:  ['848,whitey wistert']
PRED:  ['848,whitney wistert']
SAME:  []
NONE_TRUE_EQUAL:  ['848,whitey wistert']
NONE_PRED_EQUAL:  ['848,whitney wistert']
TRUE:  ['849,kirk hammett', '849,gibson flying v']
PRED:  ['849,kirk hammett']
LESS:  ['849,kirk hammett']
TRUE:  ['850,hampton roads rhinos']
PRED:  ['850,hampton roads rhinos']
SAME:  ['850,hampton roads rhinos']
TRUE:  ['851,utopian and dystopian fiction']
PRED:  ['851,utopian']
SAME:  []
NONE_TRUE_EQUAL:  ['851,utopian and dystopian fiction']
NONE_PRED_EQUAL:  ['851,utopian']
TRUE:  ['852,sturm college of l

SAME:  []
NONE_TRUE_EQUAL:  ['1047,chapelle saint-louis de carthage']
NONE_PRED_EQUAL:  ['1047,the chapelle saint-louis de carthage']
TRUE:  ['1048,franz brentano', '1048,royal society']
PRED:  ['1048,royal society', '1048,franz brentano']
SAME:  ['1048,royal society', '1048,franz brentano']
TRUE:  ['1049,norwalk trainband']
PRED:  ['1049,the norwalk trainband']
SAME:  []
NONE_TRUE_EQUAL:  ['1049,norwalk trainband']
NONE_PRED_EQUAL:  ['1049,the norwalk trainband']
TRUE:  ['1050,newin chidchob', '1050,ajahn sao kantaslo']
PRED:  ['1050,newin chidchob', '1050,ajahn sao kantaslo']
SAME:  ['1050,newin chidchob', '1050,ajahn sao kantaslo']
TRUE:  ['1051,bafta']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['1051,bafta']
NONE_PRED_EQUAL:  ['']
TRUE:  ['1052,microsoft office', '1052,c__']
PRED:  ['1052,microsoft', '1052,c++']
SAME:  []
NONE_TRUE_EQUAL:  ['1052,microsoft office', '1052,c__']
NONE_PRED_EQUAL:  ['1052,microsoft', '1052,c++']
TRUE:  ['1053,altazor', '1053,picasso']
PRED:  ['1053,picas

NONE_TRUE_EQUAL:  ['1248,lennon legend: the very best of john lennon']
NONE_PRED_EQUAL:  ['1248,john lennon']
TRUE:  ['1249,benjamin spock']
PRED:  ['1249,benjamin spock']
SAME:  ['1249,benjamin spock']
TRUE:  ['1250,atlant moscow oblast']
PRED:  ['1250,atlant moscow oblast']
SAME:  ['1250,atlant moscow oblast']
TRUE:  ['1251,bbc']
PRED:  ['1251,bbc']
SAME:  ['1251,bbc']
TRUE:  ['1252,janaka thissakuttiarachchi', '1252,atisa srijnana']
PRED:  ['1252,janaka thissakuttiarachchi', '1252,atisa srijnana']
SAME:  ['1252,janaka thissakuttiarachchi', '1252,atisa srijnana']
TRUE:  ['1253,maharashtran']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['1253,maharashtran']
NONE_PRED_EQUAL:  ['']
TRUE:  ['1254,trade association']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['1254,trade association']
NONE_PRED_EQUAL:  ['']
TRUE:  ['1255,e w kemble']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['1255,e w kemble']
NONE_PRED_EQUAL:  ['']
TRUE:  ['1256,the south end']
PRED:  ['1256,south']
SAME:  []
NONE_TRUE_EQUAL:  

NONE_PRED_LESS:  ['1445,martin']
TRUE:  ['1446,shannon cave']
PRED:  ['1446,shannon']
SAME:  []
NONE_TRUE_EQUAL:  ['1446,shannon cave']
NONE_PRED_EQUAL:  ['1446,shannon']
TRUE:  ['1447,fergus mcmaster']
PRED:  ['1447,fergus mcmaster']
SAME:  ['1447,fergus mcmaster']
TRUE:  ['1448,nicholas s. zeppos']
PRED:  ['1448,nicholas s. zeppos']
SAME:  ['1448,nicholas s. zeppos']
TRUE:  ['1449,united states']
PRED:  ['1449, ', '1449,united states']
MORE:  ['1449,united states']
TRUE:  ['1450,excelsior stakes']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['1450,excelsior stakes']
NONE_PRED_EQUAL:  ['']
TRUE:  ['1451,al bano and romina power']
PRED:  ['1451,al bano', '1451,romina power']
NONE_TRUE_MORE:  ['1451,al bano and romina power']
NONE_PRED_MORE:  ['1451,al bano', '1451,romina power']
TRUE:  ['1452,hanging']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['1452,hanging']
NONE_PRED_EQUAL:  ['']
TRUE:  ['1453,ben dreith', '1453,john mcgeever']
PRED:  ['1453,john mcgeever', '1453,ben dreith']
SAME:  ['145

TRUE:  ['1642,john g schmitz', '1642,us president']
PRED:  ['1642,us', '1642,john g schmitz']
SAME:  ['1642,john g schmitz']
PARTIAL:  ['1642,john g schmitz']
TRUE:  ['1643,moondog matinee', '1643,theme time radio hour']
PRED:  ['1643,theme time radio hour', '1643,moondoc matinee']
SAME:  ['1643,theme time radio hour']
PARTIAL:  ['1643,theme time radio hour']
TRUE:  ['1644,duncan u. fletcher', '1644,vanderbilt commodores']
PRED:  ['1644,duncan u. fletcher', '1644,vanderbilt commodores']
SAME:  ['1644,duncan u. fletcher', '1644,vanderbilt commodores']
TRUE:  ['1645,language integrated query', '1645,msx basic']
PRED:  ['1645,msx', '1645,language integrated query']
SAME:  ['1645,language integrated query']
PARTIAL:  ['1645,language integrated query']
TRUE:  ['1646,indiana territory']
PRED:  ['1646,the united states', '1646,indiana']
NONE_TRUE_MORE:  ['1646,indiana territory']
NONE_PRED_MORE:  ['1646,the united states', '1646,indiana']
TRUE:  ['1647,ford y-block engine']
PRED:  ['1647,ford

NONE_PRED_EQUAL:  ['1915,the ncaa division i football bowl subdivision']
TRUE:  ['1916,anaheim']
PRED:  ['1916,anaheim']
SAME:  ['1916,anaheim']
TRUE:  ['1917,swedish language']
PRED:  ['1917,swedish']
SAME:  []
NONE_TRUE_EQUAL:  ['1917,swedish language']
NONE_PRED_EQUAL:  ['1917,swedish']
TRUE:  ["1918,your ice cream's dirty"]
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ["1918,your ice cream's dirty"]
NONE_PRED_EQUAL:  ['']
TRUE:  ['1919,the snaman', '1919,neverwher']
PRED:  ['1919,snaman', '1919,neverwher']
SAME:  ['1919,neverwher']
PARTIAL:  ['1919,neverwher']
TRUE:  ['1920,malaysia']
PRED:  ['1920,malaysia']
SAME:  ['1920,malaysia']
TRUE:  ['1921,riverside stadium']
PRED:  ['1921,the riverside stadium']
SAME:  []
NONE_TRUE_EQUAL:  ['1921,riverside stadium']
NONE_PRED_EQUAL:  ['1921,the riverside stadium']
TRUE:  ['1922,baikal mountains', '1922,lena delta wildlife reserve']
PRED:  ['1922,baikal mountains', '1922,lena delta wildlife reserve']
SAME:  ['1922,baikal mountains', '1922,lena d

PRED:  ['2123,paul weatherwax']
LESS:  ['2123,paul weatherwax']
TRUE:  ['2124,ragout fin']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['2124,ragout fin']
NONE_PRED_EQUAL:  ['']
TRUE:  ['2125,neungin high school', '2125,wickramabahu centeral college']
PRED:  ['2125,the wickramabahu centeral college', '2125,neungin high school']
SAME:  ['2125,neungin high school']
PARTIAL:  ['2125,neungin high school']
TRUE:  ['2126,fox broadcasting company']
PRED:  ['2126,fox broadcasting company']
SAME:  ['2126,fox broadcasting company']
TRUE:  ['2127,darth vader', '2127,padm amidala']
PRED:  ['2127,darth vader', '2127,padm amidala']
SAME:  ['2127,darth vader', '2127,padm amidala']
TRUE:  ['2128,where everybody knows you']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['2128,where everybody knows you']
NONE_PRED_EQUAL:  ['']
TRUE:  ["2129,brazil's under 23 team", '2129,ronaldinho']
PRED:  ['2129,23']
NONE_TRUE_LESS:  ["2129,brazil's under 23 team", '2129,ronaldinho']
NONE_PRED_LESS:  ['2129,23']
TRUE:  ['2130,z

MORE:  ['2324,mediterranean sea']
TRUE:  ['2325,regional radio sports network']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['2325,regional radio sports network']
NONE_PRED_EQUAL:  ['']
TRUE:  ['2326,shawn fanning']
PRED:  ['2326,shawn fanning']
SAME:  ['2326,shawn fanning']
TRUE:  ['2327,linguistics']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['2327,linguistics']
NONE_PRED_EQUAL:  ['']
TRUE:  ['2328,j bennett johnston']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['2328,j bennett johnston']
NONE_PRED_EQUAL:  ['']
TRUE:  ['2329,michael m. sears']
PRED:  ['2329,michael m. sears']
SAME:  ['2329,michael m. sears']
TRUE:  ['2330,mumbai', '2330,daund railway junction']
PRED:  ['']
NONE_TRUE_LESS:  ['2330,mumbai', '2330,daund railway junction']
NONE_PRED_LESS:  ['']
TRUE:  ['2331,union state']
PRED:  ['2331,whihc', '2331,the union state', '2331,union staete']
NONE_TRUE_MORE:  ['2331,union state']
NONE_PRED_MORE:  ['2331,whihc', '2331,the union state', '2331,union staete']
TRUE:  ['2332,massachusetts d

PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['2524,moses']
NONE_PRED_EQUAL:  ['']
TRUE:  ['2525,association']
PRED:  ['2525,the association of southeast asian institutes of higher learning']
SAME:  []
NONE_TRUE_EQUAL:  ['2525,association']
NONE_PRED_EQUAL:  ['2525,the association of southeast asian institutes of higher learning']
TRUE:  ['2526,ins viraat']
PRED:  ['2526,ins viraat (', '2526,r22']
NONE_TRUE_MORE:  ['2526,ins viraat']
NONE_PRED_MORE:  ['2526,ins viraat (', '2526,r22']
TRUE:  ['2527,lagos preparatory school', '2527,willian garne']
PRED:  ['2527,lagos preparatory school', '2527,willian garne']
SAME:  ['2527,lagos preparatory school', '2527,willian garne']
TRUE:  ['2528,england']
PRED:  ['2528,england']
SAME:  ['2528,england']
TRUE:  ['2529,groove thang']
PRED:  ['2529,groove thang']
SAME:  ['2529,groove thang']
TRUE:  ['2530,salad']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['2530,salad']
NONE_PRED_EQUAL:  ['']
TRUE:  ['2531,gary portnoy', '2531,paramount television']
PRED:  ['2

PRED:  ['2730,angels toru']
SAME:  ['2730,angels toru']
TRUE:  ['2731,my favorite girl']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['2731,my favorite girl']
NONE_PRED_EQUAL:  ['']
TRUE:  ['2732,subaru legacy']
PRED:  ['2732,subaru', '2732,legacy', '2732,second']
NONE_TRUE_MORE:  ['2732,subaru legacy']
NONE_PRED_MORE:  ['2732,subaru', '2732,legacy', '2732,second']
TRUE:  ['2733,jadavpur']
PRED:  ['2733,jadavpur']
SAME:  ['2733,jadavpur']
TRUE:  ['2734,1998 baltimore ravens season', '2734,chicago bears']
PRED:  ['2734,american', '2734,chicago', '2734,1998', '2734,baltimore ravens']
NONE_TRUE_MORE:  ['2734,1998 baltimore ravens season', '2734,chicago bears']
NONE_PRED_MORE:  ['2734,american', '2734,chicago', '2734,1998', '2734,baltimore ravens']
TRUE:  ['2735,office 365']
PRED:  ['2735,365']
SAME:  []
NONE_TRUE_EQUAL:  ['2735,office 365']
NONE_PRED_EQUAL:  ['2735,365']
TRUE:  ['2736,lyudmila buldakova', '2736,anatoly kharlampiyev']
PRED:  ['2736,kharlampiyev', '2736, ', '2736,lyudmila bulda

TRUE:  ['3004,ella t. grasso']
PRED:  ['3004,ella t. grasso']
SAME:  ['3004,ella t. grasso']
TRUE:  ['3005,john franklin miller', '3005,blanche bruce']
PRED:  ['3005,blanche bruce', '3005,john franklin miller']
SAME:  ['3005,blanche bruce', '3005,john franklin miller']
TRUE:  ['3006,samuel of waldebba', '3006,marcus samuelsson']
PRED:  ['3006,marcus samuelsson', '3006,samuel', '3006,waldebba']
MORE:  ['3006,marcus samuelsson']
TRUE:  ['3007,steven peterman']
PRED:  ['3007,steven peterman']
SAME:  ['3007,steven peterman']
TRUE:  ['3008,gloucestershire', '3008,north sea']
PRED:  ['3008,gloucestershire', '3008,north sea']
SAME:  ['3008,gloucestershire', '3008,north sea']
TRUE:  ['3009,gloria schaffer', '3009,newington']
PRED:  ['3009,gloria schaffer', '3009,newington']
SAME:  ['3009,gloria schaffer', '3009,newington']
TRUE:  ['3010,claudia moro']
PRED:  ['3010,claudia moro']
SAME:  ['3010,claudia moro']
TRUE:  ['3011,bison']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['3011,bison']
NONE_PRED

PRED:  ['3275,alexander lippisch']
SAME:  ['3275,alexander lippisch']
TRUE:  ['3276,willian langer', '3276,george f hoar']
PRED:  ['']
NONE_TRUE_LESS:  ['3276,willian langer', '3276,george f hoar']
NONE_PRED_LESS:  ['']
TRUE:  ['3277,marcus floyd']
PRED:  ['3277,marcus floyd']
SAME:  ['3277,marcus floyd']
TRUE:  ['3278,union state', '3278,svetlana gounkina']
PRED:  ['3278,whihc', '3278,svetlana gounkina', '3278,union state']
MORE:  ['3278,svetlana gounkina', '3278,union state']
TRUE:  ['3279,pandikar amin mulia']
PRED:  ['3279,the dewan rakyat', '3279,amin mulia']
NONE_TRUE_MORE:  ['3279,pandikar amin mulia']
NONE_PRED_MORE:  ['3279,the dewan rakyat', '3279,amin mulia']
TRUE:  ['3280,1997 canadian grand prix']
PRED:  ['3280,pole', '3280,1997', '3280,canadian grand prix']
NONE_TRUE_MORE:  ['3280,1997 canadian grand prix']
NONE_PRED_MORE:  ['3280,pole', '3280,1997', '3280,canadian grand prix']
TRUE:  ['3281,blade runner 2']
PRED:  ['3281,the blade runner 2']
SAME:  []
NONE_TRUE_EQUAL:  [

PRED:  ['3474,buwat', '3474,fatima']
SAME:  []
NONE_TRUE_EQUAL:  ['3474,invasion of buwat', "3474,fatima bint sa'd"]
NONE_PRED_EQUAL:  ['3474,buwat', '3474,fatima']
TRUE:  ['3475,ted falon']
PRED:  ['3475,ted falon']
SAME:  ['3475,ted falon']
TRUE:  ['3476,nasa']
PRED:  ['3476,nasa']
SAME:  ['3476,nasa']
TRUE:  ['3477,cedella booker', '3477,sharon marley']
PRED:  ['3477,cedella booker', '3477,sharon marley']
SAME:  ['3477,cedella booker', '3477,sharon marley']
TRUE:  ['3478,england']
PRED:  ['3478,england']
SAME:  ['3478,england']
TRUE:  ['3479,moscow governorate']
PRED:  ['3479,moscow']
SAME:  []
NONE_TRUE_EQUAL:  ['3479,moscow governorate']
NONE_PRED_EQUAL:  ['3479,moscow']
TRUE:  ['3480,arm architecture']
PRED:  ['3480,arm architecture']
SAME:  ['3480,arm architecture']
TRUE:  ['3481,jerry siegel']
PRED:  ['3481,jerry siegel']
SAME:  ['3481,jerry siegel']
TRUE:  ['3482,nick clegg', '3482,david cameron']
PRED:  ['3482,david cameron', '3482,nick clegg']
SAME:  ['3482,david cameron', '

PRED:  ['3672,andrew lincoln']
LESS:  ['3672,andrew lincoln']
TRUE:  ['3673,blytheville high school']
PRED:  ['3673,blytheville high school']
SAME:  ['3673,blytheville high school']
TRUE:  ['3674,budapest']
PRED:  ['3674,budapest']
SAME:  ['3674,budapest']
TRUE:  ['3675,2012 campeonato brasileiro srie a.']
PRED:  ['3675,2012', '3675,campeonato brasileiro srie a.']
NONE_TRUE_MORE:  ['3675,2012 campeonato brasileiro srie a.']
NONE_PRED_MORE:  ['3675,2012', '3675,campeonato brasileiro srie a.']
TRUE:  ['3676,eddie bravo', '3676,alan jouban']
PRED:  ['3676,is eddie bravo', '3676,alan jouban']
SAME:  ['3676,alan jouban']
PARTIAL:  ['3676,alan jouban']
TRUE:  ['3677,msx basic']
PRED:  ['3677,msx']
SAME:  []
NONE_TRUE_EQUAL:  ['3677,msx basic']
NONE_PRED_EQUAL:  ['3677,msx']
TRUE:  ['3678,swedish']
PRED:  ['3678,swedish']
SAME:  ['3678,swedish']
TRUE:  ['3679,harry and the potters']
PRED:  ['3679,harry', '3679,potters']
NONE_TRUE_MORE:  ['3679,harry and the potters']
NONE_PRED_MORE:  ['3679,h

PRED:  ['3948,rhodes-haverty building']
SAME:  ['3948,rhodes-haverty building']
TRUE:  ['3949,ptolemy xiv of egypt']
PRED:  ['3949,egypt']
SAME:  []
NONE_TRUE_EQUAL:  ['3949,ptolemy xiv of egypt']
NONE_PRED_EQUAL:  ['3949,egypt']
TRUE:  ['3950,martin molony', '3950,best mate']
PRED:  ['3950,the  best mate', '3950,the martin molony']
SAME:  []
NONE_TRUE_EQUAL:  ['3950,martin molony', '3950,best mate']
NONE_PRED_EQUAL:  ['3950,the  best mate', '3950,the martin molony']
TRUE:  ['3951,greater san antonio']
PRED:  ['3951,greater san antonio']
SAME:  ['3951,greater san antonio']
TRUE:  ["3952,butter's very own episode"]
PRED:  ['3952,butter']
SAME:  []
NONE_TRUE_EQUAL:  ["3952,butter's very own episode"]
NONE_PRED_EQUAL:  ['3952,butter']
TRUE:  ['3953,chung il kwon']
PRED:  ['3953,chung il kwon']
SAME:  ['3953,chung il kwon']
TRUE:  ['3954,catcher']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['3954,catcher']
NONE_PRED_EQUAL:  ['']
TRUE:  ['3955,elsie paroubek', '3955,chicago']
PRED:  ['3955,els

TRUE:  ['4082,zachary taylor', '4082,sixth infantry regiment of us']
PRED:  ['4082,zachary taylor', '4082,sixth', '4082,us']
MORE:  ['4082,zachary taylor']
TRUE:  ['4083,chicago bulls', '4083,phoenix suns']
PRED:  ['4083,chicago bulls', '4083,phoenix suns']
SAME:  ['4083,chicago bulls', '4083,phoenix suns']
TRUE:  ['4084,islam']
PRED:  ['4084,islam']
SAME:  ['4084,islam']
TRUE:  ['4085,timothy j. roemer']
PRED:  ['4085,timothy j. roemer']
SAME:  ['4085,timothy j. roemer']
TRUE:  ['4086,becket fund for religious liberty']
PRED:  ['4086,becket']
SAME:  []
NONE_TRUE_EQUAL:  ['4086,becket fund for religious liberty']
NONE_PRED_EQUAL:  ['4086,becket']
TRUE:  ['4087,tornado']
PRED:  ['4087,tornado']
SAME:  ['4087,tornado']
TRUE:  ['4088,henri maspero']
PRED:  ['4088,henri maspero']
SAME:  ['4088,henri maspero']
TRUE:  ['4089,software as a service']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['4089,software as a service']
NONE_PRED_EQUAL:  ['']
TRUE:  ['4090,remote desktop protocol']
PRED:  ['']

TRUE:  ['4283,pork']
PRED:  ['4283,one']
SAME:  []
NONE_TRUE_EQUAL:  ['4283,pork']
NONE_PRED_EQUAL:  ['4283,one']
TRUE:  ['4284,penn state nittany lions football']
PRED:  ['4284,penn state nittany lions']
SAME:  []
NONE_TRUE_EQUAL:  ['4284,penn state nittany lions football']
NONE_PRED_EQUAL:  ['4284,penn state nittany lions']
TRUE:  ['4285,sony corpn shareholders and subsidies']
PRED:  ['4285,sony corp']
SAME:  []
NONE_TRUE_EQUAL:  ['4285,sony corpn shareholders and subsidies']
NONE_PRED_EQUAL:  ['4285,sony corp']
TRUE:  ['4286,dan mica']
PRED:  ['4286,dan mica']
SAME:  ['4286,dan mica']
TRUE:  ['4287,gary portnoy', '4287,where everybody knows your name']
PRED:  ['4287,gary portnoy']
LESS:  ['4287,gary portnoy']
TRUE:  ['4288,rachel rasmussen']
PRED:  ['4288,rachel rasmussen']
SAME:  ['4288,rachel rasmussen']
TRUE:  ['4289,liliya lobanova', '4289,dofinivka estuary']
PRED:  ['4289, ', '4289,the liliya lobanova']
SAME:  []
NONE_TRUE_EQUAL:  ['4289,liliya lobanova', '4289,dofinivka estuar

PRED:  ['4490,massachusetts']
SAME:  []
NONE_TRUE_EQUAL:  ['4490,massachusetts house of representatives']
NONE_PRED_EQUAL:  ['4490,massachusetts']
TRUE:  ['4491,naval support activity naples']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['4491,naval support activity naples']
NONE_PRED_EQUAL:  ['']
TRUE:  ['4492,microsoft azure']
PRED:  ['4492,microsoft', '4492,one']
NONE_TRUE_MORE:  ['4492,microsoft azure']
NONE_PRED_MORE:  ['4492,microsoft', '4492,one']
TRUE:  ['4493,trinity house', '4493,royal society']
PRED:  ['4493,royal society', '4493,trinity house']
SAME:  ['4493,royal society', '4493,trinity house']
TRUE:  ['4494,williamsburg, kansas']
PRED:  ['4494,williamsburg', '4494,kansas']
NONE_TRUE_MORE:  ['4494,williamsburg, kansas']
NONE_PRED_MORE:  ['4494,williamsburg', '4494,kansas']
TRUE:  ['4495,ins viraat', '4495,daund junction railway station']
PRED:  ['4495,daund junction', '4495,ins']
SAME:  []
NONE_TRUE_EQUAL:  ['4495,ins viraat', '4495,daund junction railway station']
NONE_PRED_E

TRUE:  ['4699,cbs']
PRED:  ['4699,cbs']
SAME:  ['4699,cbs']
TRUE:  ['4700,massimo liverani', '4700,guido guerrini']
PRED:  ['4700,guido guerrini', '4700,massimo liverani']
SAME:  ['4700,guido guerrini', '4700,massimo liverani']
TRUE:  ['4701,california']
PRED:  ['4701,california']
SAME:  ['4701,california']
TRUE:  ['4702,world war ii']
PRED:  ['4702,one', '4702,world war ii']
MORE:  ['4702,world war ii']
TRUE:  ['4703,rev']
PRED:  ['4703,rev']
SAME:  ['4703,rev']
TRUE:  ['4704,erich bagge']
PRED:  ['4704,erich bagge']
SAME:  ['4704,erich bagge']
TRUE:  ['4705,japan']
PRED:  ['4705,japan']
SAME:  ['4705,japan']
TRUE:  ['4706,trn vit hng']
PRED:  ['4706,trn vit hng']
SAME:  ['4706,trn vit hng']
TRUE:  ['4707,albert kwesi ocran']
PRED:  ['4707,albert kwesi ocran']
SAME:  ['4707,albert kwesi ocran']
TRUE:  ['4708,sony financial']
PRED:  ['4708,sony financial']
SAME:  ['4708,sony financial']
TRUE:  ['4709,william l. dayton']
PRED:  ['4709,william l. dayton']
SAME:  ['4709,william l. dayton'

SAME:  ['4965,laurie johnson']
PARTIAL:  ['4965,laurie johnson']
TRUE:  ['4966,uss kittiwake', '4966,haight-ashbury']
PRED:  ['4966,haight-ashbury', '4966,uss kittiwake']
SAME:  ['4966,haight-ashbury', '4966,uss kittiwake']
TRUE:  ['4967,motorpsycho nitemare']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['4967,motorpsycho nitemare']
NONE_PRED_EQUAL:  ['']
TRUE:  ['4968,hbo', '4968,erik jendresen']
PRED:  ['4968,erik jendresen', '4968,hbo']
SAME:  ['4968,erik jendresen', '4968,hbo']
TRUE:  ['4969,battle']
PRED:  ['4969,battle of the bulge']
SAME:  []
NONE_TRUE_EQUAL:  ['4969,battle']
NONE_PRED_EQUAL:  ['4969,battle of the bulge']
TRUE:  ['4970,israel', '4970,colin powell']
PRED:  ['4970,colin powell']
LESS:  ['4970,colin powell']
TRUE:  ['4971,the ultimate fighter: team rousey vs. team tate']
PRED:  ['4971,the ultimate fighter', '4971,team tate']
NONE_TRUE_MORE:  ['4971,the ultimate fighter: team rousey vs. team tate']
NONE_PRED_MORE:  ['4971,the ultimate fighter', '4971,team tate']
TRUE:  

In [23]:
print(match)
print(partial)
print(less)
print(more)
print(none_match)
print(none_less)
print(none_more)
print(predicted)
print(predicted/len(real_entities))

2238
299
210
320
1322
198
413
4102
0.6175850647395363


In [24]:
### compare only md predictions

match, partial, less, more, none_match, none_less, none_more, predicted = compare_predictions(real, ner_md)
print(match)
print(partial)
print(less)
print(more)
print(none_match)
print(none_less)
print(none_more)
print(predicted)
print(predicted/len(real_entities))

TRUE:  ['0,bill finger']
PRED:  ['0,bill finger']
SAME:  ['0,bill finger']
TRUE:  ['1,selwyn lloyd', '1,winston churchill']
PRED:  ['1,selwyn lloyd']
LESS:  ['1,selwyn lloyd']
TRUE:  ['2,gestapo']
PRED:  ['2,gestapo']
SAME:  ['2,gestapo']
TRUE:  ['3,mumbai north']
PRED:  ['3,mumbai']
SAME:  []
NONE_TRUE_EQUAL:  ['3,mumbai north']
NONE_PRED_EQUAL:  ['3,mumbai']
TRUE:  ['4,roberto clemente bridge']
PRED:  ['4,roberto clemente bridge']
SAME:  ['4,roberto clemente bridge']
TRUE:  ['5,ptolemy xiii theos philopator', '5,cleopatra v']
PRED:  ['5,ptolemy xiii theos philopator', '5,cleopatra v']
SAME:  ['5,ptolemy xiii theos philopator', '5,cleopatra v']
TRUE:  ['6,li si']
PRED:  ['6,li si']
SAME:  ['6,li si']
TRUE:  ['7,pavel moroz', '7,yakov estrin']
PRED:  ['7,pavel moroz', '7,yakov estrin']
SAME:  ['7,pavel moroz', '7,yakov estrin']
TRUE:  ['8,broadmeadows, victoria']
PRED:  ['8,broadmeadows, victoria']
SAME:  ['8,broadmeadows, victoria']
TRUE:  ['9,asc creative services']
PRED:  ['9,asc cr

PRED:  ['164,josiah idowu-fearon', '164,damasucs']
SAME:  ['164,josiah idowu-fearon']
PARTIAL:  ['164,josiah idowu-fearon']
TRUE:  ['165,paramount television', '165,judy hart angelo']
PRED:  ['165,judy hart angelo', '165,paramount television']
SAME:  ['165,judy hart angelo', '165,paramount television']
TRUE:  ['166,caspian']
PRED:  ['166,caspian']
SAME:  ['166,caspian']
TRUE:  ['167,united states department of defense', '167,arlington county, virginia']
PRED:  ['167,arlington county', '167,virginia', '167,united states department of defense']
MORE:  ['167,united states department of defense']
TRUE:  ['168,thongsuk samdaengpan']
PRED:  ['168,thongsuk samdaengpan']
SAME:  ['168,thongsuk samdaengpan']
TRUE:  ['169,asus']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['169,asus']
NONE_PRED_EQUAL:  ['']
TRUE:  ['170,ncaa']
PRED:  ['170,ncaa']
SAME:  ['170,ncaa']
TRUE:  ['171,afrika baby bam', '171,queen latifah']
PRED:  ['171,queen latifah', '171,afrika', '171,baby bam']
MORE:  ['171,queen latifa

PRED:  ['298,bloomberg markets']
SAME:  ['298,bloomberg markets']
TRUE:  ['299,motor vehicles']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['299,motor vehicles']
NONE_PRED_EQUAL:  ['']
TRUE:  ['300,holmes colbert', '300,linda hogan']
PRED:  ['300,linda hogan', '300,holmes colbert']
SAME:  ['300,linda hogan', '300,holmes colbert']
TRUE:  ['301,jon speelman']
PRED:  ['301,jon speelman']
SAME:  ['301,jon speelman']
TRUE:  ['302,ella loves cole']
PRED:  ['302,ella loves cole']
SAME:  ['302,ella loves cole']
TRUE:  ['303,prime time entertainment network']
PRED:  ['303,prime time entertainment network']
SAME:  ['303,prime time entertainment network']
TRUE:  ['304,auto shankar']
PRED:  ['304,auto shankar']
SAME:  ['304,auto shankar']
TRUE:  ['305,burr truss']
PRED:  ['305,burr truss']
SAME:  ['305,burr truss']
TRUE:  ['306,tonle sap']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['306,tonle sap']
NONE_PRED_EQUAL:  ['']
TRUE:  ['307,cheryl teigs']
PRED:  ['307,cheryl teigs']
SAME:  ['307,cheryl teigs'

NONE_PRED_EQUAL:  ['371,gm']
TRUE:  ['372,texas']
PRED:  ['372,texas']
SAME:  ['372,texas']
TRUE:  ['373,counterpoint']
PRED:  ['373,counterpoint']
SAME:  ['373,counterpoint']
TRUE:  ['374,pc 9800']
PRED:  ['374,9800']
SAME:  []
NONE_TRUE_EQUAL:  ['374,pc 9800']
NONE_PRED_EQUAL:  ['374,9800']
TRUE:  ['375,anglo-frisian']
PRED:  ['375,anglo']
SAME:  []
NONE_TRUE_EQUAL:  ['375,anglo-frisian']
NONE_PRED_EQUAL:  ['375,anglo']
TRUE:  ['376,tony bennett']
PRED:  ['376,tony bennett']
SAME:  ['376,tony bennett']
TRUE:  ['377,charles willing byrd', '377,north bend, ohio']
PRED:  ['377,charles willing byrd', '377,north bend', '377,ohio']
MORE:  ['377,charles willing byrd']
TRUE:  ['378,novelas ejemplares']
PRED:  ['378,novelas']
SAME:  []
NONE_TRUE_EQUAL:  ['378,novelas ejemplares']
NONE_PRED_EQUAL:  ['378,novelas']
TRUE:  ['379,anne spielberg']
PRED:  ['379,anne spielberg']
SAME:  ['379,anne spielberg']
TRUE:  ['380,nbc', '380,frasier']
PRED:  ['380,nbc', '380,fraiser']
SAME:  ['380,nbc']
PARTI

TRUE:  ['439,us']
PRED:  ['439,us']
SAME:  ['439,us']
TRUE:  ['440,indian ocean', '440,zambia']
PRED:  ['440,indian ocean', '440,zambia']
SAME:  ['440,indian ocean', '440,zambia']
TRUE:  ['441,panathinaikos fc']
PRED:  ['441,panathinaikos fc']
SAME:  ['441,panathinaikos fc']
TRUE:  ['442,clamp']
PRED:  ['442,clamp']
SAME:  ['442,clamp']
TRUE:  ['443,raymond conner']
PRED:  ['443,raymond conner']
SAME:  ['443,raymond conner']
TRUE:  ['444,dominican republic']
PRED:  ['444,dominican republic']
SAME:  ['444,dominican republic']
TRUE:  ['445,charles journet', '445,elbieta czartoryska']
PRED:  ['445,the elbieta czartoryska', '445,the charles journet']
SAME:  []
NONE_TRUE_EQUAL:  ['445,charles journet', '445,elbieta czartoryska']
NONE_PRED_EQUAL:  ['445,the elbieta czartoryska', '445,the charles journet']
TRUE:  ['446,olin corporation']
PRED:  ['446,olin corporation']
SAME:  ['446,olin corporation']
TRUE:  ['447,judson huss', '447,philippe tesnire']
PRED:  ['447,philippe']
NONE_TRUE_LESS:  [

SAME:  []
NONE_TRUE_EQUAL:  ['574,aegean sea']
NONE_PRED_EQUAL:  ['']
TRUE:  ['575,john tory']
PRED:  ['575,john tory']
SAME:  ['575,john tory']
TRUE:  ['576,north caucasus railway']
PRED:  ['576,north caucasus']
SAME:  []
NONE_TRUE_EQUAL:  ['576,north caucasus railway']
NONE_PRED_EQUAL:  ['576,north caucasus']
TRUE:  ['577,etienne bieler']
PRED:  ['577,etienne bieler\\xc9tienne_bi\\xe9ler']
SAME:  []
NONE_TRUE_EQUAL:  ['577,etienne bieler']
NONE_PRED_EQUAL:  ['577,etienne bieler\\xc9tienne_bi\\xe9ler']
TRUE:  ['578,london']
PRED:  ['578,london']
SAME:  ['578,london']
TRUE:  ['579,muhammad yunus']
PRED:  ['579,muhammad yunus']
SAME:  ['579,muhammad yunus']
TRUE:  ['580,hayden, stone & co.']
PRED:  ['580,hayden, stone & co.']
SAME:  ['580,hayden, stone & co.']
TRUE:  ['581,creatures']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['581,creatures']
NONE_PRED_EQUAL:  ['']
TRUE:  ['582,itv']
PRED:  ['582,itv(tv']
SAME:  []
NONE_TRUE_EQUAL:  ['582,itv']
NONE_PRED_EQUAL:  ['582,itv(tv']
TRUE:  ['5

TRUE:  ['641,lg optimus vu', '641,manu cornet']
PRED:  ['641,lg optimus vu', '641,manu cornet']
SAME:  ['641,lg optimus vu', '641,manu cornet']
TRUE:  ['642,edsel ranger']
PRED:  ['642,edsel']
SAME:  []
NONE_TRUE_EQUAL:  ['642,edsel ranger']
NONE_PRED_EQUAL:  ['642,edsel']
TRUE:  ['643,menora tunnel']
PRED:  ['643,menora']
SAME:  []
NONE_TRUE_EQUAL:  ['643,menora tunnel']
NONE_PRED_EQUAL:  ['643,menora']
TRUE:  ['644,cape town', '644,gold medal']
PRED:  ['644,cape town', '644,the royal astronomical society']
SAME:  ['644,cape town']
PARTIAL:  ['644,cape town']
TRUE:  ['645,john mcewen', '645,phm vn ng']
PRED:  ['645,john mcewen', '645,phm vn']
SAME:  ['645,john mcewen']
PARTIAL:  ['645,john mcewen']
TRUE:  ['646,hanged']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['646,hanged']
NONE_PRED_EQUAL:  ['']
TRUE:  ['647,vehicles']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['647,vehicles']
NONE_PRED_EQUAL:  ['']
TRUE:  ['648,caspian sea']
PRED:  ['648,the caspian sea']
SAME:  []
NONE_TRUE_EQUAL:  [

PRED:  ['777,nathaniel p. hill']
SAME:  ['777,nathaniel p. hill']
TRUE:  ['778,tom maniatis', '778,james still']
PRED:  ['778,james', '778,tom maniatis']
SAME:  ['778,tom maniatis']
PARTIAL:  ['778,tom maniatis']
TRUE:  ['779,becket fund for religious liberty']
PRED:  ['779,becket']
SAME:  []
NONE_TRUE_EQUAL:  ['779,becket fund for religious liberty']
NONE_PRED_EQUAL:  ['779,becket']
TRUE:  ['780,joe purcell', '780,winston bryant']
PRED:  ['780,winston bryant', '780,joe purcell']
SAME:  ['780,winston bryant', '780,joe purcell']
TRUE:  ['781,scuderia ferrari']
PRED:  ['781,first', '781,scuderia ferrari']
MORE:  ['781,scuderia ferrari']
TRUE:  ['782,olivier boulay']
PRED:  ['782,olivier boulay']
SAME:  ['782,olivier boulay']
TRUE:  ['783,mary poppins (musical)']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['783,mary poppins (musical)']
NONE_PRED_EQUAL:  ['']
TRUE:  ['784,cable stayed bridges']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['784,cable stayed bridges']
NONE_PRED_EQUAL:  ['']
TRUE:  

SAME:  ['916,turin']
TRUE:  ['917,douglas, isle of man']
PRED:  ['917,douglas']
SAME:  []
NONE_TRUE_EQUAL:  ['917,douglas, isle of man']
NONE_PRED_EQUAL:  ['917,douglas']
TRUE:  ['918,tampa']
PRED:  ['918,tampa']
SAME:  ['918,tampa']
TRUE:  ['919,phish']
PRED:  ['919,phish']
SAME:  ['919,phish']
TRUE:  ["920,musee d'orsay"]
PRED:  ['920,musee']
SAME:  []
NONE_TRUE_EQUAL:  ["920,musee d'orsay"]
NONE_PRED_EQUAL:  ['920,musee']
TRUE:  ['921,bill clinton']
PRED:  ['921,bill clinton']
SAME:  ['921,bill clinton']
TRUE:  ['922,gnowsys', '922,gnu texmacs']
PRED:  ['922,gnowsys', '922,gnu texmacs']
SAME:  ['922,gnowsys', '922,gnu texmacs']
TRUE:  ['923,liberty bell']
PRED:  ['923,liberty bell']
SAME:  ['923,liberty bell']
TRUE:  ['924,gibson guitar corporation']
PRED:  ['924,the gibson guitar corporation']
SAME:  []
NONE_TRUE_EQUAL:  ['924,gibson guitar corporation']
NONE_PRED_EQUAL:  ['924,the gibson guitar corporation']
TRUE:  ['925,the skeleton dance', "925,mickey's mellerdrammer"]
PRED:  ['

NONE_PRED_EQUAL:  ['1049,the norwalk trainband']
TRUE:  ['1050,newin chidchob', '1050,ajahn sao kantaslo']
PRED:  ['1050,newin chidchob', '1050,ajahn sao kantaslo']
SAME:  ['1050,newin chidchob', '1050,ajahn sao kantaslo']
TRUE:  ['1051,bafta']
PRED:  ['1051,bafta']
SAME:  ['1051,bafta']
TRUE:  ['1052,microsoft office', '1052,c__']
PRED:  ['1052,microsoft', '1052,c++']
SAME:  []
NONE_TRUE_EQUAL:  ['1052,microsoft office', '1052,c__']
NONE_PRED_EQUAL:  ['1052,microsoft', '1052,c++']
TRUE:  ['1053,altazor', '1053,picasso']
PRED:  ['1053,picasso']
LESS:  ['1053,picasso']
TRUE:  ['1054,toronto argonauts']
PRED:  ['1054,toronto argonauts']
SAME:  ['1054,toronto argonauts']
TRUE:  ['1055,john kotelawala']
PRED:  ['1055,john kotelawala']
SAME:  ['1055,john kotelawala']
TRUE:  ['1056,marika gombitova']
PRED:  ['1056,marika gombitova']
SAME:  ['1056,marika gombitova']
TRUE:  ['1057,govinda', '1057,chandra shekhar']
PRED:  ['1057,chandra shekhar', '1057,govinda']
SAME:  ['1057,chandra shekhar', 

PRED:  ['1118,azerbaijan']
SAME:  ['1118,azerbaijan']
TRUE:  ['1119,tabriz']
PRED:  ['1119,tabriz']
SAME:  ['1119,tabriz']
TRUE:  ['1120,isaac hayes', '1120,primus']
PRED:  ['1120,primus', '1120,isaac hayes']
SAME:  ['1120,primus', '1120,isaac hayes']
TRUE:  ['1121,bruce harrell']
PRED:  ['1121,bruce harrell']
SAME:  ['1121,bruce harrell']
TRUE:  ['1122,lpga', '1122,toll global express']
PRED:  ['1122,toll global express', '1122,lpga']
SAME:  ['1122,toll global express', '1122,lpga']
TRUE:  ['1123,glasgow']
PRED:  ['1123,glasgow']
SAME:  ['1123,glasgow']
TRUE:  ['1124,moskovsky station']
PRED:  ['1124,moskovsky']
SAME:  []
NONE_TRUE_EQUAL:  ['1124,moskovsky station']
NONE_PRED_EQUAL:  ['1124,moskovsky']
TRUE:  ['1125,band of brothers']
PRED:  ['1125,band of brothers']
SAME:  ['1125,band of brothers']
TRUE:  ['1126,12th field artillery regiment', '1126,colorado']
PRED:  ['1126,12th field artillery regiment', '1126,colorado']
SAME:  ['1126,12th field artillery regiment', '1126,colorado']

TRUE:  ['1258,american mediterranean sea', '1258,entronque de herradura']
PRED:  ['1258,the american mediterranean sea', '1258,entronque de herradura']
SAME:  ['1258,entronque de herradura']
PARTIAL:  ['1258,entronque de herradura']
TRUE:  ["1259,brown's corners, toronto", '1259,george karrys']
PRED:  ["1259,the brown's corners", '1259,toronto', '1259,the george karrys']
NONE_TRUE_MORE:  ["1259,brown's corners, toronto", '1259,george karrys']
NONE_PRED_MORE:  ["1259,the brown's corners", '1259,toronto', '1259,the george karrys']
TRUE:  ['1260,alcal de henares']
PRED:  ['1260,alcal de henares']
SAME:  ['1260,alcal de henares']
TRUE:  ['1261,unitary state', '1261,guatemala']
PRED:  ['1261,guatemala']
LESS:  ['1261,guatemala']
TRUE:  ['1262,philosophy']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['1262,philosophy']
NONE_PRED_EQUAL:  ['']
TRUE:  ['1263,provisional government of saskatchewan']
PRED:  ['1263,provisional government of saskatchewan']
SAME:  ['1263,provisional government of saskat

SAME:  []
NONE_TRUE_EQUAL:  ["1394,l'enfant a la tasse", '1394,women in the garden']
NONE_PRED_EQUAL:  ['1394,garden', "1394,l'enfant"]
TRUE:  ['1395,charles willing byrd']
PRED:  ['1395,charles willing byrd']
SAME:  ['1395,charles willing byrd']
TRUE:  ['1396,ford f-series first generation']
PRED:  ['1396,ford f-series', '1396,first']
NONE_TRUE_MORE:  ['1396,ford f-series first generation']
NONE_PRED_MORE:  ['1396,ford f-series', '1396,first']
TRUE:  ['1397,omaha']
PRED:  ['1397,omaha']
SAME:  ['1397,omaha']
TRUE:  ['1398,steven palazzo']
PRED:  ['1398,steven palazzo']
SAME:  ['1398,steven palazzo']
TRUE:  ['1399,ctv network']
PRED:  ["1399,ctv network's"]
SAME:  []
NONE_TRUE_EQUAL:  ['1399,ctv network']
NONE_PRED_EQUAL:  ["1399,ctv network's"]
TRUE:  ['1400,william anthony', '1400,philip w pillsbury']
PRED:  ['1400,william anthony', '1400,philip w pillsbury']
SAME:  ['1400,william anthony', '1400,philip w pillsbury']
TRUE:  ['1401,old saybrook, connecticut']
PRED:  ['1401,old saybroo

TRUE:  ['1597,fredis refunjol', '1597,willem-alexander']
PRED:  ['1597,willem-alexander', '1597,fredis refunjol']
SAME:  ['1597,willem-alexander', '1597,fredis refunjol']
TRUE:  ['1598,broadcast syndication']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['1598,broadcast syndication']
NONE_PRED_EQUAL:  ['']
TRUE:  ['1599,gettv', '1599,hypnotize mind']
PRED:  ['1599,gettv']
LESS:  ['1599,gettv']
TRUE:  ['1600,austin college']
PRED:  ['1600,austin college']
SAME:  ['1600,austin college']
TRUE:  ['1601,leallah']
PRED:  ['1601,leallah']
SAME:  ['1601,leallah']
TRUE:  ['1602,tom mclaury', '1602,ike clanton']
PRED:  ['1602,ike clanton', '1602,tom mclaury']
SAME:  ['1602,ike clanton', '1602,tom mclaury']
TRUE:  ['1603,los angeles world airports']
PRED:  ['1603,the los angeles world airports']
SAME:  []
NONE_TRUE_EQUAL:  ['1603,los angeles world airports']
NONE_PRED_EQUAL:  ['1603,the los angeles world airports']
TRUE:  ['1604,karafarin bank', '1604,french southern and antarctic lands']
PRED:  ['160

PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['1802,google videos']
NONE_PRED_EQUAL:  ['']
TRUE:  ['1803,warner bros']
PRED:  ['1803,warner bros']
SAME:  ['1803,warner bros']
TRUE:  ['1804,elizabeth ann seton', '1804,pope paul vi']
PRED:  ['1804,elizabeth ann seton', '1804,pope paul vi']
SAME:  ['1804,elizabeth ann seton', '1804,pope paul vi']
TRUE:  ['1805,brazil', '1805,sao paulo']
PRED:  ['1805,sao paulo', '1805,brazil']
SAME:  ['1805,sao paulo', '1805,brazil']
TRUE:  ['1806,ford', '1806,ford kansas assembly plant']
PRED:  ['1806,ford', '1806,ford kansas assembly']
SAME:  ['1806,ford']
PARTIAL:  ['1806,ford']
TRUE:  ['1807,royal blue']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['1807,royal blue']
NONE_PRED_EQUAL:  ['']
TRUE:  ['1808,david gilmour', '1808,pink floyd']
PRED:  ['1808,david gilmour', '1808,pink floyd']
SAME:  ['1808,david gilmour', '1808,pink floyd']
TRUE:  ['1809,lori black', '1809,harvard westlake school']
PRED:  ['1809,lori black', '1809,harvard westlake', '1809,one']
MORE:

PRED:  ['2079,enrique jos varona']
SAME:  ['2079,enrique jos varona']
TRUE:  ['2080,first national bank and trust building']
PRED:  ['2080,first national bank and trust building', '2080,lima', '2080,ohio']
MORE:  ['2080,first national bank and trust building']
TRUE:  ['2081,ronda']
PRED:  ['2081,ronda']
SAME:  ['2081,ronda']
TRUE:  ['2082,toms creek']
PRED:  ['2082,toms creek']
SAME:  ['2082,toms creek']
TRUE:  ['2083,the skeleton dance', '2083,the haunted house']
PRED:  ['2083,the the skeleton dance', '2083,the haunted house', '2083,1929']
MORE:  ['2083,the haunted house']
TRUE:  ['2084,golden state warriors']
PRED:  ['2084,golden state warriors']
SAME:  ['2084,golden state warriors']
TRUE:  ['2085,vangelis', '2085,jordan']
PRED:  ['2085,vangelis', '2085,jordan']
SAME:  ['2085,vangelis', '2085,jordan']
TRUE:  ['2086,jerry tagge', '2086,temptation waits']
PRED:  ['2086,jerry tagge', '2086,temptation waits']
SAME:  ['2086,jerry tagge', '2086,temptation waits']
TRUE:  ['2087,gold medal']

TRUE:  ['2284,qun thnh temple']
PRED:  ['2284,qun thnh temple']
SAME:  ['2284,qun thnh temple']
TRUE:  ['2285,shakespeare', '2285,stanley cavell']
PRED:  ['2285,stanley cavell', '2285,shakespeare']
SAME:  ['2285,stanley cavell', '2285,shakespeare']
TRUE:  ['2286,chelsea f.c.', '2286,evraz']
PRED:  ['2286,chelsea f.c', '2286,evraz']
SAME:  ['2286,evraz']
PARTIAL:  ['2286,evraz']
TRUE:  ['2287,rev', '2287,bombay sapphire']
PRED:  ['2287,bombay sapphire', '2287,rev']
SAME:  ['2287,bombay sapphire', '2287,rev']
TRUE:  ['2288,amit saigal']
PRED:  ['2288,amit saigal']
SAME:  ['2288,amit saigal']
TRUE:  ['2289,a bartlett giamatti']
PRED:  ['2289,bartlett giamatti']
SAME:  []
NONE_TRUE_EQUAL:  ['2289,a bartlett giamatti']
NONE_PRED_EQUAL:  ['2289,bartlett giamatti']
TRUE:  ['2290,tekle haymanot', '2290,haile selassie gugsa']
PRED:  ['2290,tekle', '2290,haile selassie gugsa']
SAME:  ['2290,haile selassie gugsa']
PARTIAL:  ['2290,haile selassie gugsa']
TRUE:  ['2291,riley reid']
PRED:  ['2291,ri

SAME:  ['2421,vanderbilt university medical center']
PARTIAL:  ['2421,vanderbilt university medical center']
TRUE:  ['2422,south shore plaza']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['2422,south shore plaza']
NONE_PRED_EQUAL:  ['']
TRUE:  ['2423,north sea', '2423,thames estuary']
PRED:  ['2423,north sea', '2423,thames estuary']
SAME:  ['2423,north sea', '2423,thames estuary']
TRUE:  ['2424,albertin montoya']
PRED:  ['2424,albertin montoya']
SAME:  ['2424,albertin montoya']
TRUE:  ['2425,schibsted', '2425,stockholm']
PRED:  ['2425,schibsted', '2425,stockholm']
SAME:  ['2425,schibsted', '2425,stockholm']
TRUE:  ['2426,gnome project']
PRED:  ['2426,gnome project']
SAME:  ['2426,gnome project']
TRUE:  ['2427,stateless people']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['2427,stateless people']
NONE_PRED_EQUAL:  ['']
TRUE:  ["2428,people's bank"]
PRED:  ["2428,people's bank (sri lanka"]
SAME:  []
NONE_TRUE_EQUAL:  ["2428,people's bank"]
NONE_PRED_EQUAL:  ["2428,people's bank (sri lanka"]
TRU

PRED:  ['2635,swahili']
SAME:  ['2635,swahili']
TRUE:  ['2636,immunologists']
PRED:  ['2636,immunologists']
SAME:  ['2636,immunologists']
TRUE:  ['2637,fergie sutherland', '2637,war of attrition']
PRED:  ['2637,fergie', '2637,war of attrition']
SAME:  ['2637,war of attrition']
PARTIAL:  ['2637,war of attrition']
TRUE:  ['2638,liang chow']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['2638,liang chow']
NONE_PRED_EQUAL:  ['']
TRUE:  ['2639,iowa']
PRED:  ['2639,iowa']
SAME:  ['2639,iowa']
TRUE:  ['2640,seven spring mountain resort']
PRED:  ['2640,seven', '2640,spring mountain resort']
NONE_TRUE_MORE:  ['2640,seven spring mountain resort']
NONE_PRED_MORE:  ['2640,seven', '2640,spring mountain resort']
TRUE:  ['2641,surayud chulanont']
PRED:  ['2641,surayud chulanont']
SAME:  ['2641,surayud chulanont']
TRUE:  ['2642,blue jacket']
PRED:  ['2642,blue jacket']
SAME:  ['2642,blue jacket']
TRUE:  ['2643,ny']
PRED:  ['2643,ny']
SAME:  ['2643,ny']
TRUE:  ['2644,toluca']
PRED:  ['2644,toluca']
SAME:  [

PRED:  ['']
NONE_TRUE_LESS:  ['2764,owyhee river', '2764,oregon']
NONE_PRED_LESS:  ['']
TRUE:  ['2765,james needs', '2765,x the unknown']
PRED:  ['2765,james']
NONE_TRUE_LESS:  ['2765,james needs', '2765,x the unknown']
NONE_PRED_LESS:  ['2765,james']
TRUE:  ['2766,sony bank']
PRED:  ['2766,sony']
SAME:  []
NONE_TRUE_EQUAL:  ['2766,sony bank']
NONE_PRED_EQUAL:  ['2766,sony']
TRUE:  ['2767,martin an/gsg-5']
PRED:  ['2767,martin an/gsg-5']
SAME:  ['2767,martin an/gsg-5']
TRUE:  ['2768,united world college of costa rica']
PRED:  ['2768,united world college', '2768,costa rica']
NONE_TRUE_MORE:  ['2768,united world college of costa rica']
NONE_PRED_MORE:  ['2768,united world college', '2768,costa rica']
TRUE:  ['2769,fourth legislative assembly of delhi']
PRED:  ['2769,fourth legislative assembly', '2769,delhi']
NONE_TRUE_MORE:  ['2769,fourth legislative assembly of delhi']
NONE_PRED_MORE:  ['2769,fourth legislative assembly', '2769,delhi']
TRUE:  ['2770,indy popcon']
PRED:  ['']
SAME:  []


TRUE:  ['2913,avangard omsk']
PRED:  ["2913,avangard omsk's"]
SAME:  []
NONE_TRUE_EQUAL:  ['2913,avangard omsk']
NONE_PRED_EQUAL:  ["2913,avangard omsk's"]
TRUE:  ['2914,teperberg 1870']
PRED:  ['2914,1870']
SAME:  []
NONE_TRUE_EQUAL:  ['2914,teperberg 1870']
NONE_PRED_EQUAL:  ['2914,1870']
TRUE:  ['2915,hudson highlands state parts']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['2915,hudson highlands state parts']
NONE_PRED_EQUAL:  ['']
TRUE:  ['2916,ingo steuer', '2916,stanislav morozov']
PRED:  ['2916,stanislav morozov']
LESS:  ['2916,stanislav morozov']
TRUE:  ['2917,v8 engine']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['2917,v8 engine']
NONE_PRED_EQUAL:  ['']
TRUE:  ['2918,falken tire', '2918,air 2000']
PRED:  ['2918,the air 2000']
NONE_TRUE_LESS:  ['2918,falken tire', '2918,air 2000']
NONE_PRED_LESS:  ['2918,the air 2000']
TRUE:  ['2919,lodaiska']
PRED:  ['2919,lodaiska']
SAME:  ['2919,lodaiska']
TRUE:  ['2920,letita mctavish hargrave', '2920,sextus barbour']
PRED:  ['2920,letita mcta

PRED:  ['3122,the european go federation']
SAME:  []
NONE_TRUE_EQUAL:  ['3122,european go federation']
NONE_PRED_EQUAL:  ['3122,the european go federation']
TRUE:  ['3123,momoko kchi']
PRED:  ['3123,momoko kchi']
SAME:  ['3123,momoko kchi']
TRUE:  ['3124,russell t davies']
PRED:  ['3124,russell', '3124,davies']
NONE_TRUE_MORE:  ['3124,russell t davies']
NONE_PRED_MORE:  ['3124,russell', '3124,davies']
TRUE:  ['3125,winston bryant', '3125,jimmie fisher']
PRED:  ['3125,jimmie fisher', '3125,winston bryant']
SAME:  ['3125,jimmie fisher', '3125,winston bryant']
TRUE:  ['3126,phil collinson']
PRED:  ['3126,phil collinson']
SAME:  ['3126,phil collinson']
TRUE:  ['3127,michael deeley']
PRED:  ['3127,michael deeley']
SAME:  ['3127,michael deeley']
TRUE:  ['3128,luke list', '3128,park trammell']
PRED:  ['3128,the park trammell']
NONE_TRUE_LESS:  ['3128,luke list', '3128,park trammell']
NONE_PRED_LESS:  ['3128,the park trammell']
TRUE:  ['3129,spanish harlem incident']
PRED:  ['3129,spanish']
SA

SAME:  ['3330,juan dalmau ramerez']
TRUE:  ['3331,bogdanua']
PRED:  ['3331,bogdanua']
SAME:  ['3331,bogdanua']
TRUE:  ['3332,raymond conner']
PRED:  ['3332,raymond conner']
SAME:  ['3332,raymond conner']
TRUE:  ['3333,mumbai suburban']
PRED:  ['3333,mumbai']
SAME:  []
NONE_TRUE_EQUAL:  ['3333,mumbai suburban']
NONE_PRED_EQUAL:  ['3333,mumbai']
TRUE:  ['3334,union']
PRED:  ['3334,union (american civil war']
SAME:  []
NONE_TRUE_EQUAL:  ['3334,union']
NONE_PRED_EQUAL:  ['3334,union (american civil war']
TRUE:  ['3335,the five']
PRED:  ['3335,five']
SAME:  []
NONE_TRUE_EQUAL:  ['3335,the five']
NONE_PRED_EQUAL:  ['3335,five']
TRUE:  ['3336,national academy museum and school']
PRED:  ['3336,the national academy museum and school']
SAME:  []
NONE_TRUE_EQUAL:  ['3336,national academy museum and school']
NONE_PRED_EQUAL:  ['3336,the national academy museum and school']
TRUE:  ['3337,anthony harvey']
PRED:  ['3337,anthony harvey']
SAME:  ['3337,anthony harvey']
TRUE:  ['3338,animal']
PRED:  [''

SAME:  ['3460,orange virginia']
TRUE:  ['3461,gdask', '3461,oulu']
PRED:  ['3461,gdask', '3461,oulu']
SAME:  ['3461,gdask', '3461,oulu']
TRUE:  ['3462,ford air transport service', '3462,ford y-block engine']
PRED:  ['3462,ford', '3462,y-', '3462,ford air transport service']
MORE:  ['3462,ford air transport service']
TRUE:  ['3463,creatures']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['3463,creatures']
NONE_PRED_EQUAL:  ['']
TRUE:  ['3464,lawrence realization stakes']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['3464,lawrence realization stakes']
NONE_PRED_EQUAL:  ['']
TRUE:  ['3465,detroit pistons']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['3465,detroit pistons']
NONE_PRED_EQUAL:  ['']
TRUE:  ['3466,paul jones']
PRED:  ['3466,paul jones']
SAME:  ['3466,paul jones']
TRUE:  ['3467,ann monroe gilchrist strong']
PRED:  ['3467,ann monroe gilchrist strong']
SAME:  ['3467,ann monroe gilchrist strong']
TRUE:  ['3468,bud adams']
PRED:  ['3468,bud adams']
SAME:  ['3468,bud adams']
TRUE:  ['3469,exxv-

PRED:  ['3602,texas', '3602,jim harris']
SAME:  ['3602,texas', '3602,jim harris']
TRUE:  ['3603,american indian wars']
PRED:  ['3603,american', '3603,indian']
NONE_TRUE_MORE:  ['3603,american indian wars']
NONE_PRED_MORE:  ['3603,american', '3603,indian']
TRUE:  ['3604,conowingo dam']
PRED:  ['3604,conowingo']
SAME:  []
NONE_TRUE_EQUAL:  ['3604,conowingo dam']
NONE_PRED_EQUAL:  ['3604,conowingo']
TRUE:  ['3605,great missenden']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['3605,great missenden']
NONE_PRED_EQUAL:  ['']
TRUE:  ['3606,b j crombeen', '3606,brideun school for exception children']
PRED:  ['3606,brideun school for exception children', '3606,b j crombeen']
SAME:  ['3606,brideun school for exception children', '3606,b j crombeen']
TRUE:  ['3607,jenny macklin']
PRED:  ['3607,jenny macklin']
SAME:  ['3607,jenny macklin']
TRUE:  ['3608,oysterhead']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['3608,oysterhead']
NONE_PRED_EQUAL:  ['']
TRUE:  ['3609,ingmar bergman']
PRED:  ['3609,ingmar ber

NONE_PRED_EQUAL:  ['']
TRUE:  ['3741,history of penny arcade expo']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['3741,history of penny arcade expo']
NONE_PRED_EQUAL:  ['']
TRUE:  ['3742,european broadcasting union', '3742,te deum']
PRED:  ["3742,european broadcasting union's", '3742,te deum']
SAME:  ['3742,te deum']
PARTIAL:  ['3742,te deum']
TRUE:  ['3743,karachi']
PRED:  ['3743,karachi', '3743,pakistan']
MORE:  ['3743,karachi']
TRUE:  ['3744,wyche fowler', '3744,carl stokes']
PRED:  ['3744,carl stokes', '3744,wyche fowler']
SAME:  ['3744,carl stokes', '3744,wyche fowler']
TRUE:  ['3745,geneweb', '3745,gpl license']
PRED:  ['3745,geneweb', '3745,gpl']
SAME:  ['3745,geneweb']
PARTIAL:  ['3745,geneweb']
TRUE:  ['3746,josef bühler']
PRED:  ['3746,josef bhler']
SAME:  []
NONE_TRUE_EQUAL:  ['3746,josef bühler']
NONE_PRED_EQUAL:  ['3746,josef bhler']
TRUE:  ['3747,open society foundations']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['3747,open society foundations']
NONE_PRED_EQUAL:  ['']
TRUE:  

SAME:  ['3879,mazabuka']
TRUE:  ['3880,ridgewood, new jersey']
PRED:  ['3880,ridgewood', '3880,new jersey']
NONE_TRUE_MORE:  ['3880,ridgewood, new jersey']
NONE_PRED_MORE:  ['3880,ridgewood', '3880,new jersey']
TRUE:  ['3881,california', '3881,nevado de toluca']
PRED:  ['3881,california', '3881,nevado de toluca']
SAME:  ['3881,california', '3881,nevado de toluca']
TRUE:  ['3882,walt disney', '3882,roy walker']
PRED:  ['3882,roy walker', '3882,walt disney']
SAME:  ['3882,roy walker', '3882,walt disney']
TRUE:  ['3883,carroll e. lanier']
PRED:  ['3883,carroll e. lanier']
SAME:  ['3883,carroll e. lanier']
TRUE:  ['3884,metropolitan city of venice']
PRED:  ['3884,metropolitan city of venice']
SAME:  ['3884,metropolitan city of venice']
TRUE:  ['3885,paramount television', '3885,boston']
PRED:  ['3885,paramount television', '3885,boston']
SAME:  ['3885,paramount television', '3885,boston']
TRUE:  ['3886,ann wenner']
PRED:  ['3886,ann wenner']
SAME:  ['3886,ann wenner']
TRUE:  ['3887,itv']
P

PRED:  ['3948,rhodes-haverty building']
SAME:  ['3948,rhodes-haverty building']
TRUE:  ['3949,ptolemy xiv of egypt']
PRED:  ['3949,ptolemy xiv', '3949,egypt']
NONE_TRUE_MORE:  ['3949,ptolemy xiv of egypt']
NONE_PRED_MORE:  ['3949,ptolemy xiv', '3949,egypt']
TRUE:  ['3950,martin molony', '3950,best mate']
PRED:  ['3950,martin molony']
LESS:  ['3950,martin molony']
TRUE:  ['3951,greater san antonio']
PRED:  ['3951,san antonio']
SAME:  []
NONE_TRUE_EQUAL:  ['3951,greater san antonio']
NONE_PRED_EQUAL:  ['3951,san antonio']
TRUE:  ["3952,butter's very own episode"]
PRED:  ["3952,butter's very own episode"]
SAME:  ["3952,butter's very own episode"]
TRUE:  ['3953,chung il kwon']
PRED:  ['3953,chung il kwon']
SAME:  ['3953,chung il kwon']
TRUE:  ['3954,catcher']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['3954,catcher']
NONE_PRED_EQUAL:  ['']
TRUE:  ['3955,elsie paroubek', '3955,chicago']
PRED:  ['3955,elsie paroubek', '3955,chicago']
SAME:  ['3955,elsie paroubek', '3955,chicago']
TRUE:  ['3956

SAME:  []
NONE_TRUE_EQUAL:  ['4087,tornado']
NONE_PRED_EQUAL:  ['']
TRUE:  ['4088,henri maspero']
PRED:  ['4088,henri maspero']
SAME:  ['4088,henri maspero']
TRUE:  ['4089,software as a service']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['4089,software as a service']
NONE_PRED_EQUAL:  ['']
TRUE:  ['4090,remote desktop protocol']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['4090,remote desktop protocol']
NONE_PRED_EQUAL:  ['']
TRUE:  ['4091,plymouth, vermont', '4091,massachusetts house of representatives']
PRED:  ['4091,plymouth', '4091,vermont', '4091,massachusetts house of representatives']
MORE:  ['4091,massachusetts house of representatives']
TRUE:  ['4092,marcus calpurnius bibulus', '4092,lucius cornelius']
PRED:  ['4092,cornelius', '4092,marcus calpurnius bibulus']
SAME:  ['4092,marcus calpurnius bibulus']
PARTIAL:  ['4092,marcus calpurnius bibulus']
TRUE:  ["4093,dad's root beer"]
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ["4093,dad's root beer"]
NONE_PRED_EQUAL:  ['']
TRUE:  ['4094,al

SAME:  ['4227,duboce triangle', '4227,haight-ashbury']
TRUE:  ['4228,douglas netter', '4228,j. michael straczynski']
PRED:  ['4228,j. michael straczynski', '4228,douglas netter']
SAME:  ['4228,j. michael straczynski', '4228,douglas netter']
TRUE:  ['4229,1999 san marino grand prix']
PRED:  ['4229,first', '4229,1999', '4229,san marino grand prix']
NONE_TRUE_MORE:  ['4229,1999 san marino grand prix']
NONE_PRED_MORE:  ['4229,first', '4229,1999', '4229,san marino grand prix']
TRUE:  ['4230,marvin bush']
PRED:  ['4230,marvin bush']
SAME:  ['4230,marvin bush']
TRUE:  ['4231,daniel waters', "4231,tim birton's"]
PRED:  ["4231,tim birton's", '4231,daniel waters']
SAME:  ["4231,tim birton's", '4231,daniel waters']
TRUE:  ['4232,peter grauer']
PRED:  ['4232,peter grauer']
SAME:  ['4232,peter grauer']
TRUE:  ['4233,sylvain levi']
PRED:  ['4233,sylvain levi']
SAME:  ['4233,sylvain levi']
TRUE:  ['4234,puerto ricons']
PRED:  ['4234,puerto ricons']
SAME:  ['4234,puerto ricons']
TRUE:  ['4235,selangor

TRUE:  ['4293,prime time entertainment network']
PRED:  ['4293,prime time entertainment network']
SAME:  ['4293,prime time entertainment network']
TRUE:  ['4294,theodor hillenhinrichs', '4294,maike evers']
PRED:  ['4294,maike evers', '4294,the theodor hillenhinrichs']
SAME:  ['4294,maike evers']
PARTIAL:  ['4294,maike evers']
TRUE:  ['4295,the mysterious stranger', '4295,n. c. wyeth']
PRED:  ['4295,n. c. wyeth', '4295,the mysterious stranger']
SAME:  ['4295,n. c. wyeth', '4295,the mysterious stranger']
TRUE:  ['4296,pixar']
PRED:  ['4296,pixar']
SAME:  ['4296,pixar']
TRUE:  ['4297,terminator 2: judgment day', '4297,josh friedman']
PRED:  ['4297,judgment day', '4297,josh friedman']
SAME:  ['4297,josh friedman']
PARTIAL:  ['4297,josh friedman']
TRUE:  ['4298,maike evers', '4298,francisco lachowski']
PRED:  ['4298,maike evers', '4298,francisco lachowski']
SAME:  ['4298,maike evers', '4298,francisco lachowski']
TRUE:  ['4299,second lieutenants']
PRED:  ['4299,second lieutenants']
SAME:  ['

PRED:  ['4435,gemini', '4435,apollo 11']
SAME:  ['4435,apollo 11']
PARTIAL:  ['4435,apollo 11']
TRUE:  ['4436,joe mckeehen']
PRED:  ['4436,joe mckeehen']
SAME:  ['4436,joe mckeehen']
TRUE:  ['4437,us']
PRED:  ['4437,us']
SAME:  ['4437,us']
TRUE:  ['4438,lohan ratwatte']
PRED:  ['4438,lohan ratwatte']
SAME:  ['4438,lohan ratwatte']
TRUE:  ['4439,tillingdale']
PRED:  ['4439,tillingdale']
SAME:  ['4439,tillingdale']
TRUE:  ['4440,truth prevails']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['4440,truth prevails']
NONE_PRED_EQUAL:  ['']
TRUE:  ['4441,andrew w. lewis']
PRED:  ['4441,andrew w. lewis']
SAME:  ['4441,andrew w. lewis']
TRUE:  ['4442,erik bork', '4442,stephen ambrose']
PRED:  ['4442,stephen ambrose', '4442,erik bork']
SAME:  ['4442,stephen ambrose', '4442,erik bork']
TRUE:  ['4443,mark steven']
PRED:  ['4443,mark steven']
SAME:  ['4443,mark steven']
TRUE:  ['4444,chicago and northwestern transportation company']
PRED:  ['4444,chicago', '4444,northwestern transportation']
NONE_TRUE_M

SAME:  ['4573,england']
TRUE:  ['4574,massachusetts']
PRED:  ['4574,massachusetts']
SAME:  ['4574,massachusetts']
TRUE:  ['4575,charles willing', '4575,john scott']
PRED:  ['4575,charles willing', '4575,john scott']
SAME:  ['4575,charles willing', '4575,john scott']
TRUE:  ['4576,warner bros. television']
PRED:  ['4576,warner bros. television']
SAME:  ['4576,warner bros. television']
TRUE:  ['4577,egypt nanotechnology center']
PRED:  ['4577,egypt nanotechnology center']
SAME:  ['4577,egypt nanotechnology center']
TRUE:  ['4578,akio morita']
PRED:  ['4578,akio morita']
SAME:  ['4578,akio morita']
TRUE:  ['4579,blue lagoon island', '4579,bahamas']
PRED:  ['']
NONE_TRUE_LESS:  ['4579,blue lagoon island', '4579,bahamas']
NONE_PRED_LESS:  ['']
TRUE:  ['4580,rheinmetall mg 60']
PRED:  ['4580,rheinmetall mg 60']
SAME:  ['4580,rheinmetall mg 60']
TRUE:  ['4581,p fitzwallace', '4581,k fordice']
PRED:  ['4581,k fordice', '4581,fitzwallace']
SAME:  ['4581,k fordice']
PARTIAL:  ['4581,k fordice']


TRUE:  ['4718,denver']
PRED:  ['4718,denver']
SAME:  ['4718,denver']
TRUE:  ['4719,american english']
PRED:  ['4719,american', '4719,english']
NONE_TRUE_MORE:  ['4719,american english']
NONE_PRED_MORE:  ['4719,american', '4719,english']
TRUE:  ['4720,teriitaria ii']
PRED:  ['4720,teriitaria ii']
SAME:  ['4720,teriitaria ii']
TRUE:  ['4721,andrew schally', '4721,wtul']
PRED:  ['4721,schally', '4721,wtul']
SAME:  ['4721,wtul']
PARTIAL:  ['4721,wtul']
TRUE:  ['4722,exxv-tv']
PRED:  ['4722,exxv-tv']
SAME:  ['4722,exxv-tv']
TRUE:  ['4723,republican river']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['4723,republican river']
NONE_PRED_EQUAL:  ['']
TRUE:  ['4724,all i need is your sweet lovin']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['4724,all i need is your sweet lovin']
NONE_PRED_EQUAL:  ['']
TRUE:  ['4725,trinity school, brentwood', '4725,buckhurst hill county high school']
PRED:  ['4725,trinity school', '4725,brentwood', '4725,buckhurst hill county high school']
MORE:  ['4725,buckhurst hill

SAME:  ['4850,copley medal']
TRUE:  ['4851,south korean legislative election of 1992']
PRED:  ['4851,south korean', '4851,1992']
NONE_TRUE_MORE:  ['4851,south korean legislative election of 1992']
NONE_PRED_MORE:  ['4851,south korean', '4851,1992']
TRUE:  ['4852,gerald reaven', '4852,stanford cardinal']
PRED:  ['4852,stanford', '4852,gerald reaven']
SAME:  ['4852,gerald reaven']
PARTIAL:  ['4852,gerald reaven']
TRUE:  ['4853,mark davis', '4853,o.co colliseum']
PRED:  ['4853,mark davis', '4853,o.co colliseum']
SAME:  ['4853,mark davis', '4853,o.co colliseum']
TRUE:  ["4854,bbc's tv", '4854,the sarah jane adventures']
PRED:  ['4854,bbc', '4854,sarah jane']
SAME:  []
NONE_TRUE_EQUAL:  ["4854,bbc's tv", '4854,the sarah jane adventures']
NONE_PRED_EQUAL:  ['4854,bbc', '4854,sarah jane']
TRUE:  ['4855,autodromo enzo e dino ferrari']
PRED:  ['4855,first', '4855,autodromo enzo e dino ferrari']
MORE:  ['4855,autodromo enzo e dino ferrari']
TRUE:  ['4856,canada']
PRED:  ['4856,canada']
SAME:  ['

NONE_PRED_EQUAL:  ['4986,kerguelen', '4986,french']
TRUE:  ['4987,weber thompson']
PRED:  ['4987,weber thompson']
SAME:  ['4987,weber thompson']
TRUE:  ['4988,edith vonnegut']
PRED:  ['4988,edith vonnegut']
SAME:  ['4988,edith vonnegut']
TRUE:  ['4989,iyoas i']
PRED:  ['4989,iyoas']
SAME:  []
NONE_TRUE_EQUAL:  ['4989,iyoas i']
NONE_PRED_EQUAL:  ['4989,iyoas']
TRUE:  ['4990,wcw mayhem']
PRED:  ['4990,wcw mayhem']
SAME:  ['4990,wcw mayhem']
TRUE:  ['4991,rca records']
PRED:  ['4991,rca']
SAME:  []
NONE_TRUE_EQUAL:  ['4991,rca records']
NONE_PRED_EQUAL:  ['4991,rca']
TRUE:  ['4992,nick diaz', '4992,brazilian jiu-jitsu']
PRED:  ['4992,brazilian', '4992,nick diaz']
SAME:  ['4992,nick diaz']
PARTIAL:  ['4992,nick diaz']
TRUE:  ['4993,amstelveen']
PRED:  ['4993,amstelveen']
SAME:  ['4993,amstelveen']
TRUE:  ['4994,choctaw']
PRED:  ['4994,choctaw']
SAME:  ['4994,choctaw']
TRUE:  ['4995,larry j. franco']
PRED:  ['4995,larry j. franco']
SAME:  ['4995,larry j. franco']
TRUE:  ['4996,tnt', '4996,j

In [25]:
print(match)
print(partial)
print(less)
print(more)
print(none_match)
print(none_less)
print(none_more)
print(predicted)
print(predicted/len(real_entities))

2528
328
181
298
1144
137
384
4501
0.6776573321288768


In [26]:
### compare sm+md predictions

match, partial, less, more, none_match, none_less, none_more, predicted = compare_predictions(real, ner_sm_md)
print(match)
print(partial)
print(less)
print(more)
print(none_match)
print(none_less)
print(none_more)
print(predicted)
print(predicted/len(real_entities))

TRUE:  ['0,bill finger']
PRED:  ['0,bill finger']
SAME:  ['0,bill finger']
TRUE:  ['1,selwyn lloyd', '1,winston churchill']
PRED:  ['1,selwyn lloyd']
LESS:  ['1,selwyn lloyd']
TRUE:  ['2,gestapo']
PRED:  ['2,gestapo']
SAME:  ['2,gestapo']
TRUE:  ['3,mumbai north']
PRED:  ['3,mumbai']
SAME:  []
NONE_TRUE_EQUAL:  ['3,mumbai north']
NONE_PRED_EQUAL:  ['3,mumbai']
TRUE:  ['4,roberto clemente bridge']
PRED:  ['4,roberto clemente bridge']
SAME:  ['4,roberto clemente bridge']
TRUE:  ['5,ptolemy xiii theos philopator', '5,cleopatra v']
PRED:  ['5,xiii theos philopator', '5,cleopatra v']
SAME:  ['5,cleopatra v']
PARTIAL:  ['5,cleopatra v']
TRUE:  ['6,li si']
PRED:  ['6,li si']
SAME:  ['6,li si']
TRUE:  ['7,pavel moroz', '7,yakov estrin']
PRED:  ['7,pavel moroz', '7,the yakov estrin']
SAME:  ['7,pavel moroz']
PARTIAL:  ['7,pavel moroz']
TRUE:  ['8,broadmeadows, victoria']
PRED:  ['8,broadmeadows, victoria']
SAME:  ['8,broadmeadows, victoria']
TRUE:  ['9,asc creative services']
PRED:  ['9,asc cre

PARTIAL:  ['186,mausoleum of augustus']
TRUE:  ['187,1st free french division']
PRED:  ['187,1st free french division']
SAME:  ['187,1st free french division']
TRUE:  ['188,mediterranean sea']
PRED:  ['188,the mediterranean sea']
SAME:  []
NONE_TRUE_EQUAL:  ['188,mediterranean sea']
NONE_PRED_EQUAL:  ['188,the mediterranean sea']
TRUE:  ['189,zambia', '189,ikelenge district']
PRED:  ['189,zambia', '189,ikelenge']
SAME:  ['189,zambia']
PARTIAL:  ['189,zambia']
TRUE:  ['190,royal society']
PRED:  ['190,royal society']
SAME:  ['190,royal society']
TRUE:  ['191,acer nigrum']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['191,acer nigrum']
NONE_PRED_EQUAL:  ['']
TRUE:  ['192,filipinos']
PRED:  ['192,filipinos']
SAME:  ['192,filipinos']
TRUE:  ['193,neil brown', '193,andrew peacock']
PRED:  ['193,neil brown', '193,andrew peacock']
SAME:  ['193,neil brown', '193,andrew peacock']
TRUE:  ['194,wizards vs aliens', '194,into the dalek']
PRED:  ['194,into the dalek', '194,wizards']
SAME:  ['194,into th

PRED:  ['392,us']
SAME:  ['392,us']
TRUE:  ['393,random house']
PRED:  ['393,random house']
SAME:  ['393,random house']
TRUE:  ['394,lori black', '394,henry e. catto jr']
PRED:  ['394,lori black', '394,henry e. catto jr']
SAME:  ['394,lori black', '394,henry e. catto jr']
TRUE:  ['395,arizona state sun devils']
PRED:  ['395,arizona', '395,sun devils']
NONE_TRUE_MORE:  ['395,arizona state sun devils']
NONE_PRED_MORE:  ['395,arizona', '395,sun devils']
TRUE:  ['396,wyoming']
PRED:  ['396,wyoming']
SAME:  ['396,wyoming']
TRUE:  ['397,claiborne pell', '397,william luther hill']
PRED:  ['397,william luther hill', '397,claiborne pell']
SAME:  ['397,william luther hill', '397,claiborne pell']
TRUE:  ['398,ridley scott', '398,vangelis']
PRED:  ['398,ridley scott', '398,vangelis']
SAME:  ['398,ridley scott', '398,vangelis']
TRUE:  ['399,jeffrey werleman', '399,american mediterranean sea']
PRED:  ['399,jeffrey werleman', '399,american mediterranean sea']
SAME:  ['399,jeffrey werleman', '399,amer

NONE_PRED_EQUAL:  ['527,eo']
TRUE:  ['528,nairobi']
PRED:  ['528,nairobi']
SAME:  ['528,nairobi']
TRUE:  ['529,utopian and dystopian fiction']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['529,utopian and dystopian fiction']
NONE_PRED_EQUAL:  ['']
TRUE:  ['530,luke list', '530,k. terry dornbush']
PRED:  ['530,luke list', '530,k. terry dornbush']
SAME:  ['530,luke list', '530,k. terry dornbush']
TRUE:  ['531,philip novak']
PRED:  ['531,philip novak']
SAME:  ['531,philip novak']
TRUE:  ['532,westchester avenue', '532,the bronx']
PRED:  ['532,westchester avenue', '532,bronx']
SAME:  ['532,westchester avenue']
PARTIAL:  ['532,westchester avenue']
TRUE:  ['533,c__', '533,microsoft windows']
PRED:  ['533,c++', '533,microsoft', '533,windows']
NONE_TRUE_MORE:  ['533,c__', '533,microsoft windows']
NONE_PRED_MORE:  ['533,c++', '533,microsoft', '533,windows']
TRUE:  ['534,victor entertainment']
PRED:  ['534,the victor entertainment']
SAME:  []
NONE_TRUE_EQUAL:  ['534,victor entertainment']
NONE_PRED_

NONE_TRUE_EQUAL:  ['661,air interdiction']
NONE_PRED_EQUAL:  ['661,air']
TRUE:  ['662,the ultimate fighter: brazil 2']
PRED:  ['662,brazil', '662,2']
NONE_TRUE_MORE:  ['662,the ultimate fighter: brazil 2']
NONE_PRED_MORE:  ['662,brazil', '662,2']
TRUE:  ['663,tirana']
PRED:  ['663,tirana']
SAME:  ['663,tirana']
TRUE:  ['664,jon and vangelis']
PRED:  ['664,jon', '664,vangelis']
NONE_TRUE_MORE:  ['664,jon and vangelis']
NONE_PRED_MORE:  ['664,jon', '664,vangelis']
TRUE:  ['665,wcw mayhem']
PRED:  ['665,wcw']
SAME:  []
NONE_TRUE_EQUAL:  ['665,wcw mayhem']
NONE_PRED_EQUAL:  ['665,wcw']
TRUE:  ['666,marika gombitov']
PRED:  ['666,marika gombitov']
SAME:  ['666,marika gombitov']
TRUE:  ['667,9tv']
PRED:  ['667,9tv']
SAME:  ['667,9tv']
TRUE:  ['668,iraq', '668,army air corps']
PRED:  ['668,army air corps', '668,iraq']
SAME:  ['668,army air corps', '668,iraq']
TRUE:  ['669,north fort myers high school']
PRED:  ['669,north fort myers high school']
SAME:  ['669,north fort myers high school']
TRU

PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['801,union']
NONE_PRED_EQUAL:  ['']
TRUE:  ['802,oscar and lucinda']
PRED:  ['802,oscar', '802,lucinda']
NONE_TRUE_MORE:  ['802,oscar and lucinda']
NONE_PRED_MORE:  ['802,oscar', '802,lucinda']
TRUE:  ['803,daund junction railway station']
PRED:  ['803,daund junction']
SAME:  []
NONE_TRUE_EQUAL:  ['803,daund junction railway station']
NONE_PRED_EQUAL:  ['803,daund junction']
TRUE:  ["804,broca's brain"]
PRED:  ['804,broca', '804,brain']
NONE_TRUE_MORE:  ["804,broca's brain"]
NONE_PRED_MORE:  ['804,broca', '804,brain']
TRUE:  ['805,abhijit kunte', '805,maharashtra chess association']
PRED:  ['805,the maharashtra chess association', '805,abhijit']
SAME:  []
NONE_TRUE_EQUAL:  ['805,abhijit kunte', '805,maharashtra chess association']
NONE_PRED_EQUAL:  ['805,the maharashtra chess association', '805,abhijit']
TRUE:  ['806,paramount television', '806,cheers beacon hill']
PRED:  ['806,paramount television', '806,cheers beacon']
SAME:  ['806,paramount t

TRUE:  ['940,aarnoud van heemstra']
PRED:  ['940,aaround van heemstra']
SAME:  []
NONE_TRUE_EQUAL:  ['940,aarnoud van heemstra']
NONE_PRED_EQUAL:  ['940,aaround van heemstra']
TRUE:  ['941,igor shpillband']
PRED:  ['941,igor shpillband']
SAME:  ['941,igor shpillband']
TRUE:  ['942,burbank, california']
PRED:  ['942,burbank', '942,california']
NONE_TRUE_MORE:  ['942,burbank, california']
NONE_PRED_MORE:  ['942,burbank', '942,california']
TRUE:  ['943,kakae']
PRED:  ['943,kakae']
SAME:  ['943,kakae']
TRUE:  ['944,vadodara junction railway station']
PRED:  ['944,vadodara junction']
SAME:  []
NONE_TRUE_EQUAL:  ['944,vadodara junction railway station']
NONE_PRED_EQUAL:  ['944,vadodara junction']
TRUE:  ['945,salford city f.c.']
PRED:  ['945,salford city f.c']
SAME:  []
NONE_TRUE_EQUAL:  ['945,salford city f.c.']
NONE_PRED_EQUAL:  ['945,salford city f.c']
TRUE:  ['946,doug acomb']
PRED:  ['946,doug acomb']
SAME:  ['946,doug acomb']
TRUE:  ['947,east whiteland township', '947,pennsylvania']
P

SAME:  ['1075,boga']
TRUE:  ['1076,henry clay', '1076,william luther hill']
PRED:  ['1076,henry clay', '1076,the william luther hill']
SAME:  ['1076,henry clay']
PARTIAL:  ['1076,henry clay']
TRUE:  ['1077,chris dubois']
PRED:  ['1077,one', '1077,chris dubois']
MORE:  ['1077,chris dubois']
TRUE:  ['1078,xianren cave', '1078,estadio nacional de costa rica']
PRED:  ['1078,estadio nacional de costa rica', '1078,xianren cave']
SAME:  ['1078,estadio nacional de costa rica', '1078,xianren cave']
TRUE:  ['1079,cahora bassa']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['1079,cahora bassa']
NONE_PRED_EQUAL:  ['']
TRUE:  ['1080,lawrence realization stakes']
PRED:  ['1080,the lawrence realization stakes']
SAME:  []
NONE_TRUE_EQUAL:  ['1080,lawrence realization stakes']
NONE_PRED_EQUAL:  ['1080,the lawrence realization stakes']
TRUE:  ['1081,merritt paulson']
PRED:  ['1081,merritt paulson']
SAME:  ['1081,merritt paulson']
TRUE:  ['1082,georg meissner', '1082,karl ewald hasse']
PRED:  ['1082,karl ewal

PRED:  ['1213,chidambaram subramaniam']
SAME:  ['1213,chidambaram subramaniam']
TRUE:  ['1214,bacilli']
PRED:  ['1214,bacilli']
SAME:  ['1214,bacilli']
TRUE:  ['1215,british gazette', '1215,victor hope, 2nd marquess of linlithgow']
PRED:  ['1215,victor hope', '1215,2nd marquess of linlithgow', '1215,british']
NONE_TRUE_MORE:  ['1215,british gazette', '1215,victor hope, 2nd marquess of linlithgow']
NONE_PRED_MORE:  ['1215,victor hope', '1215,2nd marquess of linlithgow', '1215,british']
TRUE:  ['1216,magnolia place', '1216,greek revival architectural style']
PRED:  ['']
NONE_TRUE_LESS:  ['1216,magnolia place', '1216,greek revival architectural style']
NONE_PRED_LESS:  ['']
TRUE:  ['1217,clan mcduck', '1217,duck family']
PRED:  ['1217,clan mcduck']
LESS:  ['1217,clan mcduck']
TRUE:  ['1218,scotland']
PRED:  ['1218,scotland']
SAME:  ['1218,scotland']
TRUE:  ['1219,ridley scott', '1219,warner bros.']
PRED:  ['1219,warner bros.', '1219,ridley scott']
SAME:  ['1219,warner bros.', '1219,ridley

PRED:  ['1414,urbain']
SAME:  []
NONE_TRUE_EQUAL:  ['1414,urbain le verrier']
NONE_PRED_EQUAL:  ['1414,urbain']
TRUE:  ['1415,west papus football team']
PRED:  ['1415,papus']
SAME:  []
NONE_TRUE_EQUAL:  ['1415,west papus football team']
NONE_PRED_EQUAL:  ['1415,papus']
TRUE:  ['1416,megabalanus concinnus']
PRED:  ['1416,megabalanus']
SAME:  []
NONE_TRUE_EQUAL:  ['1416,megabalanus concinnus']
NONE_PRED_EQUAL:  ['1416,megabalanus']
TRUE:  ['1417,barbara ann crancer']
PRED:  ['1417,barbara ann crancer']
SAME:  ['1417,barbara ann crancer']
TRUE:  ['1418,sony music entertainment']
PRED:  ['1418,sony music entertainment']
SAME:  ['1418,sony music entertainment']
TRUE:  ['1419,males']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['1419,males']
NONE_PRED_EQUAL:  ['']
TRUE:  ['1420,marlboro township, new jersey']
PRED:  ['1420,marlboro township', '1420,new jersey']
NONE_TRUE_MORE:  ['1420,marlboro township, new jersey']
NONE_PRED_MORE:  ['1420,marlboro township', '1420,new jersey']
TRUE:  ['1421,for

NONE_TRUE_EQUAL:  ['1546,toronto argonauts']
NONE_PRED_EQUAL:  ['1546,toronto']
TRUE:  ['1547,microsoft surface', '1547,microsoft azure']
PRED:  ['1547,microsoft azure', '1547,microsoft']
SAME:  ['1547,microsoft azure']
PARTIAL:  ['1547,microsoft azure']
TRUE:  ['1548,google cloud connect', '1548,microsoft office picture manager']
PRED:  ['1548,microsoft office picture', '1548,google clud connect']
SAME:  []
NONE_TRUE_EQUAL:  ['1548,google cloud connect', '1548,microsoft office picture manager']
NONE_PRED_EQUAL:  ['1548,microsoft office picture', '1548,google clud connect']
TRUE:  ['1549,iceman (comics)', '1549,stan lee']
PRED:  ['1549,stan lee']
LESS:  ['1549,stan lee']
TRUE:  ['1550,luke scott']
PRED:  ['1550,luke scott']
SAME:  ['1550,luke scott']
TRUE:  ['1551,kumta', '1551,mizoram legislative assembly']
PRED:  ['1551,whihc', '1551,the mizoram legislative assembly']
SAME:  []
NONE_TRUE_EQUAL:  ['1551,kumta', '1551,mizoram legislative assembly']
NONE_PRED_EQUAL:  ['1551,whihc', '155

PRED:  ['1683,red willow creek']
SAME:  ['1683,red willow creek']
TRUE:  ['1684,manu cornet']
PRED:  ['1684,manu']
SAME:  []
NONE_TRUE_EQUAL:  ['1684,manu cornet']
NONE_PRED_EQUAL:  ['1684,manu']
TRUE:  ['1685,fazlur rahman khan']
PRED:  ['1685,fazlur rahman khan']
SAME:  ['1685,fazlur rahman khan']
TRUE:  ['1686,defender']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['1686,defender']
NONE_PRED_EQUAL:  ['']
TRUE:  ['1687,barbara bestor']
PRED:  ['1687,barbara']
SAME:  []
NONE_TRUE_EQUAL:  ['1687,barbara bestor']
NONE_PRED_EQUAL:  ['1687,barbara']
TRUE:  ['1688,ontario']
PRED:  ['1688,ontario']
SAME:  ['1688,ontario']
TRUE:  ['1689,mrunalini kunte']
PRED:  ['1689,mrunalini kunte']
SAME:  ['1689,mrunalini kunte']
TRUE:  ['1690,james w. hyatt']
PRED:  ['1690,james w. hyatt']
SAME:  ['1690,james w. hyatt']
TRUE:  ['1691,diana taylor']
PRED:  ['1691,diana taylor']
SAME:  ['1691,diana taylor']
TRUE:  ['1692,bradenton']
PRED:  ['1692,brandenton']
SAME:  []
NONE_TRUE_EQUAL:  ['1692,bradenton']
NON

PRED:  ['1822,colorado', '1822,162', '1822,the utah state route']
MORE:  ['1822,colorado']
TRUE:  ['1823,houston']
PRED:  ['1823,houston']
SAME:  ['1823,houston']
TRUE:  ['1824,michigan wolverines', '1824,jim harbaugh']
PRED:  ['1824,jim harbaugh', '1824,michigan']
SAME:  ['1824,jim harbaugh']
PARTIAL:  ['1824,jim harbaugh']
TRUE:  ['1825,university']
PRED:  ['1825,university of melbourne, faculty of vca & mcm']
SAME:  []
NONE_TRUE_EQUAL:  ['1825,university']
NONE_PRED_EQUAL:  ['1825,university of melbourne, faculty of vca & mcm']
TRUE:  ['1826,thomas egerton']
PRED:  ['1826,thomas egerton']
SAME:  ['1826,thomas egerton']
TRUE:  ['1827,samuel w. mccall', '1827,plymouth, vermont']
PRED:  ['1827,plymouth', '1827,vermont', '1827,samuel w. mccall']
MORE:  ['1827,samuel w. mccall']
TRUE:  ['1828,christina crawford', '1828,booker t']
PRED:  ['1828,booker']
NONE_TRUE_LESS:  ['1828,christina crawford', '1828,booker t']
NONE_PRED_LESS:  ['1828,booker']
TRUE:  ['1829,garbage']
PRED:  ['1829,garb

PRED:  ['1961,nyc fc']
SAME:  ['1961,nyc fc']
TRUE:  ['1962,roh tae-woo']
PRED:  ['1962,roh tae-woo']
SAME:  ['1962,roh tae-woo']
TRUE:  ['1963,madiswil', '1963,gutenberg']
PRED:  ['1963,gutenberg', '1963,madiswil']
SAME:  ['1963,gutenberg', '1963,madiswil']
TRUE:  ['1964,kaplan, inc.']
PRED:  ['1964,kaplan', '1964,inc']
NONE_TRUE_MORE:  ['1964,kaplan, inc.']
NONE_PRED_MORE:  ['1964,kaplan', '1964,inc']
TRUE:  ['1965,wwe']
PRED:  ['1965,wwe']
SAME:  ['1965,wwe']
TRUE:  ['1966,concor holdings']
PRED:  ['1966,concor']
SAME:  []
NONE_TRUE_EQUAL:  ['1966,concor holdings']
NONE_PRED_EQUAL:  ['1966,concor']
TRUE:  ['1967,sony bank']
PRED:  ['1967,sony bank']
SAME:  ['1967,sony bank']
TRUE:  ['1968,miami']
PRED:  ['1968,miami']
SAME:  ['1968,miami']
TRUE:  ['1969,job durfee', '1969,daniel l d granger']
PRED:  ['1969,daniel l d granger', '1969,job durfee', '1969,us']
MORE:  ['1969,daniel l d granger', '1969,job durfee']
TRUE:  ['1970,viveka babajee', '1970,ronny vencatachellum']
PRED:  ['1970,

TRUE:  ['2102,san macros river', '2102,texas']
PRED:  ['2102,texas']
LESS:  ['2102,texas']
TRUE:  ['2103,quartermaster corps']
PRED:  ['2103,quartermaster']
SAME:  []
NONE_TRUE_EQUAL:  ['2103,quartermaster corps']
NONE_PRED_EQUAL:  ['2103,quartermaster']
TRUE:  ['2104,david prowse']
PRED:  ['2104,david prowse']
SAME:  ['2104,david prowse']
TRUE:  ['2105,take me there', '2105,tedd riley']
PRED:  ['2105,tedd riley']
LESS:  ['2105,tedd riley']
TRUE:  ['2106,mouse morris']
PRED:  ["2106,mouse morris'"]
SAME:  []
NONE_TRUE_EQUAL:  ['2106,mouse morris']
NONE_PRED_EQUAL:  ["2106,mouse morris'"]
TRUE:  ['2107,washington dulles international airport', '2107,silver airways']
PRED:  ['2107,silver airways', '2107,the washington dulles international airport']
SAME:  ['2107,silver airways']
PARTIAL:  ['2107,silver airways']
TRUE:  ['2108,william luther hill']
PRED:  ['2108,william luther hill']
SAME:  ['2108,william luther hill']
TRUE:  ['2109,kabir suman']
PRED:  ['2109,kabir suman']
SAME:  ['2109,

TRUE:  ['2234,baku puppet theatre', '2234,state academic opera and ballet theater']
PRED:  ['2234,baku puppet theatre']
LESS:  ['2234,baku puppet theatre']
TRUE:  ['2235,q tip', '2235,afrika baby bam']
PRED:  ['2235,afrika baby bam']
LESS:  ['2235,afrika baby bam']
TRUE:  ['2236,jimmie lou fisher']
PRED:  ['2236,jimmie lou fisher']
SAME:  ['2236,jimmie lou fisher']
TRUE:  ['2237,north sea']
PRED:  ['2237,the north sea']
SAME:  []
NONE_TRUE_EQUAL:  ['2237,north sea']
NONE_PRED_EQUAL:  ['2237,the north sea']
TRUE:  ['2238,garage']
PRED:  ['2238,garage']
SAME:  ['2238,garage']
TRUE:  ['2239,manu cornet', '2239,google.by']
PRED:  ['2239,manu cornet']
LESS:  ['2239,manu cornet']
TRUE:  ['2240,primus']
PRED:  ['2240,primus']
SAME:  ['2240,primus']
TRUE:  ['2241,paul morphy']
PRED:  ['2241,paul morphy']
SAME:  ['2241,paul morphy']
TRUE:  ['2242,ne daj se, nina', '2242,croatia']
PRED:  ['2242,daj se', '2242,nina', '2242,first', '2242,croatia']
MORE:  ['2242,croatia']
TRUE:  ['2243,patts colleg

TRUE:  ['2306,creature comforts']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['2306,creature comforts']
NONE_PRED_EQUAL:  ['']
TRUE:  ['2307,scott haran']
PRED:  ['2307,scott haran']
SAME:  ['2307,scott haran']
TRUE:  ['2308,british columbian']
PRED:  ['2308,british', '2308,columbian']
NONE_TRUE_MORE:  ['2308,british columbian']
NONE_PRED_MORE:  ['2308,british', '2308,columbian']
TRUE:  ['2309,habib boromand dashghapu']
PRED:  ['2309,habib boromand dashghapu']
SAME:  ['2309,habib boromand dashghapu']
TRUE:  ['2310,turner broadcasting system']
PRED:  ['2310,turner broadcasting system']
SAME:  ['2310,turner broadcasting system']
TRUE:  ['2311,johnny cash']
PRED:  ['2311,johnny cash']
SAME:  ['2311,johnny cash']
TRUE:  ['2312,newin chidchob', '2312,seni pramoj']
PRED:  ['2312,newin chidchob', '2312,seni pramoj']
SAME:  ['2312,newin chidchob', '2312,seni pramoj']
TRUE:  ['2313,taiwanese']
PRED:  ['2313,taiwanese']
SAME:  ['2313,taiwanese']
TRUE:  ['2314,salthill']
PRED:  ['']
SAME:  []
NONE_T

PRED:  ['2441,california']
SAME:  ['2441,california']
TRUE:  ['2442,christopher s. stewart']
PRED:  ['2442,christopher s. stewart']
SAME:  ['2442,christopher s. stewart']
TRUE:  ['2443,jawaharlal nehru']
PRED:  ['2443,jawaharlal nehru']
SAME:  ['2443,jawaharlal nehru']
TRUE:  ['2444,david scherman']
PRED:  ['2444,scherman']
SAME:  []
NONE_TRUE_EQUAL:  ['2444,david scherman']
NONE_PRED_EQUAL:  ['2444,scherman']
TRUE:  ['2445,arlington county, virginia']
PRED:  ['2445,arlington county', '2445,virginia']
NONE_TRUE_MORE:  ['2445,arlington county, virginia']
NONE_PRED_MORE:  ['2445,arlington county', '2445,virginia']
TRUE:  ['2446,sequoia national park']
PRED:  ['2446,sequoia national park']
SAME:  ['2446,sequoia national park']
TRUE:  ['2447,duwamish river']
PRED:  ['2447,duwamish river']
SAME:  ['2447,duwamish river']
TRUE:  ['2448,pulau ujong']
PRED:  ['2448,pulau']
SAME:  []
NONE_TRUE_EQUAL:  ['2448,pulau ujong']
NONE_PRED_EQUAL:  ['2448,pulau']
TRUE:  ['2449,sierra del merendin']
PRED:

TRUE:  ['2580,vc lokomotiv novosibirsk']
PRED:  ['2580,vc lokomotiv novosibirsk']
SAME:  ['2580,vc lokomotiv novosibirsk']
TRUE:  ['2581,marshall arisman']
PRED:  ['2581,marshall arisman']
SAME:  ['2581,marshall arisman']
TRUE:  ['2582,mahbuba islam rakhi']
PRED:  ['2582,mahbuba', '2582,islam rakhi']
NONE_TRUE_MORE:  ['2582,mahbuba islam rakhi']
NONE_PRED_MORE:  ['2582,mahbuba', '2582,islam rakhi']
TRUE:  ['2583,luis enrique']
PRED:  ['2583,luis enrique']
SAME:  ['2583,luis enrique']
TRUE:  ['2584,oscoda, michigan']
PRED:  ['2584,oscoda', '2584,michigan']
NONE_TRUE_MORE:  ['2584,oscoda, michigan']
NONE_PRED_MORE:  ['2584,oscoda', '2584,michigan']
TRUE:  ['2585,alton ochsner', '2585,michael hahn']
PRED:  ['2585,the alton ochsner', '2585,michael hahn']
SAME:  ['2585,michael hahn']
PARTIAL:  ['2585,michael hahn']
TRUE:  ['2586,john forbes']
PRED:  ['2586,john forbes']
SAME:  ['2586,john forbes']
TRUE:  ['2587,jeff conaway']
PRED:  ['2587,jeff conaway']
SAME:  ['2587,jeff conaway']
TRUE:  

PRED:  ['2652,glendale', '2652,california']
NONE_TRUE_MORE:  ['2652,glendale, california']
NONE_PRED_MORE:  ['2652,glendale', '2652,california']
TRUE:  ['2653,deadsea']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['2653,deadsea']
NONE_PRED_EQUAL:  ['']
TRUE:  ['2654,united states']
PRED:  ['2654,united states']
SAME:  ['2654,united states']
TRUE:  ['2655,argentina primera division']
PRED:  ['2655,the argentine primera division']
SAME:  []
NONE_TRUE_EQUAL:  ['2655,argentina primera division']
NONE_PRED_EQUAL:  ['2655,the argentine primera division']
TRUE:  ['2656,laurie johnson', '2656,stanley kubrick']
PRED:  ["2656,stanley kubrick's", '2656,laurie johnson']
SAME:  ['2656,laurie johnson']
PARTIAL:  ['2656,laurie johnson']
TRUE:  ['2657,gredelj']
PRED:  ['2657,gredelj']
SAME:  ['2657,gredelj']
TRUE:  ['2658,chris lebenzon', '2658,batman returns']
PRED:  ['2658,batman']
NONE_TRUE_LESS:  ['2658,chris lebenzon', '2658,batman returns']
NONE_PRED_LESS:  ['2658,batman']
TRUE:  ['2659,primus', '26

TRUE:  ['2789,sho v8 engine', '2789,y block engine']
PRED:  ['2789,sho v8']
NONE_TRUE_LESS:  ['2789,sho v8 engine', '2789,y block engine']
NONE_PRED_LESS:  ['2789,sho v8']
TRUE:  ['2790,golden globe']
PRED:  ['2790,golden globe']
SAME:  ['2790,golden globe']
TRUE:  ["2791,mind blowin'"]
PRED:  ["2791,mind blowin'"]
SAME:  ["2791,mind blowin'"]
TRUE:  ['2792,fantasy']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['2792,fantasy']
NONE_PRED_EQUAL:  ['']
TRUE:  ['2793,ncaa division i football bowl subdivision']
PRED:  ['2793,ncaa']
SAME:  []
NONE_TRUE_EQUAL:  ['2793,ncaa division i football bowl subdivision']
NONE_PRED_EQUAL:  ['2793,ncaa']
TRUE:  ['2794,coneygree']
PRED:  ['2794,coneygree']
SAME:  ['2794,coneygree']
TRUE:  ["2795,john madden football '92"]
PRED:  ['2795,92']
SAME:  []
NONE_TRUE_EQUAL:  ["2795,john madden football '92"]
NONE_PRED_EQUAL:  ['2795,92']
TRUE:  ['2796,history of trier', '2796,nicomedia']
PRED:  ['2796,nicomedia']
LESS:  ['2796,nicomedia']
TRUE:  ['2797,antoine bibea

SAME:  ['2928,ep r.e.p. 1']
TRUE:  ['2929,john roeslein']
PRED:  ['2929,john roeslein']
SAME:  ['2929,john roeslein']
TRUE:  ['2930,columbia records']
PRED:  ['2930,columbia records']
SAME:  ['2930,columbia records']
TRUE:  ['2931,jon speelman', '2931,polytechnic university']
PRED:  ['2931,jon', '2931,polytechnic university of the philippines taguig']
SAME:  []
NONE_TRUE_EQUAL:  ['2931,jon speelman', '2931,polytechnic university']
NONE_PRED_EQUAL:  ['2931,jon', '2931,polytechnic university of the philippines taguig']
TRUE:  ['2932,taekwondo']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['2932,taekwondo']
NONE_PRED_EQUAL:  ['']
TRUE:  ['2933,vmf-155', '2933,vmf-218']
PRED:  ['2933,vmf-155', '2933,vmf-218']
SAME:  ['2933,vmf-155', '2933,vmf-218']
TRUE:  ['2934,amstelveen']
PRED:  ['2934,amstelveen']
SAME:  ['2934,amstelveen']
TRUE:  ['2935,ontario international airport', '2935,inland empire']
PRED:  ['2935,ontario international airport', '2935,the inland empire']
SAME:  ['2935,ontario intern

PRED:  ['3071,shelby county', '3071,tennessee']
SAME:  ['3071,shelby county', '3071,tennessee']
TRUE:  ['3072,south side elevated railroad', '3072,sakari suzuki']
PRED:  ['3072,the south side elevated railroad', '3072,the sakari suzuki']
SAME:  []
NONE_TRUE_EQUAL:  ['3072,south side elevated railroad', '3072,sakari suzuki']
NONE_PRED_EQUAL:  ['3072,the south side elevated railroad', '3072,the sakari suzuki']
TRUE:  ['3073,john roberts']
PRED:  ['3073,john roberts']
SAME:  ['3073,john roberts']
TRUE:  ['3074,continental army']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['3074,continental army']
NONE_PRED_EQUAL:  ['']
TRUE:  ['3075,andrea poapst']
PRED:  ['3075,one', '3075,andrea']
NONE_TRUE_MORE:  ['3075,andrea poapst']
NONE_PRED_MORE:  ['3075,one', '3075,andrea']
TRUE:  ['3076,connecticut lakes']
PRED:  ['3076,the connecticut lakes']
SAME:  []
NONE_TRUE_EQUAL:  ['3076,connecticut lakes']
NONE_PRED_EQUAL:  ['3076,the connecticut lakes']
TRUE:  ['3077,blayse', '3077,take me there']
PRED:  [

LESS:  ['3208,west germany']
TRUE:  ['3209,dee dee warwick']
PRED:  ['3209,dee dee warwick']
SAME:  ['3209,dee dee warwick']
TRUE:  ['3210,miss michigan usa']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['3210,miss michigan usa']
NONE_PRED_EQUAL:  ['']
TRUE:  ['3211,colorectal cancer']
PRED:  ['3211,one']
SAME:  []
NONE_TRUE_EQUAL:  ['3211,colorectal cancer']
NONE_PRED_EQUAL:  ['3211,one']
TRUE:  ['3212,england national under-20 football team', '3212,england national under-21 football team']
PRED:  ['3212,england', '3212,england']
SAME:  []
NONE_TRUE_EQUAL:  ['3212,england national under-20 football team', '3212,england national under-21 football team']
NONE_PRED_EQUAL:  ['3212,england', '3212,england']
TRUE:  ['3213,centre']
PRED:  ['3213,centre']
SAME:  ['3213,centre']
TRUE:  ['3214,dick spalding', '3214,harry huston']
PRED:  ['3214,harry huston', '3214,dick spalding', '3214,first']
MORE:  ['3214,harry huston', '3214,dick spalding']
TRUE:  ['3215,vishwajyot high school', '3215,battle che

PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['3344,gridiron football position']
NONE_PRED_EQUAL:  ['']
TRUE:  ['3345,putri raemawasti']
PRED:  ['3345,putri raemawasti']
SAME:  ['3345,putri raemawasti']
TRUE:  ['3346,isa', '3346,sacred heart convent school']
PRED:  ['3346,isa', '3346,bangkok', '3346,sacred heart convent school (']
MORE:  ['3346,isa']
TRUE:  ['3347,2014-2015 football league championship']
PRED:  ['3347,2014-2015', '3347,football league championship']
NONE_TRUE_MORE:  ['3347,2014-2015 football league championship']
NONE_PRED_MORE:  ['3347,2014-2015', '3347,football league championship']
TRUE:  ['3348,upper neretva']
PRED:  ['3348,upper neretva']
SAME:  ['3348,upper neretva']
TRUE:  ['3349,nikos pateras', '3349,anionios g.s.s. season']
PRED:  ['3349,nikos pateras', '3349,2014-15']
SAME:  ['3349,nikos pateras']
PARTIAL:  ['3349,nikos pateras']
TRUE:  ['3350,joe shuster 2014-15']
PRED:  ['3350,joe shuster']
SAME:  []
NONE_TRUE_EQUAL:  ['3350,joe shuster 2014-15']
NONE_PRED_EQUAL

NONE_TRUE_EQUAL:  ['3408,wild risk']
NONE_PRED_EQUAL:  ['']
TRUE:  ["3409,mickey's mellerdrammer"]
PRED:  ['3409,mickey', '3409,mellerdrammer']
NONE_TRUE_MORE:  ["3409,mickey's mellerdrammer"]
NONE_PRED_MORE:  ['3409,mickey', '3409,mellerdrammer']
TRUE:  ['3410,western naval command']
PRED:  ['3410,western naval command']
SAME:  ['3410,western naval command']
TRUE:  ['3411,alcal de henares']
PRED:  ['3411,alcal de henares']
SAME:  ['3411,alcal de henares']
TRUE:  ['3412,harvey rosenstock']
PRED:  ['3412,harvey  rosenstock']
SAME:  []
NONE_TRUE_EQUAL:  ['3412,harvey rosenstock']
NONE_PRED_EQUAL:  ['3412,harvey  rosenstock']
TRUE:  ['3413,dionne warwick', '3413,gary garland']
PRED:  ['3413,dionne warwick', '3413,gary garland']
SAME:  ['3413,dionne warwick', '3413,gary garland']
TRUE:  ['3414,israel']
PRED:  ['3414,israel']
SAME:  ['3414,israel']
TRUE:  ['3415,cheasty boulevard south']
PRED:  ['3415,boulevard south']
SAME:  []
NONE_TRUE_EQUAL:  ['3415,cheasty boulevard south']
NONE_PRED_E

SAME:  ['3545,ferrero spa']
TRUE:  ['3546,nick castle']
PRED:  ['3546,nick castle']
SAME:  ['3546,nick castle']
TRUE:  ['3547,simon fuller', '3547,fox']
PRED:  ['3547,simon fuller', '3547,fox']
SAME:  ['3547,simon fuller', '3547,fox']
TRUE:  ['3548,birmingham']
PRED:  ['3548,birmingham']
SAME:  ['3548,birmingham']
TRUE:  ['3549,private universities']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['3549,private universities']
NONE_PRED_EQUAL:  ['']
TRUE:  ['3550,wrvu', '3550,todd j. campbell']
PRED:  ['3550,the todd j. campbell']
NONE_TRUE_LESS:  ['3550,wrvu', '3550,todd j. campbell']
NONE_PRED_LESS:  ['3550,the todd j. campbell']
TRUE:  ['3551,french southern and antarctic lands', '3551,republic of montenegro']
PRED:  ['3551,the french southern and antarctic lands']
NONE_TRUE_LESS:  ['3551,french southern and antarctic lands', '3551,republic of montenegro']
NONE_PRED_LESS:  ['3551,the french southern and antarctic lands']
TRUE:  ['3552,chris marve']
PRED:  ["3552,the chris marve's"]
SAME:  [

TRUE:  ["3751,soccer bowl '76", '3751,boundary dam']
PRED:  ['3751,one', '3751,boundary dam', "3751,soccer bowl '", '3751,76']
MORE:  ['3751,boundary dam']
TRUE:  ['3752,england']
PRED:  ['3752,england']
SAME:  ['3752,england']
TRUE:  ['3753,frank stilwell']
PRED:  ['3753,frank stilwell']
SAME:  ['3753,frank stilwell']
TRUE:  ['3754,i pray on christmas']
PRED:  ['3754,christmas']
SAME:  []
NONE_TRUE_EQUAL:  ['3754,i pray on christmas']
NONE_PRED_EQUAL:  ['3754,christmas']
TRUE:  ['3755,gettv', '3755,hypnotize minds']
PRED:  ['3755,gettv', '3755,hypnotize']
SAME:  ['3755,gettv']
PARTIAL:  ['3755,gettv']
TRUE:  ['3756,doubleday']
PRED:  ['3756,doubleday']
SAME:  ['3756,doubleday']
TRUE:  ['3757,miguel de cervantes']
PRED:  ['3757,miguel de cervantes']
SAME:  ['3757,miguel de cervantes']
TRUE:  ['3758,olyokma river', '3758,lena river']
PRED:  ['3758,olyokma river', '3758,lena river']
SAME:  ['3758,olyokma river', '3758,lena river']
TRUE:  ['3759,ucla bruins football']
PRED:  ['3759,ucla b

SAME:  ['3960,malik ausean', '3960,kyle fuller']
TRUE:  ['3961,indiana', '3961,graham holdings company']
PRED:  ['3961,graham holdings company', '3961,indiana']
SAME:  ['3961,graham holdings company', '3961,indiana']
TRUE:  ['3962,patricia amorim']
PRED:  ['3962,patricia amorim']
SAME:  ['3962,patricia amorim']
TRUE:  ['3963,hyundai lavita', '3963,hyundai elantra']
PRED:  ['3963,hyundai', '3963,hyundai']
SAME:  []
NONE_TRUE_EQUAL:  ['3963,hyundai lavita', '3963,hyundai elantra']
NONE_PRED_EQUAL:  ['3963,hyundai', '3963,hyundai']
TRUE:  ['3964,oakland athletics']
PRED:  ['3964,oakland']
SAME:  []
NONE_TRUE_EQUAL:  ['3964,oakland athletics']
NONE_PRED_EQUAL:  ['3964,oakland']
TRUE:  ['3965,ryan powell', '3965,michael']
PRED:  ['3965,michael', '3965,ryan powell']
SAME:  ['3965,michael', '3965,ryan powell']
TRUE:  ['3966,indian standard timezone']
PRED:  ['3966,indian']
SAME:  []
NONE_TRUE_EQUAL:  ['3966,indian standard timezone']
NONE_PRED_EQUAL:  ['3966,indian']
TRUE:  ['3967,space shutt

TRUE:  ['4165,bangladesh']
PRED:  ['4165,one', '4165,bangladesh']
MORE:  ['4165,bangladesh']
TRUE:  ['4166,torrey pines gliderport']
PRED:  ['4166,torrey pines gliderport']
SAME:  ['4166,torrey pines gliderport']
TRUE:  ['4167,deion sanders']
PRED:  ['4167,deion sanders']
SAME:  ['4167,deion sanders']
TRUE:  ['4168,lori black', '4168,adrian a. basora']
PRED:  ['4168,adrian a. basora', '4168,lori black']
SAME:  ['4168,adrian a. basora', '4168,lori black']
TRUE:  ['4169,michael house']
PRED:  ['4169,one', '4169,michael house']
MORE:  ['4169,michael house']
TRUE:  ['4170,jennifer lopez', '4170,randy jackson']
PRED:  ['4170,randy jackson', '4170,jennifer lopez']
SAME:  ['4170,randy jackson', '4170,jennifer lopez']
TRUE:  ['']
PRED:  ['']
SAME:  ['']
TRUE:  ['4172,waitakere united']
PRED:  ['4172,waitakere united']
SAME:  ['4172,waitakere united']
TRUE:  ['4173,operation barbarossa']
PRED:  ['4173,barbarossa']
SAME:  []
NONE_TRUE_EQUAL:  ['4173,operation barbarossa']
NONE_PRED_EQUAL:  ['417

SAME:  ['4369,best mate']
TRUE:  ['4370,ozzy osbourne', '4370,kelly osbourne']
PRED:  ['4370,kelly osbourne & ozzy osbourne']
NONE_TRUE_LESS:  ['4370,ozzy osbourne', '4370,kelly osbourne']
NONE_PRED_LESS:  ['4370,kelly osbourne & ozzy osbourne']
TRUE:  ['4371,ontario']
PRED:  ['4371,ontario']
SAME:  ['4371,ontario']
TRUE:  ['4372,william anthony']
PRED:  ['4372,william anthony  ']
SAME:  []
NONE_TRUE_EQUAL:  ['4372,william anthony']
NONE_PRED_EQUAL:  ['4372,william anthony  ']
TRUE:  ['4373,us']
PRED:  ['4373,us']
SAME:  ['4373,us']
TRUE:  ['4374,city of miami cemetery']
PRED:  ['4374,miami cemetery']
SAME:  []
NONE_TRUE_EQUAL:  ['4374,city of miami cemetery']
NONE_PRED_EQUAL:  ['4374,miami cemetery']
TRUE:  ['4375,louisiana']
PRED:  ['4375,louisiana']
SAME:  ['4375,louisiana']
TRUE:  ['4376,neungin high school', '4376,buddhist school']
PRED:  ['4376,neungin high']
NONE_TRUE_LESS:  ['4376,neungin high school', '4376,buddhist school']
NONE_PRED_LESS:  ['4376,neungin high']
TRUE:  ['4377

SAME:  ['4508,grameen bank']
TRUE:  ['4509,earl b. ruth']
PRED:  ['4509,earl b. ruth']
SAME:  ['4509,earl b. ruth']
TRUE:  ['4510,mike kelly', '4510,tim mathieson']
PRED:  ['4510,the tim mathieson', '4510,the mike kelly', '4510,australian']
NONE_TRUE_MORE:  ['4510,mike kelly', '4510,tim mathieson']
NONE_PRED_MORE:  ['4510,the tim mathieson', '4510,the mike kelly', '4510,australian']
TRUE:  ['4511,katima mulilo bridge']
PRED:  ['4511,katima mulilo bridge']
SAME:  ['4511,katima mulilo bridge']
TRUE:  ['4512,black grape global']
PRED:  ['4512,black grape global']
SAME:  ['4512,black grape global']
TRUE:  ['4513,nintendo eshop']
PRED:  ['4513,nintendo eshop']
SAME:  ['4513,nintendo eshop']
TRUE:  ['4514,phil-ellena']
PRED:  ['4514,phil-ellena']
SAME:  ['4514,phil-ellena']
TRUE:  ['4515,lucius crus']
PRED:  ['4515,lucius crus']
SAME:  ['4515,lucius crus']
TRUE:  ['4516,david animle hanses']
PRED:  ['4516,david animle hanses']
SAME:  ['4516,david animle hanses']
TRUE:  ['4517,mastercard cent

TRUE:  ["4651,a christian turn'd turk"]
PRED:  ['4651,christian', '4651,turk']
NONE_TRUE_MORE:  ["4651,a christian turn'd turk"]
NONE_PRED_MORE:  ['4651,christian', '4651,turk']
TRUE:  ['4652,persona']
PRED:  ['4652,persona']
SAME:  ['4652,persona']
TRUE:  ['4653,mark stevens']
PRED:  ['4653,mark stevens']
SAME:  ['4653,mark stevens']
TRUE:  ['4654,josh groban']
PRED:  ['4654,josh groban']
SAME:  ['4654,josh groban']
TRUE:  ['4655,simon fuller', '4655,julian gingell']
PRED:  ['4655,julian gingell', '4655, simon fuller']
SAME:  ['4655,julian gingell']
PARTIAL:  ['4655,julian gingell']
TRUE:  ['4656,wbcq-fm']
PRED:  ['4656,wbcq-fm']
SAME:  ['4656,wbcq-fm']
TRUE:  ['4657,british columbia']
PRED:  ['4657,british columbia']
SAME:  ['4657,british columbia']
TRUE:  ['4658,latin', '4658,sovereign military order of malta']
PRED:  ['4658,latin', '4658,sovereign military order of malta']
SAME:  ['4658,latin', '4658,sovereign military order of malta']
TRUE:  ['4659,lake uniamsi']
PRED:  ['4659,lak

TRUE:  ['4788,into the dalek']
PRED:  ['4788,into the dalek']
SAME:  ['4788,into the dalek']
TRUE:  ['4789,citizen kane', '4789,gregg toland']
PRED:  ['4789,citizen kane', '4789,gregg toland']
SAME:  ['4789,citizen kane', '4789,gregg toland']
TRUE:  ['4790,bruce beutler']
PRED:  ['4790,bruce beutler']
SAME:  ['4790,bruce beutler']
TRUE:  ['4791,trump entertainment resorts', '4791,maryanne barry']
PRED:  ['4791,trump entertainment resorts', '4791,maryanne barry']
SAME:  ['4791,trump entertainment resorts', '4791,maryanne barry']
TRUE:  ['4792,barasat']
PRED:  ['4792,barasat']
SAME:  ['4792,barasat']
TRUE:  ['4793,alan freeman days', '4793,come on over']
PRED:  ['4793,alan freeman days']
LESS:  ['4793,alan freeman days']
TRUE:  ['4794,janice soprano']
PRED:  ['4794,janice soprano', '4794,soprano']
MORE:  ['4794,janice soprano']
TRUE:  ['4795,john muir', '4795,mt hoffmann']
PRED:  ['4795,first', '4795,john']
SAME:  []
NONE_TRUE_EQUAL:  ['4795,john muir', '4795,mt hoffmann']
NONE_PRED_EQUA

PARTIAL:  ['4926,krista kelly']
TRUE:  ['4927,judge advocate general corps']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['4927,judge advocate general corps']
NONE_PRED_EQUAL:  ['']
TRUE:  ['4928,pope pius xi']
PRED:  ['4928,pope']
SAME:  []
NONE_TRUE_EQUAL:  ['4928,pope pius xi']
NONE_PRED_EQUAL:  ['4928,pope']
TRUE:  ['4929,brown']
PRED:  ['4929,brown']
SAME:  ['4929,brown']
TRUE:  ['4930,solon spencer beman']
PRED:  ['4930,solon spencer beman']
SAME:  ['4930,solon spencer beman']
TRUE:  ['4931,1967 mexican grand prix', '4931,1971 us']
PRED:  ['4931,first', '4931,the 1967 mexican grand prix', '4931,1971', '4931,us']
NONE_TRUE_MORE:  ['4931,1967 mexican grand prix', '4931,1971 us']
NONE_PRED_MORE:  ['4931,first', '4931,the 1967 mexican grand prix', '4931,1971', '4931,us']
TRUE:  ['4932,sam sen railway station']
PRED:  ['4932,sam sen railway station']
SAME:  ['4932,sam sen railway station']
TRUE:  ['4933,newcastle']
PRED:  ['4933,newcastle']
SAME:  ['4933,newcastle']
TRUE:  ['4934,tehran']

In [27]:
print(match)
print(partial)
print(less)
print(more)
print(none_match)
print(none_less)
print(none_more)
print(predicted)
print(predicted/len(real_entities))

2511
352
187
274
1175
156
345
4446
0.6693766937669376


In [None]:
# flair + spaCy

In [32]:
flair_spacy = ['']*n

for i in range(0, n):
    if len(ner_ner12[i])==len(ner_md[i]):
        if all(elem in ner_md[i] for elem in ner_ner12[i]) == True:
            flair_spacy[i] = ner_ner12[i]
        else:

            if ner_ner12[i] == ['']:
                flair_spacy[i] = ner_md[i]            
            else:
                flair_spacy[i] = ner_ner12[i]            
    elif len(ner_ner12[i])<len(ner_md[i]):
        if all(elem in ner_md[i] for elem in ner_ner12[i]) == True:
            flair_spacy[i] = ner_md[i]           
        else:
            if ner_ner12[i] == ['']:
                flair_spacy[i] = ner_md[i]            
            else:
                inter = list(set(ner_ner12[i]) & set(ner_md[i]))
                dif = list(set(ner_ner12[i]) - set(ner_md[i]))
                flair_spacy[i] = inter+dif
                
    elif len(ner_ner12[i])>len(ner_md[i]):
        if all(elem in ner_ner12[i] for elem in ner_md[i]) == True:
            flair_spacy[i] = ner_ner12[i]
        else:
            if ner_md[i] == ['']:
                flair_spacy[i] = ner_ner12[i]
            else:
                inter = list(set(ner_ner12[i]) & set(ner_md[i]))
                dif = list(set(ner_md[i]) - set(ner_ner12[i]))
                flair_spacy[i] = inter+dif

[['0,Bill Finger'], ['1,winston churchill', '1,Selwyn Lloyd'], ['2,Gestapo'], ['3,Mumbai North'], ['4,Roberto Clemente Bridge'], ['5,Theos Philopator', '5,Cleopatra'], ['6,Li Si'], ['7,PAvel Moroz', '7,Yakov Estrin'], ['8,Broadmeadows, Victoria'], ['9,ASC Creative Services'], ['10,Ernest Rutherford', '10,Charles Drummond Ellis'], ['11,REP Parasol'], ["12,Monroe Carell Jr. Children's Hospital", '12,Vanderbilt', '12,Duncan U. Fletcher'], ['13,Nader Guirat,', '13,Josef Johansson'], ['14,sarah jane'], ['15,HBO'], ['16,Nikolai Morozov', '16,Stanislav Morozov'], ['17,Abhijit Kunte', '17,Kasparov'], ['18,MSX Basics'], ['19,seasons', '19,Ronaldo'], ['20,SamurAbsheron', '20,Anar Salmanov'], ['21,2nd Foreign Infantry Regiment'], ['22,SR'], ['23,Kevin Jonas', '23,Joe Jonas'], ['24,Timm Gunn', '24,Sunrise', '24,HIMYM'], ['25,O.co Coliseum'], ['26,Rishkiesh'], ['27,america'], ['28,PhDs', '28,National Medal of Science'], ['29,Middlesbrough F.C.', '29,Aston Villa'], ['30,William Harper.'], ['31,PCL']

In [33]:
### compare flair (ner+ner12) + spaCy(md) predictions

match, partial, less, more, none_match, none_less, none_more, predicted = compare_predictions(real, flair_spacy)
print(match)
print(partial)
print(less)
print(more)
print(none_match)
print(none_less)
print(none_more)
print(predicted)
print(predicted/len(real_entities))

TRUE:  ['0,bill finger']
PRED:  ['0,bill finger']
SAME:  ['0,bill finger']
TRUE:  ['1,selwyn lloyd', '1,winston churchill']
PRED:  ['1,winston churchill', '1,selwyn lloyd']
SAME:  ['1,winston churchill', '1,selwyn lloyd']
TRUE:  ['2,gestapo']
PRED:  ['2,gestapo']
SAME:  ['2,gestapo']
TRUE:  ['3,mumbai north']
PRED:  ['3,mumbai north']
SAME:  ['3,mumbai north']
TRUE:  ['4,roberto clemente bridge']
PRED:  ['4,roberto clemente bridge']
SAME:  ['4,roberto clemente bridge']
TRUE:  ['5,ptolemy xiii theos philopator', '5,cleopatra v']
PRED:  ['5,theos philopator', '5,cleopatra']
SAME:  []
NONE_TRUE_EQUAL:  ['5,ptolemy xiii theos philopator', '5,cleopatra v']
NONE_PRED_EQUAL:  ['5,theos philopator', '5,cleopatra']
TRUE:  ['6,li si']
PRED:  ['6,li si']
SAME:  ['6,li si']
TRUE:  ['7,pavel moroz', '7,yakov estrin']
PRED:  ['7,pavel moroz', '7,yakov estrin']
SAME:  ['7,pavel moroz', '7,yakov estrin']
TRUE:  ['8,broadmeadows, victoria']
PRED:  ['8,broadmeadows, victoria']
SAME:  ['8,broadmeadows, v

NONE_PRED_MORE:  ['268,alpine', "268,winter olympics men's"]
TRUE:  ['269,virgin radio']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['269,virgin radio']
NONE_PRED_EQUAL:  ['']
TRUE:  ['270,tani university']
PRED:  ['270,tani university']
SAME:  ['270,tani university']
TRUE:  ['271,dowra']
PRED:  ['271,dowra']
SAME:  ['271,dowra']
TRUE:  ['272,pittsburgh pirates']
PRED:  ['272,pittsburgh pirates']
SAME:  ['272,pittsburgh pirates']
TRUE:  ['273,simon fuller']
PRED:  ['273,simon fuller']
SAME:  ['273,simon fuller']
TRUE:  ['274,the producers', '274,next thing']
PRED:  ['274,next thing', '274,the producers']
SAME:  ['274,next thing', '274,the producers']
TRUE:  ['275,henry ford', '275,ford model c']
PRED:  ['275,henry ford', '275,model c']
SAME:  ['275,henry ford']
PARTIAL:  ['275,henry ford']
TRUE:  ["276,dragons' den"]
PRED:  ["276,dragons' den"]
SAME:  ["276,dragons' den"]
TRUE:  ['277,joseph case high school', '277,providence chapel, charlwood']
PRED:  ['277,the providence chapel, charlwo

TRUE:  ['428,raptors 905']
PRED:  ['428,raptors 905']
SAME:  ['428,raptors 905']
TRUE:  ['429,carmel winery']
PRED:  ['429,carmel winery']
SAME:  ['429,carmel winery']
TRUE:  ['430,irishmen']
PRED:  ['430,irishmen']
SAME:  ['430,irishmen']
TRUE:  ['431,nea smyrni']
PRED:  ['431,nea smyrni']
SAME:  ['431,nea smyrni']
TRUE:  ['432,ingmar bergman', "432,james o'brien"]
PRED:  ['432,ingmar bergman', "432,james o'brien"]
SAME:  ['432,ingmar bergman', "432,james o'brien"]
TRUE:  ['433,colonel', '433,militia']
PRED:  ['433,militia']
LESS:  ['433,militia']
TRUE:  ['434,toronto']
PRED:  ['434,toronto']
SAME:  ['434,toronto']
TRUE:  ['435,mike salmon']
PRED:  ['435,mike salmon']
SAME:  ['435,mike salmon']
TRUE:  ['436,julius caesar']
PRED:  ['436,kingship', '436,julius caesar']
MORE:  ['436,julius caesar']
TRUE:  ['437,mary poppins']
PRED:  ['437,mary poppins']
SAME:  ['437,mary poppins']
TRUE:  ['438,bob macmillan', '438,mikhail pashnin']
PRED:  ['438,bob macmillan', '438,mikhail pashnin']
SAME

PRED:  ["571,president's", '571,brockport golden eagles']
MORE:  ['571,brockport golden eagles']
TRUE:  ['572,bruce bochy', '572,ramiro pea']
PRED:  ['572,ramiro pea', '572,bruce bochy']
SAME:  ['572,ramiro pea', '572,bruce bochy']
TRUE:  ['573,tnt', '573,jeff conaway']
PRED:  ['573,jeff conaway', '573,tnt']
SAME:  ['573,jeff conaway', '573,tnt']
TRUE:  ['574,aegean sea']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['574,aegean sea']
NONE_PRED_EQUAL:  ['']
TRUE:  ['575,john tory']
PRED:  ['575,john tory']
SAME:  ['575,john tory']
TRUE:  ['576,north caucasus railway']
PRED:  ['576,north caucasus']
SAME:  []
NONE_TRUE_EQUAL:  ['576,north caucasus railway']
NONE_PRED_EQUAL:  ['576,north caucasus']
TRUE:  ['577,etienne bieler']
PRED:  ['577,etienne bieler\\xc9tienne_bi\\xe9ler']
SAME:  []
NONE_TRUE_EQUAL:  ['577,etienne bieler']
NONE_PRED_EQUAL:  ['577,etienne bieler\\xc9tienne_bi\\xe9ler']
TRUE:  ['578,london']
PRED:  ['578,london']
SAME:  ['578,london']
TRUE:  ['579,muhammad yunus']
PRED:  [

SAME:  ['775,winston bryant', '775,joseph stiglitz']
TRUE:  ['776,jeevan mrityu']
PRED:  ['776,jeevan mrityu']
SAME:  ['776,jeevan mrityu']
TRUE:  ['777,nathaniel p. hill']
PRED:  ['777,nathaniel p. hill']
SAME:  ['777,nathaniel p. hill']
TRUE:  ['778,tom maniatis', '778,james still']
PRED:  ['778,james still', '778,tom maniatis']
SAME:  ['778,james still', '778,tom maniatis']
TRUE:  ['779,becket fund for religious liberty']
PRED:  ['779,becket']
SAME:  []
NONE_TRUE_EQUAL:  ['779,becket fund for religious liberty']
NONE_PRED_EQUAL:  ['779,becket']
TRUE:  ['780,joe purcell', '780,winston bryant']
PRED:  ['780,winston bryant', '780,joe purcell']
SAME:  ['780,winston bryant', '780,joe purcell']
TRUE:  ['781,scuderia ferrari']
PRED:  ['781,scuderia ferrari,']
SAME:  []
NONE_TRUE_EQUAL:  ['781,scuderia ferrari']
NONE_PRED_EQUAL:  ['781,scuderia ferrari,']
TRUE:  ['782,olivier boulay']
PRED:  ['782,olivier boulay']
SAME:  ['782,olivier boulay']
TRUE:  ['783,mary poppins (musical)']
PRED:  ['

PRED:  ['918,tampa', '918,florida']
MORE:  ['918,tampa']
TRUE:  ['919,phish']
PRED:  ['919,phish']
SAME:  ['919,phish']
TRUE:  ["920,musee d'orsay"]
PRED:  ["920,musee d'orsay"]
SAME:  ["920,musee d'orsay"]
TRUE:  ['921,bill clinton']
PRED:  ['921,bill clinton']
SAME:  ['921,bill clinton']
TRUE:  ['922,gnowsys', '922,gnu texmacs']
PRED:  ['922,gnowsys', '922,gnu texmacs']
SAME:  ['922,gnowsys', '922,gnu texmacs']
TRUE:  ['923,liberty bell']
PRED:  ['923,liberty bell']
SAME:  ['923,liberty bell']
TRUE:  ['924,gibson guitar corporation']
PRED:  ['924,gibson guitar corporation']
SAME:  ['924,gibson guitar corporation']
TRUE:  ['925,the skeleton dance', "925,mickey's mellerdrammer"]
PRED:  ["925,mickey's mellerdrammer", '925,the skeleton dance']
SAME:  ["925,mickey's mellerdrammer", '925,the skeleton dance']
TRUE:  ['926,2013 copa centroamericana']
PRED:  ['926,2013', '926,copa centroamericana']
NONE_TRUE_MORE:  ['926,2013 copa centroamericana']
NONE_PRED_MORE:  ['926,2013', '926,copa cent

SAME:  ['1061,alexandre tichonov', '1061,valentin muratov']
TRUE:  ['1062,kansas legislature', '1062,us congress']
PRED:  ['1062,kansas legislature', '1062,us congress']
SAME:  ['1062,kansas legislature', '1062,us congress']
TRUE:  ['1063,nicole lai', '1063,pulau ubin']
PRED:  ['1063,pulau ubin', '1063,nicole lai']
SAME:  ['1063,pulau ubin', '1063,nicole lai']
TRUE:  ['1064,edwin f. hunter', '1064,thomas darden']
PRED:  ['1064,thomas darden', '1064,edwin f. hunter']
SAME:  ['1064,thomas darden', '1064,edwin f. hunter']
TRUE:  ['1065,vanderbilt commodores']
PRED:  ['1065,vanderbilt commodores']
SAME:  ['1065,vanderbilt commodores']
TRUE:  ['1066,switzerland', '1066,summer olympics']
PRED:  ['1066,olympics', '1066,switzerland', '1066,summer']
MORE:  ['1066,switzerland']
TRUE:  ['1067,roger barton']
PRED:  ['1067,roger barton']
SAME:  ['1067,roger barton']
TRUE:  ['1068,darren mcnamara']
PRED:  ['1068,darren mcnamara']
SAME:  ['1068,darren mcnamara']
TRUE:  ['1069,national science medal']

PARTIAL:  ['1274,the catastrophe of success']
TRUE:  ['1275,indy popcon', '1275,louis le cocqq']
PRED:  ['1275,indy popcon', '1275,louis le cocqq']
SAME:  ['1275,indy popcon', '1275,louis le cocqq']
TRUE:  ['1276,downriver', '1276,cyrus mann']
PRED:  ['1276,cyrus mann', '1276,downriver']
SAME:  ['1276,cyrus mann', '1276,downriver']
TRUE:  ['1277,william cushing']
PRED:  ['1277,william cushing']
SAME:  ['1277,william cushing']
TRUE:  ["1278,filbert's old time root beer"]
PRED:  ["1278,filbert's old time"]
SAME:  []
NONE_TRUE_EQUAL:  ["1278,filbert's old time root beer"]
NONE_PRED_EQUAL:  ["1278,filbert's old time"]
TRUE:  ['1279,primus', '1279,comedy central']
PRED:  ['1279,primus', '1279,comedy central']
SAME:  ['1279,primus', '1279,comedy central']
TRUE:  ['1280,written in the stars']
PRED:  ['1280,written in the stars']
SAME:  ['1280,written in the stars']
TRUE:  ['1281,in a word or 2']
PRED:  ['1281,in a word']
SAME:  []
NONE_TRUE_EQUAL:  ['1281,in a word or 2']
NONE_PRED_EQUAL:  ['

TRUE:  ['1486,kazan']
PRED:  ['1486,kazan']
SAME:  ['1486,kazan']
TRUE:  ['1487,john tortorella']
PRED:  ['1487,john tortorella']
SAME:  ['1487,john tortorella']
TRUE:  ['1488,donna shalala']
PRED:  ['1488,ngos', '1488,donna shalala']
MORE:  ['1488,donna shalala']
TRUE:  ['1489,san diego convention']
PRED:  ['1489,san diego convention']
SAME:  ['1489,san diego convention']
TRUE:  ['1490,king county, washington']
PRED:  ['1490,county, washington']
SAME:  []
NONE_TRUE_EQUAL:  ['1490,king county, washington']
NONE_PRED_EQUAL:  ['1490,county, washington']
TRUE:  ['1491,germany']
PRED:  ['1491,germany']
SAME:  ['1491,germany']
TRUE:  ['1492,arctic ocean', '1492,laptev sea']
PRED:  ['1492,arctic ocean', '1492,laptev sea']
SAME:  ['1492,arctic ocean', '1492,laptev sea']
TRUE:  ["1493,lorenzo de' medici", '1493,lucrezia tornabuoni']
PRED:  ['1493,lucrezia tornabuoni', "1493,lorenzo de' medici"]
SAME:  ['1493,lucrezia tornabuoni', "1493,lorenzo de' medici"]
TRUE:  ['1494,kevin jonas', '1494,nic

TRUE:  ['1629,pat kirkwood']
PRED:  ['1629,pat kirkwood']
SAME:  ['1629,pat kirkwood']
TRUE:  ['1630,yes']
PRED:  ['1630,yes']
SAME:  ['1630,yes']
TRUE:  ['1631,pulau ubin']
PRED:  ['1631,pulau ubin']
SAME:  ['1631,pulau ubin']
TRUE:  ['1632,athens']
PRED:  ['1632,athens']
SAME:  ['1632,athens']
TRUE:  ['1633,the elders', '1633,united world college']
PRED:  ['1633,elders', '1633,world college']
SAME:  []
NONE_TRUE_EQUAL:  ['1633,the elders', '1633,united world college']
NONE_PRED_EQUAL:  ['1633,elders', '1633,world college']
TRUE:  ['1634,uganda']
PRED:  ['1634,uganda']
SAME:  ['1634,uganda']
TRUE:  ['1635,simon fuller', '1635,jennifier lopez']
PRED:  ['1635,simon fuller', '1635,jennifier lopez']
SAME:  ['1635,simon fuller', '1635,jennifier lopez']
TRUE:  ['1636,bbc one', '1636,bbc hd']
PRED:  ['1636,bbc', '1636,bbc hd']
SAME:  ['1636,bbc hd']
PARTIAL:  ['1636,bbc hd']
TRUE:  ['1637,jay lane']
PRED:  ['1637,jay lane']
SAME:  ['1637,jay lane']
TRUE:  ['1638,greater london']
PRED:  ['163

TRUE:  ['1844,siberia']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['1844,siberia']
NONE_PRED_EQUAL:  ['']
TRUE:  ['1845,orient news', '1845,feras saied']
PRED:  ['1845,orient news', '1845,feras saied']
SAME:  ['1845,orient news', '1845,feras saied']
TRUE:  ['1846,michael jackson']
PRED:  ['1846,michael jackson.']
SAME:  []
NONE_TRUE_EQUAL:  ['1846,michael jackson']
NONE_PRED_EQUAL:  ['1846,michael jackson.']
TRUE:  ['1847,raymond baldwin']
PRED:  ['1847,raymond baldwin']
SAME:  ['1847,raymond baldwin']
TRUE:  ['1848,lighthouse boy club']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['1848,lighthouse boy club']
NONE_PRED_EQUAL:  ['']
TRUE:  ['1849,jeff munn']
PRED:  ['1849,jeff munn']
SAME:  ['1849,jeff munn']
TRUE:  ['1850,the okee dokee brothers', '1850,2015 mls all-star game']
PRED:  ['1850,the okee dokee brothers', '1850,2015 mls all-star game']
SAME:  ['1850,the okee dokee brothers', '1850,2015 mls all-star game']
TRUE:  ['1851,chip kelly']
PRED:  ['1851,chip kelly']
SAME:  ['1851,chip ke

SAME:  ['2062,bodhin kjolhede']
PARTIAL:  ['2062,bodhin kjolhede']
TRUE:  ['2063,1502nd infantry brigade', '2063,galatasaray handball team']
PRED:  ['2063,galatasaray handball team', '2063,infantry brigade (ready reserve)']
SAME:  ['2063,galatasaray handball team']
PARTIAL:  ['2063,galatasaray handball team']
TRUE:  ['2064,the sarah jane adventures']
PRED:  ['2064,sarah jane adventures']
SAME:  []
NONE_TRUE_EQUAL:  ['2064,the sarah jane adventures']
NONE_PRED_EQUAL:  ['2064,sarah jane adventures']
TRUE:  ['2065,open society foundation', '2065,susan weber soros']
PRED:  ['2065,open society foundation', '2065,susan weber soros']
SAME:  ['2065,open society foundation', '2065,susan weber soros']
TRUE:  ['2066,clinton foundation', '2066,edward j. perkins']
PRED:  ['2066,clinton foundation', '2066,edward j. perkins']
SAME:  ['2066,clinton foundation', '2066,edward j. perkins']
TRUE:  ['2067,fr. agnel multipurpose school and junior college']
PRED:  ['2067,agnel multipurpose', '2067,junior col

TRUE:  ['2278,warner bros. television']
PRED:  ['2278,warner bros. television']
SAME:  ['2278,warner bros. television']
TRUE:  ['2279,tofa ahin', '2279,ethopia']
PRED:  ['']
NONE_TRUE_LESS:  ['2279,tofa ahin', '2279,ethopia']
NONE_PRED_LESS:  ['']
TRUE:  ['2280,entertainment one music']
PRED:  ['2280,entertainment one music']
SAME:  ['2280,entertainment one music']
TRUE:  ['2281,ctenochaetus binotatus', '2281,cactinopterygii']
PRED:  ['2281,ctenochaetus binotatus', '2281,cactinopterygii']
SAME:  ['2281,ctenochaetus binotatus', '2281,cactinopterygii']
TRUE:  ['2282,sapindales']
PRED:  ['2282,sapindales']
SAME:  ['2282,sapindales']
TRUE:  ['2283,us']
PRED:  ['2283,us']
SAME:  ['2283,us']
TRUE:  ['2284,qun thnh temple']
PRED:  ['2284,qun thnh temple']
SAME:  ['2284,qun thnh temple']
TRUE:  ['2285,shakespeare', '2285,stanley cavell']
PRED:  ['2285,stanley cavell', '2285,shakespeare']
SAME:  ['2285,stanley cavell', '2285,shakespeare']
TRUE:  ['2286,chelsea f.c.', '2286,evraz']
PRED:  ['2286

TRUE:  ['2496,carl stokes', '2496,joseph stiglitz']
PRED:  ['2496,carl stokes', '2496,joseph stiglitz', '2496,us']
MORE:  ['2496,carl stokes', '2496,joseph stiglitz']
TRUE:  ['2497,roberto clemente bridge', '2497,homestead grays bridge']
PRED:  ['2497,homestead grays bridge', '2497,roberto clemente bridge']
SAME:  ['2497,homestead grays bridge', '2497,roberto clemente bridge']
TRUE:  ['2498,ontario']
PRED:  ['2498,ontario']
SAME:  ['2498,ontario']
TRUE:  ['2499,i love how you love me']
PRED:  ['2499,i love']
SAME:  []
NONE_TRUE_EQUAL:  ['2499,i love how you love me']
NONE_PRED_EQUAL:  ['2499,i love']
TRUE:  ['2500,pennsylvania']
PRED:  ['2500,pennsylvania', '2500,american']
MORE:  ['2500,pennsylvania']
TRUE:  ['2501,defenceman']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['2501,defenceman']
NONE_PRED_EQUAL:  ['']
TRUE:  ['2502,jerusalem institue of justice', '2502,gazaisrael']
PRED:  ['2502,jerusalem institue of justice', '2502,gazaisrael']
SAME:  ['2502,jerusalem institue of justice', '2

TRUE:  ['2709,neuroimaging']
PRED:  ['2709,neuroimaging']
SAME:  ['2709,neuroimaging']
TRUE:  ['2710,malaysian and chinese association']
PRED:  ['2710,malaysian', '2710,chinese']
NONE_TRUE_MORE:  ['2710,malaysian and chinese association']
NONE_PRED_MORE:  ['2710,malaysian', '2710,chinese']
TRUE:  ['2711,don berlin', '2711,michael sears']
PRED:  ['2711,berlin', '2711,michael sears']
SAME:  ['2711,michael sears']
PARTIAL:  ['2711,michael sears']
TRUE:  ['2712,svetlana navasardyan']
PRED:  ['2712,svetlana navasardyan']
SAME:  ['2712,svetlana navasardyan']
TRUE:  ['2713,ajith perera', '2713,neungin high school']
PRED:  ['2713,ajith perera', '2713,neungin high school']
SAME:  ['2713,ajith perera', '2713,neungin high school']
TRUE:  ['2714,austin college', '2714,royal australian air force']
PRED:  ['2714,royal australian air force', '2714,austin college']
SAME:  ['2714,royal australian air force', '2714,austin college']
TRUE:  ['2715,england national under-20 football team', '2715,middlesbro

SAME:  ['2933,vmf-155', '2933,vmf-218']
TRUE:  ['2934,amstelveen']
PRED:  ['2934,amstelveen']
SAME:  ['2934,amstelveen']
TRUE:  ['2935,ontario international airport', '2935,inland empire']
PRED:  ['2935,ontario international airport', '2935,inland empire']
SAME:  ['2935,ontario international airport', '2935,inland empire']
TRUE:  ['2936,erich bagge', '2936,erban ieica']
PRED:  ['2936,erich bagge']
LESS:  ['2936,erich bagge']
TRUE:  ['2937,peter frampton']
PRED:  ['2937,peter frampton']
SAME:  ['2937,peter frampton']
TRUE:  ['2938,yosemite park']
PRED:  ['2938,yosemite']
SAME:  []
NONE_TRUE_EQUAL:  ['2938,yosemite park']
NONE_PRED_EQUAL:  ['2938,yosemite']
TRUE:  ["2939,alice's wonderland"]
PRED:  ["2939,alice's wonderland"]
SAME:  ["2939,alice's wonderland"]
TRUE:  ['2940,estadio nacional de costa rica']
PRED:  ['2940,estadio nacional de costa rica', '2940,2011']
MORE:  ['2940,estadio nacional de costa rica']
TRUE:  ['2941,eric roth', '2941,charles lemaire']
PRED:  ['2941,charles lemai

PARTIAL:  ['3227,rising star']
TRUE:  ['3228,lee robinson']
PRED:  ['3228,lee robinson']
SAME:  ['3228,lee robinson']
TRUE:  ['3229,norway', '3229,swedish academy']
PRED:  ['3229,norway', '3229,swedish academy']
SAME:  ['3229,norway', '3229,swedish academy']
TRUE:  ['3230,burbank california']
PRED:  ['3230,burbank california']
SAME:  ['3230,burbank california']
TRUE:  ['3231,google web toolkit', '3231,manu cornet']
PRED:  ['3231,name', '3231,manu', '3231,google web toolkit']
MORE:  ['3231,google web toolkit']
TRUE:  ['3232,pope benedict xvi']
PRED:  ['3232,benedict']
SAME:  []
NONE_TRUE_EQUAL:  ['3232,pope benedict xvi']
NONE_PRED_EQUAL:  ['3232,benedict']
TRUE:  ['3233,tony award']
PRED:  ['3233,tony award']
SAME:  ['3233,tony award']
TRUE:  ['3234,falmouth university']
PRED:  ['3234,falmouth university']
SAME:  ['3234,falmouth university']
TRUE:  ['3235,khiladi 786']
PRED:  ['3235,khiladi 786']
SAME:  ['3235,khiladi 786']
TRUE:  ["3236,menetries's warbler"]
PRED:  ["3236,menetries's"

SAME:  []
NONE_TRUE_EQUAL:  ['3438,england national football team']
NONE_PRED_EQUAL:  ['3438,england']
TRUE:  ['3439,maria ludwika krasiska']
PRED:  ['3439,maria ludwika krasiska']
SAME:  ['3439,maria ludwika krasiska']
TRUE:  ['3440,the runaway bride']
PRED:  ['3440,the runaway bride', '3440,doctor who']
MORE:  ['3440,the runaway bride']
TRUE:  ['3441,the prodigal son']
PRED:  ['3441,the prodigal']
SAME:  []
NONE_TRUE_EQUAL:  ['3441,the prodigal son']
NONE_PRED_EQUAL:  ['3441,the prodigal']
TRUE:  ['3442,helsinki']
PRED:  ['3442,helsinki']
SAME:  ['3442,helsinki']
TRUE:  ['3443,common mushroom']
PRED:  ['3443,mushroom']
SAME:  []
NONE_TRUE_EQUAL:  ['3443,common mushroom']
NONE_PRED_EQUAL:  ['3443,mushroom']
TRUE:  ['3444,isa']
PRED:  ['3444,isa']
SAME:  ['3444,isa']
TRUE:  ['3445,fusajiro yamauchi', '3445,nintendo eshop']
PRED:  ['3445,fusajiro yamauchi', '3445,nintendo eshop']
SAME:  ['3445,fusajiro yamauchi', '3445,nintendo eshop']
TRUE:  ['3446,ladonia', '3446,probus (journal)']
PR

PRED:  ['3724,maharashtra']
SAME:  ['3724,maharashtra']
TRUE:  ['3725,colorado avalanche']
PRED:  ['3725,colorado avalanche']
SAME:  ['3725,colorado avalanche']
TRUE:  ['3726,2658 gingerich', '3726,harvard']
PRED:  ['3726,gingerich', '3726,harvard']
SAME:  ['3726,harvard']
PARTIAL:  ['3726,harvard']
TRUE:  ['3727,robert de niro']
PRED:  ['3727,robert de niro']
SAME:  ['3727,robert de niro']
TRUE:  ['3728,boston red sox']
PRED:  ['3728,boston red sox']
SAME:  ['3728,boston red sox']
TRUE:  ['3729,canberra']
PRED:  ['3729,canberra']
SAME:  ['3729,canberra']
TRUE:  ['3730,karinga bay']
PRED:  ['3730,karinga bay']
SAME:  ['3730,karinga bay']
TRUE:  ['3731,think', '3731,philosophy']
PRED:  ['']
NONE_TRUE_LESS:  ['3731,think', '3731,philosophy']
NONE_PRED_LESS:  ['']
TRUE:  ['3732,robert morris colonials']
PRED:  ['3732,american', '3732,robert morris colonials']
MORE:  ['3732,robert morris colonials']
TRUE:  ['3733,la liga']
PRED:  ['3733,la liga']
SAME:  ['3733,la liga']
TRUE:  ['3734,bucki

TRUE:  ['3943,san diego comic-con international']
PRED:  ['3943,san diego comic-con international']
SAME:  ['3943,san diego comic-con international']
TRUE:  ['3944,klang valley']
PRED:  ['3944,klang valley']
SAME:  ['3944,klang valley']
TRUE:  ['3945,norfolk southern railway']
PRED:  ['3945,norfolk southern railway']
SAME:  ['3945,norfolk southern railway']
TRUE:  ['3946,blanche bruce', '3946,george arceneaux, jr.']
PRED:  ['3946,george arceneaux, jr.', '3946,blanche bruce']
SAME:  ['3946,george arceneaux, jr.', '3946,blanche bruce']
TRUE:  ['3947,chaudhary devi lal']
PRED:  ['3947,chaudhary devi lal']
SAME:  ['3947,chaudhary devi lal']
TRUE:  ['3948,rhodes-haverty building']
PRED:  ['3948,rhodes-haverty building']
SAME:  ['3948,rhodes-haverty building']
TRUE:  ['3949,ptolemy xiv of egypt']
PRED:  ['3949,ptolemy xiv', '3949,egypt']
NONE_TRUE_MORE:  ['3949,ptolemy xiv of egypt']
NONE_PRED_MORE:  ['3949,ptolemy xiv', '3949,egypt']
TRUE:  ['3950,martin molony', '3950,best mate']
PRED:  ['

NONE_PRED_EQUAL:  ['4161,ohio']
TRUE:  ['4162,american boulevard']
PRED:  ['4162,american boulevard (metro transit']
SAME:  []
NONE_TRUE_EQUAL:  ['4162,american boulevard']
NONE_PRED_EQUAL:  ['4162,american boulevard (metro transit']
TRUE:  ['4163,michigan wolverines', '4163,chicago bears']
PRED:  ['4163,michigan wolverines', '4163,chicago bears']
SAME:  ['4163,michigan wolverines', '4163,chicago bears']
TRUE:  ['4164,liverpool playhouse']
PRED:  ['4164,liverpool playhouse']
SAME:  ['4164,liverpool playhouse']
TRUE:  ['4165,bangladesh']
PRED:  ['4165,one', '4165,bangladesh']
MORE:  ['4165,bangladesh']
TRUE:  ['4166,torrey pines gliderport']
PRED:  ['4166,torrey pines gliderport']
SAME:  ['4166,torrey pines gliderport']
TRUE:  ['4167,deion sanders']
PRED:  ['4167,deion sanders']
SAME:  ['4167,deion sanders']
TRUE:  ['4168,lori black', '4168,adrian a. basora']
PRED:  ['4168,adrian a. basora', '4168,lori black']
SAME:  ['4168,adrian a. basora', '4168,lori black']
TRUE:  ['4169,michael hou

SAME:  ['4381,national collegiate athletic association', '4381,nicholas s. zeppos']
TRUE:  ['4382,harry harlow']
PRED:  ['4382,phd', '4382,harry harlow']
MORE:  ['4382,harry harlow']
TRUE:  ['4383,pierre r. graham', '4383,william french smith']
PRED:  ['4383,william french smith', '4383,pierre r. graham']
SAME:  ['4383,william french smith', '4383,pierre r. graham']
TRUE:  ['4384,singapore', '4384,pulau ubin']
PRED:  ['4384,pulau ubin', '4384,singapore']
SAME:  ['4384,pulau ubin', '4384,singapore']
TRUE:  ['4385,nyc']
PRED:  ['4385,nyc']
SAME:  ['4385,nyc']
TRUE:  ['4386,inyo national forest']
PRED:  ['4386,inyo national forest']
SAME:  ['4386,inyo national forest']
TRUE:  ['4387,john paul ii']
PRED:  ['4387,john paul ii']
SAME:  ['4387,john paul ii']
TRUE:  ['4388,heathsville, virginia']
PRED:  ['4388,heathsville, virginia']
SAME:  ['4388,heathsville, virginia']
TRUE:  ['4389,sverre krogh sundbo', '4389,havard vad petersson']
PRED:  ['4389,sverre krogh sundbo']
LESS:  ['4389,sverre kr

LESS:  ['4604,karl ewald hasse']
TRUE:  ['4605,sierra nevada']
PRED:  ['4605,sierra nevada']
SAME:  ['4605,sierra nevada']
TRUE:  ['4606,animal']
PRED:  ['4606,animal kingdom']
SAME:  []
NONE_TRUE_EQUAL:  ['4606,animal']
NONE_PRED_EQUAL:  ['4606,animal kingdom']
TRUE:  ['4607,pau broca']
PRED:  ['4607,one', '4607,pau broca']
MORE:  ['4607,pau broca']
TRUE:  ['4608,chordate']
PRED:  ['4608,chordate']
SAME:  ['4608,chordate']
TRUE:  ['4609,sony', '4609,asus']
PRED:  ['4609,asus', '4609,sony']
SAME:  ['4609,asus', '4609,sony']
TRUE:  ['4610,country music']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['4610,country music']
NONE_PRED_EQUAL:  ['']
TRUE:  ['4611,instagram']
PRED:  ['']
SAME:  []
NONE_TRUE_EQUAL:  ['4611,instagram']
NONE_PRED_EQUAL:  ['']
TRUE:  ['4612,daniel gibson']
PRED:  ['4612,daniel gibson']
SAME:  ['4612,daniel gibson']
TRUE:  ['4613,homestead grays']
PRED:  ['4613,homestead grays']
SAME:  ['4613,homestead grays']
TRUE:  ['4614,harvard-westlake school', '4614,henry e. catto

PRED:  ['4818,christen-democratisch en vlaams']
SAME:  ['4818,christen-democratisch en vlaams']
TRUE:  ['4819,juno i']
PRED:  ['4819,juno i']
SAME:  ['4819,juno i']
TRUE:  ['4820,google web toolkit', '4820,playn']
PRED:  ['4820,playn']
LESS:  ['4820,playn']
TRUE:  ['4821,steven moffat']
PRED:  ['4821,steven moffat']
SAME:  ['4821,steven moffat']
TRUE:  ['4822,bbc two']
PRED:  ['4822,bbc two']
SAME:  ['4822,bbc two']
TRUE:  ['4823,tupelo mississippi']
PRED:  ['4823,tupelo mississippi']
SAME:  ['4823,tupelo mississippi']
TRUE:  ['4824,walt disney records']
PRED:  ['4824,walt disney records']
SAME:  ['4824,walt disney records']
TRUE:  ['4825,cohoes, ny']
PRED:  ['4825,cohoes', '4825,ny']
NONE_TRUE_MORE:  ['4825,cohoes, ny']
NONE_PRED_MORE:  ['4825,cohoes', '4825,ny']
TRUE:  ['4826,william campbell', '4826,columbia university']
PRED:  ['4826,columbia university', '4826,william campbell']
SAME:  ['4826,columbia university', '4826,william campbell']
TRUE:  ['4827,maharashtran']
PRED:  ['4827

In [34]:
print(match)
print(partial)
print(less)
print(more)
print(none_match)
print(none_less)
print(none_more)
print(predicted)
print(predicted/len(real_entities))

3190
330
135
229
872
110
134
5206
0.7838000602228244
