In [7]:
import os
import sys
import time
import numpy as np
import matplotlib.pyplot as plt
module_path = os.path.abspath(os.path.join('../../fusion'))
sys.path.append(module_path)

from preprocessing_datasets.preprocessing_utilities import ValueUtils
from preprocessing_datasets import load_dataset
from embedding_algorithms import sentence_embedding, set_embedding_model
from dimensionality_reduction_algorithms import dimension_reduction_algorithms
from cluster_algorithms import cluster_algorithm

from helper import load_by_index, get_author_candidates, getFinalAuthors, launchWithoutReductionFusion
from plot_tools import plotChart, plotCluster
from evaluation import *

In [8]:
key_values = {
    'model_type':'bilstm',
    'char_level':False,
    'model_version': 2,
    'rnn_dim':1024,
    'verbose':1,
    'attributes_list': ['author'],
    'embedding_type': 'inferSent',
    'dataset': 'clean_book',
    'cluster_method': 'hierarchy',
    'num_clusters_rate': 0.1,
    'block_length_thresold': 0.2,
    'acceptance_diff': 0,
    'block_weight': 2,
}

In [9]:
dataset_name, table_group_by_isbn, isbn_list, true_authors = load_dataset(key_values)

dataset: clean_book
Loading time is: 2.7775821685791016


In [10]:
set_embedding_model(key_values)

Vocab size : 2196017
model_version: 2
rnn_dim: 1024
model_type: bilstm
char_level: False
Setup time is: 270.20252323150635


In [11]:
start_time = time.time()
finalAuthors, trueAuthors = launchWithoutReductionFusion(table_group_by_isbn, isbn_list, true_authors, key_values)
print("Total time is: {0}".format(time.time() - start_time))

ISBN: 0007197160
true author: Marren Peter
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.7847087383270264
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 3
Blocking time is: 0.04017305374145508
Discarted candidate: [{'conservation officer': 2, 'former scientist': 2}, {'marren naturalist peter': 2}]
Possible candidate: [{'marren peter': 19}]
lengthNecessary: 5.0
[{'marren peter': 19}]
acceptance_diff 0
['marren peter'] VS true_author: ['marren peter']
ISBN: 0029011086
true author: Averill James R.,Nunley Elma P.
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.06728696823120117
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 3
Blocking time is: 0.0026459693908691406
Discarted candidate: [{'elma nunley p': 4}, {'averill': 1, 'james r': 1}]
Possible candidate: [{'averill james r': 21}]
lengthNecessary: 5.4
[{'averill james r': 21}]
ac

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.09162306785583496
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 6
Blocking time is: 0.004332780838012695
Discarted candidate: [{'george witte': 1, 'glenn': 3}, {'van zutphen': 3}, {'schmitt': 3}, {'bernd': 3}]
Possible candidate: [{'bernd schmitt': 25}, {'glenn van zutphen': 19}]
lengthNecessary: 11.4
[{'bernd schmitt': 25}, {'glenn van zutphen': 19}]
acceptance_diff 0
['bernd schmitt', 'glenn van zutphen'] VS true_author: ['bernd schmitt', 'glenn van zutphen']
ISBN: 0241113873
true author: Critchley Julian
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.03892207145690918
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0014612674713134766
Discarted candidate: [{'colin wheeler': 1}]
Possible candidate: [{'critchley': 1, 'critchley julian': 17}]
lengthNecessary: 3

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.0656270980834961
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 4
Blocking time is: 0.0029268264770507812
Discarted candidate: [{'willia': 1}, {'aldersey-williams h': 1}, {'aldersey hugh williams': 1}]
Possible candidate: [{'aldersey-willia hugh': 3, 'aldersey-williams hugh': 32, 'alderseyand hugh': 1}]
lengthNecessary: 7.800000000000001
[{'aldersey-willia hugh': 3, 'aldersey-williams hugh': 32, 'alderseyand hugh': 1}]
acceptance_diff 0
['aldersey-williams hugh'] VS true_author: ['aldersey-williams hugh']
ISBN: 039475994X
true author: Thomsen Moritz
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.04504799842834473
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0012180805206298828
Discarted candidate: [{'maurice thompson': 1}]
Possible candidate: [{'moritz thomse

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.0623927116394043
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0013201236724853516
Discarted candidate: []
Possible candidate: [{'alverson d dayton dr l ph': 4, 'alverson dayton dr l': 5, 'dayton dr l': 1}, {'alverson': 1, 'alverson d ph': 1, 'alverson dayton l': 4}]
lengthNecessary: 3.2
[{'alverson d dayton dr l ph': 4, 'alverson dayton dr l': 5, 'dayton dr l': 1}, {'alverson': 1, 'alverson d ph': 1, 'alverson dayton l': 4}]
acceptance_diff 0
['alverson dayton dr l'] VS true_author: ['alverson dayton l']
ISBN: 0670805122
true author: Katz Donald R.
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.20036983489990234
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.012533903121948242
Discarted candidate: [{'donald katz': 18}, {'adult viking': 

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.0728609561920166
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 4
Blocking time is: 0.0025992393493652344
Discarted candidate: [{'beattie william': 5}, {'beatty dr william': 1}, {'beattie w': 1}]
Possible candidate: [{'a m ross': 1, 'alexander m ross': 22, 'alexander ross': 1}]
lengthNecessary: 6.2
[{'a m ross': 1, 'alexander m ross': 22, 'alexander ross': 1}]
acceptance_diff 0
['alexander m ross'] VS true_author: ['alexander m ross']
ISBN: 0802115462
true author: Weddle David
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.05199289321899414
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0015900135040283203
Discarted candidate: [{'american heritage': 1}]
Possible candidate: [{'david peckinpah re sam weddle': 1, 'david weddle': 16}]
lengthNecessary: 3.6
[{'david 

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.06609702110290527
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 5
Blocking time is: 0.0036590099334716797
Discarted candidate: [{'miyun park': 7}, {'moby park': 2}, {'and miyun park': 1}, {'moby': 1}]
Possible candidate: [{'moby': 35}]
lengthNecessary: 9.200000000000001
[{'moby': 35}]
acceptance_diff 0
['moby'] VS true_author: ['miyun moby park']
ISBN: 1632384183
true author: Burt Carl
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.03575706481933594
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.000988006591796875
Discarted candidate: []
Possible candidate: [{'burt carl': 5, 'ny press research': 1}]
lengthNecessary: 1.2000000000000002
[{'burt carl': 5, 'ny press research': 1}]
acceptance_diff 0
['burt carl'] VS true_author: ['burt carl']
ISBN: 186152207X
true 

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.3006141185760498
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.014088153839111328
Discarted candidate: [{'doug welsch': 6}, {'doug welsh': 2, 'douglas welsh': 1, 'doyg welsh': 1, 'welsh': 2}, {'connie effefson': 1, 'connie ellefson': 3, 'connie ellefson l': 1, 'connie ellefson loc': 1}, {'lockhart thomas': 1, 'steph thomas': 1, 'stephens tom': 3}, {'ellefson stephens': 2, 'stephens': 1, 'stephens tho': 1}, {'l': 1}, {'dough welsh': 1}]
Possible candidate: [{'connie ellefson lockhart': 28, 'connie lockhart': 1}, {'l stephens thomas': 25}, {'douglas ellefson f welsh': 1, 'douglas f welsh': 23}]
lengthNecessary: 21.400000000000002
[{'connie ellefson lockhart': 28, 'connie lockhart': 1}, {'l stephens thomas': 25}, {'douglas ellefson f welsh': 1, 'douglas f welsh': 23}]
acceptance_diff 0
['connie ellefson lockhart', 'l stephens thomas', 'doug

lengthNecessary: 12.8
[{'alec guinness': 58}]
acceptance_diff 0
['alec guinness'] VS true_author: ['alec guinness']
ISBN: 029777283X
true author: Richards J. M.
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.0439610481262207
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0009751319885253906
Discarted candidate: []
Possible candidate: [{'j m richards': 5, 'j richards': 1, 'james maude richards': 1, 'm': 1}]
lengthNecessary: 1.6
[{'j m richards': 5, 'j richards': 1, 'james maude richards': 1, 'm': 1}]
acceptance_diff 0
['j m richards'] VS true_author: ['j m richards']
ISBN: 0297788124
true author: Hudson Rock,Davidson Sara
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.0518641471862793
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 3
Blocking time is: 0.0019731521606445312
Discarted candidate: [{'hudson r': 1

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.1098320484161377
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 7
Blocking time is: 0.0052337646484375
Discarted candidate: [{'beaton c m': 1}, {'ann beattie': 1}, {'ann beattie': 1}, {'ann beattie': 1}, {'ann beattie': 1}, {'ann beattie': 1}]
Possible candidate: [{'ann beattie': 56}]
lengthNecessary: 12.4
[{'ann beattie': 56}]
acceptance_diff 0
['ann beattie'] VS true_author: ['ann beattie']
ISBN: 0395138841
true author: Farris Martin T.,Sampson Roy J.
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.04817795753479004
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.001026153564453125
Discarted candidate: []
Possible candidate: [{'farris m y': 1, 'farris martin t': 4, 'j r simpson': 1, 'j roy sampson': 1}]
lengthNecessary: 1.4000000000000001
[{'farris m y': 1, 'fa

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.061985015869140625
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 4
Blocking time is: 0.002619028091430664
Discarted candidate: [{'aa vv': 1}, {'weinberg': 1}, {'steven weinberg': 1}]
Possible candidate: [{'steven weinberg': 31}]
lengthNecessary: 6.800000000000001
[{'steven weinberg': 31}]
acceptance_diff 0
['steven weinberg'] VS true_author: ['steven weinberg']
ISBN: 0521880696
true author: Nucci Antonio,Papagiannaki Konstantina
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.0639200210571289
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 6
Blocking time is: 0.004562854766845703
Discarted candidate: [{'konstantin papagiannaki': 1}, {'konstantina papagiannaki': 1}, {'konstantina papagiannaki': 1}, {'konstantina papagiannaki': 1}]
Possible candidate: [{'antonio nucci': 33}, {'konstant

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.18386197090148926
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.01350092887878418
Discarted candidate: [{'d ph': 4}, {'d deborah ph tannen': 2}, {'deborah': 1}, {'phd tannen': 1}, {'deborah tannen': 1}, {'deborah tannen': 1}, {'deborah tannen': 1}, {'deborah tannen': 1}, {'deborah tannen': 1}]
Possible candidate: [{'deborah tannen': 91}]
lengthNecessary: 20.8
[{'deborah tannen': 91}]
acceptance_diff 0
['deborah tannen'] VS true_author: ['deborah tannen']
ISBN: 0723305390
true author: Lee Owen R.
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.054650306701660156
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0011341571807861328
Discarted candidate: []
Possible candidate: [{'lee owen': 1, 'lee owen r': 9}]
lengthNecessary: 2.0
[{'lee owen':

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.26383113861083984
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 6
Blocking time is: 0.0052030086517333984
Discarted candidate: [{'j orourke p': 2}, {"j o'rourke p": 2}, {'j o p rourke': 1}, {'doug kenney': 1}, {"j o'rourke p": 1}]
Possible candidate: [{"j o'rourke p": 52}]
lengthNecessary: 11.8
[{"j o'rourke p": 52}]
acceptance_diff 0
["j o'rourke p"] VS true_author: ["j o'rourke p"]
ISBN: 0872861600
true author: Ferlinghetti Lawrence
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.04661679267883301
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.002457141876220703
Discarted candidate: [{'chris felver': 2, 'chris felver ferlinghetti lawrence photographs': 1}]
Possible candidate: [{'ferlinghetti lawrence': 14}]
lengthNecessary: 3.4000000000000004
[{'ferlinghetti 

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.19877004623413086
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.018341779708862305
Discarted candidate: [{'hosseini khal': 2}, {'hosseini': 1}, {'hosseini khaled': 1}, {'anne sinai': 1}, {'allen pollock': 1}, {'hosseini khaled': 1}, {'hosseini khaled': 1}, {'hosseini khaled': 1}, {'hosseini khaled': 1}]
Possible candidate: [{'hosseini khaled': 116}]
lengthNecessary: 25.200000000000003
[{'hosseini khaled': 116}]
acceptance_diff 0
['hosseini khaled'] VS true_author: ['hosseini khaled']
ISBN: 1575660024
true author: Wiggs Susan,Llywelyn Morgan,Samuel Barbara,Gellis Roberta
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.10704207420349121
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 7
Blocking time is: 0.005189180374145508
Discarted candidate: [{'llywelyn morgan

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.03381490707397461
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0010187625885009766
Discarted candidate: []
Possible candidate: [{'garg ruby': 3, 'n ramanadha rao': 3, 'n ramananda rao': 1}]
lengthNecessary: 1.4000000000000001
[{'garg ruby': 3, 'n ramanadha rao': 3, 'n ramananda rao': 1}]
acceptance_diff 0
['garg ruby', 'n ramanadha rao'] VS true_author: ['garg ruby']
ISBN: 9766400024
true author: Gloudon A.,Tobisch C.
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.02637195587158203
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0010988712310791016
Discarted candidate: []
Possible candidate: [{'a cloudon': 5, 'a gloudon': 1, 'c tobisch': 1}, {'ancile gloudon': 5}]
lengthNecessary: 2.4000000000000004
[{'a cloudon': 5, 'a gloudon': 1, 'c to

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.11892390251159668
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 5
Blocking time is: 0.003036022186279297
Discarted candidate: [{'g lewis': 3}, {'botanic gardens kew royal': 1}, {'g lewis p': 1}]
Possible candidate: [{'botanic gardens kew royal': 23}, {'gwilym lewis': 14, 'lewis wwilym': 1}]
lengthNecessary: 8.6
[{'botanic gardens kew royal': 23}, {'gwilym lewis': 14, 'lewis wwilym': 1}]
acceptance_diff 0
['botanic gardens kew royal', 'gwilym lewis'] VS true_author: ['gwilym lewis']
ISBN: 0873585267
true author: Drake Christin
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.04417109489440918
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.00140380859375
Discarted candidate: [{'cristin drake': 1}]
Possible candidate: [{'christin drake': 18}]
lengthNecessary: 3.800

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.12256503105163574
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 7
Blocking time is: 0.0056040287017822266
Discarted candidate: [{'nelson thomas': 1}, {'lucian w': 1}, {'editing julia m pitkin': 1}, {'lucian maynard': 1}, {'lucian maynard': 1}]
Possible candidate: [{'kitty maynard': 31}, {'lucian maynard': 25}]
lengthNecessary: 12.200000000000001
[{'kitty maynard': 31}, {'lucian maynard': 25}]
acceptance_diff 0
['kitty maynard', 'lucian maynard'] VS true_author: ['kitty maynard', 'lucian maynard']
ISBN: 0521291186
true author: Gugler Josef,Flanagan William G.
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.08503389358520508
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 5
Blocking time is: 0.003403902053833008
Discarted candidate: [{'flanagan g william': 7}, {'gugler j': 1}, {'flanag

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.3632950782775879
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.1114509105682373
Discarted candidate: [{'philbrick': 3}, {'nathaniel philbrick': 1}, {'nathaniel philbrick': 1}, {'nathaniel philbrick': 1}, {'nathaniel philbrick': 1}, {'nathaniel philbrick': 1}, {'nathaniel philbrick': 1}, {'nathaniel philbrick': 1}, {'nathaniel philbrick': 1}]
Possible candidate: [{'nathaniel': 3, 'nathaniel philbrick': 301}]
lengthNecessary: 63.0
[{'nathaniel': 3, 'nathaniel philbrick': 301}]
acceptance_diff 0
['nathaniel philbrick'] VS true_author: ['nathaniel philbrick']
ISBN: 0671422324
true author: Davies Paul
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.07770204544067383
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 3
Blocking time is: 0.0020711421966552734
Discarted c

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.04323911666870117
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0012180805206298828
Discarted candidate: [{'gail janice thompson': 1}]
Possible candidate: [{'melanie panagiotopoulos': 9, 'melanie panagiotopoulos satter': 1}]
lengthNecessary: 2.2
[{'melanie panagiotopoulos': 9, 'melanie panagiotopoulos satter': 1}]
acceptance_diff 0
['melanie panagiotopoulos'] VS true_author: ['melanie panagiotopoulos']
ISBN: 1855100479
true author: Mitchell Mitch,Platt John
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.041284799575805664
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0013952255249023438
Discarted candidate: []
Possible candidate: [{'joh platt': 1, 'john platt': 5}, {'mitch mitchell': 6}]
lengthNecessary: 2.4000000000000004
[{'joh platt':

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.08561515808105469
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 6
Blocking time is: 0.0043277740478515625
Discarted candidate: [{'betina krahn m': 8}, {'betina krahn m': 1}, {'betina krahn': 1}, {'betina krahn': 1}, {'betina krahn': 1}]
Possible candidate: [{'betina krahn': 40}]
lengthNecessary: 10.4
[{'betina krahn': 40}]
acceptance_diff 0
['betina krahn'] VS true_author: ['betina krahn']
ISBN: 1555424155
true author: Desatnick Robert L.,Detzel Denis H.
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.057170867919921875
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 4
Blocking time is: 0.0022237300872802734
Discarted candidate: [{'desatnick l robert': 1}, {'denis detzel h': 1}]
Possible candidate: [{'desanick l robert': 1, 'desatnick l robert': 18}, {'denis detzel h': 12}]
lengthNec

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.08113408088684082
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 5
Blocking time is: 0.003530263900756836
Discarted candidate: [{'charles mark miller': 2}, {'harrisson john': 2}, {'mark mi': 1}]
Possible candidate: [{'mark miller': 20}, {'andrew maclauchian': 1, 'andrew maclauchlan': 19}]
lengthNecessary: 9.0
[{'mark miller': 20}, {'andrew maclauchian': 1, 'andrew maclauchlan': 19}]
acceptance_diff 0
['mark miller', 'andrew maclauchlan'] VS true_author: ['mark miller', 'andrew maclauchlan', 'harrisson john']
ISBN: 1612124380
true author: Cox Jeff
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.07145309448242188
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 4
Blocking time is: 0.0022521018981933594
Discarted candidate: [{'mondavi tim': 1}, {'cox jeff': 1}, {'cox jeff': 1}]
Possible c

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.06291580200195312
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0014019012451171875
Discarted candidate: []
Possible candidate: [{'conn dr george h': 3, 'conn george': 1, 'conn george h': 1, 'conn george harold': 8, 'george h': 1}, {'conn': 1, 'elsie v': 1, 'h': 1, 'hanauer': 1, 'horse library lovers wilshire': 1}]
lengthNecessary: 3.8000000000000003
[{'conn dr george h': 3, 'conn george': 1, 'conn george h': 1, 'conn george harold': 8, 'george h': 1}, {'conn': 1, 'elsie v': 1, 'h': 1, 'hanauer': 1, 'horse library lovers wilshire': 1}]
acceptance_diff 0
['conn george harold', 'conn', 'elsie v', 'h', 'hanauer', 'horse library lovers wilshire'] VS true_author: ['conn george h']
ISBN: 0385293917
true author: Olsen Jack
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.036103963851

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.07733821868896484
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 4
Blocking time is: 0.002730131149291992
Discarted candidate: [{'alvin kerr': 1, 'federico helen': 1}, {'arie dezanger': 1, 'townsend': 1}]
Possible candidate: [{'adams': 1, 'adams charlotte': 19}, {'doris m townsend': 1, 'doris mcferran townsend': 8}]
lengthNecessary: 6.6000000000000005
[{'adams': 1, 'adams charlotte': 19}, {'doris m townsend': 1, 'doris mcferran townsend': 8}]
acceptance_diff 0
['adams charlotte', 'doris mcferran townsend'] VS true_author: ['adams charlotte', 'doris mcferran townsend']
ISBN: 0380018136
true author: Asimov Isaac
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.055634260177612305
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 3
Blocking time is: 0.0018649101257324219
Discarted candidate:

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.19498395919799805
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.019299745559692383
Discarted candidate: [{'chaucher': 1, 'coghill nevill': 9}, {'chaucer': 1, 'chaucer geofrey': 1}, {'coghill nevil': 1}, {'coghill nevill': 1}, {'coghill nevill': 1}, {'coghill nevill': 1}, {'coghill nevill': 1}, {'coghill nevil trans': 1}, {'h liddell mark': 1}]
Possible candidate: [{'chaucer geoffrey': 103}]
lengthNecessary: 24.400000000000002
[{'chaucer geoffrey': 103}]
acceptance_diff 0
['chaucer geoffrey'] VS true_author: ['chaucer geoffrey']
ISBN: 0072859342
true author: Getlein Mark
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.06912803649902344
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 4
Blocking time is: 0.0028591156005859375
Discarted candidate: [{'gilbert rita':

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.17103099822998047
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.0158841609954834
Discarted candidate: [{'llywelyn': 1}, {'scott': 1}, {'llywelyn morgan': 1}, {'llywelyn morgan': 1}, {'llywelyn morgan': 1}, {'llywelyn morgan': 1}, {'llywelyn morgan': 1}, {'llywelyn morgan': 1}]
Possible candidate: [{'michael scott': 54}, {'llywelyn morgan': 50}]
lengthNecessary: 22.400000000000002
[{'michael scott': 54}, {'llywelyn morgan': 50}]
acceptance_diff 0
['michael scott', 'llywelyn morgan'] VS true_author: ['llywelyn morgan', 'michael scott']
ISBN: 0801040264
true author: Hendriksen William
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.033750057220458984
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.001394033432006836
Discarted candidate: []
Po

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.0920100212097168
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 5
Blocking time is: 0.003821134567260742
Discarted candidate: [{'l nist sherrie': 6}, {'holschuh jodi patrick': 6}, {'holschuh jodi': 2}]
Possible candidate: [{'nist sherrie': 22}, {'holschuh jodi': 14}]
lengthNecessary: 10.0
[{'nist sherrie': 22}, {'holschuh jodi': 14}]
acceptance_diff 0
['nist sherrie', 'holschuh jodi'] VS true_author: ['nist sherrie', 'holschuh jodi patrick']
ISBN: 3775708529
true author: Sobek Werner,Jahn Helmut
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.05743002891540527
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0011768341064453125
Discarted candidate: []
Possible candidate: [{'anna susanne': 4, 'anna susanne texte von': 1, 'helmut jahn': 3, 'jahn': 1, 'kuhn nicola': 

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.05251312255859375
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0013310909271240234
Discarted candidate: [{'barnaby conrad iii': 1}]
Possible candidate: [{'barnaby conrad': 14}]
lengthNecessary: 3.0
[{'barnaby conrad': 14}]
acceptance_diff 0
['barnaby conrad'] VS true_author: ['barnaby conrad iii']
ISBN: 0310200059
true author: Wangerin Walter Jr.
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.32227563858032227
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 8
Blocking time is: 0.010664939880371094
Discarted candidate: [{'walter': 1, 'walter wangerin': 13}, {'jr': 5, 'jr wangerin': 1}, {'walter wangerin': 1}, {'walter wangerin': 1}, {'walter wangerin': 1}, {'walter wangerin': 1}, {'jr walter wangerin': 1}]
Possible candidate: [{'jr walter wangerin': 52}]
length

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.06528520584106445
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 4
Blocking time is: 0.0024912357330322266
Discarted candidate: [{'oliver r': 1}]
Possible candidate: [{'allen george': 14}, {'allen gerald': 10}, {'oliver richard': 9}]
lengthNecessary: 6.800000000000001
[{'allen george': 14}, {'allen gerald': 10}, {'oliver richard': 9}]
acceptance_diff 0
['allen george', 'allen gerald', 'oliver richard'] VS true_author: ['allen gerald', 'oliver richard']
ISBN: 0273703692
true author: Weetman Pauline
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.04651308059692383
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0011758804321289062
Discarted candidate: []
Possible candidate: [{'pauline weetman': 11}, {'pauline prof weetman': 3}]
lengthNecessary: 2.8000000000000003
[

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.23304414749145508
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 6
Blocking time is: 0.005460977554321289
Discarted candidate: [{'henry mackendrick': 4}, {'lachlan mackendrick paul': 3}, {'henry macken': 1}, {'kendrick mac paul': 1}, {'mackendrick paul': 1}]
Possible candidate: [{'mackendrick paul': 43}]
lengthNecessary: 10.600000000000001
[{'mackendrick paul': 43}]
acceptance_diff 0
['mackendrick paul'] VS true_author: ['mackendrick paul']
ISBN: 159184438X
true author: McLean Bethany,Nocera Joe
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.31818199157714844
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.014758825302124023
Discarted candidate: [{'joe nocera': 2}, {'bethany mclean': 1}, {'bethany mclean': 1}, {'bethany mclean': 1}, {'bethany mclean': 1}, {'bet

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.06278419494628906
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 3
Blocking time is: 0.002450227737426758
Discarted candidate: [{'davis theo': 2}, {'aa vv': 1}]
Possible candidate: [{'davis theo': 24}]
lengthNecessary: 5.4
[{'davis theo': 24}]
acceptance_diff 0
['davis theo'] VS true_author: ['davis theo']
ISBN: 0814405436
true author: Perkins Dennis N. T.,Holtman Margaret P.,Kessler Paul R.,McCarthy Catherine
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.5132410526275635
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.06176877021789551
Discarted candidate: [{'catherine mccarthy': 38}, {'dennis perkins': 6}, {'d ph': 2}, {'dennis nt perkins': 1}, {'dennis n t': 1}, {'holtman margaret p': 1}, {'catherine mccarthy': 1}]
Possible candidate: [{'dennis n perkins t'

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.06807494163513184
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.002990245819091797
Discarted candidate: []
Possible candidate: [{'aa vv': 1, 'e nisbet richard': 1, 'e nisbett richard': 1, 'holyoak j keith': 1, 'paul r thaga': 1, 'paul r thagard': 1, 'paul thagard': 1}, {'etc h holland john': 2, 'h holland john': 3, 'holland': 1, 'holland john': 1}]
lengthNecessary: 2.8000000000000003
[{'aa vv': 1, 'e nisbet richard': 1, 'e nisbett richard': 1, 'holyoak j keith': 1, 'paul r thaga': 1, 'paul r thagard': 1, 'paul thagard': 1}, {'etc h holland john': 2, 'h holland john': 3, 'holland': 1, 'holland john': 1}]
acceptance_diff 0
['aa vv', 'e nisbet richard', 'holyoak j keith', 'paul r thaga', 'h holland john'] VS true_author: ['h holland john', 'holyoak j keith', 'e nisbett richard', 'paul r thagard']
ISBN: 0715624008
true author: Miller Edwin Ha

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.04700112342834473
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0015590190887451172
Discarted candidate: []
Possible candidate: [{'j richard': 1, 'j richard stillman': 1, 'joseph richard stillman': 5}]
lengthNecessary: 1.4000000000000001
[{'j richard': 1, 'j richard stillman': 1, 'joseph richard stillman': 5}]
acceptance_diff 0
['joseph richard stillman'] VS true_author: ['j richard stillman']
ISBN: 0491018347
true author: Sillitoe Alan
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.07000517845153809
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0014917850494384766
Discarted candidate: [{'aan sillitoe': 1}]
Possible candidate: [{'alan sillitoe': 19}]
lengthNecessary: 4.0
[{'alan sillitoe': 19}]
acceptance_diff 0
['alan sillitoe'] VS true

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.06751489639282227
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0025453567504882812
Discarted candidate: []
Possible candidate: [{'l robert thomas': 8}, {'robert thomas': 7}]
lengthNecessary: 3.0
[{'l robert thomas': 8}, {'robert thomas': 7}]
acceptance_diff 0
['l robert thomas'] VS true_author: ['l robert thomas']
ISBN: 1401301495
true author: Squyres Steve
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.08057713508605957
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 6
Blocking time is: 0.0041048526763916016
Discarted candidate: [{'squyres steven': 10}, {'squyres steve steven': 1}, {'squyres steven w': 1}, {'squyres steven': 1}, {'squyres steven': 1}]
Possible candidate: [{'squyres steve': 39}]
lengthNecessary: 10.600000000000001
[{'squyres steve': 39}]
accep

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.03978300094604492
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.001672983169555664
Discarted candidate: [{'boris vallejo': 2}]
Possible candidate: [{'david gerrold': 12}]
lengthNecessary: 2.8000000000000003
[{'david gerrold': 12}]
acceptance_diff 0
['david gerrold'] VS true_author: ['david gerrold']
ISBN: 048621513X
true author: Grieve M.
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.04910993576049805
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0018067359924316406
Discarted candidate: []
Possible candidate: [{'grieve maud': 15}, {'grieve kr?uter m naturheilkunde': 1, 'grieve m': 4}]
lengthNecessary: 4.0
[{'grieve maud': 15}, {'grieve kr?uter m naturheilkunde': 1, 'grieve m': 4}]
acceptance_diff 0
['grieve maud'] VS true_author: ['grie

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.7488420009613037
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.16349387168884277
Discarted candidate: [{'nancy': 24}, {'goldstone': 13}, {'lawrence': 6}, {'goldstone': 2}, {'nancy': 2}, {'goldstone': 1}, {'goldstone': 1}, {'goldstone': 1}]
Possible candidate: [{'goldstone lawrence': 118}, {'bazelon goldstone nancy': 2, 'goldstone nancy': 90}]
lengthNecessary: 52.0
[{'goldstone lawrence': 118}, {'bazelon goldstone nancy': 2, 'goldstone nancy': 90}]
acceptance_diff 0
['goldstone lawrence', 'goldstone nancy'] VS true_author: ['goldstone lawrence', 'goldstone nancy']
ISBN: 0394280210
true author: Rice Anne
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.09248590469360352
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 4
Blocking time is: 0.003425121307373047
Discar

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.3493330478668213
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.01731705665588379
Discarted candidate: [{'barr rosanne': 1}, {'barr roseanne~roseanne': 1}, {'roseanne': 1}, {'roseanne': 1}, {'barr roseann': 1}, {'roseanne': 1}, {'roseanne': 1}, {'roseanne': 1}, {'roseanne': 1}]
Possible candidate: [{'barr roseanne': 90}]
lengthNecessary: 19.8
[{'barr roseanne': 90}]
acceptance_diff 0
['barr roseanne'] VS true_author: ['barr roseanne']
ISBN: 0060163747
true author: Williamson Marianne
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.1579761505126953
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 8
Blocking time is: 0.009283781051635742
Discarted candidate: [{'corelli marie': 1}, {'marianne williamson': 1}, {'marianne williamson': 1}, {'marianne williamson': 1}, {

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.3501718044281006
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.02358412742614746
Discarted candidate: [{'margaret peden sayers': 16, 'margaret peden sayers tr': 1}, {'allende isabelle': 10}, {'margaret peden': 2}, {'sayers': 1}, {'allende i': 1}, {'grace hill l': 1}, {'allende isabella': 1}, {'prof sayers': 1}, {'from margaret peden sayers spanish the': 1}]
Possible candidate: [{'allende isabel': 103}]
lengthNecessary: 27.6
[{'allende isabel': 103}]
acceptance_diff 0
['allende isabel'] VS true_author: ['allende isabel']
ISBN: 0060195339
true author: Leavy Jane
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.17328095436096191
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.012308835983276367
Discarted candidate: [{'jane leay': 1}, {'j leav

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.21578717231750488
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 9
Blocking time is: 0.01111602783203125
Discarted candidate: [{'pattie smith': 1}, {'patti smith': 1}, {'patti smith': 1}, {'patti smith': 1}, {'patti smith': 1}, {'patti smith': 1}, {'patti smith': 1}, {'patti smith': 1}]
Possible candidate: [{'patti smith': 80}]
lengthNecessary: 17.6
[{'patti smith': 80}]
acceptance_diff 0
['patti smith'] VS true_author: ['patti smith']
ISBN: 0061124230
true author: Brinkley Douglas
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.14663076400756836
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 8
Blocking time is: 0.007873058319091797
Discarted candidate: [{'brewer kyf': 1}, {'brinkley douglas': 1}, {'brinkley douglas': 1}, {'brinkley douglas g': 1}, {'brinkley douglas g': 1}, {'brinkl

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.35463976860046387
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.05412101745605469
Discarted candidate: [{'ury william': 14}, {'fisher roger ury': 1}, {'l william': 1}, {'bruce patton': 1}, {'bruce patton': 1}, {'bruce patton': 1}, {'bruce patton': 1}]
Possible candidate: [{'fisher roger': 85}, {'l ury william': 66}, {'bruce patton': 44}]
lengthNecessary: 43.0
[{'fisher roger': 85}, {'l ury william': 66}, {'bruce patton': 44}]
acceptance_diff 0
['fisher roger', 'l ury william', 'bruce patton'] VS true_author: ['fisher roger', 'ury william']
ISBN: 014019469X
true author: Boldt G. Laurence
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.26944589614868164
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.01977825164794922
Discarted candidate: [

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.35942792892456055
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 9
Blocking time is: 0.013671875
Discarted candidate: [{'antonio damasio r': 4}, {'antonio damasio r': 2}, {'antonio damasio': 1}, {'antonio damasio': 1}, {'antonio damasio': 1}, {'antonio damasio': 1}, {'antonio damasio': 1}, {'antonio damasio': 1}]
Possible candidate: [{'antonio damasio': 76}]
lengthNecessary: 17.6
[{'antonio damasio': 76}]
acceptance_diff 0
['antonio damasio'] VS true_author: ['antonio damasio']
ISBN: 0151008116
true author: Martel Yann
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.17905497550964355
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.014342069625854492
Discarted candidate: [{'martel yan': 1}, {'martel yann': 1}, {'martel yann': 1}, {'martel yann': 1}, {'martel yann

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.28638601303100586
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.023661136627197266
Discarted candidate: [{'morrison toni': 2}, {'morrisson toni': 1}, {'prize pulitzer': 1}, {'morrison toni': 1}, {'morrison toni': 1}, {'morrison toni': 1}, {'morrison toni': 1}, {'morrison toni': 1}, {'morrison toni': 1}]
Possible candidate: [{'morrison toni': 128}]
lengthNecessary: 27.6
[{'morrison toni': 128}]
acceptance_diff 0
['morrison toni'] VS true_author: ['morrison toni']
ISBN: 0307266745
true author: Clinton Bill
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.4751160144805908
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.037841796875
Discarted candidate: [{'bill clinton president': 4}, {'bill clinton etc': 3}, {'bill clinton etc president': 2},

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.28437089920043945
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.018178939819335938
Discarted candidate: [{'**debut *s-p-e-c-t-a-c-u-l-a-r book julie powell': 1}, {'cooking julie powell': 1}, {'cooking julie powell': 1}, {'judie powell': 1}, {'child julia julie powell': 1}, {'julie powell': 1}, {'julie powell': 1}, {'julie powell': 1}, {'julie powell': 1}]
Possible candidate: [{'julie powell': 100}]
lengthNecessary: 21.8
[{'julie powell': 100}]
acceptance_diff 0
['julie powell'] VS true_author: ['julie powell']
ISBN: 0316110744
true author: Brown Marc
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.14473509788513184
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 7
Blocking time is: 0.006371021270751953
Discarted candidate: [{'pfeiffer': 1}, {'mcclelland': 1}, {

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.21779203414916992
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.016773700714111328
Discarted candidate: [{'eddings leigh': 1}, {'d eddings': 1}, {'david eddings': 1}, {'david eddings': 1}, {'david eddings': 1}, {'david eddings': 1}, {'david eddings': 1}, {'david eddings': 1}, {'david eddings': 1}]
Possible candidate: [{'david eddings': 108}]
lengthNecessary: 23.400000000000002
[{'david eddings': 108}]
acceptance_diff 0
['david eddings'] VS true_author: ['david eddings']
ISBN: 0345383273
true author: Eddings David
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.1470940113067627
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 8
Blocking time is: 0.007753849029541016
Discarted candidate: [{'eddings leigh': 1}, {'david eddings leigh': 1}, {'david eddings': 1}, {'da

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.21204710006713867
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.01710796356201172
Discarted candidate: [{'fonda jane': 1}, {'robin vitetta': 1}, {'mccarthy mignon': 1}, {'fonda jane': 1}, {'fonda jane': 1}, {'fonda jane': 1}, {'fonda jane': 1}, {'fonda jane': 1}, {'fonda jane': 1}]
Possible candidate: [{'fonda jane': 109}]
lengthNecessary: 23.6
[{'fonda jane': 109}]
acceptance_diff 0
['fonda jane'] VS true_author: ['fonda jane']
ISBN: 0375756450
true author: Morris Edmund
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.14536714553833008
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 8
Blocking time is: 0.00799417495727539
Discarted candidate: [{'e morris': 1}, {'edmund morris': 1}, {'edmund morris': 1}, {'edmund morris': 1}, {'edmund morris': 1}, {'edmund morr

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 1.2206380367279053
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.4909210205078125
Discarted candidate: [{'bourbeau katherine': 4}, {'bourbeau katherine photographs': 2}, {'brewster todd': 1}, {'brewster todd': 1}, {'brewster todd': 1}, {'brewster todd': 1}, {'brewster todd': 1}, {'brewster todd': 1}]
Possible candidate: [{'jennings peter': 212}, {'brewster todd': 193}]
lengthNecessary: 83.4
[{'jennings peter': 212}, {'brewster todd': 193}]
acceptance_diff 0
['jennings peter', 'brewster todd'] VS true_author: ['jennings peter', 'brewster todd']
ISBN: 0385495315
true author: Singh Simon
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.15623235702514648
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 8
Blocking time is: 0.008244991302490234
Discarted candidate: [{'ed

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.979600191116333
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.20694184303283691
Discarted candidate: [{'arthur fizdale gold robert': 4}, {'arhtur gold': 2, 'gold': 1}, {'arthur e fizdale gold robert': 2}, {'fizdale': 2}, {'and arthur gold': 2}, {'y': 2}, {'fizdale r': 1}, {'gold': 1}]
Possible candidate: [{'arthur gold': 144}, {'fizdale robert': 140}]
lengthNecessary: 60.2
[{'arthur gold': 144}, {'fizdale robert': 140}]
acceptance_diff 0
['arthur gold', 'fizdale robert'] VS true_author: ['arthur gold', 'fizdale robert']
ISBN: 0394534689
true author: Hamilton Ian
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.1612231731414795
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 8
Blocking time is: 0.006825923919677734
Discarted candidate: [{'albert huyskens': 1}, {'

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.3794722557067871
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.01374506950378418
Discarted candidate: [{'charles fecher': 3, 'charles louis': 1}, {'hl mencken': 1, 'mencken': 1}, {'a': 2}, {'fecher': 2}, {'a charles fecher h l mencken': 1}, {'by': 1}, {'hichens robert smythe': 1}, {'henry mencken': 1}]
Possible candidate: [{'h l mencken': 60}, {'a charles fecher': 31}]
lengthNecessary: 21.0
[{'h l mencken': 60}, {'a charles fecher': 31}]
acceptance_diff 0
['h l mencken', 'a charles fecher'] VS true_author: ['a charles fecher h l mencken']
ISBN: 0394571711
true author: White Edmund
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.19006109237670898
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 9
Blocking time is: 0.010460138320922852
Discarted candidate: [{'edmu

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.30678296089172363
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.012682199478149414
Discarted candidate: [{'minot': 1}, {'susan': 1}, {'minot susan': 1}, {'minot susan': 1}, {'minot susan': 1}, {'minot susan': 1}, {'minot susan': 1}, {'minot susan': 1}, {'minot susan': 1}]
Possible candidate: [{'minot susan': 93}]
lengthNecessary: 20.400000000000002
[{'minot susan': 93}]
acceptance_diff 0
['minot susan'] VS true_author: ['minot susan']
ISBN: 0395861624
true author: Martin Briggs Jacqueline
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.43541669845581055
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.06753110885620117
Discarted candidate: [{'azarian mary': 19, 'mary': 2}, {'briggs martin': 2}, {'azarian jacqueline mary': 2}, {'b jacquelin

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.20801091194152832
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 8
Blocking time is: 0.006947755813598633
Discarted candidate: [{'arnol emily mccully': 1}, {'arnold emily mccully': 1}, {'arnold emily mccully': 1}, {'arnold emily mccully': 1}, {'arnold emily mccully': 1}, {'arnold emily mccully': 1}, {'arnold emily mccully': 1}]
Possible candidate: [{'arnold emily mccully': 67}]
lengthNecessary: 14.8
[{'arnold emily mccully': 67}]
acceptance_diff 0
['arnold emily mccully'] VS true_author: ['arnold emily mccully']
ISBN: 0399237232
true author: Jacques Brian
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.17684125900268555
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 8
Blocking time is: 0.0080108642578125
Discarted candidate: [{'brain jacques': 1}, {'david elliot': 1}, {'brian jacques

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 1.04622483253479
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.17980003356933594
Discarted candidate: [{'goodall jane w': 3}, {'berman philip': 2}, {'goodall jane': 2}, {'berman phillip': 2}, {'berman l phillip': 2}, {'goodall jane': 1}, {'goodall jane w': 1}, {'berman phillip': 1}]
Possible candidate: [{'goodall jane': 207}, {'berman phillip': 169}]
lengthNecessary: 78.0
[{'goodall jane': 207}, {'berman phillip': 169}]
acceptance_diff 0
['goodall jane', 'berman phillip'] VS true_author: ['goodall jane', 'berman phillip']
ISBN: 0446530867
true author: Kiyosaki T. Robert,Lechter L Sharon
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.3217771053314209
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.02453899383544922
Discarted candidate: [{'k

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.28782057762145996
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.014202117919921875
Discarted candidate: [{'bobby hutchinson': 1}, {'nora roberts': 1}, {'nora roberts': 1}, {'nora roberts': 1}, {'nora roberts': 1}, {'nora roberts': 1}, {'nora roberts': 1}, {'nora roberts': 1}, {'nora roberts': 1}]
Possible candidate: [{'nora roberts': 96}]
lengthNecessary: 21.0
[{'nora roberts': 96}]
acceptance_diff 0
['nora roberts'] VS true_author: ['nora roberts']
ISBN: 0515147184
true author: Robb D. J.,Gaffney Patricia,Blayney Mary,Ryan Langan Ruth
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.5734143257141113
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.058383941650390625
Discarted candidate: [{'langan ruth ryan': 32}, {'j robb': 9}, {'c r ryan'

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.2747490406036377
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.017775774002075195
Discarted candidate: [{'maclaine shirley': 2}, {'fowles': 1}, {'maclaine shirley': 1}, {'maclaine shirley': 1}, {'maclaine shirley': 1}, {'maclaine shirley': 1}, {'maclaine shirley': 1}, {'maclaine shirley': 1}, {'maclaine shirley': 1}]
Possible candidate: [{'maclaine shirley': 89}]
lengthNecessary: 19.8
[{'maclaine shirley': 89}]
acceptance_diff 0
['maclaine shirley'] VS true_author: ['maclaine shirley']
ISBN: 0553272535
true author: Wiesel Elie
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.25545191764831543
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.01134800910949707
Discarted candidate: [{'rodway stella': 10}, {'francois mauriac': 9}, {'brown mcafe

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.2310960292816162
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.027724027633666992
Discarted candidate: [{'fulrimari jeffrey': 2, 'fulvimari jeffrey': 10}, {'fulrimari jeffrey madonna': 4, 'fulvimari jeffrey madonna': 1, 'louise madonna': 1}, {'ciccone madonna': 1, 'madonna ritchie': 3}, {'blue': 1, 'fulvimari': 1, 'hand signed': 1}, {'blue in ink madonna written': 1, 'color delightful fulvimari in jeffrey whimsically': 1}, {'designed dj masuda toshiya': 1}, {'rock singer the': 1}, {'endpapers guaranteed pink': 1}, {'her is married name': 1}]
Possible candidate: [{'by madonna': 1, 'fulvimari madonna': 1, 'madona': 1, 'madonna': 95}]
lengthNecessary: 25.8
[{'by madonna': 1, 'fulvimari madonna': 1, 'madona': 1, 'madonna': 95}]
acceptance_diff 0
['madonna'] VS true_author: ['madonna']
ISBN: 0670063568
true author: Keillor Garrison
embedding_

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.17164897918701172
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 9
Blocking time is: 0.009370088577270508
Discarted candidate: [{'boyle coraghessan t': 2}, {'coraghessan': 1}, {'c': 1}, {'boyle c t': 1}, {'boyd malcolm': 1}, {'boyle c t': 1}, {'boyle c t': 1}]
Possible candidate: [{'boyle c t': 62}, {'boyle coraghessan t': 18, 'boyle t': 1}]
lengthNecessary: 17.8
[{'boyle c t': 62}, {'boyle coraghessan t': 18, 'boyle t': 1}]
acceptance_diff 0
['boyle c t', 'boyle coraghessan t'] VS true_author: ['boyle c t']
ISBN: 0670891916
true author: Bradford Sarah
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.13704705238342285
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 9
Blocking time is: 0.009546995162963867
Discarted candidate: [{'bradford sarah': 2}, {'kennedy': 1}, {'jacqueline onassis

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.32038307189941406
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.05305194854736328
Discarted candidate: [{'collier': 2}, {'horowitz peter': 1}, {'collier horo': 1}, {'peter': 1}, {'h perkin': 1}, {'collier peter': 1}, {'collier peter': 1}, {'horowitz': 1}]
Possible candidate: [{'collier peter': 110}, {'david horowitz': 101}]
lengthNecessary: 44.0
[{'collier peter': 110}, {'david horowitz': 101}]
acceptance_diff 0
['collier peter', 'david horowitz'] VS true_author: ['collier peter', 'david horowitz']
ISBN: 0671622617
true author: Prager Dennis,Telushkin Joseph
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.16665077209472656
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.01790595054626465
Discarted candidate: [{'joseph telushkin': 2}, {'jo

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.276292085647583
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.014751911163330078
Discarted candidate: [{'f hamilton john kennedy nigel': 1}, {'hamiltion nigel': 1}, {'hamilton nigel': 1}, {'hamilton nigel': 1}, {'hamilton nigel': 1}, {'hamilton nigel': 1}, {'hamilton nigel': 1}, {'hamilton nigel': 1}, {'hamilton nigel': 1}]
Possible candidate: [{'hamilton nigel': 103}]
lengthNecessary: 22.400000000000002
[{'hamilton nigel': 103}]
acceptance_diff 0
['hamilton nigel'] VS true_author: ['hamilton nigel']
ISBN: 0679422714
true author: Chang Jung,Halliday Jon
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.18587899208068848
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.01995396614074707
Discarted candidate: [{'chang jung': 1}, {'halliday jon'

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.47860074043273926
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.1451249122619629
Discarted candidate: [{'anne by rice': 3}, {'anne rice': 1}, {'anne rice': 1}, {'anne rice': 1}, {'anne rice': 1}, {'anne rice': 1}, {'anne rice': 1}, {'anne rice': 1}, {'anne rice': 1}]
Possible candidate: [{'anne rice': 354}]
lengthNecessary: 73.0
[{'anne rice': 354}]
acceptance_diff 0
['anne rice'] VS true_author: ['anne rice']
ISBN: 0679492658
true author: Ford Richard
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.15945792198181152
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.014013051986694336
Discarted candidate: [{'ford richard': 2}, {'ford g m': 1}, {'ford richard': 1}, {'ford richard': 1}, {'ford richard': 1}, {'ford richard': 1}, {'ford richard

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.24318408966064453
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.0351560115814209
Discarted candidate: [{'contributor-jeff coplon': 1}, {'coplon sinatra~jeff tina': 1}, {'sinatra tina w': 1}, {'sinatra tina': 1}, {'sinatra tina': 1}, {'sinatra tina': 1}, {'sinatra tina': 1}, {'sinatra tina': 1}]
Possible candidate: [{'sinatra tina': 114}, {'coplon jeff': 56}]
lengthNecessary: 35.6
[{'sinatra tina': 114}, {'coplon jeff': 56}]
acceptance_diff 0
['sinatra tina', 'coplon jeff'] VS true_author: ['sinatra tina', 'coplon jeff']
ISBN: 068487170X
true author: Nesheim L. John
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.10337400436401367
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 7
Blocking time is: 0.005525827407836914
Discarted candidate: [{'john nesheim': 1}, {

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.2462756633758545
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 9
Blocking time is: 0.01242971420288086
Discarted candidate: [{'hugo victor': 1}, {'cramer jim': 1}, {'cramer j james': 1}, {'cramer j james': 1}, {'cramer j james': 1}, {'cramer j james': 1}, {'cramer j james': 1}, {'cramer j james': 1}]
Possible candidate: [{'cramer j james': 73}]
lengthNecessary: 16.2
[{'cramer j james': 73}]
acceptance_diff 0
['cramer j james'] VS true_author: ['cramer j james']
ISBN: 0743226755
true author: Loehr Jim,Schwartz Tony
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.44104599952697754
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.022349119186401367
Discarted candidate: [{'e james loehr': 3}, {'free press': 1}, {'loehr': 1}, {'schwartz tony': 1}, {'schwartz tony': 1

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.26139378547668457
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.037194013595581055
Discarted candidate: [{'b ering timothy': 9}, {'basil ering timothy': 2}, {'dicamillo kate': 2}, {'basil ering timothy': 1}, {'basil ering timothy': 1}, {'basil ering timothy': 1}, {'b ering timothy': 1}, {'basil ering timothy': 1}, {'basil ering timothy': 1}]
Possible candidate: [{'dicamillo kate': 164}]
lengthNecessary: 36.6
[{'dicamillo kate': 164}]
acceptance_diff 0
['dicamillo kate'] VS true_author: ['dicamillo kate']
ISBN: 0765312948
true author: Herbert Brian,Anderson J. Kevin
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.23520207405090332
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.026800870895385742
Discarted candidate: [{'anderson kevin': 2}

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.3319849967956543
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.031960248947143555
Discarted candidate: [{'ferguson william': 6}, {'a coe d foreword michael': 1, 'coe d michael': 3}, {'a': 1, 'by': 1}, {'collaboration ferguson in m william': 2}, {'coe d john michael royce': 1, 'collab john q royce': 1}, {'ferguson': 1}, {'collaboration in': 1}, {'ferguson m w willima': 1}]
Possible candidate: [{'fergusen m william': 1, 'ferguson m william': 78}, {'john q royce': 73, 'john royce': 1}]
lengthNecessary: 34.4
[{'fergusen m william': 1, 'ferguson m william': 78}, {'john q royce': 73, 'john royce': 1}]
acceptance_diff 0
['ferguson m william', 'john q royce'] VS true_author: ['ferguson m william', 'john q royce']
ISBN: 0809230410
true author: Wooden John,Jamison Steve
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.31226181983947754
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.033486127853393555
Discarted candidate: [{'a david noebel': 6}, {'david noebel': 5}, {'david noebel': 3}, {'lahaye time': 1}, {'lahaye~david noebel tim': 1}, {'dave minasian': 1}, {'f lahaye tim': 1}, {'a david noebel': 1}]
Possible candidate: [{'lahaye tim': 66}, {'david noebel': 52}]
lengthNecessary: 27.400000000000002
[{'lahaye tim': 66}, {'david noebel': 52}]
acceptance_diff 0
['lahaye tim', 'david noebel'] VS true_author: ['lahaye tim', 'david noebel']
ISBN: 0865475873
true author: McDonough William,Braungart Michael
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.7748420238494873
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.13173866271972656
Discarted candidate: [{'b

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 1.0043320655822754
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 7
Blocking time is: 0.014347076416015625
Discarted candidate: [{'e thomas woods': 11}, {'jr': 3}, {'thomas woods': 1}, {'aa vv': 1}, {'e jr thomas woods': 1}, {'e jr thomas woods': 1}]
Possible candidate: [{'e jr thomas woods': 50}]
lengthNecessary: 13.600000000000001
[{'e jr thomas woods': 50}]
acceptance_diff 0
['e jr thomas woods'] VS true_author: ['e jr thomas woods']
ISBN: 0898154286
true author: Miller Mark,Harrisson John
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.4453392028808594
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 10
Blocking time is: 0.019871950149536133
Discarted candidate: [{'ellen frank lois': 6}, {'charles mark miller': 3}, {'harrison john': 2}, {'ellen frank photographer-lois': 1}, {'harriss

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.21897220611572266
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 8
Blocking time is: 0.027601957321166992
Discarted candidate: [{'c graeme simsion': 4}, {'graeme simsion': 1}, {'graeme simsion': 1}, {'graeme simsion': 1}, {'graeme simsion': 1}, {'graeme simsion': 1}, {'graeme simsion': 1}]
Possible candidate: [{'graeme simsion': 69}]
lengthNecessary: 15.8
[{'graeme simsion': 69}]
acceptance_diff 0
['graeme simsion'] VS true_author: ['graeme simsion']
ISBN: 155661456X
true author: Oke Janette
embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.2105870246887207
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 7
Blocking time is: 0.005685091018676758
Discarted candidate: [{'janette oke': 2}, {'janet oke': 2}, {'janette oke': 1}, {'janette oke': 1}, {'janette oke': 1}, {'janette oke': 1}]
Poss

embedding_type: inferSent
attributes_list: ['author']
model_type: bilstm
char_level: False
Embedding time is: 0.29040098190307617
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 8
Blocking time is: 0.011083126068115234
Discarted candidate: [{'lewis r': 7}, {'lewis richard': 3}, {'collectif': 1}, {'green': 1}, {'in shrink still wrap': 1}, {'a': 1}, {'lewis richard w': 1}]
Possible candidate: [{'lewis richard w': 64}]
lengthNecessary: 15.8
[{'lewis richard w': 64}]
acceptance_diff 0
['lewis richard w'] VS true_author: ['lewis richard w']
Total time is: 169.44639897346497


# Old cases

In [13]:
getEvaluation(finalAuthors, trueAuthors, 1)

precision is 0.8682295877122069
recall is 0.9597855227882037
f1Score is 0.9117147707979626


(0.8682295877122069, 0.9597855227882037, 0.9117147707979626)

In [8]:
getEvaluation(finalAuthors, trueAuthors, 1)

precision is 0.9014567266495287
recall is 0.9401251117068812
f1Score is 0.9203849518810149


(0.9014567266495287, 0.9401251117068812, 0.9203849518810149)

# With multiple wins and remove similar one

In [7]:
# acceptance_diff = 3
getEvaluation(finalAuthors, trueAuthors, 1)

TP: 1058, FP: 230, FN: 61
precision is 0.8214285714285714
recall is 0.9454870420017873
f1Score is 0.8791026173660157


(0.8214285714285714, 0.9454870420017873, 0.8791026173660157)

In [13]:
# acceptance_diff = 2
getEvaluation(finalAuthors, trueAuthors, 1)

TP: 1058, FP: 230, FN: 61
precision is 0.8214285714285714
recall is 0.9454870420017873
f1Score is 0.8791026173660157


(0.8214285714285714, 0.9454870420017873, 0.8791026173660157)

In [18]:
# acceptance_diff = 1
getEvaluation(finalAuthors, trueAuthors, 1)

TP: 1058, FP: 230, FN: 61
precision is 0.8214285714285714
recall is 0.9454870420017873
f1Score is 0.8791026173660157


(0.8214285714285714, 0.9454870420017873, 0.8791026173660157)

In [6]:
# acceptance_diff = 0
getEvaluation(finalAuthors, trueAuthors, 1) # different because maybe multiple with same vote
# maybe more blocks same author

TP: 1028, FP: 140, FN: 91
precision is 0.8801369863013698
recall is 0.9186773905272565
f1Score is 0.8989943156974203


(0.8801369863013698, 0.9186773905272565, 0.8989943156974203)

# add blockWeight

In [13]:
# block_weight 2
getEvaluation(finalAuthors, trueAuthors, 1)

TP: 1028, FP: 140, FN: 91
precision is 0.8801369863013698
recall is 0.9186773905272565
f1Score is 0.8989943156974203


(0.8801369863013698, 0.9186773905272565, 0.8989943156974203)