In [2]:
import os
import sys
import time
import numpy as np
import matplotlib.pyplot as plt
module_path = os.path.abspath(os.path.join('../../fusion'))
sys.path.append(module_path)

from preprocessing_datasets.preprocessing_utilities import ValueUtils
from preprocessing_datasets import load_dataset
from embedding_algorithms import sentence_embedding, set_embedding_model
from dimensionality_reduction_algorithms import dimension_reduction_algorithms
from cluster_algorithms import cluster_algorithm

from helper import launchWithoutReductionFusionMovie
from plot_tools import plotChart, plotCluster
from evaluation import *

In [3]:
key_values = {
    'model_type':'bilstm',
    'char_level':False,
    'model_version': 2,
    'rnn_dim':1024,
    'verbose':1,
    'attributes_list': ['newDirector'],
    'embedding_type': 'inferSent',
    'dataset': 'clean_movie',
    'cluster_method': 'hierarchy',
    'num_clusters_rate': 0.1,
    'block_length_thresold': 0.35,
    'acceptance_diff':2,
    'block_weight': 2,
}

In [4]:
dataset_name, table_group_by_movie_id, list_movie_id, true_directors = load_dataset(key_values)

dataset: clean_movie
Loading time is: 0.7737767696380615


In [5]:
set_embedding_model(key_values)

Vocab size : 2196017
model_version: 2
rnn_dim: 1024
model_type: bilstm
char_level: False
Setup time is: 182.7321581840515


In [6]:
start_time = time.time()
finalDirectors, trueDirectors = launchWithoutReductionFusionMovie(table_group_by_movie_id, list_movie_id, true_directors, key_values)
print("Total time is: {0}".format(time.time() - start_time))

movie_id: 73952
true director: Robert Hamer
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.3788938522338867
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.1254100799560547
Discarted candidate: []
Possible candidate: [{'chester e hal': 2, 'cyril frankel': 1, 'hamer robert': 4}]
lengthNecessary: 2.4499999999999997
[{'chester e hal': 2, 'cyril frankel': 1, 'hamer robert': 4}]
acceptance_diff 2
['chester e hal', 'hamer robert'] VS true_author: ['hamer robert']
movie_id: 1655
true director: Alexandre O. Philippe
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.037998199462890625
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0011756420135498047
Discarted candidate: []
Possible candidate: [{'alexandre o philippe': 2, 'alexandre philippe': 1}]
lengthNecessary: 1.0499999999999998
[{'al

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.13192415237426758
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0014069080352783203
Discarted candidate: []
Possible candidate: [{'anant mahadevan': 1, 'ananth mahadevan narayan': 3}]
lengthNecessary: 1.4
[{'anant mahadevan': 1, 'ananth mahadevan narayan': 3}]
acceptance_diff 2
['anant mahadevan', 'ananth mahadevan narayan'] VS true_author: ['ananth mahadevan narayan']
movie_id: 14372
true director: Joel Coen
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.10285210609436035
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0018069744110107422
Discarted candidate: []
Possible candidate: [{'coen joel': 13}, {'coen ethan': 7}]
lengthNecessary: 6.0
[{'coen joel': 13}, {'coen ethan': 7}]
acceptance_diff 2
['coen joel', 'coen ethan'] VS t

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.026520967483520508
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0008461475372314453
Discarted candidate: []
Possible candidate: [{'dudow slatan': 2, 'groschopp richard': 1, 'kurt maetzig': 1}]
lengthNecessary: 1.4
[{'dudow slatan': 2, 'groschopp richard': 1, 'kurt maetzig': 1}]
acceptance_diff 2
['dudow slatan', 'groschopp richard', 'kurt maetzig'] VS true_author: ['dudow slatan', 'kurt maetzig']
movie_id: 2672
true director: Christian Krones;Olaf S. Muller;Roland Schrotthofer;Florian Weigensamer
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.056252241134643555
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0010461807250976562
Discarted candidate: []
Possible candidate: [{'christian krones': 1, 'florian weigensamer': 2, 'muller

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.043901920318603516
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 3
Blocking time is: 0.0017271041870117188
Discarted candidate: [{'david n twohy': 1}, {'david twohy': 1}]
Possible candidate: [{'david twohy': 23}]
lengthNecessary: 7.75
[{'david twohy': 23}]
acceptance_diff 2
['david twohy'] VS true_author: ['david twohy']
movie_id: 80410
true director: Paul Ragsdale
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05280900001525879
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0010848045349121094
Discarted candidate: []
Possible candidate: [{'alba angelica de': 3, 'paul ragsdale': 6}]
lengthNecessary: 3.15
[{'alba angelica de': 3, 'paul ragsdale': 6}]
acceptance_diff 2
['paul ragsdale'] VS true_author: ['paul ragsdale']
movie_id: 71705
true director: A

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03742718696594238
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0012218952178955078
Discarted candidate: []
Possible candidate: [{'capra frank': 5, 'irvin willat': 3}]
lengthNecessary: 2.8
[{'capra frank': 5, 'irvin willat': 3}]
acceptance_diff 2
['capra frank', 'irvin willat'] VS true_author: ['capra frank']
movie_id: 109103
true director: Dick Zondag;Ralph Zondag;Phil Nibbelink;Simon Wells
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.08632302284240723
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 5
Blocking time is: 0.0036759376525878906
Discarted candidate: [{'nibbelink phil': 13}, {'dick zondag': 12}, {'simon wells': 12}, {'ralph zondag': 12}, {'nibbelink phil': 1}]
Possible candidate: []
lengthNecessary: 15.5
[]
acceptance_diff 2
[] VS true_a

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.051235198974609375
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0015110969543457031
Discarted candidate: []
Possible candidate: [{'herrmann zschoche': 8}, {'dressel eleonore': 4}]
lengthNecessary: 3.1999999999999993
[{'herrmann zschoche': 8}, {'dressel eleonore': 4}]
acceptance_diff 2
['herrmann zschoche', 'dressel eleonore'] VS true_author: ['herrmann zschoche']
movie_id: 78918
true director: Marc Gracie;Tim Ferguson
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05316495895385742
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.001348733901977539
Discarted candidate: []
Possible candidate: [{'gracie marc': 6}, {'ferguson tim': 5}]
lengthNecessary: 2.8499999999999996
[{'gracie marc': 6}, {'ferguson tim': 5}]
acceptance_diff 2
['

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04699587821960449
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.001271963119506836
Discarted candidate: []
Possible candidate: [{'bobby roe': 2, 'jeff larson': 1}]
lengthNecessary: 1.0499999999999998
[{'bobby roe': 2, 'jeff larson': 1}]
acceptance_diff 2
['bobby roe', 'jeff larson'] VS true_author: ['bobby roe']
movie_id: 40997
true director: Cinzia Th Torrini
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04318809509277344
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0010597705841064453
Discarted candidate: []
Possible candidate: [{'cinzia th torrini': 1, 'cinzia torrini': 4}]
lengthNecessary: 1.75
[{'cinzia th torrini': 1, 'cinzia torrini': 4}]
acceptance_diff 2
['cinzia torrini'] VS true_author: ['cinzia th torrini']
movie_

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.061018943786621094
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0012428760528564453
Discarted candidate: []
Possible candidate: [{'burton tim': 8}, {'johnson michael': 6, 'johnson mike': 1}]
lengthNecessary: 4.25
[{'burton tim': 8}, {'johnson michael': 6, 'johnson mike': 1}]
acceptance_diff 2
['burton tim', 'johnson michael'] VS true_author: ['johnson mike', 'burton tim']
movie_id: 96586
true director: Michael Preece
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.041188955307006836
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0014009475708007812
Discarted candidate: [{'eric norris': 4}]
Possible candidate: [{'michael preece': 12}]
lengthNecessary: 4.6
[{'michael preece': 12}]
acceptance_diff 2
['michael preece'] VS true_autho

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04127693176269531
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0007610321044921875
Discarted candidate: []
Possible candidate: [{'createspace': 1, 'jonze spike': 2}]
lengthNecessary: 1.0499999999999998
[{'createspace': 1, 'jonze spike': 2}]
acceptance_diff 2
['createspace', 'jonze spike'] VS true_author: ['jonze spike']
movie_id: 9550
true director: Adam Rodgers
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04198598861694336
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0011131763458251953
Discarted candidate: []
Possible candidate: [{'adam rodgers': 4, 'adam rogers': 3}]
lengthNecessary: 2.4499999999999997
[{'adam rodgers': 4, 'adam rogers': 3}]
acceptance_diff 2
['adam rodgers'] VS true_author: ['adam rodgers']
movie_id: 11

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.054594993591308594
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0011000633239746094
Discarted candidate: []
Possible candidate: [{'antal nimr?d': 1, 'antal nimrod': 7}]
lengthNecessary: 2.8
[{'antal nimr?d': 1, 'antal nimrod': 7}]
acceptance_diff 2
['antal nimrod'] VS true_author: ['antal nimrod']
movie_id: 107875
true director: Gary Michael Schultz
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04452800750732422
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0009877681732177734
Discarted candidate: []
Possible candidate: [{'gary michael schultz': 4, 'gary schultz': 2}]
lengthNecessary: 2.0999999999999996
[{'gary michael schultz': 4, 'gary schultz': 2}]
acceptance_diff 2
['gary michael schultz', 'gary schultz'] VS true_author: 

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.037467241287231445
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0015974044799804688
Discarted candidate: []
Possible candidate: [{'a dupont e': 2, 'andre dupont ewald': 1}]
lengthNecessary: 1.0499999999999998
[{'a dupont e': 2, 'andre dupont ewald': 1}]
acceptance_diff 2
['a dupont e', 'andre dupont ewald'] VS true_author: ['a dupont e']
movie_id: 2982
true director: Silvio Caiozzi;Pablo Perelman
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03668713569641113
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0010669231414794922
Discarted candidate: []
Possible candidate: [{'caiozzi silvio': 2, 'pablo perelman': 1}]
lengthNecessary: 1.0499999999999998
[{'caiozzi silvio': 2, 'pablo perelman': 1}]
acceptance_diff 2
['caiozzi silvio'

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.049241065979003906
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0007669925689697266
Discarted candidate: []
Possible candidate: [{'filho kleber mendon?a': 1, 'filho kleber mendonca': 3, 'timo zhalnin': 2}]
lengthNecessary: 2.0999999999999996
[{'filho kleber mendon?a': 1, 'filho kleber mendonca': 3, 'timo zhalnin': 2}]
acceptance_diff 2
['filho kleber mendon?a', 'timo zhalnin'] VS true_author: ['filho kleber mendonca']
movie_id: 71289
true director: F. Javier Gutierrez
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06540489196777344
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0011761188507080078
Discarted candidate: [{'kwan stanley': 1}]
Possible candidate: [{'f gutierrez javier': 12, 'gutierrez javier': 1}]
lengthNecessary: 

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06518888473510742
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0016388893127441406
Discarted candidate: [{'emami paul': 4}]
Possible candidate: [{'roar uthaug': 11}]
lengthNecessary: 4.25
[{'roar uthaug': 11}]
acceptance_diff 2
['roar uthaug'] VS true_author: ['roar uthaug']
movie_id: 22057
true director: David Leland
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.07822299003601074
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.001074075698852539
Discarted candidate: []
Possible candidate: [{'david leland': 2, 'gary graver': 2}]
lengthNecessary: 1.4
[{'david leland': 2, 'gary graver': 2}]
acceptance_diff 2
['david leland', 'gary graver'] VS true_author: ['david leland']
movie_id: 81646
true director: Francois Ozon
embedding_typ

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04405713081359863
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0013849735260009766
Discarted candidate: []
Possible candidate: [{'aleksandar petrovi': 5, 'aleksandar petrovic': 2}]
lengthNecessary: 2.4499999999999997
[{'aleksandar petrovi': 5, 'aleksandar petrovic': 2}]
acceptance_diff 2
['aleksandar petrovi'] VS true_author: ['aleksandar petrovic']
movie_id: 5258
true director: Keven Undergaro
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.025721073150634766
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0013229846954345703
Discarted candidate: []
Possible candidate: [{'keven undergaro': 1, 'kevin undergaro': 1}]
lengthNecessary: 0.7
[{'keven undergaro': 1, 'kevin undergaro': 1}]
acceptance_diff 2
['keven undergaro'] VS true_a

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.037507057189941406
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.001092672348022461
Discarted candidate: []
Possible candidate: [{'vaclav vorli?ek': 7, 'vaclav vorlicek': 2}]
lengthNecessary: 3.15
[{'vaclav vorli?ek': 7, 'vaclav vorlicek': 2}]
acceptance_diff 2
['vaclav vorli?ek'] VS true_author: ['vaclav vorlicek']
movie_id: 17664
true director: Roy Del Ruth
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03169369697570801
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0005559921264648438
Discarted candidate: []
Possible candidate: [{'del roy ruth': 1, 'liapis peter': 1}]
lengthNecessary: 0.7
[{'del roy ruth': 1, 'liapis peter': 1}]
acceptance_diff 2
['del roy ruth', 'liapis peter'] VS true_author: ['del roy ruth']
movie_id: 218

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05833697319030762
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0016179084777832031
Discarted candidate: []
Possible candidate: [{'allen francesco william': 2, 'allen will': 6}]
lengthNecessary: 2.8
[{'allen francesco william': 2, 'allen will': 6}]
acceptance_diff 2
['allen will'] VS true_author: ['allen will']
movie_id: 88768
true director: Rob Cohen
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04947996139526367
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.001917123794555664
Discarted candidate: [{'city mel': 3}]
Possible candidate: [{'cohen rob': 14}]
lengthNecessary: 4.949999999999999
[{'cohen rob': 14}]
acceptance_diff 2
['cohen rob'] VS true_author: ['cohen rob']
movie_id: 37013
true director: Luke Cresswell;Steve McNic

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.057327985763549805
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.001409769058227539
Discarted candidate: []
Possible candidate: [{'bigas luna': 2, 'carles porta': 3, 'lara sergi': 3}]
lengthNecessary: 2.8
[{'bigas luna': 2, 'carles porta': 3, 'lara sergi': 3}]
acceptance_diff 2
['bigas luna', 'carles porta', 'lara sergi'] VS true_author: ['carles porta']
movie_id: 26682
true director: Mark Stouffer
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04244589805603027
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0016510486602783203
Discarted candidate: []
Possible candidate: [{'benward luke': 3, 'mark stouffer': 4}]
lengthNecessary: 2.4499999999999997
[{'benward luke': 3, 'mark stouffer': 4}]
acceptance_diff 2
['benward luke', 'mark

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05499696731567383
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0013759136199951172
Discarted candidate: [{'flackett jennifer': 4}]
Possible candidate: [{'levin mark': 16}]
lengthNecessary: 6.0
[{'levin mark': 16}]
acceptance_diff 2
['levin mark'] VS true_author: ['levin mark']
movie_id: 6670
true director: Paolo Taviani;Vittorio Taviani
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.0294189453125
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0012018680572509766
Discarted candidate: []
Possible candidate: [{'paolo taviani': 5, 'taviani vittorio': 3}]
lengthNecessary: 2.8
[{'paolo taviani': 5, 'taviani vittorio': 3}]
acceptance_diff 2
['paolo taviani', 'taviani vittorio'] VS true_author: ['paolo taviani', 'taviani vittorio']
mov

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05712008476257324
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0016231536865234375
Discarted candidate: []
Possible candidate: [{'english john': 5, 'f joseph poland': 1, 'lively william': 1, 'william witney': 2}]
lengthNecessary: 3.15
[{'english john': 5, 'f joseph poland': 1, 'lively william': 1, 'william witney': 2}]
acceptance_diff 2
['english john'] VS true_author: ['william witney', 'english john']
movie_id: 22732
true director: Don Taylor
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06360316276550293
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 3
Blocking time is: 0.0017118453979492188
Discarted candidate: [{'hodges mike': 1}]
Possible candidate: [{'don taylor': 18}, {'hodges mike': 9}]
lengthNecessary: 8.799999999999999
[{'don taylor': 18

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.030827999114990234
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0011529922485351562
Discarted candidate: []
Possible candidate: [{'mart?nez ra?l': 1, 'martinez raul': 1}]
lengthNecessary: 0.7
[{'mart?nez ra?l': 1, 'martinez raul': 1}]
acceptance_diff 2
['mart?nez ra?l'] VS true_author: ['martinez raul']
movie_id: 43746
true director: Andrew Lau;Alan Mak
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05551505088806152
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 3
Blocking time is: 0.0015938282012939453
Discarted candidate: [{'ralph rieckermann': 5}]
Possible candidate: [{'andrew lau': 4, 'andrew lau wai-keung': 3, 'lau wai-keung': 5}, {'alan mak': 8}]
lengthNecessary: 7.75
[{'andrew lau': 4, 'andrew lau wai-keung': 3, 'lau wai-keung': 5}, {'alan m

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.023814916610717773
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0008690357208251953
Discarted candidate: []
Possible candidate: [{'vasili zhuravlyov': 1, 'vasily zhuravlyov': 1}]
lengthNecessary: 0.7
[{'vasili zhuravlyov': 1, 'vasily zhuravlyov': 1}]
acceptance_diff 2
['vasili zhuravlyov'] VS true_author: ['vasili zhuravlyov']
movie_id: 67893
true director: Pete Chatmon
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.047682762145996094
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0010991096496582031
Discarted candidate: []
Possible candidate: [{'chatmon pete': 3, 'rick zahn': 2}]
lengthNecessary: 1.75
[{'chatmon pete': 3, 'rick zahn': 2}]
acceptance_diff 2
['chatmon pete', 'rick zahn'] VS true_author: ['chatmon pete']
movie_id

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.056304931640625
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0013959407806396484
Discarted candidate: []
Possible candidate: [{'lindberg mats': 9}, {'astrand carl': 6}]
lengthNecessary: 4.25
[{'lindberg mats': 9}, {'astrand carl': 6}]
acceptance_diff 2
['lindberg mats', 'astrand carl'] VS true_author: ['astrand carl', 'lindberg mats']
movie_id: 40393
true director: Ewing Scott
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03920602798461914
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.001377105712890625
Discarted candidate: []
Possible candidate: [{'ewing scott': 8}, {'george sherman': 3}]
lengthNecessary: 2.8499999999999996
[{'ewing scott': 8}, {'george sherman': 3}]
acceptance_diff 2
['ewing scott', 'george sherman'] VS tru

Blocking time is: 0.0011899471282958984
Discarted candidate: []
Possible candidate: [{'kitamura ry?hei': 2, 'kitamura ryuhei': 5}]
lengthNecessary: 2.4499999999999997
[{'kitamura ry?hei': 2, 'kitamura ryuhei': 5}]
acceptance_diff 2
['kitamura ryuhei'] VS true_author: ['kitamura ryuhei']
movie_id: 76373
true director: Alice O'Fredericks;Jon Iversen
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03275179862976074
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.00096893310546875
Discarted candidate: []
Possible candidate: [{"alice o'fredericks": 3, 'iversen jon': 2}]
lengthNecessary: 1.75
[{"alice o'fredericks": 3, 'iversen jon': 2}]
acceptance_diff 2
["alice o'fredericks", 'iversen jon'] VS true_author: ['iversen jon', "alice o'fredericks"]
movie_id: 38696
true director: Sean Carr
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embeddin

# ALL Cases

In [9]:
getEvaluation(finalDirectors, trueDirectors, 1)

precision is 0.4731404958677686
recall is 0.5725
f1Score is 0.5180995475113123


(0.4731404958677686, 0.5725, 0.5180995475113123)

In [13]:
getEvaluation(finalDirectors, trueDirectors, 1) #0.3

precision is 0.49230769230769234
recall is 0.56
f1Score is 0.5239766081871345


(0.49230769230769234, 0.56, 0.5239766081871345)

In [17]:
getEvaluation(finalDirectors, trueDirectors, 1) #0.35

precision is 0.5022727272727273
recall is 0.5525
f1Score is 0.5261904761904763


(0.5022727272727273, 0.5525, 0.5261904761904763)

In [21]:
getEvaluation(finalDirectors, trueDirectors, 1) #Fix directors split

precision is 0.9272727272727272
recall is 0.6891891891891891
f1Score is 0.7906976744186047


(0.9272727272727272, 0.6891891891891891, 0.7906976744186047)

# Add Multiple Winner from a block

In [6]:
getEvaluation(finalDirectors, trueDirectors, 1)

precision is 0.8589041095890411
recall is 1.0591216216216217
f1Score is 0.9485627836611195


(0.8589041095890411, 1.0591216216216217, 0.9485627836611195)

In [7]:
finalDirectors

[['chester e hal', 'cyril frankel', 'hamer robert'],
 ['alexandre o philippe', 'alexandre philippe'],
 ['bird brad'],
 ['sherman vincent'],
 ['clarke fraser heston'],
 [],
 ['godfrey ho', 'kim si-hyun'],
 ['dave fleischer'],
 ["d'urville martin"],
 ['norton virgien', 'igor kovalyov'],
 ['bezucha thomas'],
 ['galinsky michael', 'hawley suki'],
 ['chatrichalerm yukoi', 'chatrichalerm yukol'],
 ['harvey parry', 'richard talmadge'],
 ['hara keiichi'],
 ['allegret yves', 'e portas rafael'],
 ['cohen larry', 'tannen william'],
 ['anant mahadevan', 'ananth mahadevan narayan'],
 ['coen joel', 'coen ethan'],
 ['dziga groupe vertov', 'godard jean-luc'],
 ['adam small', 'peter stuart'],
 ['caradog james', 'caradog james w'],
 ['lord phil'],
 ['al khaled nassiry soliman', 'antonio augugliaro', 'del gabriele grande'],
 ['albert magnoli', 'prince'],
 ['andrzej bartkowiak'],
 ['hugues martin', 'martin sandra'],
 ['chomsky j marvin'],
 ["o'haver tommy"],
 ['dewey richard', 'marrinan timothy'],
 ['a d 

In [8]:
trueDirectors

[['hamer robert'],
 ['alexandre o philippe'],
 ['bird brad'],
 ['sherman vincent'],
 ['c fraser heston'],
 ['john landis', 'dante joe', 'george miller', 'spielberg steven'],
 ['godfrey ho'],
 ['dave fleischer'],
 ["d'urville martin"],
 ['norton virgien', 'igor kovalyov'],
 ['bezucha thomas'],
 ['hawley suki', 'galinsky michael'],
 ['chatrichalerm yukol'],
 ['richard talmadge', 'harvey parry'],
 ['hara keiichi'],
 ['allegret yves'],
 ['cohen larry', 'tannen william'],
 ['ananth mahadevan narayan'],
 ['coen joel'],
 ['godard jean-luc'],
 ['adam small', 'peter stuart'],
 ['caradog james w'],
 ['christopher miller', 'lord phil'],
 ['al khaled nassiry soliman', 'antonio augugliaro', 'del gabriele grande'],
 ['prince'],
 ['andrzej bartkowiak'],
 ['martin sandra', 'hugues martin'],
 ['chomsky j marvin'],
 ["o'haver tommy"],
 ['marrinan timothy', 'dewey richard'],
 ['chris hegedus', 'a d pennebaker'],
 ['paolo taviani', 'taviani vittorio'],
 ['johar karan'],
 ['anlo sepulveda', 'collins paul']

In [10]:
_getTPFPFN(finalDirectors, trueDirectors)

(627, 103, -35)

# Add Multiple Winner and remove the similar one

In [17]:
# acceptance_diff 3
getEvaluation(finalDirectors, trueDirectors, 1)

TP: 560, FP: 102, FN: 32
precision is 0.8459214501510574
recall is 0.9459459459459459
f1Score is 0.8931419457735247


(0.8459214501510574, 0.9459459459459459, 0.8931419457735247)

In [6]:
# acceptance_diff 2
getEvaluation(finalDirectors, trueDirectors, 1)

precision is 0.8459214501510574
recall is 0.9459459459459459
f1Score is 0.8931419457735247


(0.8459214501510574, 0.9459459459459459, 0.8931419457735247)

In [9]:
# acceptance_diff 1
getEvaluation(finalDirectors, trueDirectors, 1)

TP: 503, FP: 73, FN: 89
precision is 0.8732638888888888
recall is 0.8496621621621622
f1Score is 0.8613013698630135


(0.8732638888888888, 0.8496621621621622, 0.8613013698630135)

In [13]:
# acceptance_diff 0
getEvaluation(finalDirectors, trueDirectors, 1)

TP: 425, FP: 39, FN: 167
precision is 0.915948275862069
recall is 0.7179054054054054
f1Score is 0.8049242424242425


(0.915948275862069, 0.7179054054054054, 0.8049242424242425)

# Add block_weight

In [6]:
# block_weight 5
getEvaluation(finalDirectors, trueDirectors, 1)

TP: 549, FP: 92, FN: 43
precision is 0.8564742589703588
recall is 0.9273648648648649
f1Score is 0.8905109489051094


(0.8564742589703588, 0.9273648648648649, 0.8905109489051094)

In [10]:
# block_weight 3
getEvaluation(finalDirectors, trueDirectors, 1)

TP: 551, FP: 94, FN: 41
precision is 0.8542635658914729
recall is 0.9307432432432432
f1Score is 0.8908649959579628


(0.8542635658914729, 0.9307432432432432, 0.8908649959579628)

In [14]:
# block_weight 2
getEvaluation(finalDirectors, trueDirectors, 1)

TP: 557, FP: 105, FN: 35
precision is 0.8413897280966768
recall is 0.9408783783783784
f1Score is 0.8883572567783095


(0.8413897280966768, 0.9408783783783784, 0.8883572567783095)

In [7]:
# block_weight 2 + fix only higher than 15
getEvaluation(finalDirectors, trueDirectors, 1)

TP: 557, FP: 105, FN: 35
precision is 0.8413897280966768
recall is 0.9408783783783784
f1Score is 0.8883572567783095


(0.8413897280966768, 0.9408783783783784, 0.8883572567783095)

In [10]:
key_values = {
    'model_type':'bilstm',
    'char_level':False,
    'model_version': 2,
    'rnn_dim':1024,
    'verbose':1,
    'attributes_list': ['newDirector'],
    'embedding_type': 'inferSent',
    'dataset': 'clean_movie',
    'cluster_method': 'hierarchy',
    'num_clusters_rate': 0.05,
    'block_length_thresold': 0.35,
    'acceptance_diff':2,
    'block_weight': 2,
}

In [11]:
dataset_name, table_group_by_movie_id, list_movie_id, true_directors = load_dataset(key_values)

dataset: clean_movie
Loading time is: 0.29227328300476074


In [12]:
start_time = time.time()
finalDirectors, trueDirectors = launchWithoutReductionFusionMovie(table_group_by_movie_id, list_movie_id, true_directors, key_values)
print("Total time is: {0}".format(time.time() - start_time))

movie_id: 73952
true director: Robert Hamer
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.08913087844848633
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.005808830261230469
Discarted candidate: []
Possible candidate: [{'chester e hal': 2, 'cyril frankel': 1, 'hamer robert': 4}]
lengthNecessary: 2.4499999999999997
[{'chester e hal': 2, 'cyril frankel': 1, 'hamer robert': 4}]
acceptance_diff 2
['chester e hal', 'hamer robert'] VS true_author: ['hamer robert']
movie_id: 1655
true director: Alexandre O. Philippe
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.09842419624328613
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0013167858123779297
Discarted candidate: []
Possible candidate: [{'alexandre o philippe': 2, 'alexandre philippe': 1}]
lengthNecessary: 1.0499999999999998
[

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05925130844116211
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.002017974853515625
Discarted candidate: []
Possible candidate: [{'coen ethan': 7, 'coen joel': 13}]
lengthNecessary: 7.0
[{'coen ethan': 7, 'coen joel': 13}]
acceptance_diff 2
['coen joel'] VS true_author: ['coen joel']
movie_id: 2550
true director: Jean-Luc Godard
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04901003837585449
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0013661384582519531
Discarted candidate: []
Possible candidate: [{'dziga groupe vertov': 1, 'godard jean-luc': 3}]
lengthNecessary: 1.4
[{'dziga groupe vertov': 1, 'godard jean-luc': 3}]
acceptance_diff 2
['dziga groupe vertov', 'godard jean-luc'] VS true_author: ['godard jean-luc']
movie_id: 

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03664994239807129
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.001432180404663086
Discarted candidate: []
Possible candidate: [{'michael tiddes': 11, 'mike tiddes': 2}]
lengthNecessary: 4.55
[{'michael tiddes': 11, 'mike tiddes': 2}]
acceptance_diff 2
['michael tiddes'] VS true_author: ['michael tiddes']
movie_id: 15123
true director: Jon Bulette;Stone Roberts
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.0416569709777832
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0014438629150390625
Discarted candidate: []
Possible candidate: [{'bulette jon': 3, 'f roberts stone': 1, 'roberts stone': 1}]
lengthNecessary: 1.75
[{'bulette jon': 3, 'f roberts stone': 1, 'roberts stone': 1}]
acceptance_diff 2
['bulette jon', 'f roberts ston

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05183982849121094
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0010409355163574219
Discarted candidate: []
Possible candidate: [{'k king': 6, 'kevin king': 3}]
lengthNecessary: 3.15
[{'k king': 6, 'kevin king': 3}]
acceptance_diff 2
['k king'] VS true_author: ['kevin king']
movie_id: 57809
true director: John Lamb
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.032645225524902344
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.001020193099975586
Discarted candidate: []
Possible candidate: [{'hill jack': 2, 'john lamb': 1}]
lengthNecessary: 1.0499999999999998
[{'hill jack': 2, 'john lamb': 1}]
acceptance_diff 2
['hill jack', 'john lamb'] VS true_author: ['john lamb']
movie_id: 97278
true director: Norman Panama;Melvin Frank
embe

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.10343074798583984
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 3
Blocking time is: 0.003942251205444336
Discarted candidate: [{'nibbelink phil': 14}, {'simon wells': 12}]
Possible candidate: [{'dick zondag': 12, 'ralph zondag': 12}]
lengthNecessary: 16.5
[{'dick zondag': 12, 'ralph zondag': 12}]
acceptance_diff 2
['dick zondag', 'ralph zondag'] VS true_author: ['dick zondag', 'simon wells', 'ralph zondag', 'nibbelink phil']
movie_id: 76115
true director: Adam Friedman;Iain Kennedy
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06166982650756836
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0016710758209228516
Discarted candidate: []
Possible candidate: [{'adam friedman': 7, 'iain kennedy': 5}]
lengthNecessary: 4.199999999999999
[{'adam friedman':

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05391883850097656
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0010271072387695312
Discarted candidate: []
Possible candidate: [{'arnfred morten': 1, 'lars trier von': 2}]
lengthNecessary: 1.0499999999999998
[{'arnfred morten': 1, 'lars trier von': 2}]
acceptance_diff 2
['arnfred morten', 'lars trier von'] VS true_author: ['lars trier von']
movie_id: 98793
true director: Mona Fastvold
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06294512748718262
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0015637874603271484
Discarted candidate: []
Possible candidate: [{'fastvold mona': 6, 'lerche mona': 2}]
lengthNecessary: 2.8
[{'fastvold mona': 6, 'lerche mona': 2}]
acceptance_diff 2
['fastvold mona'] VS true_author: ['fastvold mona'

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.0397801399230957
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.002101898193359375
Discarted candidate: []
Possible candidate: [{'castellani renato': 3, 'comencini luigi': 1, 'franco rossi': 1}]
lengthNecessary: 1.75
[{'castellani renato': 3, 'comencini luigi': 1, 'franco rossi': 1}]
acceptance_diff 2
['castellani renato', 'comencini luigi', 'franco rossi'] VS true_author: ['franco rossi', 'comencini luigi', 'castellani renato']
movie_id: 70161
true director: Stanley Kwan
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03484511375427246
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0018868446350097656
Discarted candidate: []
Possible candidate: [{'chow thomas': 2, 'kwan stanley': 3}]
lengthNecessary: 1.75
[{'chow thomas': 2, 'k

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03395891189575195
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.001149892807006836
Discarted candidate: []
Possible candidate: [{'arons rich': 3, 'audu paden': 1, 'dave marshall': 1}]
lengthNecessary: 1.75
[{'arons rich': 3, 'audu paden': 1, 'dave marshall': 1}]
acceptance_diff 2
['arons rich', 'audu paden', 'dave marshall'] VS true_author: ['arons rich', 'dave marshall', 'audu paden']
movie_id: 66521
true director: E. A. Dupont
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04121994972229004
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0010161399841308594
Discarted candidate: []
Possible candidate: [{'a dupont e': 1, 'andr dupont ewald': 2, 'andre dupont ewald': 2}]
lengthNecessary: 1.75
[{'a dupont e': 1, 'andr dupont ewal

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.037277936935424805
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0014529228210449219
Discarted candidate: []
Possible candidate: [{'daniel ingsten': 1, 'jonas svensson': 2}]
lengthNecessary: 1.0499999999999998
[{'daniel ingsten': 1, 'jonas svensson': 2}]
acceptance_diff 2
['daniel ingsten', 'jonas svensson'] VS true_author: ['jonas svensson', 'daniel ingsten']
movie_id: 77030
true director: Gordon Douglas
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03125309944152832
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0012621879577636719
Discarted candidate: []
Possible candidate: [{'douglas gordon': 3, 'richard wilson': 1}]
lengthNecessary: 1.4
[{'douglas gordon': 3, 'richard wilson': 1}]
acceptance_diff 2
['douglas gordon', 'ri

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.15130090713500977
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.005629062652587891
Discarted candidate: []
Possible candidate: [{'forster lewis r': 3, 'foster lewis r': 8, 'mickey rooney': 1}]
lengthNecessary: 4.199999999999999
[{'forster lewis r': 3, 'foster lewis r': 8, 'mickey rooney': 1}]
acceptance_diff 2
['foster lewis r'] VS true_author: ['foster lewis r']
movie_id: 59613
true director: Basil Dearden;Will Hay
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06308102607727051
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0018999576568603516
Discarted candidate: []
Possible candidate: [{'basil dearden': 2, 'hay will': 3}]
lengthNecessary: 1.75
[{'basil dearden': 2, 'hay will': 3}]
acceptance_diff 2
['basil dearden', 'hay w

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05431699752807617
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0011718273162841797
Discarted candidate: []
Possible candidate: [{'garc rodrigo': 1, 'garcia rodrigo': 4}]
lengthNecessary: 1.75
[{'garc rodrigo': 1, 'garcia rodrigo': 4}]
acceptance_diff 2
['garcia rodrigo'] VS true_author: ['garcia rodrigo']
movie_id: 35679
true director: Albert Maysles;David Maysles;Charlotte Zwerin
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03257489204406738
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0009319782257080078
Discarted candidate: []
Possible candidate: [{'albert maysles': 5, 'charlotte zwerin': 3, 'david maysles': 5}]
lengthNecessary: 4.55
[{'albert maysles': 5, 'charlotte zwerin': 3, 'david maysles': 5}]
acceptance_diff 2
[

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06590414047241211
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0013289451599121094
Discarted candidate: []
Possible candidate: [{'f gutierrez javier': 12, 'gutierrez javier': 1, 'kwan stanley': 1}]
lengthNecessary: 4.8999999999999995
[{'f gutierrez javier': 12, 'gutierrez javier': 1, 'kwan stanley': 1}]
acceptance_diff 2
['f gutierrez javier'] VS true_author: ['f gutierrez javier']
movie_id: 88250
true director: Montgomery Tully
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.032411813735961914
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0010530948638916016
Discarted candidate: []
Possible candidate: [{'david paltenghi': 1, 'montgomery tully': 2}]
lengthNecessary: 1.0499999999999998
[{'david paltenghi': 1, 'montgomery tully

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03692007064819336
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0012106895446777344
Discarted candidate: []
Possible candidate: [{'clark johnson': 1, 'john stockwell': 11}]
lengthNecessary: 4.199999999999999
[{'clark johnson': 1, 'john stockwell': 11}]
acceptance_diff 2
['john stockwell'] VS true_author: ['john stockwell']
movie_id: 29977
true director: Roar Uthaug
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.039765119552612305
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.001180887222290039
Discarted candidate: []
Possible candidate: [{'emami paul': 4, 'roar uthaug': 11}]
lengthNecessary: 5.25
[{'emami paul': 4, 'roar uthaug': 11}]
acceptance_diff 2
['roar uthaug'] VS true_author: ['roar uthaug']
movie_id: 22057
true direc

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04058098793029785
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0010039806365966797
Discarted candidate: []
Possible candidate: [{'aleksandar petrovi': 5, 'aleksandar petrovic': 2}]
lengthNecessary: 2.4499999999999997
[{'aleksandar petrovi': 5, 'aleksandar petrovic': 2}]
acceptance_diff 2
['aleksandar petrovi'] VS true_author: ['aleksandar petrovic']
movie_id: 5258
true director: Keven Undergaro
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03428769111633301
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0010900497436523438
Discarted candidate: []
Possible candidate: [{'keven undergaro': 1, 'kevin undergaro': 1}]
lengthNecessary: 0.7
[{'keven undergaro': 1, 'kevin undergaro': 1}]
acceptance_diff 2
['keven undergaro'] VS true_

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04054903984069824
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0018737316131591797
Discarted candidate: []
Possible candidate: [{'andrew lau': 3, 'andrew lau wai-keung': 2, 'lau wai-keung': 2}]
lengthNecessary: 2.4499999999999997
[{'andrew lau': 3, 'andrew lau wai-keung': 2, 'lau wai-keung': 2}]
acceptance_diff 2
['andrew lau', 'andrew lau wai-keung', 'lau wai-keung'] VS true_author: ['andrew lau']
movie_id: 51986
true director: Vaclav Vorlicek
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03808093070983887
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0009951591491699219
Discarted candidate: []
Possible candidate: [{'vaclav vorli?ek': 7, 'vaclav vorlicek': 2}]
lengthNecessary: 3.15
[{'vaclav vorli?ek': 7, 'vaclav vorlicek'

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.055058956146240234
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0015490055084228516
Discarted candidate: []
Possible candidate: [{'jarkko laine t': 2, 'kates naama': 4}]
lengthNecessary: 2.0999999999999996
[{'jarkko laine t': 2, 'kates naama': 4}]
acceptance_diff 2
['jarkko laine t', 'kates naama'] VS true_author: ['kates naama']
movie_id: 40458
true director: Will Allen
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04758715629577637
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0012118816375732422
Discarted candidate: []
Possible candidate: [{'allen francesco william': 2, 'allen will': 6}]
lengthNecessary: 2.8
[{'allen francesco william': 2, 'allen will': 6}]
acceptance_diff 2
['allen will'] VS true_author: ['allen will']


embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04361891746520996
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0016109943389892578
Discarted candidate: []
Possible candidate: [{'franco james': 5, 'pamela romanowsky': 4}]
lengthNecessary: 3.15
[{'franco james': 5, 'pamela romanowsky': 4}]
acceptance_diff 2
['franco james', 'pamela romanowsky'] VS true_author: ['pamela romanowsky', 'franco james']
movie_id: 61669
true director: John Erick Dowdle
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03882598876953125
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.001730203628540039
Discarted candidate: []
Possible candidate: [{'dowdle erick john': 11, 'dowdle john': 1}]
lengthNecessary: 4.199999999999999
[{'dowdle erick john': 11, 'dowdle john': 1}]
acceptance_diff 2
['dowdle erick 

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04750704765319824
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 2
Blocking time is: 0.0020380020141601562
Discarted candidate: []
Possible candidate: [{'guillaume ivernel': 11}, {'arthur qwak': 10}]
lengthNecessary: 6.35
[{'guillaume ivernel': 11}, {'arthur qwak': 10}]
acceptance_diff 2
['guillaume ivernel', 'arthur qwak'] VS true_author: ['guillaume ivernel', 'arthur qwak']
movie_id: 51887
true director: Mark Levin
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.045703887939453125
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0016520023345947266
Discarted candidate: []
Possible candidate: [{'flackett jennifer': 4, 'levin mark': 16}]
lengthNecessary: 7.0
[{'flackett jennifer': 4, 'levin mark': 16}]
acceptance_diff 2
['levin mark'] VS true_author: [

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.11394476890563965
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 2
Blocking time is: 0.0035676956176757812
Discarted candidate: []
Possible candidate: [{'don taylor': 18}, {'hodges mike': 10}]
lengthNecessary: 8.799999999999999
[{'don taylor': 18}, {'hodges mike': 10}]
acceptance_diff 2
['don taylor', 'hodges mike'] VS true_author: ['don taylor']
movie_id: 88880
true director: Alain Delon
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.039281368255615234
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.001461029052734375
Discarted candidate: []
Possible candidate: [{'alain delon': 3, 'davis robin': 2}]
lengthNecessary: 1.75
[{'alain delon': 3, 'davis robin': 2}]
acceptance_diff 2
['alain delon', 'davis robin'] VS true_author: ['alain delon']
movie_id: 

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04502105712890625
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0012810230255126953
Discarted candidate: []
Possible candidate: [{'alexandre bustillo': 5, 'julien maury': 4}]
lengthNecessary: 3.15
[{'alexandre bustillo': 5, 'julien maury': 4}]
acceptance_diff 2
['alexandre bustillo', 'julien maury'] VS true_author: ['julien maury', 'alexandre bustillo']
movie_id: 95021
true director: Lewis D. Collins;Vernon Keays
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.043869972229003906
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0012140274047851562
Discarted candidate: []
Possible candidate: [{'collins d lewis': 2, 'keays vernon': 1}]
lengthNecessary: 1.0499999999999998
[{'collins d lewis': 2, 'keays vernon': 1}]
acceptance_diff 2


embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.022563934326171875
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.001455068588256836
Discarted candidate: []
Possible candidate: [{'giorgos lanthimos': 1, 'lanthimos yorgos': 4}]
lengthNecessary: 1.75
[{'giorgos lanthimos': 1, 'lanthimos yorgos': 4}]
acceptance_diff 2
['lanthimos yorgos'] VS true_author: ['lanthimos yorgos']
movie_id: 18454
true director: Jose Estrada
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.059719085693359375
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.001851797103881836
Discarted candidate: []
Possible candidate: [{'antonio estrada jose': 4, 'estrada jose': 8}]
lengthNecessary: 4.199999999999999
[{'antonio estrada jose': 4, 'estrada jose': 8}]
acceptance_diff 2
['estrada jose'] VS true_author: ['estr

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.08705615997314453
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0016748905181884766
Discarted candidate: []
Possible candidate: [{'aranda vicente': 3, 'gomez raul': 2}]
lengthNecessary: 1.75
[{'aranda vicente': 3, 'gomez raul': 2}]
acceptance_diff 2
['aranda vicente', 'gomez raul'] VS true_author: ['aranda vicente']
movie_id: 64814
true director: R. Balki
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.08261609077453613
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0037550926208496094
Discarted candidate: []
Possible candidate: [{'balki r': 4, 'ricky sandhu': 1}]
lengthNecessary: 1.75
[{'balki r': 4, 'ricky sandhu': 1}]
acceptance_diff 2
['balki r'] VS true_author: ['balki r']
movie_id: 87155
true director: Jiri Svoboda
embedd

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.1097407341003418
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.0015287399291992188
Discarted candidate: []
Possible candidate: [{'coppola ford francis': 19, 'coppola francis': 1}]
lengthNecessary: 7.0
[{'coppola ford francis': 19, 'coppola francis': 1}]
acceptance_diff 2
['coppola ford francis'] VS true_author: ['coppola ford francis']
movie_id: 105251
true director: Nic Hill;Scott Glosserman
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05119800567626953
cluster_method: hierarchy
num_clusters_rate: 0.05
NUM_CLUSTERS 1
Blocking time is: 0.002067089080810547
Discarted candidate: []
Possible candidate: [{'glosserman scott': 2, 'hill nic': 1}]
lengthNecessary: 1.0499999999999998
[{'glosserman scott': 2, 'hill nic': 1}]
acceptance_diff 2
['glosserman scott', 'hill ni

In [13]:
getEvaluation(finalDirectors, trueDirectors, 1)

TP: 545, FP: 92, FN: 47
precision is 0.8555729984301413
recall is 0.9206081081081081
f1Score is 0.886899918633035


(0.8555729984301413, 0.9206081081081081, 0.886899918633035)

In [18]:
key_values = {
    'model_type':'bilstm',
    'char_level':False,
    'model_version': 2,
    'rnn_dim':1024,
    'verbose':1,
    'attributes_list': ['newDirector'],
    'embedding_type': 'inferSent',
    'dataset': 'clean_movie',
    'cluster_method': 'hierarchy',
    'num_clusters_rate': 0.01,
    'block_length_thresold': 0.35,
    'acceptance_diff':2,
    'block_weight': 5,
}

In [19]:
dataset_name, table_group_by_movie_id, list_movie_id, true_directors = load_dataset(key_values)

dataset: clean_movie
Loading time is: 0.15001392364501953


In [20]:
start_time = time.time()
finalDirectors, trueDirectors = launchWithoutReductionFusionMovie(table_group_by_movie_id, list_movie_id, true_directors, key_values)
print("Total time is: {0}".format(time.time() - start_time))

movie_id: 73952
true director: Robert Hamer
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06668806076049805
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0020279884338378906
Discarted candidate: []
Possible candidate: [{'chester e hal': 2, 'cyril frankel': 1, 'hamer robert': 4}]
lengthNecessary: 2.4499999999999997
[{'chester e hal': 2, 'cyril frankel': 1, 'hamer robert': 4}]
acceptance_diff 2
['chester e hal', 'hamer robert'] VS true_author: ['hamer robert']
movie_id: 1655
true director: Alexandre O. Philippe
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.07591795921325684
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.004094123840332031
Discarted candidate: []
Possible candidate: [{'alexandre o philippe': 2, 'alexandre philippe': 1}]
lengthNecessary: 1.0499999999999998
[

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05695986747741699
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.001851797103881836
Discarted candidate: []
Possible candidate: [{'cohen larry': 6, 'tannen william': 4}]
lengthNecessary: 3.5
[{'cohen larry': 6, 'tannen william': 4}]
acceptance_diff 2
['cohen larry', 'tannen william'] VS true_author: ['cohen larry', 'tannen william']
movie_id: 5820
true director: Ananth Narayan Mahadevan
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04429292678833008
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0014889240264892578
Discarted candidate: []
Possible candidate: [{'anant mahadevan': 1, 'ananth mahadevan narayan': 3}]
lengthNecessary: 1.4
[{'anant mahadevan': 1, 'ananth mahadevan narayan': 3}]
acceptance_diff 2
['anant mahadevan', 

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06251168251037598
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0011680126190185547
Discarted candidate: []
Possible candidate: [{'anlo sepulveda': 3, 'collins paul': 2}]
lengthNecessary: 1.75
[{'anlo sepulveda': 3, 'collins paul': 2}]
acceptance_diff 2
['anlo sepulveda', 'collins paul'] VS true_author: ['anlo sepulveda', 'collins paul']
movie_id: 31082
true director: Slatan Dudow;Kurt Maetzig
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06076407432556152
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0014710426330566406
Discarted candidate: []
Possible candidate: [{'dudow slatan': 2, 'groschopp richard': 1, 'kurt maetzig': 1}]
lengthNecessary: 1.4
[{'dudow slatan': 2, 'groschopp richard': 1, 'kurt maetzig': 1}]
acceptance_d

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05232834815979004
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0017650127410888672
Discarted candidate: []
Possible candidate: [{'david n twohy': 1, 'david twohy': 24}]
lengthNecessary: 8.75
[{'david n twohy': 1, 'david twohy': 24}]
acceptance_diff 2
['david twohy'] VS true_author: ['david twohy']
movie_id: 80410
true director: Paul Ragsdale
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04377603530883789
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0010690689086914062
Discarted candidate: []
Possible candidate: [{'alba angelica de': 3, 'paul ragsdale': 6}]
lengthNecessary: 3.15
[{'alba angelica de': 3, 'paul ragsdale': 6}]
acceptance_diff 2
['paul ragsdale'] VS true_author: ['paul ragsdale']
movie_id: 71705
true director: 

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04735898971557617
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.001432180404663086
Discarted candidate: []
Possible candidate: [{'barreto ramos saul': 1, 'gonzalez manuel mauricio': 2}]
lengthNecessary: 1.0499999999999998
[{'barreto ramos saul': 1, 'gonzalez manuel mauricio': 2}]
acceptance_diff 2
['barreto ramos saul', 'gonzalez manuel mauricio'] VS true_author: ['gonzalez manuel mauricio', 'barreto ramos saul']
movie_id: 57896
true director: Andrew V. McLaglen
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.036238908767700195
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.001377105712890625
Discarted candidate: []
Possible candidate: [{'andrew mclaglen': 2, 'andrew mclaglen v': 6}]
lengthNecessary: 2.8
[{'andrew mclaglen': 2,

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.09455585479736328
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.004189968109130859
Discarted candidate: []
Possible candidate: [{'john stevenson': 25, 'mark osborne': 31}]
lengthNecessary: 19.599999999999998
[{'john stevenson': 25, 'mark osborne': 31}]
acceptance_diff 2
['mark osborne'] VS true_author: ['john stevenson', 'mark osborne']
movie_id: 5451
true director: Claudio Marques;Marilia Hughes
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03838205337524414
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0012090206146240234
Discarted candidate: []
Possible candidate: [{'claudio marques': 3, 'guerreiro hughes marilia': 2, 'hughes marilia': 1}]
lengthNecessary: 2.0999999999999996
[{'claudio marques': 3, 'guerreiro hughes maril

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04787421226501465
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0016093254089355469
Discarted candidate: []
Possible candidate: [{'emmanuel sapolsky': 2, 'spielberg steven': 8}]
lengthNecessary: 3.5
[{'emmanuel sapolsky': 2, 'spielberg steven': 8}]
acceptance_diff 2
['spielberg steven'] VS true_author: ['spielberg steven']
movie_id: 36774
true director: Davis Guggenheim
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04414081573486328
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0012428760528564453
Discarted candidate: []
Possible candidate: [{'colin nutley': 2, 'davis guggenheim': 9}]
lengthNecessary: 3.8499999999999996
[{'colin nutley': 2, 'davis guggenheim': 9}]
acceptance_diff 2
['davis guggenheim'] VS true_author: ['davi

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.08074712753295898
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0016109943389892578
Discarted candidate: []
Possible candidate: [{'beverly sebastian': 8, 'ferd sebastian': 6}]
lengthNecessary: 4.8999999999999995
[{'beverly sebastian': 8, 'ferd sebastian': 6}]
acceptance_diff 2
['beverly sebastian', 'ferd sebastian'] VS true_author: ['ferd sebastian', 'beverly sebastian']
movie_id: 74621
true director: Taggart Siegel;Jon Betz
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06843709945678711
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0011551380157470703
Discarted candidate: []
Possible candidate: [{'betz jon': 2, 'siegel taggart': 1}]
lengthNecessary: 1.0499999999999998
[{'betz jon': 2, 'siegel taggart': 1}]
acceptance_diff 2

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05665087699890137
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0017580986022949219
Discarted candidate: []
Possible candidate: [{'denis hennelly': 6, 'denis hennelly henry': 6}]
lengthNecessary: 4.199999999999999
[{'denis hennelly': 6, 'denis hennelly henry': 6}]
acceptance_diff 2
['denis hennelly'] VS true_author: ['denis hennelly henry']
movie_id: 62628
true director: Henry Hathaway;Howard Hawks;Henry King;Henry Koster;Jean Negulesco
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.07376408576965332
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0026750564575195312
Discarted candidate: []
Possible candidate: [{'hathaway henry': 7, 'hawks howard': 5, 'henry king': 5, 'henry koster': 5, 'jean negulesco': 5}]
lengthNecessary: 9.

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04954218864440918
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0008330345153808594
Discarted candidate: []
Possible candidate: [{'emanuel goldman peter': 1, 'emmanuel goldman peter': 1}]
lengthNecessary: 0.7
[{'emanuel goldman peter': 1, 'emmanuel goldman peter': 1}]
acceptance_diff 2
['emanuel goldman peter'] VS true_author: ['emanuel goldman peter']
movie_id: 56553
true director: J. Lee Thompson
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05152297019958496
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0016171932220458984
Discarted candidate: []
Possible candidate: [{'j lee thompson': 9, 'jack lee thompson': 2}]
lengthNecessary: 3.8499999999999996
[{'j lee thompson': 9, 'jack lee thompson': 2}]
acceptance_diff 2
['j lee 

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06547403335571289
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0017170906066894531
Discarted candidate: []
Possible candidate: [{'basil dearden': 2, 'hay will': 3}]
lengthNecessary: 1.75
[{'basil dearden': 2, 'hay will': 3}]
acceptance_diff 2
['basil dearden', 'hay will'] VS true_author: ['basil dearden', 'hay will']
movie_id: 77007
true director: Jim Isaac
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06984305381774902
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0015459060668945312
Discarted candidate: []
Possible candidate: [{'isaac james': 7, 'isaac jim': 2}]
lengthNecessary: 3.15
[{'isaac james': 7, 'isaac jim': 2}]
acceptance_diff 2
['isaac james'] VS true_author: ['isaac jim']
movie_id: 95834
true director: Joseph M

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04397392272949219
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0013909339904785156
Discarted candidate: []
Possible candidate: [{'jennifer prediger': 6, 'jess weixler': 4}]
lengthNecessary: 3.5
[{'jennifer prediger': 6, 'jess weixler': 4}]
acceptance_diff 2
['jennifer prediger', 'jess weixler'] VS true_author: ['jennifer prediger', 'jess weixler']
movie_id: 28227
true director: Paul Toogood;Lloyd Stanton
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05120992660522461
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0041408538818359375
Discarted candidate: []
Possible candidate: [{'lloyd stanton': 2, 'paul toogood': 3}]
lengthNecessary: 1.75
[{'lloyd stanton': 2, 'paul toogood': 3}]
acceptance_diff 2
['lloyd stanton', 'paul too

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06827187538146973
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.002064228057861328
Discarted candidate: []
Possible candidate: [{'filho kleber mendon?a': 1, 'filho kleber mendonca': 3, 'timo zhalnin': 2}]
lengthNecessary: 2.0999999999999996
[{'filho kleber mendon?a': 1, 'filho kleber mendonca': 3, 'timo zhalnin': 2}]
acceptance_diff 2
['filho kleber mendon?a', 'timo zhalnin'] VS true_author: ['filho kleber mendonca']
movie_id: 71289
true director: F. Javier Gutierrez
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.09617996215820312
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0016121864318847656
Discarted candidate: []
Possible candidate: [{'f gutierrez javier': 12, 'gutierrez javier': 1, 'kwan stanley': 1}]
lengthNecessary: 

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06941795349121094
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0005362033843994141
Discarted candidate: []
Possible candidate: [{'david leland': 2, 'gary graver': 2}]
lengthNecessary: 1.4
[{'david leland': 2, 'gary graver': 2}]
acceptance_diff 2
['david leland', 'gary graver'] VS true_author: ['david leland']
movie_id: 81646
true director: Francois Ozon
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06289792060852051
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0012810230255126953
Discarted candidate: []
Possible candidate: [{'fran?ois ozon': 1, 'francois ozon': 13}]
lengthNecessary: 4.8999999999999995
[{'fran?ois ozon': 1, 'francois ozon': 13}]
acceptance_diff 2
['francois ozon'] VS true_author: ['francois ozon']
movie_id:

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.0334019660949707
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0009028911590576172
Discarted candidate: []
Possible candidate: [{'aleksandar petrovi': 5, 'aleksandar petrovic': 2}]
lengthNecessary: 2.4499999999999997
[{'aleksandar petrovi': 5, 'aleksandar petrovic': 2}]
acceptance_diff 2
['aleksandar petrovi'] VS true_author: ['aleksandar petrovic']
movie_id: 5258
true director: Keven Undergaro
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.030726909637451172
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0008919239044189453
Discarted candidate: []
Possible candidate: [{'keven undergaro': 1, 'kevin undergaro': 1}]
lengthNecessary: 0.7
[{'keven undergaro': 1, 'kevin undergaro': 1}]
acceptance_diff 2
['keven undergaro'] VS true_

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06631922721862793
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0014278888702392578
Discarted candidate: []
Possible candidate: [{'andrew lau': 3, 'andrew lau wai-keung': 2, 'lau wai-keung': 2}]
lengthNecessary: 2.4499999999999997
[{'andrew lau': 3, 'andrew lau wai-keung': 2, 'lau wai-keung': 2}]
acceptance_diff 2
['andrew lau', 'andrew lau wai-keung', 'lau wai-keung'] VS true_author: ['andrew lau']
movie_id: 51986
true director: Vaclav Vorlicek
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04972410202026367
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0016350746154785156
Discarted candidate: []
Possible candidate: [{'vaclav vorli?ek': 7, 'vaclav vorlicek': 2}]
lengthNecessary: 3.15
[{'vaclav vorli?ek': 7, 'vaclav vorlicek'

Blocking time is: 0.001110076904296875
Discarted candidate: []
Possible candidate: [{'jarkko laine t': 2, 'kates naama': 4}]
lengthNecessary: 2.0999999999999996
[{'jarkko laine t': 2, 'kates naama': 4}]
acceptance_diff 2
['jarkko laine t', 'kates naama'] VS true_author: ['kates naama']
movie_id: 40458
true director: Will Allen
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05744218826293945
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0010919570922851562
Discarted candidate: []
Possible candidate: [{'allen francesco william': 2, 'allen will': 6}]
lengthNecessary: 2.8
[{'allen francesco william': 2, 'allen will': 6}]
acceptance_diff 2
['allen will'] VS true_author: ['allen will']
movie_id: 88768
true director: Rob Cohen
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.050585031509399414
cluster_method: hierarchy

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05449700355529785
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0014848709106445312
Discarted candidate: []
Possible candidate: [{'ali zafar': 3, 'kabir khan': 12}]
lengthNecessary: 5.25
[{'ali zafar': 3, 'kabir khan': 12}]
acceptance_diff 2
['kabir khan'] VS true_author: ['kabir khan']
movie_id: 59765
true director: James Franco;Gus Van Sant
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.049421072006225586
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0011949539184570312
Discarted candidate: []
Possible candidate: [{'franco james': 1, 'gus sant van': 3}]
lengthNecessary: 1.4
[{'franco james': 1, 'gus sant van': 3}]
acceptance_diff 2
['franco james', 'gus sant van'] VS true_author: ['franco james', 'gus sant van']
movie_id: 7

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.039048194885253906
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.002873659133911133
Discarted candidate: []
Possible candidate: [{'albert lewin': 2, 'leroy mervyn': 9}]
lengthNecessary: 3.8499999999999996
[{'albert lewin': 2, 'leroy mervyn': 9}]
acceptance_diff 2
['leroy mervyn'] VS true_author: ['leroy mervyn']
movie_id: 42099
true director: Oldrich Lipsky
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.02289104461669922
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0014410018920898438
Discarted candidate: []
Possible candidate: [{'lipsky old?ich': 2, 'lipsky oldrich': 1}]
lengthNecessary: 1.0499999999999998
[{'lipsky old?ich': 2, 'lipsky oldrich': 1}]
acceptance_diff 2
['lipsky old?ich'] VS true_author: ['lipsky oldrich']
mo

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.044699907302856445
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0012688636779785156
Discarted candidate: []
Possible candidate: [{'adrian powers': 3, 'earl johan': 6}]
lengthNecessary: 3.15
[{'adrian powers': 3, 'earl johan': 6}]
acceptance_diff 2
['earl johan'] VS true_author: ['earl johan', 'adrian powers']
movie_id: 47937
true director: William Witney;John English
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03932023048400879
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0015947818756103516
Discarted candidate: []
Possible candidate: [{'english john': 5, 'f joseph poland': 1, 'lively william': 1, 'william witney': 2}]
lengthNecessary: 3.15
[{'english john': 5, 'f joseph poland': 1, 'lively william': 1, 'william witney':

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05654597282409668
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0018613338470458984
Discarted candidate: []
Possible candidate: [{'alan mak': 8, 'andrew lau': 4, 'andrew lau wai-keung': 3, 'lau wai-keung': 5, 'ralph rieckermann': 5}]
lengthNecessary: 8.75
[{'alan mak': 8, 'andrew lau': 4, 'andrew lau wai-keung': 3, 'lau wai-keung': 5, 'ralph rieckermann': 5}]
acceptance_diff 2
['alan mak'] VS true_author: ['alan mak', 'andrew lau']
movie_id: 53643
true director: Benoit Philippon
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.019322872161865234
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0006477832794189453
Discarted candidate: []
Possible candidate: [{'beno?t philippon': 1, 'benoit philippon': 1}]
lengthNecessary: 0.7
[{'be

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.030612945556640625
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.001094818115234375
Discarted candidate: []
Possible candidate: [{'beebe ford': 2, 'ray taylor': 1}]
lengthNecessary: 1.0499999999999998
[{'beebe ford': 2, 'ray taylor': 1}]
acceptance_diff 2
['beebe ford', 'ray taylor'] VS true_author: ['beebe ford', 'ray taylor']
movie_id: 40583
true director: Andrei Konchalovsky
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.0329442024230957
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0010859966278076172
Discarted candidate: []
Possible candidate: [{'andrei konchalovsky': 8, 'andrey konchalovskiy': 1}]
lengthNecessary: 3.15
[{'andrei konchalovsky': 8, 'andrey konchalovskiy': 1}]
acceptance_diff 2
['andrei konchalovsky'] VS t

[{'ewing scott': 8, 'george sherman': 3}]
acceptance_diff 2
['ewing scott'] VS true_author: ['ewing scott']
movie_id: 35829
true director: Michael Paul Stephenson
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.052172183990478516
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0015430450439453125
Discarted candidate: []
Possible candidate: [{'michael paul stephenson': 6, 'michael stephenson': 2}]
lengthNecessary: 2.8
[{'michael paul stephenson': 6, 'michael stephenson': 2}]
acceptance_diff 2
['michael paul stephenson'] VS true_author: ['michael paul stephenson']
movie_id: 27116
true director: Neil Marshall
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04349803924560547
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0016770362854003906
Discarted candidate: []
Possible candida

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05110001564025879
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0016660690307617188
Discarted candidate: []
Possible candidate: [{'d k krishna': 4, 'dk krishna': 1, 'nidimoru raj': 4}]
lengthNecessary: 3.15
[{'d k krishna': 4, 'dk krishna': 1, 'nidimoru raj': 4}]
acceptance_diff 2
['d k krishna', 'nidimoru raj'] VS true_author: ['dk krishna', 'nidimoru raj']
movie_id: 112475
true director: Francis Ford Coppola
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05689215660095215
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0013949871063232422
Discarted candidate: []
Possible candidate: [{'coppola ford francis': 19, 'coppola francis': 1}]
lengthNecessary: 7.0
[{'coppola ford francis': 19, 'coppola francis': 1}]
acceptance_diff 2
[

In [21]:
getEvaluation(finalDirectors, trueDirectors, 1)

TP: 543, FP: 85, FN: 49
precision is 0.8646496815286624
recall is 0.9172297297297297
f1Score is 0.8901639344262294


(0.8646496815286624, 0.9172297297297297, 0.8901639344262294)

In [23]:
key_values = {
    'model_type':'bilstm',
    'char_level':False,
    'model_version': 2,
    'rnn_dim':1024,
    'verbose':1,
    'attributes_list': ['newDirector'],
    'embedding_type': 'inferSent',
    'dataset': 'clean_movie',
    'cluster_method': 'hierarchy',
    'num_clusters_rate': 0.01,
    'block_length_thresold': 0.35,
    'acceptance_diff':5,
    'block_weight': 5,
}

In [24]:
dataset_name, table_group_by_movie_id, list_movie_id, true_directors = load_dataset(key_values)

dataset: clean_movie
Loading time is: 0.14563894271850586


In [25]:
start_time = time.time()
finalDirectors, trueDirectors = launchWithoutReductionFusionMovie(table_group_by_movie_id, list_movie_id, true_directors, key_values)
print("Total time is: {0}".format(time.time() - start_time))

movie_id: 73952
true director: Robert Hamer
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.07476401329040527
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0010471343994140625
Discarted candidate: []
Possible candidate: [{'chester e hal': 2, 'cyril frankel': 1, 'hamer robert': 4}]
lengthNecessary: 2.4499999999999997
[{'chester e hal': 2, 'cyril frankel': 1, 'hamer robert': 4}]
acceptance_diff 5
['chester e hal', 'cyril frankel', 'hamer robert'] VS true_author: ['hamer robert']
movie_id: 1655
true director: Alexandre O. Philippe
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.047369956970214844
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0015077590942382812
Discarted candidate: []
Possible candidate: [{'alexandre o philippe': 2, 'alexandre philippe': 1}]
lengthNecessary: 1

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.09779191017150879
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0011298656463623047
Discarted candidate: []
Possible candidate: [{'cohen larry': 6, 'tannen william': 4}]
lengthNecessary: 3.5
[{'cohen larry': 6, 'tannen william': 4}]
acceptance_diff 5
['cohen larry', 'tannen william'] VS true_author: ['cohen larry', 'tannen william']
movie_id: 5820
true director: Ananth Narayan Mahadevan
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.0565488338470459
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0016949176788330078
Discarted candidate: []
Possible candidate: [{'anant mahadevan': 1, 'ananth mahadevan narayan': 3}]
lengthNecessary: 1.4
[{'anant mahadevan': 1, 'ananth mahadevan narayan': 3}]
acceptance_diff 5
['anant mahadevan', 

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05399608612060547
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0014967918395996094
Discarted candidate: []
Possible candidate: [{'paolo taviani': 4, 'taviani vittorio': 2}]
lengthNecessary: 2.0999999999999996
[{'paolo taviani': 4, 'taviani vittorio': 2}]
acceptance_diff 5
['paolo taviani', 'taviani vittorio'] VS true_author: ['paolo taviani', 'taviani vittorio']
movie_id: 80539
true director: Karan Johar
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05810403823852539
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0012531280517578125
Discarted candidate: []
Possible candidate: [{'abhishek varman': 2, 'johar karan': 10}]
lengthNecessary: 4.199999999999999
[{'abhishek varman': 2, 'johar karan': 10}]
acceptance_diff 5
['johar ka

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06842589378356934
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0018188953399658203
Discarted candidate: []
Possible candidate: [{'david n twohy': 1, 'david twohy': 24}]
lengthNecessary: 8.75
[{'david n twohy': 1, 'david twohy': 24}]
acceptance_diff 5
['david twohy'] VS true_author: ['david twohy']
movie_id: 80410
true director: Paul Ragsdale
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.050789833068847656
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.002146005630493164
Discarted candidate: []
Possible candidate: [{'alba angelica de': 3, 'paul ragsdale': 6}]
lengthNecessary: 3.15
[{'alba angelica de': 3, 'paul ragsdale': 6}]
acceptance_diff 5
['alba angelica de', 'paul ragsdale'] VS true_author: ['paul ragsdale']
movie_id: 7

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04395699501037598
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0011241436004638672
Discarted candidate: []
Possible candidate: [{'capra frank': 5, 'irvin willat': 3}]
lengthNecessary: 2.8
[{'capra frank': 5, 'irvin willat': 3}]
acceptance_diff 5
['capra frank', 'irvin willat'] VS true_author: ['capra frank']
movie_id: 109103
true director: Dick Zondag;Ralph Zondag;Phil Nibbelink;Simon Wells
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.07761478424072266
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0035338401794433594
Discarted candidate: []
Possible candidate: [{'dick zondag': 12, 'nibbelink phil': 14, 'ralph zondag': 12, 'simon wells': 12}]
lengthNecessary: 17.5
[{'dick zondag': 12, 'nibbelink phil': 14, 'ralph zondag': 1

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03824615478515625
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0013599395751953125
Discarted candidate: []
Possible candidate: [{'fastvold mona': 6, 'lerche mona': 2}]
lengthNecessary: 2.8
[{'fastvold mona': 6, 'lerche mona': 2}]
acceptance_diff 5
['fastvold mona', 'lerche mona'] VS true_author: ['fastvold mona']
movie_id: 22203
true director: Tony Jopia
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.029690980911254883
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0009720325469970703
Discarted candidate: []
Possible candidate: [{'jason rivers': 1, 'jopia tony': 3}]
lengthNecessary: 1.4
[{'jason rivers': 1, 'jopia tony': 3}]
acceptance_diff 5
['jason rivers', 'jopia tony'] VS true_author: ['jopia tony']
movie_id: 74692
true d

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.07311391830444336
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.001313924789428711
Discarted candidate: []
Possible candidate: [{'cinzia th torrini': 1, 'cinzia torrini': 4}]
lengthNecessary: 1.75
[{'cinzia th torrini': 1, 'cinzia torrini': 4}]
acceptance_diff 5
['cinzia th torrini'] VS true_author: ['cinzia th torrini']
movie_id: 7829
true director: Gu Changwei
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06946611404418945
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.004649162292480469
Discarted candidate: []
Possible candidate: [{'chang-wei gu': 2, 'changwei gu': 3}]
lengthNecessary: 1.75
[{'chang-wei gu': 2, 'changwei gu': 3}]
acceptance_diff 5
['chang-wei gu'] VS true_author: ['changwei gu']
movie_id: 102878
true direc

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.07745099067687988
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0018150806427001953
Discarted candidate: []
Possible candidate: [{'eric norris': 4, 'michael preece': 12}]
lengthNecessary: 5.6
[{'eric norris': 4, 'michael preece': 12}]
acceptance_diff 5
['michael preece'] VS true_author: ['michael preece']
movie_id: 25199
true director: Jonas Cuaron
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.034062862396240234
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0014698505401611328
Discarted candidate: []
Possible candidate: [{'cuar jon?s': 2, 'cuaron jonas': 8}]
lengthNecessary: 3.5
[{'cuar jon?s': 2, 'cuaron jonas': 8}]
acceptance_diff 5
['cuaron jonas'] VS true_author: ['cuaron jonas']
movie_id: 76604
true director: Cary Joji 

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03591012954711914
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.001714944839477539
Discarted candidate: []
Possible candidate: [{'jordan metzger': 4, 'kaley simpson': 2}]
lengthNecessary: 2.0999999999999996
[{'jordan metzger': 4, 'kaley simpson': 2}]
acceptance_diff 5
['jordan metzger', 'kaley simpson'] VS true_author: ['jordan metzger', 'kaley simpson']
movie_id: 101097
true director: Glenn Silber;Barry Alexander Brown
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05227494239807129
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0009877681732177734
Discarted candidate: []
Possible candidate: [{'alexander barry brown': 3, 'emilio estevez': 2, 'glenn silber': 3}]
lengthNecessary: 2.8
[{'alexander barry brown': 3, 'emilio esteve

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06661033630371094
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0014519691467285156
Discarted candidate: []
Possible candidate: [{'dillon jonathan': 4, 'dillon jonathan m': 3}]
lengthNecessary: 2.4499999999999997
[{'dillon jonathan': 4, 'dillon jonathan m': 3}]
acceptance_diff 5
['dillon jonathan'] VS true_author: ['dillon jonathan m']
movie_id: 33198
true director: Russell Hodge
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.08235025405883789
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0011210441589355469
Discarted candidate: []
Possible candidate: [{'communications inc roads': 1, 'hodge russell': 1}]
lengthNecessary: 0.7
[{'communications inc roads': 1, 'hodge russell': 1}]
acceptance_diff 5
['communications inc roads', '

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05345630645751953
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.002270936965942383
Discarted candidate: []
Possible candidate: [{'bernard deyri?s': 2, 'bernard deyries': 13, 'kimio yabuki': 13}]
lengthNecessary: 9.799999999999999
[{'bernard deyri?s': 2, 'bernard deyries': 13, 'kimio yabuki': 13}]
acceptance_diff 5
['bernard deyries', 'kimio yabuki'] VS true_author: ['kimio yabuki', 'bernard deyries']
movie_id: 66319
true director: Stig Lasseby;Jan Gissberg
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04274582862854004
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0010178089141845703
Discarted candidate: []
Possible candidate: [{'gissberg jan': 4, 'lasseby stig': 5}]
lengthNecessary: 3.15
[{'gissberg jan': 4, 'lasseby stig':

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06641626358032227
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.002096891403198242
Discarted candidate: []
Possible candidate: [{'adrian hoven': 7, 'armstrong michael': 15}]
lengthNecessary: 7.699999999999999
[{'adrian hoven': 7, 'armstrong michael': 15}]
acceptance_diff 5
['armstrong michael'] VS true_author: ['armstrong michael']
movie_id: 48673
true director: Steve
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.038693904876708984
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.001004934310913086
Discarted candidate: []
Possible candidate: [{'ellison steve': 1, 'flying lotus': 9, 'steve': 1}]
lengthNecessary: 3.8499999999999996
[{'ellison steve': 1, 'flying lotus': 9, 'steve': 1}]
acceptance_diff 5
['flying lotus'] VS true_au

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.09100794792175293
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.001970052719116211
Discarted candidate: []
Possible candidate: [{'cayrol jean': 3, 'claude durand': 2}]
lengthNecessary: 1.75
[{'cayrol jean': 3, 'claude durand': 2}]
acceptance_diff 5
['cayrol jean', 'claude durand'] VS true_author: ['cayrol jean', 'claude durand']
movie_id: 71373
true director: Devaki Singh;Luke Kenny
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.0831911563873291
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0010192394256591797
Discarted candidate: []
Possible candidate: [{'devaki singh': 2, 'kenny luke': 5}]
lengthNecessary: 2.4499999999999997
[{'devaki singh': 2, 'kenny luke': 5}]
acceptance_diff 5
['devaki singh', 'kenny luke'] VS true_auth

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04520702362060547
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0013797283172607422
Discarted candidate: []
Possible candidate: [{'daisaku shirakawa': 2, 'osamu tezuka': 4, 'taiji yabushita': 2}]
lengthNecessary: 2.8
[{'daisaku shirakawa': 2, 'osamu tezuka': 4, 'taiji yabushita': 2}]
acceptance_diff 5
['daisaku shirakawa', 'osamu tezuka', 'taiji yabushita'] VS true_author: ['osamu tezuka', 'taiji yabushita']
movie_id: 87092
true director: Fred Weintraub;Tom Kuhn
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05584716796875
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0014090538024902344
Discarted candidate: []
Possible candidate: [{'fred weintraub': 7, 'kuhn tom': 6}]
lengthNecessary: 4.55
[{'fred weintraub': 7, 'kuhn tom': 

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04279494285583496
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0014777183532714844
Discarted candidate: []
Possible candidate: [{'gallaga peque': 6, 'lore reyes': 3}]
lengthNecessary: 3.15
[{'gallaga peque': 6, 'lore reyes': 3}]
acceptance_diff 5
['gallaga peque', 'lore reyes'] VS true_author: ['lore reyes', 'gallaga peque']
movie_id: 39951
true director: Nathan Juran
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03914690017700195
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.001728057861328125
Discarted candidate: []
Possible candidate: [{'h juran nathan': 6, 'juran nathan': 3}]
lengthNecessary: 3.15
[{'h juran nathan': 6, 'juran nathan': 3}]
acceptance_diff 5
['h juran nathan'] VS true_author: ['juran nathan']
movie_id: 1

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.0281369686126709
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0011987686157226562
Discarted candidate: []
Possible candidate: [{'vaclav vorli?ek': 7, 'vaclav vorlicek': 2}]
lengthNecessary: 3.15
[{'vaclav vorli?ek': 7, 'vaclav vorlicek': 2}]
acceptance_diff 5
['vaclav vorli?ek'] VS true_author: ['vaclav vorlicek']
movie_id: 17664
true director: Roy Del Ruth
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.034978628158569336
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0020830631256103516
Discarted candidate: []
Possible candidate: [{'del roy ruth': 1, 'liapis peter': 1}]
lengthNecessary: 0.7
[{'del roy ruth': 1, 'liapis peter': 1}]
acceptance_diff 5
['del roy ruth', 'liapis peter'] VS true_author: ['del roy ruth']
movie_id: 2

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04583096504211426
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0009779930114746094
Discarted candidate: []
Possible candidate: [{'jarkko laine t': 2, 'kates naama': 4}]
lengthNecessary: 2.0999999999999996
[{'jarkko laine t': 2, 'kates naama': 4}]
acceptance_diff 5
['jarkko laine t', 'kates naama'] VS true_author: ['kates naama']
movie_id: 40458
true director: Will Allen
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04348897933959961
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0009987354278564453
Discarted candidate: []
Possible candidate: [{'allen francesco william': 2, 'allen will': 6}]
lengthNecessary: 2.8
[{'allen francesco william': 2, 'allen will': 6}]
acceptance_diff 5
['allen francesco william', 'allen will'] VS tr

Blocking time is: 0.0011677742004394531
Discarted candidate: []
Possible candidate: [{'dowdle erick john': 11, 'dowdle john': 1}]
lengthNecessary: 4.199999999999999
[{'dowdle erick john': 11, 'dowdle john': 1}]
acceptance_diff 5
['dowdle erick john'] VS true_author: ['dowdle erick john']
movie_id: 53127
true director: Terry Spencer Hesser
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04033493995666504
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.001010894775390625
Discarted candidate: []
Possible candidate: [{'hesser spencer terry': 2, 'mazurek stephan': 1}]
lengthNecessary: 1.0499999999999998
[{'hesser spencer terry': 2, 'mazurek stephan': 1}]
acceptance_diff 5
['hesser spencer terry', 'mazurek stephan'] VS true_author: ['hesser spencer terry']
movie_id: 60953
true director: Kabir Khan
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: F

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04998612403869629
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0010187625885009766
Discarted candidate: []
Possible candidate: [{'albert lewin': 2, 'leroy mervyn': 9}]
lengthNecessary: 3.8499999999999996
[{'albert lewin': 2, 'leroy mervyn': 9}]
acceptance_diff 5
['leroy mervyn'] VS true_author: ['leroy mervyn']
movie_id: 42099
true director: Oldrich Lipsky
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.017541885375976562
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0009601116180419922
Discarted candidate: []
Possible candidate: [{'lipsky old?ich': 2, 'lipsky oldrich': 1}]
lengthNecessary: 1.0499999999999998
[{'lipsky old?ich': 2, 'lipsky oldrich': 1}]
acceptance_diff 5
['lipsky old?ich'] VS true_author: ['lipsky oldrich']
m

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03550219535827637
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0012221336364746094
Discarted candidate: []
Possible candidate: [{'adrian powers': 3, 'earl johan': 6}]
lengthNecessary: 3.15
[{'adrian powers': 3, 'earl johan': 6}]
acceptance_diff 5
['adrian powers', 'earl johan'] VS true_author: ['earl johan', 'adrian powers']
movie_id: 47937
true director: William Witney;John English
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.046241044998168945
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0012998580932617188
Discarted candidate: []
Possible candidate: [{'english john': 5, 'f joseph poland': 1, 'lively william': 1, 'william witney': 2}]
lengthNecessary: 3.15
[{'english john': 5, 'f joseph poland': 1, 'lively william': 1, 

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05744504928588867
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0017800331115722656
Discarted candidate: []
Possible candidate: [{'alan mak': 8, 'andrew lau': 4, 'andrew lau wai-keung': 3, 'lau wai-keung': 5, 'ralph rieckermann': 5}]
lengthNecessary: 8.75
[{'alan mak': 8, 'andrew lau': 4, 'andrew lau wai-keung': 3, 'lau wai-keung': 5, 'ralph rieckermann': 5}]
acceptance_diff 5
['alan mak', 'andrew lau', 'andrew lau wai-keung', 'lau wai-keung', 'ralph rieckermann'] VS true_author: ['alan mak', 'andrew lau']
movie_id: 53643
true director: Benoit Philippon
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.019397974014282227
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0012009143829345703
Discarted candidate: []
Possible candidate:

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05323505401611328
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0013790130615234375
Discarted candidate: []
Possible candidate: [{'chatmon pete': 3, 'rick zahn': 2}]
lengthNecessary: 1.75
[{'chatmon pete': 3, 'rick zahn': 2}]
acceptance_diff 5
['chatmon pete', 'rick zahn'] VS true_author: ['chatmon pete']
movie_id: 29749
true director: Francis Jun Posadas
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05475616455078125
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.000659942626953125
Discarted candidate: []
Possible candidate: [{'francis jun posadas': 1, 'francis posadas': 1}]
lengthNecessary: 0.7
[{'francis jun posadas': 1, 'francis posadas': 1}]
acceptance_diff 5
['francis jun posadas'] VS true_author: ['francis jun posadas'

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.08217287063598633
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0015361309051513672
Discarted candidate: []
Possible candidate: [{'l?a pool': 1, 'lea pool': 3}]
lengthNecessary: 1.4
[{'l?a pool': 1, 'lea pool': 3}]
acceptance_diff 5
['l?a pool'] VS true_author: ['lea pool']
movie_id: 69925
true director: Heidi Maria Faisst
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.050843238830566406
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.001146078109741211
Discarted candidate: []
Possible candidate: [{'faissi heidi maria': 1, 'faisst heidi maria': 1}]
lengthNecessary: 0.7
[{'faissi heidi maria': 1, 'faisst heidi maria': 1}]
acceptance_diff 5
['faissi heidi maria'] VS true_author: ['faisst heidi maria']
movie_id: 47283
true directo

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.036071062088012695
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0016391277313232422
Discarted candidate: []
Possible candidate: [{"alice o'fredericks": 3, 'iversen jon': 2}]
lengthNecessary: 1.75
[{"alice o'fredericks": 3, 'iversen jon': 2}]
acceptance_diff 5
["alice o'fredericks", 'iversen jon'] VS true_author: ['iversen jon', "alice o'fredericks"]
movie_id: 38696
true director: Sean Carr
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.047618865966796875
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0023300647735595703
Discarted candidate: []
Possible candidate: [{'carr sean': 1, 'carrigan sean': 5}]
lengthNecessary: 2.0999999999999996
[{'carr sean': 1, 'carrigan sean': 5}]
acceptance_diff 5
['carr sean'] VS true_author: ['c

In [26]:
getEvaluation(finalDirectors, trueDirectors, 1)

TP: 578, FP: 119, FN: 14
precision is 0.8292682926829268
recall is 0.9763513513513513
f1Score is 0.8968192397207138


(0.8292682926829268, 0.9763513513513513, 0.8968192397207138)

In [27]:
key_values = {
    'model_type':'bilstm',
    'char_level':False,
    'model_version': 2,
    'rnn_dim':1024,
    'verbose':1,
    'attributes_list': ['newDirector'],
    'embedding_type': 'inferSent',
    'dataset': 'clean_movie',
    'cluster_method': 'hierarchy',
    'num_clusters_rate': 0.01,
    'block_length_thresold': 0.35,
    'acceptance_diff':4,
    'block_weight': 5,
}

In [28]:
dataset_name, table_group_by_movie_id, list_movie_id, true_directors = load_dataset(key_values)

dataset: clean_movie
Loading time is: 0.1672508716583252


In [29]:
start_time = time.time()
finalDirectors, trueDirectors = launchWithoutReductionFusionMovie(table_group_by_movie_id, list_movie_id, true_directors, key_values)
print("Total time is: {0}".format(time.time() - start_time))

movie_id: 73952
true director: Robert Hamer
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.09439802169799805
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0009961128234863281
Discarted candidate: []
Possible candidate: [{'chester e hal': 2, 'cyril frankel': 1, 'hamer robert': 4}]
lengthNecessary: 2.4499999999999997
[{'chester e hal': 2, 'cyril frankel': 1, 'hamer robert': 4}]
acceptance_diff 4
['chester e hal', 'cyril frankel', 'hamer robert'] VS true_author: ['hamer robert']
movie_id: 1655
true director: Alexandre O. Philippe
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04845905303955078
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.001750946044921875
Discarted candidate: []
Possible candidate: [{'alexandre o philippe': 2, 'alexandre philippe': 1}]
lengthNecessary: 1.0

Blocking time is: 0.0011799335479736328
Discarted candidate: []
Possible candidate: [{'allegret yves': 2, 'e portas rafael': 1}]
lengthNecessary: 1.0499999999999998
[{'allegret yves': 2, 'e portas rafael': 1}]
acceptance_diff 4
['allegret yves', 'e portas rafael'] VS true_author: ['allegret yves']
movie_id: 24035
true director: William Tannen;Larry Cohen
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05853009223937988
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.001767873764038086
Discarted candidate: []
Possible candidate: [{'cohen larry': 6, 'tannen william': 4}]
lengthNecessary: 3.5
[{'cohen larry': 6, 'tannen william': 4}]
acceptance_diff 4
['cohen larry', 'tannen william'] VS true_author: ['cohen larry', 'tannen william']
movie_id: 5820
true director: Ananth Narayan Mahadevan
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Em

Blocking time is: 0.0017571449279785156
Discarted candidate: []
Possible candidate: [{'a d pennebaker': 9, 'chris hegedus': 7}]
lengthNecessary: 5.6
[{'a d pennebaker': 9, 'chris hegedus': 7}]
acceptance_diff 4
['a d pennebaker', 'chris hegedus'] VS true_author: ['chris hegedus', 'a d pennebaker']
movie_id: 17046
true director: Paolo Taviani;Vittorio Taviani
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03634190559387207
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.001177072525024414
Discarted candidate: []
Possible candidate: [{'paolo taviani': 4, 'taviani vittorio': 2}]
lengthNecessary: 2.0999999999999996
[{'paolo taviani': 4, 'taviani vittorio': 2}]
acceptance_diff 4
['paolo taviani', 'taviani vittorio'] VS true_author: ['paolo taviani', 'taviani vittorio']
movie_id: 80539
true director: Karan Johar
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bils

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05067610740661621
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0016469955444335938
Discarted candidate: []
Possible candidate: [{'freddie young': 1, 'gene kelly': 8}]
lengthNecessary: 3.15
[{'freddie young': 1, 'gene kelly': 8}]
acceptance_diff 4
['gene kelly'] VS true_author: ['gene kelly']
movie_id: 71075
true director: David Twohy
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.0750892162322998
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.002680063247680664
Discarted candidate: []
Possible candidate: [{'david n twohy': 1, 'david twohy': 24}]
lengthNecessary: 8.75
[{'david n twohy': 1, 'david twohy': 24}]
acceptance_diff 4
['david twohy'] VS true_author: ['david twohy']
movie_id: 80410
true director: Paul Ragsdale
embeddin

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04908299446105957
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0013401508331298828
Discarted candidate: []
Possible candidate: [{'glass nancy': 2, 'loschiavo michele': 1}]
lengthNecessary: 1.0499999999999998
[{'glass nancy': 2, 'loschiavo michele': 1}]
acceptance_diff 4
['glass nancy', 'loschiavo michele'] VS true_author: ['loschiavo michele', 'glass nancy']
movie_id: 40048
true director: Manuel Gonzalez Mauricio;Saul Barreto Ramos
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.0491938591003418
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0013871192932128906
Discarted candidate: []
Possible candidate: [{'barreto ramos saul': 1, 'gonzalez manuel mauricio': 2}]
lengthNecessary: 1.0499999999999998
[{'barreto ramos saul': 1, 'g

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03885912895202637
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.002487659454345703
Discarted candidate: []
Possible candidate: [{'claudio marques': 3, 'guerreiro hughes marilia': 2, 'hughes marilia': 1}]
lengthNecessary: 2.0999999999999996
[{'claudio marques': 3, 'guerreiro hughes marilia': 2, 'hughes marilia': 1}]
acceptance_diff 4
['claudio marques', 'guerreiro hughes marilia', 'hughes marilia'] VS true_author: ['claudio marques', 'hughes marilia']
movie_id: 29810
true director: Herrmann Zschoche
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03184390068054199
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0015721321105957031
Discarted candidate: []
Possible candidate: [{'dressel eleonore': 4, 'herrmann zschoche': 8}]
length

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.0306851863861084
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0010161399841308594
Discarted candidate: []
Possible candidate: [{'masha zur': 2, 'yonathan zur': 1}]
lengthNecessary: 1.0499999999999998
[{'masha zur': 2, 'yonathan zur': 1}]
acceptance_diff 4
['masha zur', 'yonathan zur'] VS true_author: ['masha zur', 'yonathan zur']
movie_id: 110303
true director: Nick Broomfield;Rudi Dolezal
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.0514681339263916
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0013270378112792969
Discarted candidate: []
Possible candidate: [{'broomfield nick': 10, 'dolezal rudi': 7}]
lengthNecessary: 5.949999999999999
[{'broomfield nick': 10, 'dolezal rudi': 7}]
acceptance_diff 4
['broomfield nick', 'dol

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05074000358581543
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0008168220520019531
Discarted candidate: []
Possible candidate: [{'d hogan w': 4, 'david hogan': 3}]
lengthNecessary: 2.4499999999999997
[{'d hogan w': 4, 'david hogan': 3}]
acceptance_diff 4
['d hogan w', 'david hogan'] VS true_author: ['d hogan w']
movie_id: 27210
true director: Honey Lauren;Carlos Ramos Jr.
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03640389442443848
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.000993967056274414
Discarted candidate: []
Possible candidate: [{'carlos jr ramos': 1, 'honey lauren': 3}]
lengthNecessary: 1.4
[{'carlos jr ramos': 1, 'honey lauren': 3}]
acceptance_diff 4
['carlos jr ramos', 'honey lauren'] VS true_author: ['carl

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03474926948547363
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0011310577392578125
Discarted candidate: []
Possible candidate: [{'boivin patrick': 4, 'olivier roberge': 2}]
lengthNecessary: 2.0999999999999996
[{'boivin patrick': 4, 'olivier roberge': 2}]
acceptance_diff 4
['boivin patrick', 'olivier roberge'] VS true_author: ['boivin patrick', 'olivier roberge']
movie_id: 56198
true director: Jordan Metzger;Kaley Simpson
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04544520378112793
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0009059906005859375
Discarted candidate: []
Possible candidate: [{'jordan metzger': 4, 'kaley simpson': 2}]
lengthNecessary: 2.0999999999999996
[{'jordan metzger': 4, 'kaley simpson': 2}]
acceptance

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.08297491073608398
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0016548633575439453
Discarted candidate: []
Possible candidate: [{'dillon jonathan': 4, 'dillon jonathan m': 3}]
lengthNecessary: 2.4499999999999997
[{'dillon jonathan': 4, 'dillon jonathan m': 3}]
acceptance_diff 4
['dillon jonathan'] VS true_author: ['dillon jonathan m']
movie_id: 33198
true director: Russell Hodge
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04786491394042969
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0006830692291259766
Discarted candidate: []
Possible candidate: [{'communications inc roads': 1, 'hodge russell': 1}]
lengthNecessary: 0.7
[{'communications inc roads': 1, 'hodge russell': 1}]
acceptance_diff 4
['communications inc roads', '

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03734707832336426
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0018341541290283203
Discarted candidate: []
Possible candidate: [{'koji shiraishi': 1, 'masayuki ochiai': 1, 'nakamura yoshihiro': 2, 'norio tsuruta': 1, 'shimizu takashi': 1}]
lengthNecessary: 2.0999999999999996
[{'koji shiraishi': 1, 'masayuki ochiai': 1, 'nakamura yoshihiro': 2, 'norio tsuruta': 1, 'shimizu takashi': 1}]
acceptance_diff 4
['koji shiraishi', 'masayuki ochiai', 'nakamura yoshihiro', 'norio tsuruta', 'shimizu takashi'] VS true_author: ['koji shiraishi', 'nakamura yoshihiro', 'shimizu takashi', 'masayuki ochiai', 'norio tsuruta']
movie_id: 69398
true director: Bernard Deyries;Kimio Yabuki
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04680299758911133
cluster_method: hierarchy
num_clu

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06819295883178711
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.002015829086303711
Discarted candidate: []
Possible candidate: [{'adrian hoven': 7, 'armstrong michael': 15}]
lengthNecessary: 7.699999999999999
[{'adrian hoven': 7, 'armstrong michael': 15}]
acceptance_diff 4
['armstrong michael'] VS true_author: ['armstrong michael']
movie_id: 48673
true director: Steve
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.08689403533935547
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0013759136199951172
Discarted candidate: []
Possible candidate: [{'ellison steve': 1, 'flying lotus': 9, 'steve': 1}]
lengthNecessary: 3.8499999999999996
[{'ellison steve': 1, 'flying lotus': 9, 'steve': 1}]
acceptance_diff 4
['flying lotus'] VS true_au

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03803706169128418
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0012700557708740234
Discarted candidate: []
Possible candidate: [{'amber edwards': 2, 'dave davidson': 4}]
lengthNecessary: 2.0999999999999996
[{'amber edwards': 2, 'dave davidson': 4}]
acceptance_diff 4
['amber edwards', 'dave davidson'] VS true_author: ['dave davidson', 'amber edwards']
movie_id: 62327
true director: Maggie Betts
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03860616683959961
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0015869140625
Discarted candidate: []
Possible candidate: [{'betts maggie': 1, 'betts margaret': 5}]
lengthNecessary: 2.0999999999999996
[{'betts maggie': 1, 'betts margaret': 5}]
acceptance_diff 4
['betts maggie', 'betts marg

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06651020050048828
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.002126932144165039
Discarted candidate: []
Possible candidate: [{'ceyton kristina': 2, 'jennifer kent': 25, 'kristian moliere': 2}]
lengthNecessary: 10.149999999999999
[{'ceyton kristina': 2, 'jennifer kent': 25, 'kristian moliere': 2}]
acceptance_diff 4
['jennifer kent'] VS true_author: ['jennifer kent']
movie_id: 51499
true director: Simon Wincer
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.048635244369506836
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0012869834899902344
Discarted candidate: []
Possible candidate: [{'coote greg': 3, 'hogan paul': 3, 'simon wincer': 12}]
lengthNecessary: 6.3
[{'coote greg': 3, 'hogan paul': 3, 'simon wincer': 12}]
acceptanc

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04748702049255371
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0011761188507080078
Discarted candidate: []
Possible candidate: [{'advani nikhil': 6, 'advani nikkhil': 6}]
lengthNecessary: 4.199999999999999
[{'advani nikhil': 6, 'advani nikkhil': 6}]
acceptance_diff 4
['advani nikhil'] VS true_author: ['advani nikhil']
movie_id: 60567
true director: Brian Yuzna;Christophe Gans;Shusuke Kaneko
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.030069828033447266
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0010080337524414062
Discarted candidate: []
Possible candidate: [{'brian yuzna': 1, 'christophe gans': 2, 'kaneko shusuke': 1}]
lengthNecessary: 1.4
[{'brian yuzna': 1, 'christophe gans': 2, 'kaneko shusuke': 1}]
acceptance_diff

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.033956050872802734
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0014081001281738281
Discarted candidate: []
Possible candidate: [{'hugo rodr?guez': 2, 'hugo rodriguez': 4}]
lengthNecessary: 2.0999999999999996
[{'hugo rodr?guez': 2, 'hugo rodriguez': 4}]
acceptance_diff 4
['hugo rodr?guez'] VS true_author: ['hugo rodriguez']
movie_id: 70399
true director: Markus Heidingsfelder;Min Tesch
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.041364431381225586
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0012981891632080078
Discarted candidate: []
Possible candidate: [{'heidingsfelder markus': 3, 'min tesch': 2}]
lengthNecessary: 1.75
[{'heidingsfelder markus': 3, 'min tesch': 2}]
acceptance_diff 4
['heidingsfelder markus', 'min tesc

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03165388107299805
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0013439655303955078
Discarted candidate: []
Possible candidate: [{'cresswell luke': 2, 'mcnicholas steve': 1}]
lengthNecessary: 1.0499999999999998
[{'cresswell luke': 2, 'mcnicholas steve': 1}]
acceptance_diff 4
['cresswell luke', 'mcnicholas steve'] VS true_author: ['cresswell luke', 'mcnicholas steve']
movie_id: 24567
true director: Craig R. Baxley
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.052433013916015625
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.001027822494506836
Discarted candidate: []
Possible candidate: [{'baxley craig': 2, 'baxley craig r': 7}]
lengthNecessary: 3.15
[{'baxley craig': 2, 'baxley craig r': 7}]
acceptance_diff 4
['baxley craig r'

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04173016548156738
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0014469623565673828
Discarted candidate: []
Possible candidate: [{'ali zafar': 3, 'kabir khan': 12}]
lengthNecessary: 5.25
[{'ali zafar': 3, 'kabir khan': 12}]
acceptance_diff 4
['kabir khan'] VS true_author: ['kabir khan']
movie_id: 59765
true director: James Franco;Gus Van Sant
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.045021772384643555
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0011048316955566406
Discarted candidate: []
Possible candidate: [{'franco james': 1, 'gus sant van': 3}]
lengthNecessary: 1.4
[{'franco james': 1, 'gus sant van': 3}]
acceptance_diff 4
['franco james', 'gus sant van'] VS true_author: ['franco james', 'gus sant van']
movie_id: 7

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05423307418823242
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.001461029052734375
Discarted candidate: []
Possible candidate: [{'flackett jennifer': 4, 'levin mark': 16}]
lengthNecessary: 7.0
[{'flackett jennifer': 4, 'levin mark': 16}]
acceptance_diff 4
['levin mark'] VS true_author: ['levin mark']
movie_id: 6670
true director: Paolo Taviani;Vittorio Taviani
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.025259017944335938
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0009832382202148438
Discarted candidate: []
Possible candidate: [{'paolo taviani': 5, 'taviani vittorio': 3}]
lengthNecessary: 2.8
[{'paolo taviani': 5, 'taviani vittorio': 3}]
acceptance_diff 4
['paolo taviani', 'taviani vittorio'] VS true_author: ['paolo tav

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06484103202819824
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0017852783203125
Discarted candidate: []
Possible candidate: [{'don taylor': 18, 'hodges mike': 10}]
lengthNecessary: 9.799999999999999
[{'don taylor': 18, 'hodges mike': 10}]
acceptance_diff 4
['don taylor'] VS true_author: ['don taylor']
movie_id: 88880
true director: Alain Delon
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03967928886413574
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0010271072387695312
Discarted candidate: []
Possible candidate: [{'alain delon': 3, 'davis robin': 2}]
lengthNecessary: 1.75
[{'alain delon': 3, 'davis robin': 2}]
acceptance_diff 4
['alain delon', 'davis robin'] VS true_author: ['alain delon']
movie_id: 85048
true director: U

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03642916679382324
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.001605987548828125
Discarted candidate: []
Possible candidate: [{'daniele huillet': 3, 'jean-marie straub': 4}]
lengthNecessary: 2.4499999999999997
[{'daniele huillet': 3, 'jean-marie straub': 4}]
acceptance_diff 4
['daniele huillet', 'jean-marie straub'] VS true_author: ['jean-marie straub', 'daniele huillet']
movie_id: 41579
true director: Darren Grodsky;Danny Jacobs
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.0450439453125
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0011448860168457031
Discarted candidate: []
Possible candidate: [{'danny jacobs': 4, 'darren grodsky': 8}]
lengthNecessary: 4.199999999999999
[{'danny jacobs': 4, 'darren grodsky': 8}]
accepta

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.02497720718383789
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0010752677917480469
Discarted candidate: []
Possible candidate: [{'francis girod': 3, 'regis wargnier': 2}]
lengthNecessary: 1.75
[{'francis girod': 3, 'regis wargnier': 2}]
acceptance_diff 4
['francis girod', 'regis wargnier'] VS true_author: ['francis girod']
movie_id: 82468
true director: Gerard Pires
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04629087448120117
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0010471343994140625
Discarted candidate: []
Possible candidate: [{'g?rard pir': 1, 'gerard pires': 10}]
lengthNecessary: 3.8499999999999996
[{'g?rard pir': 1, 'gerard pires': 10}]
acceptance_diff 4
['gerard pires'] VS true_author: ['gerard pires']
movie_

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.032070159912109375
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0013408660888671875
Discarted candidate: []
Possible candidate: [{'aranda vicente': 3, 'gomez raul': 2}]
lengthNecessary: 1.75
[{'aranda vicente': 3, 'gomez raul': 2}]
acceptance_diff 4
['aranda vicente', 'gomez raul'] VS true_author: ['aranda vicente']
movie_id: 64814
true director: R. Balki
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03271889686584473
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0009908676147460938
Discarted candidate: []
Possible candidate: [{'balki r': 4, 'ricky sandhu': 1}]
lengthNecessary: 1.75
[{'balki r': 4, 'ricky sandhu': 1}]
acceptance_diff 4
['balki r', 'ricky sandhu'] VS true_author: ['balki r']
movie_id: 87155
true director: Ji

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04422926902770996
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.001007080078125
Discarted candidate: []
Possible candidate: [{'chang cheh': 8, 'hsueh li pao': 4, 'hsueh-li pao': 2}]
lengthNecessary: 4.8999999999999995
[{'chang cheh': 8, 'hsueh li pao': 4, 'hsueh-li pao': 2}]
acceptance_diff 4
['chang cheh', 'hsueh li pao'] VS true_author: ['chang cheh', 'hsueh-li pao']
movie_id: 92814
true director: Fritz Lang;Rene Sti
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04624676704406738
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0009810924530029297
Discarted candidate: []
Possible candidate: [{'fritz lang': 6, 'rene sti': 2}]
lengthNecessary: 2.8
[{'fritz lang': 6, 'rene sti': 2}]
acceptance_diff 4
['fritz lang', 'rene sti'] V

In [30]:
getEvaluation(finalDirectors, trueDirectors, 1)

TP: 574, FP: 111, FN: 18
precision is 0.8379562043795621
recall is 0.9695945945945946
f1Score is 0.8989819890368052


(0.8379562043795621, 0.9695945945945946, 0.8989819890368052)

In [31]:
key_values = {
    'model_type':'bilstm',
    'char_level':False,
    'model_version': 2,
    'rnn_dim':1024,
    'verbose':1,
    'attributes_list': ['newDirector'],
    'embedding_type': 'inferSent',
    'dataset': 'clean_movie',
    'cluster_method': 'hierarchy',
    'num_clusters_rate': 0.01,
    'block_length_thresold': 0.35,
    'acceptance_diff':3,
    'block_weight': 5,
}

In [32]:
dataset_name, table_group_by_movie_id, list_movie_id, true_directors = load_dataset(key_values)

dataset: clean_movie
Loading time is: 0.19414305686950684


In [33]:
start_time = time.time()
finalDirectors, trueDirectors = launchWithoutReductionFusionMovie(table_group_by_movie_id, list_movie_id, true_directors, key_values)
print("Total time is: {0}".format(time.time() - start_time))

movie_id: 73952
true director: Robert Hamer
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.08547687530517578
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.00683903694152832
Discarted candidate: []
Possible candidate: [{'chester e hal': 2, 'cyril frankel': 1, 'hamer robert': 4}]
lengthNecessary: 2.4499999999999997
[{'chester e hal': 2, 'cyril frankel': 1, 'hamer robert': 4}]
acceptance_diff 3
['chester e hal', 'cyril frankel', 'hamer robert'] VS true_author: ['hamer robert']
movie_id: 1655
true director: Alexandre O. Philippe
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04525899887084961
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0008950233459472656
Discarted candidate: []
Possible candidate: [{'alexandre o philippe': 2, 'alexandre philippe': 1}]
lengthNecessary: 1.04

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.07518482208251953
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0008068084716796875
Discarted candidate: []
Possible candidate: [{'allegret yves': 2, 'e portas rafael': 1}]
lengthNecessary: 1.0499999999999998
[{'allegret yves': 2, 'e portas rafael': 1}]
acceptance_diff 3
['allegret yves', 'e portas rafael'] VS true_author: ['allegret yves']
movie_id: 24035
true director: William Tannen;Larry Cohen
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.058032989501953125
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.002110004425048828
Discarted candidate: []
Possible candidate: [{'cohen larry': 6, 'tannen william': 4}]
lengthNecessary: 3.5
[{'cohen larry': 6, 'tannen william': 4}]
acceptance_diff 3
['cohen larry', 'tannen william'] VS

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04141974449157715
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0026068687438964844
Discarted candidate: []
Possible candidate: [{'paolo taviani': 4, 'taviani vittorio': 2}]
lengthNecessary: 2.0999999999999996
[{'paolo taviani': 4, 'taviani vittorio': 2}]
acceptance_diff 3
['paolo taviani', 'taviani vittorio'] VS true_author: ['paolo taviani', 'taviani vittorio']
movie_id: 80539
true director: Karan Johar
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.11144804954528809
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0018024444580078125
Discarted candidate: []
Possible candidate: [{'abhishek varman': 2, 'johar karan': 10}]
lengthNecessary: 4.199999999999999
[{'abhishek varman': 2, 'johar karan': 10}]
acceptance_diff 3
['johar ka

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06923699378967285
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.002228975296020508
Discarted candidate: []
Possible candidate: [{'david n twohy': 1, 'david twohy': 24}]
lengthNecessary: 8.75
[{'david n twohy': 1, 'david twohy': 24}]
acceptance_diff 3
['david twohy'] VS true_author: ['david twohy']
movie_id: 80410
true director: Paul Ragsdale
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06709122657775879
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0012748241424560547
Discarted candidate: []
Possible candidate: [{'alba angelica de': 3, 'paul ragsdale': 6}]
lengthNecessary: 3.15
[{'alba angelica de': 3, 'paul ragsdale': 6}]
acceptance_diff 3
['alba angelica de', 'paul ragsdale'] VS true_author: ['paul ragsdale']
movie_id: 71

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04885673522949219
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0016870498657226562
Discarted candidate: []
Possible candidate: [{'capra frank': 5, 'irvin willat': 3}]
lengthNecessary: 2.8
[{'capra frank': 5, 'irvin willat': 3}]
acceptance_diff 3
['capra frank', 'irvin willat'] VS true_author: ['capra frank']
movie_id: 109103
true director: Dick Zondag;Ralph Zondag;Phil Nibbelink;Simon Wells
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.09589695930480957
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.005850076675415039
Discarted candidate: []
Possible candidate: [{'dick zondag': 12, 'nibbelink phil': 14, 'ralph zondag': 12, 'simon wells': 12}]
lengthNecessary: 17.5
[{'dick zondag': 12, 'nibbelink phil': 14, 'ralph zondag': 12

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05591702461242676
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0010008811950683594
Discarted candidate: []
Possible candidate: [{'arnfred morten': 1, 'lars trier von': 2}]
lengthNecessary: 1.0499999999999998
[{'arnfred morten': 1, 'lars trier von': 2}]
acceptance_diff 3
['arnfred morten', 'lars trier von'] VS true_author: ['lars trier von']
movie_id: 98793
true director: Mona Fastvold
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03827691078186035
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0009682178497314453
Discarted candidate: []
Possible candidate: [{'fastvold mona': 6, 'lerche mona': 2}]
lengthNecessary: 2.8
[{'fastvold mona': 6, 'lerche mona': 2}]
acceptance_diff 3
['fastvold mona'] VS true_author: ['fastvold mona'

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.036009788513183594
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0008080005645751953
Discarted candidate: []
Possible candidate: [{'chow thomas': 2, 'kwan stanley': 3}]
lengthNecessary: 1.75
[{'chow thomas': 2, 'kwan stanley': 3}]
acceptance_diff 3
['chow thomas', 'kwan stanley'] VS true_author: ['kwan stanley']
movie_id: 43578
true director: Eric Lewald;Glenn Morgan
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04013204574584961
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0008571147918701172
Discarted candidate: []
Possible candidate: [{'eric lewald': 2, 'glenn morgan': 1}]
lengthNecessary: 1.0499999999999998
[{'eric lewald': 2, 'glenn morgan': 1}]
acceptance_diff 3
['eric lewald', 'glenn morgan'] VS true_author: ['glenn 

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05880999565124512
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0012540817260742188
Discarted candidate: []
Possible candidate: [{'frantisek pilat': 2, 'otakar vavra': 1}]
lengthNecessary: 1.0499999999999998
[{'frantisek pilat': 2, 'otakar vavra': 1}]
acceptance_diff 3
['frantisek pilat', 'otakar vavra'] VS true_author: ['otakar vavra']
movie_id: 80619
true director: Steven C. Miller
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.08931398391723633
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.001123189926147461
Discarted candidate: []
Possible candidate: [{'c miller steven': 8, 'miller steven': 1}]
lengthNecessary: 3.15
[{'c miller steven': 8, 'miller steven': 1}]
acceptance_diff 3
['c miller steven'] VS true_author: ['c mill

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06496000289916992
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.001878976821899414
Discarted candidate: []
Possible candidate: [{'jared p scott': 4, 'jared scott': 1}]
lengthNecessary: 1.75
[{'jared p scott': 4, 'jared scott': 1}]
acceptance_diff 3
['jared p scott'] VS true_author: ['jared p scott']
movie_id: 61778
true director: Vuk Rsumovic
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.0366361141204834
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0014579296112060547
Discarted candidate: []
Possible candidate: [{'rsumovi vuk': 1, 'rsumovic vuk': 1}]
lengthNecessary: 0.7
[{'rsumovi vuk': 1, 'rsumovic vuk': 1}]
acceptance_diff 3
['rsumovi vuk'] VS true_author: ['rsumovic vuk']
movie_id: 10800
true director: Maurice Elvey;Mil

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.0387578010559082
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.000827789306640625
Discarted candidate: []
Possible candidate: [{'gary michael schultz': 4, 'gary schultz': 2}]
lengthNecessary: 2.0999999999999996
[{'gary michael schultz': 4, 'gary schultz': 2}]
acceptance_diff 3
['gary michael schultz', 'gary schultz'] VS true_author: ['gary michael schultz']
movie_id: 93298
true director: Roger Corman
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.052349090576171875
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0010399818420410156
Discarted candidate: []
Possible candidate: [{'b charles griffith': 2, 'corman roger': 6, 'mel welles': 2}]
lengthNecessary: 3.5
[{'b charles griffith': 2, 'corman roger': 6, 'mel welles': 2}]
accept

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.044776201248168945
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0009260177612304688
Discarted candidate: []
Possible candidate: [{'christoph schaub': 2, 'michael schindhelm': 1}]
lengthNecessary: 1.0499999999999998
[{'christoph schaub': 2, 'michael schindhelm': 1}]
acceptance_diff 3
['christoph schaub', 'michael schindhelm'] VS true_author: ['christoph schaub', 'michael schindhelm']
movie_id: 68284
true director: E. A. Dupont
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03775525093078613
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0009469985961914062
Discarted candidate: []
Possible candidate: [{'a dupont e': 2, 'andre dupont ewald': 1}]
lengthNecessary: 1.0499999999999998
[{'a dupont e': 2, 'andre dupont ewald': 1}]
acc

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03783607482910156
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0008480548858642578
Discarted candidate: []
Possible candidate: [{'fabrice ferrari': 2, 'gilles perret': 1}]
lengthNecessary: 1.0499999999999998
[{'fabrice ferrari': 2, 'gilles perret': 1}]
acceptance_diff 3
['fabrice ferrari', 'gilles perret'] VS true_author: ['fabrice ferrari', 'gilles perret']
movie_id: 67503
true director: Robert Clampett
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03631591796875
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0010838508605957031
Discarted candidate: []
Possible candidate: [{'bob clampett': 5, 'clampett robert': 3}]
lengthNecessary: 2.8
[{'bob clampett': 5, 'clampett robert': 3}]
acceptance_diff 3
['bob clampett', 'clampett 

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.02807021141052246
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0006570816040039062
Discarted candidate: []
Possible candidate: [{'andy de emmony': 1, 'andy deemmony': 1}]
lengthNecessary: 0.7
[{'andy de emmony': 1, 'andy deemmony': 1}]
acceptance_diff 3
['andy de emmony'] VS true_author: ['andy de emmony']
movie_id: 54649
true director:  Fridrik Thor Fridriksson
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.027373075485229492
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0011272430419921875
Discarted candidate: []
Possible candidate: [{'a?ar friadegrik friadegriksson': 2, 'fridrik fridriksson thor': 2}]
lengthNecessary: 1.4
[{'a?ar friadegrik friadegriksson': 2, 'fridrik fridriksson thor': 2}]
acceptance_diff 3
['a?ar friad

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.060243844985961914
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0016150474548339844
Discarted candidate: []
Possible candidate: [{'blais matthew taylor': 1, 'mack trevor': 3}]
lengthNecessary: 1.4
[{'blais matthew taylor': 1, 'mack trevor': 3}]
acceptance_diff 3
['blais matthew taylor', 'mack trevor'] VS true_author: ['mack trevor']
movie_id: 8218
true director: Raj Khosla
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04224205017089844
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0013768672943115234
Discarted candidate: []
Possible candidate: [{'carnevale marcos': 2, 'khosla raj': 5}]
lengthNecessary: 2.4499999999999997
[{'carnevale marcos': 2, 'khosla raj': 5}]
acceptance_diff 3
['carnevale marcos', 'khosla raj'] VS true_

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.029648780822753906
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0009720325469970703
Discarted candidate: []
Possible candidate: [{'brown clarence': 2, 'maurice tourneur': 1}]
lengthNecessary: 1.0499999999999998
[{'brown clarence': 2, 'maurice tourneur': 1}]
acceptance_diff 3
['brown clarence', 'maurice tourneur'] VS true_author: ['brown clarence', 'maurice tourneur']
movie_id: 40680
true director: Sam Wainwright Douglas;Brenda Greene Mitchell
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05204033851623535
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0010502338409423828
Discarted candidate: []
Possible candidate: [{'brenda greene mitchell': 2, 'brenda mitchell': 3, 'douglas sam wainwright': 3}]
lengthNecessary: 2.8
[{'brend

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03580594062805176
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0008282661437988281
Discarted candidate: []
Possible candidate: [{'heidingsfelder markus': 3, 'min tesch': 2}]
lengthNecessary: 1.75
[{'heidingsfelder markus': 3, 'min tesch': 2}]
acceptance_diff 3
['heidingsfelder markus', 'min tesch'] VS true_author: ['min tesch', 'heidingsfelder markus']
movie_id: 46047
true director: Erick M. Crespo;Kiki Melendez
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04634904861450195
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0013980865478515625
Discarted candidate: []
Possible candidate: [{'crespo erick': 2, 'crespo erick m': 2, 'kiki melendez': 2}]
lengthNecessary: 2.0999999999999996
[{'crespo erick': 2, 'crespo erick m': 2, 'k

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03978896141052246
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0009529590606689453
Discarted candidate: []
Possible candidate: [{'jaa tony': 4, 'panna rittikrai': 2}]
lengthNecessary: 2.0999999999999996
[{'jaa tony': 4, 'panna rittikrai': 2}]
acceptance_diff 3
['jaa tony', 'panna rittikrai'] VS true_author: ['jaa tony', 'panna rittikrai']
movie_id: 92190
true director: Jeffrey Obrow;Stephen Carpenter
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.054350852966308594
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.001373291015625
Discarted candidate: []
Possible candidate: [{'carpenter stephen': 10, 'jeffrey obrow': 8}]
lengthNecessary: 6.3
[{'carpenter stephen': 10, 'jeffrey obrow': 8}]
acceptance_diff 3
['carpenter stephen', '

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05156087875366211
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0011441707611083984
Discarted candidate: []
Possible candidate: [{'g hunt jeffrey': 6, 'hunt jeffrey': 2}]
lengthNecessary: 2.8
[{'g hunt jeffrey': 6, 'hunt jeffrey': 2}]
acceptance_diff 3
['g hunt jeffrey'] VS true_author: ['hunt jeffrey']
movie_id: 30379
true director: Philip Wang;Wesley Chan
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03840208053588867
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0010292530059814453
Discarted candidate: []
Possible candidate: [{'chan wesley': 6, 'philip wang': 5}]
lengthNecessary: 3.8499999999999996
[{'chan wesley': 6, 'philip wang': 5}]
acceptance_diff 3
['chan wesley', 'philip wang'] VS true_author: ['philip wang', 'chan

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.035083770751953125
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0005297660827636719
Discarted candidate: []
Possible candidate: [{'agel romi': 2, 'holger wick': 1}]
lengthNecessary: 1.0499999999999998
[{'agel romi': 2, 'holger wick': 1}]
acceptance_diff 3
['agel romi', 'holger wick'] VS true_author: ['holger wick', 'agel romi']
movie_id: 83371
true director: Clifford Bestall
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.039359092712402344
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0012650489807128906
Discarted candidate: []
Possible candidate: [{'bestall cliff': 1, 'bestall clifford': 2, 'freeman morgan': 1, 'lori mccreary': 1}]
lengthNecessary: 1.75
[{'bestall cliff': 1, 'bestall clifford': 2, 'freeman morgan': 1, 'lori

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04122495651245117
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0017638206481933594
Discarted candidate: []
Possible candidate: [{'adrien brody': 2, 'ford kevin': 1}]
lengthNecessary: 1.0499999999999998
[{'adrien brody': 2, 'ford kevin': 1}]
acceptance_diff 3
['adrien brody', 'ford kevin'] VS true_author: ['ford kevin', 'adrien brody']
movie_id: 24097
true director: Jeffrey Delman
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05065608024597168
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0011141300201416016
Discarted candidate: []
Possible candidate: [{'delman jeffery': 6, 'delman jeffrey': 2}]
lengthNecessary: 2.8
[{'delman jeffery': 6, 'delman jeffrey': 2}]
acceptance_diff 3
['delman jeffery'] VS true_author: ['delman jef

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04170393943786621
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0013217926025390625
Discarted candidate: []
Possible candidate: [{'alexandre bustillo': 5, 'julien maury': 4}]
lengthNecessary: 3.15
[{'alexandre bustillo': 5, 'julien maury': 4}]
acceptance_diff 3
['alexandre bustillo', 'julien maury'] VS true_author: ['julien maury', 'alexandre bustillo']
movie_id: 95021
true director: Lewis D. Collins;Vernon Keays
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06783628463745117
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.000946044921875
Discarted candidate: []
Possible candidate: [{'collins d lewis': 2, 'keays vernon': 1}]
lengthNecessary: 1.0499999999999998
[{'collins d lewis': 2, 'keays vernon': 1}]
acceptance_diff 3
['col

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.0363919734954834
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0013189315795898438
Discarted candidate: []
Possible candidate: [{'g?rard pir': 1, 'gerard pires': 10}]
lengthNecessary: 3.8499999999999996
[{'g?rard pir': 1, 'gerard pires': 10}]
acceptance_diff 3
['gerard pires'] VS true_author: ['gerard pires']
movie_id: 88603
true director: Karel Zeman
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04199409484863281
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0012371540069580078
Discarted candidate: []
Possible candidate: [{'francis gross': 1, 'karel zeman': 7}]
lengthNecessary: 2.8
[{'francis gross': 1, 'karel zeman': 7}]
acceptance_diff 3
['karel zeman'] VS true_author: ['karel zeman']
movie_id: 5239
true director: William

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03953409194946289
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0015399456024169922
Discarted candidate: []
Possible candidate: [{'ji?i svoboda': 6, 'jiri svoboda': 1}]
lengthNecessary: 2.4499999999999997
[{'ji?i svoboda': 6, 'jiri svoboda': 1}]
acceptance_diff 3
['ji?i svoboda'] VS true_author: ['jiri svoboda']
movie_id: 19545
true director: Charles Chu;Gavin Kelly
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.034085988998413086
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0010018348693847656
Discarted candidate: []
Possible candidate: [{'charles chu': 1, 'gavin kelly': 2}]
lengthNecessary: 1.0499999999999998
[{'charles chu': 1, 'gavin kelly': 2}]
acceptance_diff 3
['charles chu', 'gavin kelly'] VS true_author: ['charles c

Blocking time is: 0.0013310909271240234
Discarted candidate: []
Possible candidate: [{'chris notarile r': 4, 'kim santiago': 7}]
lengthNecessary: 3.8499999999999996
[{'chris notarile r': 4, 'kim santiago': 7}]
acceptance_diff 3
['chris notarile r', 'kim santiago'] VS true_author: ['kim santiago', 'chris notarile r']
movie_id: 87533
true director: Chang Cheh;Pao Hsueh-Li
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04977703094482422
cluster_method: hierarchy
num_clusters_rate: 0.01
NUM_CLUSTERS 1
Blocking time is: 0.0016269683837890625
Discarted candidate: []
Possible candidate: [{'chang cheh': 8, 'hsueh li pao': 4, 'hsueh-li pao': 2}]
lengthNecessary: 4.8999999999999995
[{'chang cheh': 8, 'hsueh li pao': 4, 'hsueh-li pao': 2}]
acceptance_diff 3
['chang cheh'] VS true_author: ['chang cheh', 'hsueh-li pao']
movie_id: 92814
true director: Fritz Lang;Rene Sti
embedding_type: inferSent
attributes_list: ['newDirector']
m

In [34]:
getEvaluation(finalDirectors, trueDirectors, 1)

TP: 568, FP: 98, FN: 24
precision is 0.8528528528528528
recall is 0.9594594594594594
f1Score is 0.90302066772655


(0.8528528528528528, 0.9594594594594594, 0.90302066772655)