In [1]:
import os
import sys
import time
import numpy as np
import matplotlib.pyplot as plt
module_path = os.path.abspath(os.path.join('../../fusion'))
sys.path.append(module_path)

from preprocessing_datasets.preprocessing_utilities import ValueUtils
from preprocessing_datasets import load_dataset
from embedding_algorithms import sentence_embedding, set_embedding_model
from dimensionality_reduction_algorithms import dimension_reduction_algorithms
from cluster_algorithms import cluster_algorithm

from helper import launchWithoutReductionFusionMovie
from plot_tools import plotChart, plotCluster
from evaluation import *

In [2]:
key_values = {
    'model_type':'bilstm',
    'char_level':False,
    'model_version': 2,
    'rnn_dim':1024,
    'verbose':1,
    'attributes_list': ['newDirector'],
    'embedding_type': 'inferSent',
    'dataset': 'clean_movie',
    'cluster_method': 'hierarchy',
    'num_clusters_rate': 0.1,
    'block_length_thresold': 0.35,
    'acceptance_diff':3,
    'block_weight': 5,
}

In [3]:
dataset_name, table_group_by_movie_id, list_movie_id, true_directors = load_dataset(key_values)

dataset: clean_movie
Loading time is: 0.5085608959197998


In [4]:
set_embedding_model(key_values)

Vocab size : 2196017
model_version: 2
rnn_dim: 1024
model_type: bilstm
char_level: False
Setup time is: 274.1313548088074


In [5]:
start_time = time.time()
finalDirectors, trueDirectors = launchWithoutReductionFusionMovie(table_group_by_movie_id, list_movie_id, true_directors, key_values)
print("Total time is: {0}".format(time.time() - start_time))

movie_id: 73952
true director: Robert Hamer
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 1.0827579498291016
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.16681194305419922
Discarted candidate: []
Possible candidate: [{'chester e hal': 2, 'cyril frankel': 1, 'hamer robert': 4}]
lengthNecessary: 2.4499999999999997
[{'chester e hal': 2, 'cyril frankel': 1, 'hamer robert': 4}]
acceptance_diff 3
['chester e hal', 'cyril frankel', 'hamer robert'] VS true_author: ['hamer robert']
movie_id: 1655
true director: Alexandre O. Philippe
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05323505401611328
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0012118816375732422
Discarted candidate: []
Possible candidate: [{'alexandre o philippe': 2, 'alexandre philippe': 1}]
lengthNecessary: 1.04999

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.0711367130279541
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0016930103302001953
Discarted candidate: []
Possible candidate: [{'coen joel': 13}, {'coen ethan': 7}]
lengthNecessary: 7.0
[{'coen joel': 13}, {'coen ethan': 7}]
acceptance_diff 3
['coen joel', 'coen ethan'] VS true_author: ['coen joel']
movie_id: 2550
true director: Jean-Luc Godard
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.041131019592285156
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0012249946594238281
Discarted candidate: []
Possible candidate: [{'dziga groupe vertov': 1, 'godard jean-luc': 3}]
lengthNecessary: 1.4
[{'dziga groupe vertov': 1, 'godard jean-luc': 3}]
acceptance_diff 3
['dziga groupe vertov', 'godard jean-luc'] VS true_author: ['godard jean

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06740593910217285
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.001203775405883789
Discarted candidate: []
Possible candidate: [{'christian krones': 1, 'florian weigensamer': 2, 'muller olaf s': 1, 'roland schrotthofer': 1}]
lengthNecessary: 1.75
[{'christian krones': 1, 'florian weigensamer': 2, 'muller olaf s': 1, 'roland schrotthofer': 1}]
acceptance_diff 3
['christian krones', 'florian weigensamer', 'muller olaf s', 'roland schrotthofer'] VS true_author: ['florian weigensamer', 'muller olaf s', 'roland schrotthofer', 'christian krones']
movie_id: 91427
true director: Robert Rossen
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.057060956954956055
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0012161731719970703
Discarted can

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.11100912094116211
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0014100074768066406
Discarted candidate: []
Possible candidate: [{'alba angelica de': 3, 'paul ragsdale': 6}]
lengthNecessary: 3.15
[{'alba angelica de': 3, 'paul ragsdale': 6}]
acceptance_diff 3
['alba angelica de', 'paul ragsdale'] VS true_author: ['paul ragsdale']
movie_id: 71705
true director: Allan Arkush
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.0816340446472168
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0019259452819824219
Discarted candidate: [{'dante joe': 6}]
Possible candidate: [{'allan arkush': 13}]
lengthNecessary: 6.6499999999999995
[{'allan arkush': 13}]
acceptance_diff 3
['allan arkush'] VS true_author: ['allan arkush']
movie_id: 91114
true 

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.044571876525878906
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0013720989227294922
Discarted candidate: []
Possible candidate: [{'capra frank': 5, 'irvin willat': 3}]
lengthNecessary: 2.8
[{'capra frank': 5, 'irvin willat': 3}]
acceptance_diff 3
['capra frank', 'irvin willat'] VS true_author: ['capra frank']
movie_id: 109103
true director: Dick Zondag;Ralph Zondag;Phil Nibbelink;Simon Wells
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.10863995552062988
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 5
Blocking time is: 0.0037970542907714844
Discarted candidate: [{'nibbelink phil': 13}, {'dick zondag': 12}, {'simon wells': 12}, {'ralph zondag': 12}, {'nibbelink phil': 1}]
Possible candidate: []
lengthNecessary: 17.5
[]
acceptance_diff 3
[] VS true_

Discarted candidate: [{'jean negulesco': 3}]
Possible candidate: [{'otto preminger': 14}]
lengthNecessary: 5.949999999999999
[{'otto preminger': 14}]
acceptance_diff 3
['otto preminger'] VS true_author: ['otto preminger']
movie_id: 51150
true director: Sabine Krayenbuhl;Zeva Oelbaum
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.032354116439819336
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.001132965087890625
Discarted candidate: []
Possible candidate: [{'krayenb?hl sabine': 1, 'krayenbuhl sabine': 3, 'oelbaum zeva': 3}]
lengthNecessary: 2.4499999999999997
[{'krayenb?hl sabine': 1, 'krayenbuhl sabine': 3, 'oelbaum zeva': 3}]
acceptance_diff 3
['krayenb?hl sabine', 'oelbaum zeva'] VS true_author: ['krayenbuhl sabine', 'oelbaum zeva']
movie_id: 92248
true director: Lars Von Trier
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embed

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05275440216064453
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0012607574462890625
Discarted candidate: []
Possible candidate: [{'cinzia th torrini': 1, 'cinzia torrini': 4}]
lengthNecessary: 1.75
[{'cinzia th torrini': 1, 'cinzia torrini': 4}]
acceptance_diff 3
['cinzia th torrini'] VS true_author: ['cinzia th torrini']
movie_id: 7829
true director: Gu Changwei
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.0319209098815918
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0013430118560791016
Discarted candidate: []
Possible candidate: [{'chang-wei gu': 2, 'changwei gu': 3}]
lengthNecessary: 1.75
[{'chang-wei gu': 2, 'changwei gu': 3}]
acceptance_diff 3
['chang-wei gu'] VS true_author: ['changwei gu']
movie_id: 102878
true direct

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04183602333068848
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0013470649719238281
Discarted candidate: []
Possible candidate: [{'cuar jon?s': 2, 'cuaron jonas': 8}]
lengthNecessary: 3.5
[{'cuar jon?s': 2, 'cuaron jonas': 8}]
acceptance_diff 3
['cuaron jonas'] VS true_author: ['cuaron jonas']
movie_id: 76604
true director: Cary Joji Fukunaga
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06020617485046387
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0014929771423339844
Discarted candidate: [{'cary fukunaga joji': 4}]
Possible candidate: [{'cary fukunaga': 16}]
lengthNecessary: 7.0
[{'cary fukunaga': 16}]
acceptance_diff 3
['cary fukunaga'] VS true_author: ['cary fukunaga joji']
movie_id: 93149
true director: Otakar Vavra
embe

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04436516761779785
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.001008749008178711
Discarted candidate: []
Possible candidate: [{'buster keaton': 2, 'charles lamont': 1}]
lengthNecessary: 1.0499999999999998
[{'buster keaton': 2, 'charles lamont': 1}]
acceptance_diff 3
['buster keaton', 'charles lamont'] VS true_author: ['charles lamont']
movie_id: 73875
true director: Spike Jonze
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.044313907623291016
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0012729167938232422
Discarted candidate: []
Possible candidate: [{'createspace': 1, 'jonze spike': 2}]
lengthNecessary: 1.0499999999999998
[{'createspace': 1, 'jonze spike': 2}]
acceptance_diff 3
['createspace', 'jonze spike'] VS true_author:

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05053591728210449
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0011153221130371094
Discarted candidate: []
Possible candidate: [{'dillon jonathan': 4, 'dillon jonathan m': 3}]
lengthNecessary: 2.4499999999999997
[{'dillon jonathan': 4, 'dillon jonathan m': 3}]
acceptance_diff 3
['dillon jonathan'] VS true_author: ['dillon jonathan m']
movie_id: 33198
true director: Russell Hodge
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.07757401466369629
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0014309883117675781
Discarted candidate: []
Possible candidate: [{'communications inc roads': 1, 'hodge russell': 1}]
lengthNecessary: 0.7
[{'communications inc roads': 1, 'hodge russell': 1}]
acceptance_diff 3
['communications inc roads', 'ho

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.0587770938873291
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0015118122100830078
Discarted candidate: []
Possible candidate: [{'albert pereira-lazaro': 3, 'emmanuel klotz': 5}]
lengthNecessary: 2.8
[{'albert pereira-lazaro': 3, 'emmanuel klotz': 5}]
acceptance_diff 3
['albert pereira-lazaro', 'emmanuel klotz'] VS true_author: ['albert pereira-lazaro', 'emmanuel klotz']
movie_id: 23189
true director: Masayuki Ochiai;Norio Tsuruta;Takashi Shimizu;Yoshihiro Nakamura;Koji Shiraishi
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05068683624267578
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.002492189407348633
Discarted candidate: []
Possible candidate: [{'koji shiraishi': 1, 'masayuki ochiai': 1, 'nakamura yoshihiro': 2, 'norio t

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.17585206031799316
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0012972354888916016
Discarted candidate: []
Possible candidate: [{'diego juan solanas': 8, 'juan solanas': 1}]
lengthNecessary: 3.15
[{'diego juan solanas': 8, 'juan solanas': 1}]
acceptance_diff 3
['diego juan solanas'] VS true_author: ['juan solanas']
movie_id: 72374
true director: Norton Virgien;John Eng
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.1595151424407959
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 4
Blocking time is: 0.005997180938720703
Discarted candidate: [{'norton virgien': 2}, {'eng john': 1}]
Possible candidate: [{'eng john': 19}, {'norton virgien': 17}]
lengthNecessary: 13.649999999999999
[{'eng john': 19}, {'norton virgien': 17}]
acceptance_diff 3
['eng john', 

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.12772607803344727
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0031251907348632812
Discarted candidate: [{'cukor george': 4}]
Possible candidate: [{'minnelli vincente': 10}]
lengthNecessary: 4.8999999999999995
[{'minnelli vincente': 10}]
acceptance_diff 3
['minnelli vincente'] VS true_author: ['minnelli vincente']
movie_id: 39473
true director: Bill L. Norton
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.12980103492736816
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.001500844955444336
Discarted candidate: [{'bill norton': 3}]
Possible candidate: [{'bill l norton': 10}]
lengthNecessary: 4.55
[{'bill l norton': 10}]
acceptance_diff 3
['bill l norton'] VS true_author: ['bill l norton']
movie_id: 1357
true director: Glenn Triggs

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.07393002510070801
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0020308494567871094
Discarted candidate: [{'fran?ois ozon': 1}]
Possible candidate: [{'francois ozon': 13}]
lengthNecessary: 4.8999999999999995
[{'francois ozon': 13}]
acceptance_diff 3
['francois ozon'] VS true_author: ['francois ozon']
movie_id: 94563
true director: Neil Meschino;Giulio De Santi
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.08908390998840332
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0014951229095458984
Discarted candidate: []
Possible candidate: [{'de giulio santi': 7}, {'meschino neil': 4}]
lengthNecessary: 3.8499999999999996
[{'de giulio santi': 7}, {'meschino neil': 4}]
acceptance_diff 3
['de giulio santi', 'meschino neil'] VS true_author

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05845308303833008
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0008578300476074219
Discarted candidate: []
Possible candidate: [{'aleksandar petrovi': 5, 'aleksandar petrovic': 2}]
lengthNecessary: 2.4499999999999997
[{'aleksandar petrovi': 5, 'aleksandar petrovic': 2}]
acceptance_diff 3
['aleksandar petrovi'] VS true_author: ['aleksandar petrovic']
movie_id: 5258
true director: Keven Undergaro
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04489326477050781
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0011730194091796875
Discarted candidate: []
Possible candidate: [{'keven undergaro': 1, 'kevin undergaro': 1}]
lengthNecessary: 0.7
[{'keven undergaro': 1, 'kevin undergaro': 1}]
acceptance_diff 3
['keven undergaro'] VS true_au

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06284499168395996
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0014190673828125
Discarted candidate: []
Possible candidate: [{'andrew lau': 3, 'andrew lau wai-keung': 2, 'lau wai-keung': 2}]
lengthNecessary: 2.4499999999999997
[{'andrew lau': 3, 'andrew lau wai-keung': 2, 'lau wai-keung': 2}]
acceptance_diff 3
['andrew lau', 'andrew lau wai-keung', 'lau wai-keung'] VS true_author: ['andrew lau']
movie_id: 51986
true director: Vaclav Vorlicek
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.04052615165710449
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0021600723266601562
Discarted candidate: []
Possible candidate: [{'vaclav vorli?ek': 7, 'vaclav vorlicek': 2}]
lengthNecessary: 3.15
[{'vaclav vorli?ek': 7, 'vaclav vorlicek': 2}]

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.08758401870727539
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 3
Blocking time is: 0.0030510425567626953
Discarted candidate: [{'c fraser heston': 1}]
Possible candidate: [{'c fraser heston': 12}, {'clarke fraser heston': 10}]
lengthNecessary: 8.049999999999999
[{'c fraser heston': 12}, {'clarke fraser heston': 10}]
acceptance_diff 3
['c fraser heston'] VS true_author: ['c fraser heston']
movie_id: 104370
true director: Matej Chlupacek
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05637383460998535
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.008890151977539062
Discarted candidate: []
Possible candidate: [{'chlupacek matej': 4, 'michal samir': 2}]
lengthNecessary: 2.0999999999999996
[{'chlupacek matej': 4, 'michal samir': 2}]
acceptance_diff 3
['

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.050415754318237305
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0023050308227539062
Discarted candidate: []
Possible candidate: [{'hesser spencer terry': 2, 'mazurek stephan': 1}]
lengthNecessary: 1.0499999999999998
[{'hesser spencer terry': 2, 'mazurek stephan': 1}]
acceptance_diff 3
['hesser spencer terry', 'mazurek stephan'] VS true_author: ['hesser spencer terry']
movie_id: 60953
true director: Kabir Khan
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.10300278663635254
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.0015330314636230469
Discarted candidate: [{'ali zafar': 3}]
Possible candidate: [{'kabir khan': 12}]
lengthNecessary: 5.25
[{'kabir khan': 12}]
acceptance_diff 3
['kabir khan'] VS true_author: ['kabir khan']
movi

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.10145783424377441
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 2
Blocking time is: 0.001239776611328125
Discarted candidate: [{'albert lewin': 2}]
Possible candidate: [{'leroy mervyn': 9}]
lengthNecessary: 3.8499999999999996
[{'leroy mervyn': 9}]
acceptance_diff 3
['leroy mervyn'] VS true_author: ['leroy mervyn']
movie_id: 42099
true director: Oldrich Lipsky
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.02631831169128418
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0008950233459472656
Discarted candidate: []
Possible candidate: [{'lipsky old?ich': 2, 'lipsky oldrich': 1}]
lengthNecessary: 1.0499999999999998
[{'lipsky old?ich': 2, 'lipsky oldrich': 1}]
acceptance_diff 3
['lipsky old?ich'] VS true_author: ['lipsky oldrich']
movie_id: 27606
true dir

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.06923818588256836
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0014719963073730469
Discarted candidate: []
Possible candidate: [{'dean fuller marcus': 8, 'fuller marcus': 1}]
lengthNecessary: 3.15
[{'dean fuller marcus': 8, 'fuller marcus': 1}]
acceptance_diff 3
['dean fuller marcus'] VS true_author: ['dean fuller marcus']
movie_id: 33240
true director: Johan Earl;Adrian Powers
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05029463768005371
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0011372566223144531
Discarted candidate: []
Possible candidate: [{'adrian powers': 3, 'earl johan': 6}]
lengthNecessary: 3.15
[{'adrian powers': 3, 'earl johan': 6}]
acceptance_diff 3
['adrian powers', 'earl johan'] VS true_author: ['earl johan

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05319619178771973
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0035037994384765625
Discarted candidate: []
Possible candidate: [{'mart?nez ra?l': 1, 'martinez raul': 1}]
lengthNecessary: 0.7
[{'mart?nez ra?l': 1, 'martinez raul': 1}]
acceptance_diff 3
['mart?nez ra?l'] VS true_author: ['martinez raul']
movie_id: 43746
true director: Andrew Lau;Alan Mak
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.13544511795043945
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 3
Blocking time is: 0.004640817642211914
Discarted candidate: [{'alan mak': 8}, {'ralph rieckermann': 5}]
Possible candidate: [{'andrew lau': 4, 'andrew lau wai-keung': 3, 'lau wai-keung': 5}]
lengthNecessary: 8.75
[{'andrew lau': 4, 'andrew lau wai-keung': 3, 'lau wai-keung': 5}]
acceptance

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.02835822105407715
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0012569427490234375
Discarted candidate: []
Possible candidate: [{'bulaji veljko': 6, 'bulajic veljko': 2}]
lengthNecessary: 2.8
[{'bulaji veljko': 6, 'bulajic veljko': 2}]
acceptance_diff 3
['bulaji veljko'] VS true_author: ['bulajic veljko']
movie_id: 21344
true director: Vasili Zhuravlyov
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.03289198875427246
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0010921955108642578
Discarted candidate: []
Possible candidate: [{'vasili zhuravlyov': 1, 'vasily zhuravlyov': 1}]
lengthNecessary: 0.7
[{'vasili zhuravlyov': 1, 'vasily zhuravlyov': 1}]
acceptance_diff 3
['vasili zhuravlyov'] VS true_author: ['vasili zhuravlyov']
movi

embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.0692739486694336
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 3
Blocking time is: 0.004591226577758789
Discarted candidate: [{"greg o'connor": 2}, {'gavin oconnor': 1}]
Possible candidate: [{"gavin o'connor": 20}]
lengthNecessary: 8.049999999999999
[{"gavin o'connor": 20}]
acceptance_diff 3
["gavin o'connor"] VS true_author: ["gavin o'connor"]
movie_id: 8315
true director: Lea Pool
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.043144941329956055
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0010387897491455078
Discarted candidate: []
Possible candidate: [{'l?a pool': 1, 'lea pool': 3}]
lengthNecessary: 1.4
[{'l?a pool': 1, 'lea pool': 3}]
acceptance_diff 3
['l?a pool'] VS true_author: ['lea pool']
movie_id: 69925
true director: Heidi Maria Faisst


embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.05451798439025879
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.003557920455932617
Discarted candidate: []
Possible candidate: [{'films substratum': 1, 'frindel jeremy': 7}]
lengthNecessary: 2.8
[{'films substratum': 1, 'frindel jeremy': 7}]
acceptance_diff 3
['frindel jeremy'] VS true_author: ['frindel jeremy']
movie_id: 105721
true director: Georg Maas
embedding_type: inferSent
attributes_list: ['newDirector']
model_type: bilstm
char_level: False
Embedding time is: 0.056394100189208984
cluster_method: hierarchy
num_clusters_rate: 0.1
NUM_CLUSTERS 1
Blocking time is: 0.0011169910430908203
Discarted candidate: []
Possible candidate: [{'georg maas': 3, 'judith kaufmann': 2}]
lengthNecessary: 1.75
[{'georg maas': 3, 'judith kaufmann': 2}]
acceptance_diff 3
['georg maas', 'judith kaufmann'] VS true_author: ['georg maas']
movie_id: 88941

# ALL Cases

In [9]:
getEvaluation(finalDirectors, trueDirectors, 1)

precision is 0.4731404958677686
recall is 0.5725
f1Score is 0.5180995475113123


(0.4731404958677686, 0.5725, 0.5180995475113123)

In [13]:
getEvaluation(finalDirectors, trueDirectors, 1) #0.3

precision is 0.49230769230769234
recall is 0.56
f1Score is 0.5239766081871345


(0.49230769230769234, 0.56, 0.5239766081871345)

In [17]:
getEvaluation(finalDirectors, trueDirectors, 1) #0.35

precision is 0.5022727272727273
recall is 0.5525
f1Score is 0.5261904761904763


(0.5022727272727273, 0.5525, 0.5261904761904763)

In [21]:
getEvaluation(finalDirectors, trueDirectors, 1) #Fix directors split

precision is 0.9272727272727272
recall is 0.6891891891891891
f1Score is 0.7906976744186047


(0.9272727272727272, 0.6891891891891891, 0.7906976744186047)

# Add Multiple Winner from a block

In [6]:
getEvaluation(finalDirectors, trueDirectors, 1)

precision is 0.8589041095890411
recall is 1.0591216216216217
f1Score is 0.9485627836611195


(0.8589041095890411, 1.0591216216216217, 0.9485627836611195)

In [7]:
finalDirectors

[['chester e hal', 'cyril frankel', 'hamer robert'],
 ['alexandre o philippe', 'alexandre philippe'],
 ['bird brad'],
 ['sherman vincent'],
 ['clarke fraser heston'],
 [],
 ['godfrey ho', 'kim si-hyun'],
 ['dave fleischer'],
 ["d'urville martin"],
 ['norton virgien', 'igor kovalyov'],
 ['bezucha thomas'],
 ['galinsky michael', 'hawley suki'],
 ['chatrichalerm yukoi', 'chatrichalerm yukol'],
 ['harvey parry', 'richard talmadge'],
 ['hara keiichi'],
 ['allegret yves', 'e portas rafael'],
 ['cohen larry', 'tannen william'],
 ['anant mahadevan', 'ananth mahadevan narayan'],
 ['coen joel', 'coen ethan'],
 ['dziga groupe vertov', 'godard jean-luc'],
 ['adam small', 'peter stuart'],
 ['caradog james', 'caradog james w'],
 ['lord phil'],
 ['al khaled nassiry soliman', 'antonio augugliaro', 'del gabriele grande'],
 ['albert magnoli', 'prince'],
 ['andrzej bartkowiak'],
 ['hugues martin', 'martin sandra'],
 ['chomsky j marvin'],
 ["o'haver tommy"],
 ['dewey richard', 'marrinan timothy'],
 ['a d 

In [8]:
trueDirectors

[['hamer robert'],
 ['alexandre o philippe'],
 ['bird brad'],
 ['sherman vincent'],
 ['c fraser heston'],
 ['john landis', 'dante joe', 'george miller', 'spielberg steven'],
 ['godfrey ho'],
 ['dave fleischer'],
 ["d'urville martin"],
 ['norton virgien', 'igor kovalyov'],
 ['bezucha thomas'],
 ['hawley suki', 'galinsky michael'],
 ['chatrichalerm yukol'],
 ['richard talmadge', 'harvey parry'],
 ['hara keiichi'],
 ['allegret yves'],
 ['cohen larry', 'tannen william'],
 ['ananth mahadevan narayan'],
 ['coen joel'],
 ['godard jean-luc'],
 ['adam small', 'peter stuart'],
 ['caradog james w'],
 ['christopher miller', 'lord phil'],
 ['al khaled nassiry soliman', 'antonio augugliaro', 'del gabriele grande'],
 ['prince'],
 ['andrzej bartkowiak'],
 ['martin sandra', 'hugues martin'],
 ['chomsky j marvin'],
 ["o'haver tommy"],
 ['marrinan timothy', 'dewey richard'],
 ['chris hegedus', 'a d pennebaker'],
 ['paolo taviani', 'taviani vittorio'],
 ['johar karan'],
 ['anlo sepulveda', 'collins paul']

In [10]:
_getTPFPFN(finalDirectors, trueDirectors)

(627, 103, -35)

# Add Multiple Winner and remove the similar one

In [17]:
# acceptance_diff 3
getEvaluation(finalDirectors, trueDirectors, 1)

TP: 560, FP: 102, FN: 32
precision is 0.8459214501510574
recall is 0.9459459459459459
f1Score is 0.8931419457735247


(0.8459214501510574, 0.9459459459459459, 0.8931419457735247)

In [6]:
# acceptance_diff 2
getEvaluation(finalDirectors, trueDirectors, 1)

precision is 0.8459214501510574
recall is 0.9459459459459459
f1Score is 0.8931419457735247


(0.8459214501510574, 0.9459459459459459, 0.8931419457735247)

In [9]:
# acceptance_diff 1
getEvaluation(finalDirectors, trueDirectors, 1)

TP: 503, FP: 73, FN: 89
precision is 0.8732638888888888
recall is 0.8496621621621622
f1Score is 0.8613013698630135


(0.8732638888888888, 0.8496621621621622, 0.8613013698630135)

In [13]:
# acceptance_diff 0
getEvaluation(finalDirectors, trueDirectors, 1)

TP: 425, FP: 39, FN: 167
precision is 0.915948275862069
recall is 0.7179054054054054
f1Score is 0.8049242424242425


(0.915948275862069, 0.7179054054054054, 0.8049242424242425)