In [1]:
import glob
import time
import os
import pandas as pd
import sklearn.metrics
from sklearn.preprocessing import MinMaxScaler
import pickle
from argparse import ArgumentParser, Namespace
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
from itertools import chain
from tqdm import tqdm
import copy
import shutil
import pickle

I assume that the candidate generation and feature genration has already be run on the training and dev tables

In [2]:
es_url = 'http://ckg07:9200'
es_index = 'wikidatadwd-augmented'

# Input Paths

# GDrive Path: /table-linker-dataset/2019-iswc_challenge_data/t2dv2/canonical-with-context/t2dv2-train-canonical/
train_path = "/Users/amandeep/Github/table-linker/data/SemTabR4_T2dv2/train-canonical"

# GDrive Path: /table-linker-dataset/2019-iswc_challenge_data/t2dv2/canonical-with-context/t2dv2-dev-canonical/
dev_path = "/Users/amandeep/Github/table-linker/data/t2dv2/t2dv2-dev-canonical"

# GDrive Path: /table-linker-dataset/2019-iswc_challenge_data/t2dv2/ground_truth/Xinting_GT_csv
ground_truth_files = "/Users/amandeep/Github/table-linker/data/SemTabR4_T2dv2/GT"

# can be downloaded from https://github.com/usc-isi-i2/table-linker-pipelines/blob/main/table-linker-full-pipeline/models/weighted_lr.pkl
classifier_model_path = '/Users/amandeep/Github/table-linker-pipelines/table-linker-full-pipeline/models/weighted_lr.pkl'


# OUTPUT PATHS
output_path = "/Users/amandeep/Github/table-linker/data/SemTabR4_T2dv2/table-linker"
train_output_path = f'{output_path}/train1-output'
dev_output_path = f'{output_path}/dev-output'

# increase version to create a new folder for an experiment
PREVIOUS_VERSION = "v10_342_files"
VERSION = "v12"

train_candidate_path = f'{train_output_path}/{VERSION}/candidates'
train_feature_path = f'{train_output_path}/{VERSION}/features'
train_missing_candidates_path = f'{train_output_path}/{VERSION}/train_missing_candidates_path'

dev_candidate_path = f'{dev_output_path}/{VERSION}/candidates'
dev_feature_path = f'{dev_output_path}/{VERSION}/features'
dev_output_predictions = f'{dev_output_path}/{VERSION}/dev_predictions'
dev_predictions_top_k = f'{dev_output_path}/{VERSION}/dev_predictions_top_k'
dev_colorized_path = f'{dev_output_path}/{VERSION}/dev_predictions_colorized'
dev_metrics_path = f'{dev_output_path}/{VERSION}/dev_predictions_metrics'
dev_missing_candidates_path = f'{dev_output_path}/{VERSION}/dev_missing_candidates_path'

aux_field = 'graph_embedding_complex,class_count,property_count,context'


train_prop_count = f'{train_output_path}/{VERSION}/train_prop_count' 
train_class_count = f'{train_output_path}/{VERSION}/train_class_count'
train_context_path = f'{train_output_path}/{VERSION}/train_context'
train_graph_embedding = f'{train_output_path}/{VERSION}/train_graph_embedding'

dev_prop_count = f'{dev_output_path}/{VERSION}/dev_prop_count'
dev_class_count = f'{dev_output_path}/{VERSION}/dev_class_count'
dev_context_path = f'{dev_output_path}/{VERSION}/dev_context'
dev_graph_embedding = f'{dev_output_path}/{VERSION}/dev_graph_embedding'

temp_dir = f'{output_path}/temp'

pos_output = f'{temp_dir}/training_data/pos_features.pkl'
neg_output = f'{temp_dir}/training_data/neg_features.pkl'
min_max_scaler_path = f'{temp_dir}/training_data/normalization_factor.pkl'

final_score_column = 'siamese_prediction'

model_save_path = f'{dev_output_path}/{VERSION}/saved_models'
best_model_path = ''

copy_candidates_from_previous_version = False

In [3]:
!mkdir -p "$temp_dir"

!mkdir -p "$train_prop_count"
!mkdir -p "$dev_prop_count"
!mkdir -p "$train_class_count"
!mkdir -p "$dev_class_count"
!mkdir -p "$train_graph_embedding"
!mkdir -p "$dev_graph_embedding"
!mkdir -p "$train_context_path"
!mkdir -p "$dev_context_path"

!mkdir -p "$train_candidate_path"
!mkdir -p "$dev_candidate_path"

!mkdir -p "$train_feature_path"
!mkdir -p "$dev_feature_path"

!mkdir -p "$temp_dir/training_data"
!mkdir -p "$dev_output_predictions"
!mkdir -p "$model_save_path"
!mkdir -p "$dev_predictions_top_k"
!mkdir -p "$dev_colorized_path"
!mkdir -p "$dev_metrics_path"
!mkdir -p "$dev_missing_candidates_path"
!mkdir -p "$train_missing_candidates_path"

In [4]:
if copy_candidates_from_previous_version:
    !cp $dev_output_path/$PREVIOUS_VERSION/candidates/*csv $dev_output_path/$VERSION/candidates
    !cp $dev_output_path/$PREVIOUS_VERSION/dev_prop_count/* $dev_output_path/$VERSION/dev_prop_count
    !cp $dev_output_path/$PREVIOUS_VERSION/dev_class_count/* $dev_output_path/$VERSION/dev_class_count
    !cp $dev_output_path/$PREVIOUS_VERSION/dev_context/* $dev_output_path/$VERSION/dev_context
    !cp $dev_output_path/$PREVIOUS_VERSION/dev_graph_embedding/* $dev_output_path/$VERSION/dev_graph_embedding
    
    !cp $train_output_path/$PREVIOUS_VERSION/candidates/*csv $train_output_path/$VERSION/candidates
    !cp $train_output_path/$PREVIOUS_VERSION/train_prop_count/* $train_output_path/$VERSION/train_prop_count
    !cp $train_output_path/$PREVIOUS_VERSION/train_class_count/* $train_output_path/$VERSION/train_class_count
    !cp $train_output_path/$PREVIOUS_VERSION/train_context/* $train_output_path/$VERSION/train_context
    !cp $train_output_path/$PREVIOUS_VERSION/train_graph_embedding/* $train_output_path/$VERSION/train_graph_embedding

In [7]:
features = ['pagerank','retrieval_score','monge_elkan','monge_elkan_aliases','des_cont_jaccard',
            'jaro_winkler','levenshtein','singleton','num_char','num_tokens',
           'lof_class_count_tf_idf_score', 'lof_property_count_tf_idf_score',
           'lof-graph-embedding-score', 'lof-reciprocal-rank', 'context_score']

In [8]:
classifier_features = ['aligned_pagerank', 'smallest_qnode_number', 'monge_elkan', 'des_cont_jaccard_normalized']

## Candidate Generation

In [7]:
def candidate_generation(path, gt_path, output_path, class_count_path, prop_count_path, context_path, graph_embedding):
    file_list = glob.glob(path + '/*.csv')
    for i, file in tqdm(enumerate(file_list)):
        st = time.time()
        filename = file.split('/')[-1]
#         print(f"{filename}: {i+1} of {len(file_list)}")
        gt_file = f"{ground_truth_files}/{filename}"
        output_file = f"{output_path}/{filename}"
        
        !tl clean -c label -o label_clean "$file" / \
        --url $es_url --index $es_index \
        get-fuzzy-augmented-matches -c label_clean \
        --auxiliary-fields {aux_field} \
        --auxiliary-folder "$temp_dir" / \
        --url $es_url --index $es_index \
        get-exact-matches -c label_clean \
        --auxiliary-fields {aux_field} \
        --auxiliary-folder "$temp_dir" / \
        / get-kgtk-search-matches -c label_clean  \
        --auxiliary-fields {aux_field} \
        --auxiliary-folder "$temp_dir" \
        / ground-truth-labeler --gt-file "$gt_file" > "$output_file"
        
        for field in aux_field.split(','):
            aux_list = []
            for f in glob.glob(f'{temp_dir}/*{field}.tsv'):
                aux_list.append(pd.read_csv(f, sep='\t', dtype=object))
            aux_df = pd.concat(aux_list).drop_duplicates(subset=['qnode'])
            if field == 'class_count':
                class_count_file = f"{class_count_path}/{filename.strip('.csv')}_class_count.tsv"
                aux_df.to_csv(class_count_file, sep='\t', index=False)
            elif field == 'property_count':
                prop_count_file = f"{prop_count_path}/{filename.strip('.csv')}_prop_count.tsv"
                aux_df.to_csv(prop_count_file, sep='\t', index=False)
            elif field == 'context':
                context_file = f"{context_path}/{filename.strip('.csv')}_context.tsv"
                aux_df.to_csv(context_file, sep='\t', index=False)
            else:
                graph_embedding_file = f"{graph_embedding}/{filename.strip('.csv')}_graph_embedding_complex.tsv"
                aux_df.to_csv(graph_embedding_file, sep='\t', index=False)
        
#         print(time.time() - st)


In [8]:
if not copy_candidates_from_previous_version:
    candidate_generation(train_path, ground_truth_files, train_candidate_path, train_class_count, train_prop_count, train_context_path,train_graph_embedding)

0it [00:00, ?it/s]

clean Time: 0.0038008689880371094s
get-fuzzy-augmented-matches Time: 5.1828460693359375s
get-exact-matches Time: 0.6038298606872559s
get-kgtk-search-matches Time: 4.174046993255615s
ground-truth-labeler Time: 0.09036588668823242s


1it [00:19, 19.19s/it]

clean Time: 0.0024907588958740234s
get-fuzzy-augmented-matches Time: 3.837916135787964s
get-exact-matches Time: 0.5225467681884766s
get-kgtk-search-matches Time: 3.588423013687134s
ground-truth-labeler Time: 0.2516970634460449s


2it [00:36, 17.93s/it]

clean Time: 0.0053522586822509766s
get-fuzzy-augmented-matches Time: 7.758697986602783s
get-exact-matches Time: 0.7729170322418213s
get-kgtk-search-matches Time: 11.220542907714844s
ground-truth-labeler Time: 0.15458369255065918s


3it [01:05, 22.97s/it]

clean Time: 0.004917144775390625s
get-fuzzy-augmented-matches Time: 12.519254207611084s
get-exact-matches Time: 0.9404559135437012s
get-kgtk-search-matches Time: 13.555083990097046s
ground-truth-labeler Time: 0.20128393173217773s


4it [01:40, 27.89s/it]

clean Time: 0.0027971267700195312s
get-fuzzy-augmented-matches Time: 7.006508111953735s
get-exact-matches Time: 1.3908112049102783s
get-kgtk-search-matches Time: 7.939384937286377s
ground-truth-labeler Time: 0.1173560619354248s


5it [02:08, 27.82s/it]

clean Time: 0.0054590702056884766s
get-fuzzy-augmented-matches Time: 7.854846954345703s
get-exact-matches Time: 0.8723340034484863s
get-kgtk-search-matches Time: 25.897789001464844s
ground-truth-labeler Time: 0.20546507835388184s


6it [02:52, 33.53s/it]

clean Time: 0.0025892257690429688s
get-fuzzy-augmented-matches Time: 8.463303804397583s
get-exact-matches Time: 0.4312129020690918s
get-kgtk-search-matches Time: 8.38702917098999s
ground-truth-labeler Time: 0.11772704124450684s


7it [03:18, 30.77s/it]

clean Time: 0.004010915756225586s
get-fuzzy-augmented-matches Time: 6.462490081787109s
get-exact-matches Time: 1.344177007675171s
get-kgtk-search-matches Time: 5.611929178237915s
ground-truth-labeler Time: 0.13200092315673828s


8it [03:39, 27.86s/it]

clean Time: 0.00601506233215332s
get-fuzzy-augmented-matches Time: 7.041852951049805s
get-exact-matches Time: 1.0252890586853027s
get-kgtk-search-matches Time: 5.822709083557129s
ground-truth-labeler Time: 0.12175202369689941s


9it [04:04, 26.76s/it]

clean Time: 0.0038809776306152344s
get-fuzzy-augmented-matches Time: 7.260751008987427s
get-exact-matches Time: 0.39313387870788574s
get-kgtk-search-matches Time: 6.925559043884277s
ground-truth-labeler Time: 0.11767292022705078s


10it [04:28, 26.03s/it]

clean Time: 0.0061190128326416016s
get-fuzzy-augmented-matches Time: 6.0345778465271s
Command: get-exact-matches
Error Message:  Traceback (most recent call last):
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/urllib3/connection.py", line 169, in _new_conn
    conn = connection.create_connection(
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/urllib3/util/connection.py", line 73, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/socket.py", line 953, in getaddrinfo
    for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno 8] nodename nor servname provided, or not known

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/urllib3/connect

11it [04:48, 24.34s/it]

clean Time: 0.009142875671386719s
get-fuzzy-augmented-matches Time: 5.926518201828003s
get-exact-matches Time: 1.1749060153961182s
get-kgtk-search-matches Time: 3.5283660888671875s
ground-truth-labeler Time: 0.2692840099334717s


12it [05:16, 25.29s/it]

clean Time: 0.005278110504150391s
get-fuzzy-augmented-matches Time: 4.887967824935913s
get-exact-matches Time: 0.6357731819152832s
get-kgtk-search-matches Time: 5.784508943557739s
ground-truth-labeler Time: 0.06714105606079102s


13it [05:43, 25.74s/it]

clean Time: 0.005784273147583008s
get-fuzzy-augmented-matches Time: 9.952754974365234s
get-exact-matches Time: 1.242366075515747s
get-kgtk-search-matches Time: 4.334038972854614s
ground-truth-labeler Time: 0.17670559883117676s


14it [06:09, 25.88s/it]

clean Time: 0.004316806793212891s
get-fuzzy-augmented-matches Time: 6.263353109359741s
get-exact-matches Time: 0.7766590118408203s
get-kgtk-search-matches Time: 10.9512779712677s
ground-truth-labeler Time: 0.1997220516204834s


15it [06:38, 26.81s/it]

clean Time: 0.0070459842681884766s
get-fuzzy-augmented-matches Time: 6.14279317855835s
get-exact-matches Time: 0.35429906845092773s
get-kgtk-search-matches Time: 6.189280986785889s
ground-truth-labeler Time: 0.16070199012756348s


16it [07:02, 26.14s/it]

clean Time: 0.003894805908203125s
get-fuzzy-augmented-matches Time: 9.774265050888062s
get-exact-matches Time: 0.6550338268280029s
get-kgtk-search-matches Time: 13.597098350524902s
ground-truth-labeler Time: 0.15918374061584473s


17it [07:35, 28.20s/it]

clean Time: 0.0032858848571777344s
get-fuzzy-augmented-matches Time: 5.88588285446167s
get-exact-matches Time: 0.6868579387664795s
get-kgtk-search-matches Time: 7.069318056106567s
ground-truth-labeler Time: 0.1474149227142334s


18it [07:56, 25.97s/it]

clean Time: 0.010612010955810547s
get-fuzzy-augmented-matches Time: 9.447230339050293s
get-exact-matches Time: 0.5733201503753662s
get-kgtk-search-matches Time: 3.3838820457458496s
ground-truth-labeler Time: 0.18084001541137695s


19it [08:20, 25.35s/it]

clean Time: 0.007569074630737305s
get-fuzzy-augmented-matches Time: 9.001677989959717s
get-exact-matches Time: 0.8601832389831543s
get-kgtk-search-matches Time: 22.63483190536499s
ground-truth-labeler Time: 0.2527458667755127s


20it [09:04, 30.98s/it]

clean Time: 0.0022220611572265625s
get-fuzzy-augmented-matches Time: 2.984344005584717s
get-exact-matches Time: 0.5546019077301025s
get-kgtk-search-matches Time: 3.613651752471924s
ground-truth-labeler Time: 0.06818389892578125s


21it [09:22, 26.90s/it]

clean Time: 0.004821062088012695s
get-fuzzy-augmented-matches Time: 6.683928728103638s
get-exact-matches Time: 0.45075201988220215s
get-kgtk-search-matches Time: 4.664046049118042s
ground-truth-labeler Time: 0.13589215278625488s


22it [09:41, 24.68s/it]

clean Time: 0.0020799636840820312s
get-fuzzy-augmented-matches Time: 4.707327842712402s
get-exact-matches Time: 0.32107019424438477s
get-kgtk-search-matches Time: 3.4913909435272217s
ground-truth-labeler Time: 0.05269026756286621s


23it [09:54, 21.16s/it]

clean Time: 0.005624055862426758s
get-fuzzy-augmented-matches Time: 6.29867696762085s
get-exact-matches Time: 0.6687088012695312s
get-kgtk-search-matches Time: 7.396997928619385s
ground-truth-labeler Time: 0.11291098594665527s


24it [10:16, 21.56s/it]

clean Time: 0.00191497802734375s
get-fuzzy-augmented-matches Time: 6.824878931045532s
get-exact-matches Time: 0.8373019695281982s
get-kgtk-search-matches Time: 2.3136439323425293s
ground-truth-labeler Time: 0.053192853927612305s


25it [10:35, 20.76s/it]

clean Time: 0.0050122737884521484s
get-fuzzy-augmented-matches Time: 2.908484697341919s
get-exact-matches Time: 0.16090798377990723s
get-kgtk-search-matches Time: 11.478424072265625s
ground-truth-labeler Time: 0.1075589656829834s


26it [10:58, 21.30s/it]

clean Time: 0.007780790328979492s
get-fuzzy-augmented-matches Time: 8.87652039527893s
get-exact-matches Time: 1.1975388526916504s
get-kgtk-search-matches Time: 6.22927713394165s
ground-truth-labeler Time: 0.08661389350891113s


27it [11:23, 22.52s/it]

clean Time: 0.007157087326049805s
get-fuzzy-augmented-matches Time: 16.51269006729126s
get-exact-matches Time: 1.0194509029388428s
get-kgtk-search-matches Time: 16.30362582206726s
ground-truth-labeler Time: 0.23952507972717285s


28it [12:09, 29.57s/it]

clean Time: 0.004361867904663086s
get-fuzzy-augmented-matches Time: 5.90547513961792s
get-exact-matches Time: 0.5279879570007324s
get-kgtk-search-matches Time: 8.500985145568848s
ground-truth-labeler Time: 0.14929723739624023s


29it [12:30, 27.01s/it]

clean Time: 0.004938840866088867s
get-fuzzy-augmented-matches Time: 8.554004907608032s
get-exact-matches Time: 0.7138712406158447s
get-kgtk-search-matches Time: 11.227036952972412s
ground-truth-labeler Time: 0.16188311576843262s


30it [13:01, 27.98s/it]

clean Time: 0.0059661865234375s
get-fuzzy-augmented-matches Time: 7.711696147918701s
get-exact-matches Time: 0.6562349796295166s
get-kgtk-search-matches Time: 14.934393882751465s
ground-truth-labeler Time: 0.23491811752319336s


31it [13:34, 29.72s/it]

clean Time: 0.002441883087158203s
get-fuzzy-augmented-matches Time: 4.231873989105225s
get-exact-matches Time: 0.17674589157104492s
get-kgtk-search-matches Time: 2.6033542156219482s
ground-truth-labeler Time: 0.05278801918029785s


32it [13:50, 25.37s/it]

clean Time: 0.012055158615112305s
get-fuzzy-augmented-matches Time: 10.328552007675171s
get-exact-matches Time: 1.0811772346496582s
get-kgtk-search-matches Time: 6.9580957889556885s
ground-truth-labeler Time: 0.22024774551391602s


33it [14:18, 26.32s/it]

clean Time: 0.004453897476196289s
get-fuzzy-augmented-matches Time: 6.954061031341553s
get-exact-matches Time: 1.5882878303527832s
get-kgtk-search-matches Time: 7.926398277282715s
ground-truth-labeler Time: 0.3209407329559326s


34it [14:43, 25.82s/it]

clean Time: 0.005774021148681641s
get-fuzzy-augmented-matches Time: 7.316712141036987s
get-exact-matches Time: 0.5019838809967041s
get-kgtk-search-matches Time: 7.441135883331299s
ground-truth-labeler Time: 0.15256690979003906s


35it [15:08, 25.52s/it]

clean Time: 0.0032167434692382812s
get-fuzzy-augmented-matches Time: 4.644268989562988s
get-exact-matches Time: 0.5609591007232666s
get-kgtk-search-matches Time: 3.015625s
ground-truth-labeler Time: 0.05562591552734375s


36it [15:23, 22.59s/it]

clean Time: 0.005581855773925781s
get-fuzzy-augmented-matches Time: 6.46367883682251s
get-exact-matches Time: 0.5120241641998291s
get-kgtk-search-matches Time: 7.584712982177734s
ground-truth-labeler Time: 0.12448310852050781s


37it [15:48, 23.19s/it]

clean Time: 0.0031728744506835938s
get-fuzzy-augmented-matches Time: 10.423254013061523s
get-exact-matches Time: 0.45426201820373535s
get-kgtk-search-matches Time: 13.403637886047363s
ground-truth-labeler Time: 0.1894679069519043s


38it [16:21, 26.12s/it]

clean Time: 0.0031359195709228516s
get-fuzzy-augmented-matches Time: 6.760684013366699s
get-exact-matches Time: 0.6263868808746338s
get-kgtk-search-matches Time: 5.966264009475708s
ground-truth-labeler Time: 0.15210914611816406s


39it [16:41, 24.24s/it]

clean Time: 0.0014150142669677734s
get-fuzzy-augmented-matches Time: 2.3641929626464844s
get-exact-matches Time: 0.3410468101501465s
get-kgtk-search-matches Time: 3.359851837158203s
ground-truth-labeler Time: 0.049750566482543945s


40it [16:53, 20.70s/it]

clean Time: 0.0019388198852539062s
get-fuzzy-augmented-matches Time: 4.466360092163086s
get-exact-matches Time: 0.495499849319458s
get-kgtk-search-matches Time: 7.175814867019653s
ground-truth-labeler Time: 0.05928301811218262s


41it [17:13, 20.48s/it]

clean Time: 0.0034759044647216797s
get-fuzzy-augmented-matches Time: 8.073223114013672s
get-exact-matches Time: 0.7186770439147949s
get-kgtk-search-matches Time: 8.978694677352905s
ground-truth-labeler Time: 0.09632205963134766s


42it [17:39, 21.93s/it]

clean Time: 0.002696990966796875s
get-fuzzy-augmented-matches Time: 5.118952989578247s
get-exact-matches Time: 0.7132110595703125s
get-kgtk-search-matches Time: 4.122593879699707s
ground-truth-labeler Time: 0.05235695838928223s


43it [17:58, 21.29s/it]

clean Time: 0.0054891109466552734s
get-fuzzy-augmented-matches Time: 4.2400221824646s
get-exact-matches Time: 0.17987680435180664s
get-kgtk-search-matches Time: 8.725259065628052s
ground-truth-labeler Time: 0.08852672576904297s


44it [18:20, 21.34s/it]

clean Time: 0.005663871765136719s
get-fuzzy-augmented-matches Time: 8.780416011810303s
get-exact-matches Time: 1.3978688716888428s
get-kgtk-search-matches Time: 9.279215097427368s
ground-truth-labeler Time: 0.1549210548400879s


45it [18:49, 23.65s/it]

clean Time: 0.0031778812408447266s
get-fuzzy-augmented-matches Time: 4.591247797012329s
get-exact-matches Time: 0.5723450183868408s
get-kgtk-search-matches Time: 8.660943984985352s
ground-truth-labeler Time: 0.0991511344909668s


46it [19:12, 23.37s/it]

clean Time: 0.00397491455078125s
get-fuzzy-augmented-matches Time: 6.342136859893799s
get-exact-matches Time: 0.4899098873138428s
get-kgtk-search-matches Time: 6.16115665435791s
ground-truth-labeler Time: 0.0862588882446289s


47it [19:32, 22.61s/it]

clean Time: 0.0065250396728515625s
get-fuzzy-augmented-matches Time: 9.295779943466187s
get-exact-matches Time: 0.9430408477783203s
get-kgtk-search-matches Time: 11.173500061035156s
ground-truth-labeler Time: 0.17873883247375488s


48it [20:03, 25.07s/it]

clean Time: 0.002747058868408203s
get-fuzzy-augmented-matches Time: 3.8516781330108643s
get-exact-matches Time: 0.23694586753845215s
get-kgtk-search-matches Time: 3.2364578247070312s
ground-truth-labeler Time: 0.11264801025390625s


49it [20:19, 22.44s/it]

clean Time: 0.002549409866333008s
get-fuzzy-augmented-matches Time: 6.2447190284729s
get-exact-matches Time: 0.5188329219818115s
get-kgtk-search-matches Time: 9.12427806854248s
ground-truth-labeler Time: 0.09526300430297852s


50it [20:43, 22.80s/it]

clean Time: 0.0013778209686279297s
get-fuzzy-augmented-matches Time: 4.8062779903411865s
get-exact-matches Time: 0.33710694313049316s
get-kgtk-search-matches Time: 4.294665813446045s
ground-truth-labeler Time: 0.0562891960144043s


51it [21:00, 21.17s/it]

clean Time: 0.0019729137420654297s
get-fuzzy-augmented-matches Time: 11.283444881439209s
get-exact-matches Time: 0.24293303489685059s
get-kgtk-search-matches Time: 2.0672359466552734s
ground-truth-labeler Time: 0.04927492141723633s


52it [21:19, 20.29s/it]

clean Time: 0.010110855102539062s
get-fuzzy-augmented-matches Time: 10.096020936965942s
get-exact-matches Time: 0.5506012439727783s
get-kgtk-search-matches Time: 12.937490940093994s
ground-truth-labeler Time: 0.14938092231750488s


53it [21:53, 24.61s/it]

clean Time: 0.003506898880004883s
get-fuzzy-augmented-matches Time: 9.672940015792847s
Command: get-exact-matches
Error Message:  Traceback (most recent call last):
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/urllib3/connection.py", line 169, in _new_conn
    conn = connection.create_connection(
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/urllib3/util/connection.py", line 73, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/socket.py", line 953, in getaddrinfo
    for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno 8] nodename nor servname provided, or not known

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/urllib3/connec

54it [22:13, 22.98s/it]

clean Time: 0.005051851272583008s
get-fuzzy-augmented-matches Time: 9.71813678741455s
get-exact-matches Time: 0.6394579410552979s
get-kgtk-search-matches Time: 12.547319173812866s
ground-truth-labeler Time: 0.7724311351776123s


55it [22:46, 26.24s/it]

clean Time: 0.0038928985595703125s
get-fuzzy-augmented-matches Time: 8.01764178276062s
get-exact-matches Time: 0.7472379207611084s
get-kgtk-search-matches Time: 10.039378881454468s
ground-truth-labeler Time: 0.14532089233398438s


56it [23:14, 26.51s/it]

clean Time: 0.006964921951293945s
get-fuzzy-augmented-matches Time: 7.78526496887207s
get-exact-matches Time: 0.5102260112762451s
get-kgtk-search-matches Time: 6.044971942901611s
ground-truth-labeler Time: 0.12265300750732422s


57it [23:35, 25.06s/it]

clean Time: 0.0018110275268554688s
get-fuzzy-augmented-matches Time: 3.8503940105438232s
get-exact-matches Time: 0.4837210178375244s
get-kgtk-search-matches Time: 1.4400408267974854s
ground-truth-labeler Time: 0.05547189712524414s


58it [23:48, 21.45s/it]

clean Time: 0.001708984375s
get-fuzzy-augmented-matches Time: 4.861485958099365s
get-exact-matches Time: 0.5801742076873779s
get-kgtk-search-matches Time: 0.9638500213623047s
ground-truth-labeler Time: 0.04981088638305664s


59it [24:02, 19.25s/it]

clean Time: 0.0013680458068847656s
get-fuzzy-augmented-matches Time: 4.772050142288208s
get-exact-matches Time: 0.28783369064331055s
get-kgtk-search-matches Time: 6.090597152709961s
ground-truth-labeler Time: 0.05741000175476074s


60it [24:22, 19.34s/it]

clean Time: 0.0018239021301269531s
get-fuzzy-augmented-matches Time: 4.290143013000488s
get-exact-matches Time: 0.5488948822021484s
get-kgtk-search-matches Time: 2.1866509914398193s
ground-truth-labeler Time: 0.0593411922454834s


61it [24:37, 18.00s/it]

clean Time: 0.002002239227294922s
get-fuzzy-augmented-matches Time: 5.58064603805542s
get-exact-matches Time: 1.536257028579712s
get-kgtk-search-matches Time: 4.548943996429443s
ground-truth-labeler Time: 0.06988906860351562s


62it [24:58, 18.95s/it]

clean Time: 0.006055116653442383s
get-fuzzy-augmented-matches Time: 7.948517799377441s
get-exact-matches Time: 0.481917142868042s
get-kgtk-search-matches Time: 20.440701246261597s
ground-truth-labeler Time: 0.1636509895324707s


63it [25:37, 25.08s/it]

clean Time: 0.0017418861389160156s
get-fuzzy-augmented-matches Time: 5.246600151062012s
get-exact-matches Time: 0.19492506980895996s
get-kgtk-search-matches Time: 4.3263099193573s
ground-truth-labeler Time: 0.07278776168823242s


64it [25:53, 22.19s/it]

clean Time: 0.003326416015625s
get-fuzzy-augmented-matches Time: 4.967069864273071s
get-exact-matches Time: 0.623892068862915s
get-kgtk-search-matches Time: 10.276692867279053s
ground-truth-labeler Time: 0.11377310752868652s


65it [26:17, 22.74s/it]

clean Time: 0.005278825759887695s
get-fuzzy-augmented-matches Time: 7.875754117965698s
get-exact-matches Time: 0.8778910636901855s
get-kgtk-search-matches Time: 20.60933494567871s
ground-truth-labeler Time: 0.1969301700592041s


66it [26:55, 27.46s/it]

clean Time: 0.006986141204833984s
get-fuzzy-augmented-matches Time: 8.867820978164673s
get-exact-matches Time: 0.9334499835968018s
get-kgtk-search-matches Time: 25.34157109260559s
ground-truth-labeler Time: 0.2383708953857422s


67it [27:42, 33.29s/it]

clean Time: 0.00292205810546875s
get-fuzzy-augmented-matches Time: 6.895409107208252s
get-exact-matches Time: 0.5510332584381104s
get-kgtk-search-matches Time: 8.201827049255371s
ground-truth-labeler Time: 0.13010001182556152s


68it [28:08, 31.06s/it]

clean Time: 0.0038483142852783203s
get-fuzzy-augmented-matches Time: 11.820307970046997s
get-exact-matches Time: 0.9639780521392822s
get-kgtk-search-matches Time: 9.264647006988525s
ground-truth-labeler Time: 0.1335887908935547s


69it [28:39, 31.16s/it]

clean Time: 0.002269268035888672s
get-fuzzy-augmented-matches Time: 5.351563930511475s
get-exact-matches Time: 0.2322711944580078s
get-kgtk-search-matches Time: 3.8116631507873535s
ground-truth-labeler Time: 0.04990887641906738s


70it [28:56, 26.64s/it]

clean Time: 0.002825021743774414s
get-fuzzy-augmented-matches Time: 3.6970717906951904s
get-exact-matches Time: 0.2813749313354492s
get-kgtk-search-matches Time: 3.1737868785858154s
ground-truth-labeler Time: 0.05576181411743164s


71it [29:10, 22.98s/it]

clean Time: 0.0036041736602783203s
get-fuzzy-augmented-matches Time: 5.885469198226929s
get-exact-matches Time: 0.6507260799407959s
get-kgtk-search-matches Time: 8.168703079223633s
ground-truth-labeler Time: 0.09668803215026855s


72it [29:33, 23.03s/it]

clean Time: 0.007236003875732422s
get-fuzzy-augmented-matches Time: 7.469182014465332s
get-exact-matches Time: 0.6030259132385254s
get-kgtk-search-matches Time: 9.802597999572754s
ground-truth-labeler Time: 0.16977810859680176s


73it [30:01, 24.50s/it]

clean Time: 0.007811784744262695s
entered except
Command: get-fuzzy-augmented-matches
Error Message: Traceback (most recent call last):
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/urllib3/connection.py", line 169, in _new_conn
    conn = connection.create_connection(
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/urllib3/util/connection.py", line 73, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/socket.py", line 953, in getaddrinfo
    for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno 8] nodename nor servname provided, or not known

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/urllib3/connectionpool.py", line 699, in ur

74it [30:19, 22.47s/it]

clean Time: 0.0036590099334716797s
get-fuzzy-augmented-matches Time: 8.103681087493896s
get-exact-matches Time: 0.6450331211090088s
get-kgtk-search-matches Time: 9.193456888198853s
ground-truth-labeler Time: 0.13250398635864258s


75it [30:45, 23.64s/it]

clean Time: 0.0023772716522216797s
get-fuzzy-augmented-matches Time: 4.80174994468689s
get-exact-matches Time: 0.7455871105194092s
get-kgtk-search-matches Time: 4.8745338916778564s
ground-truth-labeler Time: 0.05861926078796387s


76it [31:03, 21.79s/it]

clean Time: 0.0036988258361816406s
get-fuzzy-augmented-matches Time: 6.022672891616821s
get-exact-matches Time: 0.5033118724822998s
get-kgtk-search-matches Time: 4.439256191253662s
ground-truth-labeler Time: 0.11343598365783691s


77it [31:21, 20.89s/it]

clean Time: 0.0016789436340332031s
get-fuzzy-augmented-matches Time: 3.1941568851470947s
get-exact-matches Time: 0.5428290367126465s
get-kgtk-search-matches Time: 1.542959213256836s
ground-truth-labeler Time: 0.07061409950256348s


78it [31:34, 18.48s/it]

clean Time: 0.0021390914916992188s
get-fuzzy-augmented-matches Time: 4.354590177536011s
get-exact-matches Time: 0.40432095527648926s
get-kgtk-search-matches Time: 1.6738290786743164s
ground-truth-labeler Time: 0.05244612693786621s


79it [31:50, 17.64s/it]

clean Time: 0.004929065704345703s
get-fuzzy-augmented-matches Time: 6.3756279945373535s
get-exact-matches Time: 0.32355308532714844s
get-kgtk-search-matches Time: 1.267899751663208s
ground-truth-labeler Time: 0.05329012870788574s


80it [32:06, 17.27s/it]

clean Time: 0.002088308334350586s
get-fuzzy-augmented-matches Time: 2.376239061355591s
get-exact-matches Time: 0.12414693832397461s
get-kgtk-search-matches Time: 3.0842418670654297s
ground-truth-labeler Time: 0.060648202896118164s


81it [32:20, 16.15s/it]

clean Time: 0.002084970474243164s
get-fuzzy-augmented-matches Time: 4.773196220397949s
get-exact-matches Time: 0.5824489593505859s
get-kgtk-search-matches Time: 9.9011869430542s
ground-truth-labeler Time: 0.1581737995147705s


82it [32:45, 18.76s/it]

clean Time: 0.002095937728881836s
get-fuzzy-augmented-matches Time: 4.987047910690308s
get-exact-matches Time: 0.2097001075744629s
get-kgtk-search-matches Time: 1.4859368801116943s
ground-truth-labeler Time: 0.05413389205932617s


83it [33:00, 17.71s/it]

clean Time: 0.0038499832153320312s
get-fuzzy-augmented-matches Time: 5.699875831604004s
get-exact-matches Time: 0.4673912525177002s
get-kgtk-search-matches Time: 1.8953542709350586s
ground-truth-labeler Time: 0.08750081062316895s


84it [33:18, 17.73s/it]

clean Time: 0.004982948303222656s
get-fuzzy-augmented-matches Time: 9.06934404373169s
get-exact-matches Time: 0.3609001636505127s
get-kgtk-search-matches Time: 7.935024976730347s
ground-truth-labeler Time: 0.11849117279052734s


85it [33:41, 19.39s/it]

clean Time: 0.002219676971435547s
get-fuzzy-augmented-matches Time: 7.034233808517456s
get-exact-matches Time: 0.7253968715667725s
get-kgtk-search-matches Time: 5.764862060546875s
ground-truth-labeler Time: 0.07626128196716309s


86it [34:02, 19.86s/it]

clean Time: 0.007641792297363281s
get-fuzzy-augmented-matches Time: 6.286259889602661s
get-exact-matches Time: 0.7500698566436768s
get-kgtk-search-matches Time: 3.48136305809021s
ground-truth-labeler Time: 0.10296797752380371s


87it [34:21, 19.67s/it]

clean Time: 0.008544921875s
get-fuzzy-augmented-matches Time: 7.219648838043213s
get-exact-matches Time: 1.364182949066162s
get-kgtk-search-matches Time: 6.957329034805298s
ground-truth-labeler Time: 0.14041376113891602s


88it [34:46, 21.34s/it]

clean Time: 0.005450010299682617s
get-fuzzy-augmented-matches Time: 5.988386869430542s
get-exact-matches Time: 0.7795050144195557s
get-kgtk-search-matches Time: 13.784618854522705s
ground-truth-labeler Time: 0.3250007629394531s


89it [35:17, 24.21s/it]

clean Time: 0.0014700889587402344s
get-fuzzy-augmented-matches Time: 4.518582820892334s
get-exact-matches Time: 0.780249834060669s
get-kgtk-search-matches Time: 2.4708070755004883s
ground-truth-labeler Time: 0.0653069019317627s


90it [35:35, 22.24s/it]

clean Time: 0.0029439926147460938s
get-fuzzy-augmented-matches Time: 5.147885799407959s
get-exact-matches Time: 0.22154903411865234s
get-kgtk-search-matches Time: 4.655900001525879s
ground-truth-labeler Time: 0.08425331115722656s


91it [35:52, 20.56s/it]

clean Time: 0.005628108978271484s
get-fuzzy-augmented-matches Time: 6.783630847930908s
get-exact-matches Time: 0.5752220153808594s
get-kgtk-search-matches Time: 11.543673753738403s
ground-truth-labeler Time: 0.15513014793395996s


92it [36:17, 22.12s/it]

clean Time: 0.0052258968353271484s
get-fuzzy-augmented-matches Time: 5.9830217361450195s
get-exact-matches Time: 0.8004941940307617s
get-kgtk-search-matches Time: 5.504356861114502s
ground-truth-labeler Time: 0.10471415519714355s


93it [36:38, 21.63s/it]

clean Time: 0.004136085510253906s
get-fuzzy-augmented-matches Time: 3.5202789306640625s
get-exact-matches Time: 0.18530702590942383s
get-kgtk-search-matches Time: 5.61520791053772s
ground-truth-labeler Time: 0.09771203994750977s


94it [36:55, 20.27s/it]

clean Time: 0.0013229846954345703s
get-fuzzy-augmented-matches Time: 4.628463268280029s
get-exact-matches Time: 0.6889388561248779s
get-kgtk-search-matches Time: 2.124969005584717s
ground-truth-labeler Time: 0.05520510673522949s


95it [37:11, 18.99s/it]

clean Time: 0.005760908126831055s
get-fuzzy-augmented-matches Time: 3.723716974258423s
get-exact-matches Time: 0.17166996002197266s
get-kgtk-search-matches Time: 2.7997560501098633s
ground-truth-labeler Time: 0.0525209903717041s


96it [37:26, 17.80s/it]

clean Time: 0.003632068634033203s
get-fuzzy-augmented-matches Time: 4.162777900695801s
get-exact-matches Time: 0.9443740844726562s
get-kgtk-search-matches Time: 6.968183994293213s
ground-truth-labeler Time: 0.3494608402252197s


97it [37:48, 19.12s/it]

clean Time: 0.0031189918518066406s
get-fuzzy-augmented-matches Time: 6.67361307144165s
get-exact-matches Time: 0.5519068241119385s
get-kgtk-search-matches Time: 11.21340012550354s
ground-truth-labeler Time: 0.12828493118286133s


98it [38:14, 21.25s/it]

clean Time: 0.004827976226806641s
get-fuzzy-augmented-matches Time: 6.977615118026733s
get-exact-matches Time: 0.277087926864624s
get-kgtk-search-matches Time: 6.469156980514526s
ground-truth-labeler Time: 0.12406611442565918s


99it [38:35, 21.02s/it]

clean Time: 0.005635976791381836s
get-fuzzy-augmented-matches Time: 7.030617952346802s
get-exact-matches Time: 0.8276698589324951s
get-kgtk-search-matches Time: 8.979559898376465s
ground-truth-labeler Time: 0.503972053527832s


100it [39:02, 22.74s/it]

clean Time: 0.0012731552124023438s
get-fuzzy-augmented-matches Time: 3.5406720638275146s
get-exact-matches Time: 0.3545663356781006s
get-kgtk-search-matches Time: 3.808173179626465s
ground-truth-labeler Time: 0.06366920471191406s


101it [39:17, 20.58s/it]

clean Time: 0.002238035202026367s
get-fuzzy-augmented-matches Time: 4.837245225906372s
get-exact-matches Time: 0.4826319217681885s
get-kgtk-search-matches Time: 1.9396729469299316s
ground-truth-labeler Time: 0.056424856185913086s


102it [39:32, 18.98s/it]

clean Time: 0.003554105758666992s
get-fuzzy-augmented-matches Time: 6.549211025238037s
get-exact-matches Time: 0.4950692653656006s
get-kgtk-search-matches Time: 6.634460210800171s
ground-truth-labeler Time: 0.09486699104309082s


103it [39:52, 19.18s/it]

clean Time: 0.002218008041381836s
get-fuzzy-augmented-matches Time: 4.722104072570801s
get-exact-matches Time: 0.22895503044128418s
get-kgtk-search-matches Time: 3.5613391399383545s
ground-truth-labeler Time: 0.0529329776763916s


104it [40:09, 18.43s/it]

clean Time: 0.0035011768341064453s
get-fuzzy-augmented-matches Time: 3.760166883468628s
get-exact-matches Time: 0.2958681583404541s
get-kgtk-search-matches Time: 4.062509775161743s
ground-truth-labeler Time: 0.4108109474182129s


105it [40:26, 17.98s/it]

clean Time: 0.007508039474487305s
get-fuzzy-augmented-matches Time: 6.009843111038208s
get-exact-matches Time: 0.6659829616546631s
get-kgtk-search-matches Time: 3.7838358879089355s
ground-truth-labeler Time: 0.6114029884338379s


106it [40:45, 18.47s/it]

clean Time: 0.0033788681030273438s
get-fuzzy-augmented-matches Time: 8.130220174789429s
get-exact-matches Time: 1.0332601070404053s
get-kgtk-search-matches Time: 11.35867714881897s
ground-truth-labeler Time: 0.18488383293151855s


107it [41:15, 21.87s/it]

clean Time: 0.004992961883544922s
get-fuzzy-augmented-matches Time: 9.71511197090149s
get-exact-matches Time: 0.8225722312927246s
get-kgtk-search-matches Time: 9.541840076446533s
ground-truth-labeler Time: 0.13813090324401855s


108it [41:44, 24.10s/it]

clean Time: 0.00290679931640625s
get-fuzzy-augmented-matches Time: 5.868821382522583s
get-exact-matches Time: 0.7426788806915283s
get-kgtk-search-matches Time: 6.5078418254852295s
ground-truth-labeler Time: 0.07400703430175781s


109it [42:04, 22.68s/it]

clean Time: 0.0029320716857910156s
get-fuzzy-augmented-matches Time: 5.227061986923218s
get-exact-matches Time: 0.33248114585876465s
get-kgtk-search-matches Time: 2.2488200664520264s
ground-truth-labeler Time: 0.0923299789428711s


110it [42:18, 20.15s/it]

clean Time: 0.0016908645629882812s
get-fuzzy-augmented-matches Time: 4.278937101364136s
get-exact-matches Time: 0.21819210052490234s
get-kgtk-search-matches Time: 4.745203018188477s
ground-truth-labeler Time: 0.06180405616760254s


111it [42:37, 19.81s/it]

clean Time: 0.0027709007263183594s
get-fuzzy-augmented-matches Time: 6.745830059051514s
get-exact-matches Time: 0.28629302978515625s
get-kgtk-search-matches Time: 6.63723087310791s
ground-truth-labeler Time: 0.09674501419067383s


112it [43:00, 20.77s/it]

clean Time: 0.0013740062713623047s
get-fuzzy-augmented-matches Time: 4.984714031219482s
get-exact-matches Time: 0.7918961048126221s
get-kgtk-search-matches Time: 1.8638181686401367s
ground-truth-labeler Time: 0.05538821220397949s


113it [43:17, 19.53s/it]

clean Time: 0.0037512779235839844s
get-fuzzy-augmented-matches Time: 4.852689027786255s
get-exact-matches Time: 0.3271181583404541s
get-kgtk-search-matches Time: 2.4608659744262695s
ground-truth-labeler Time: 0.042024850845336914s


114it [43:33, 18.53s/it]

clean Time: 0.0023338794708251953s
get-fuzzy-augmented-matches Time: 3.0625219345092773s
get-exact-matches Time: 0.4902670383453369s
get-kgtk-search-matches Time: 6.1821019649505615s
ground-truth-labeler Time: 0.09143710136413574s


115it [43:51, 18.31s/it]

clean Time: 0.0024237632751464844s
get-fuzzy-augmented-matches Time: 6.1610541343688965s
get-exact-matches Time: 0.35873889923095703s
get-kgtk-search-matches Time: 9.07139277458191s
ground-truth-labeler Time: 0.0953516960144043s


116it [44:15, 20.12s/it]

clean Time: 0.004270076751708984s
get-fuzzy-augmented-matches Time: 7.182606935501099s
get-exact-matches Time: 0.6745321750640869s
get-kgtk-search-matches Time: 5.946465015411377s
ground-truth-labeler Time: 0.6910479068756104s


117it [44:38, 21.07s/it]

clean Time: 0.001886129379272461s
get-fuzzy-augmented-matches Time: 5.307080030441284s
get-exact-matches Time: 0.657905101776123s
get-kgtk-search-matches Time: 5.846879005432129s
ground-truth-labeler Time: 0.06776618957519531s


118it [44:59, 20.83s/it]

clean Time: 0.005509138107299805s
get-fuzzy-augmented-matches Time: 6.313590049743652s
get-exact-matches Time: 0.8338851928710938s
get-kgtk-search-matches Time: 8.206748247146606s
ground-truth-labeler Time: 0.10543298721313477s


119it [45:22, 21.66s/it]

clean Time: 0.003579854965209961s
get-fuzzy-augmented-matches Time: 8.400014638900757s
get-exact-matches Time: 1.4211771488189697s
get-kgtk-search-matches Time: 10.194504976272583s
ground-truth-labeler Time: 0.49068689346313477s


120it [45:52, 24.02s/it]

clean Time: 0.0038890838623046875s
get-fuzzy-augmented-matches Time: 9.067251920700073s
get-exact-matches Time: 0.4445011615753174s
get-kgtk-search-matches Time: 10.841367244720459s
ground-truth-labeler Time: 0.12008500099182129s


121it [46:20, 25.32s/it]

clean Time: 0.018635034561157227s
get-fuzzy-augmented-matches Time: 24.02668786048889s
get-exact-matches Time: 2.015214681625366s
get-kgtk-search-matches Time: 45.1687490940094s
ground-truth-labeler Time: 0.5877318382263184s


122it [47:44, 42.84s/it]

clean Time: 0.0015671253204345703s
get-fuzzy-augmented-matches Time: 5.605024099349976s
get-exact-matches Time: 0.9105663299560547s
get-kgtk-search-matches Time: 3.4941561222076416s
ground-truth-labeler Time: 0.06360769271850586s


123it [48:05, 36.24s/it]

clean Time: 0.0017969608306884766s
get-fuzzy-augmented-matches Time: 5.124270677566528s
get-exact-matches Time: 0.8159568309783936s
get-kgtk-search-matches Time: 2.046556234359741s
ground-truth-labeler Time: 0.06584596633911133s


124it [48:22, 30.56s/it]

clean Time: 0.0033118724822998047s
get-fuzzy-augmented-matches Time: 11.155898809432983s
get-exact-matches Time: 1.536834955215454s
get-kgtk-search-matches Time: 15.577663898468018s
ground-truth-labeler Time: 0.17157816886901855s


125it [49:00, 32.69s/it]

clean Time: 0.00514984130859375s
get-fuzzy-augmented-matches Time: 6.501666784286499s
get-exact-matches Time: 0.6815853118896484s
get-kgtk-search-matches Time: 8.806877851486206s
ground-truth-labeler Time: 0.13013124465942383s


126it [49:23, 29.99s/it]

clean Time: 0.0023431777954101562s
get-fuzzy-augmented-matches Time: 3.4683141708374023s
get-exact-matches Time: 0.4101829528808594s
get-kgtk-search-matches Time: 2.4088566303253174s
ground-truth-labeler Time: 0.09083271026611328s


127it [49:35, 24.62s/it]

clean Time: 0.003117084503173828s
get-fuzzy-augmented-matches Time: 5.749831914901733s
get-exact-matches Time: 0.706510066986084s
get-kgtk-search-matches Time: 3.7263848781585693s
ground-truth-labeler Time: 0.44763803482055664s


128it [49:55, 22.97s/it]

clean Time: 0.0016329288482666016s
get-fuzzy-augmented-matches Time: 4.511094808578491s
get-exact-matches Time: 0.7996029853820801s
get-kgtk-search-matches Time: 2.923048973083496s
ground-truth-labeler Time: 0.03780984878540039s


129it [50:10, 20.66s/it]

clean Time: 0.0022211074829101562s
get-fuzzy-augmented-matches Time: 4.173630952835083s
get-exact-matches Time: 0.5615530014038086s
get-kgtk-search-matches Time: 4.5388898849487305s
ground-truth-labeler Time: 0.6041219234466553s


130it [50:27, 19.75s/it]

clean Time: 0.0014271736145019531s
get-fuzzy-augmented-matches Time: 4.729512929916382s
get-exact-matches Time: 0.3144659996032715s
get-kgtk-search-matches Time: 2.185692071914673s
ground-truth-labeler Time: 0.050794124603271484s


131it [50:42, 18.19s/it]

clean Time: 0.002437114715576172s
get-fuzzy-augmented-matches Time: 4.930420875549316s
get-exact-matches Time: 0.7997779846191406s
get-kgtk-search-matches Time: 6.157778024673462s
ground-truth-labeler Time: 0.05203890800476074s


132it [51:02, 18.87s/it]

clean Time: 0.008481979370117188s
get-fuzzy-augmented-matches Time: 5.17395806312561s
get-exact-matches Time: 0.17488932609558105s
get-kgtk-search-matches Time: 2.7262189388275146s
ground-truth-labeler Time: 0.052726030349731445s


133it [51:20, 18.54s/it]

clean Time: 0.0038290023803710938s
get-fuzzy-augmented-matches Time: 5.397194862365723s
get-exact-matches Time: 0.3890819549560547s
get-kgtk-search-matches Time: 5.231549263000488s
ground-truth-labeler Time: 0.12443804740905762s


134it [51:38, 18.41s/it]

clean Time: 0.0049092769622802734s
get-fuzzy-augmented-matches Time: 7.648902893066406s
get-exact-matches Time: 0.3052709102630615s
get-kgtk-search-matches Time: 3.2274348735809326s
ground-truth-labeler Time: 0.10199213027954102s


135it [51:57, 18.48s/it]

clean Time: 0.0021779537200927734s
get-fuzzy-augmented-matches Time: 4.238616943359375s
get-exact-matches Time: 0.7286107540130615s
get-kgtk-search-matches Time: 4.299221992492676s
ground-truth-labeler Time: 0.07673096656799316s


136it [52:15, 18.30s/it]

clean Time: 0.003000974655151367s
get-fuzzy-augmented-matches Time: 7.301132917404175s
get-exact-matches Time: 0.7804131507873535s
get-kgtk-search-matches Time: 9.456622838973999s
ground-truth-labeler Time: 0.11588406562805176s


137it [52:42, 20.86s/it]

clean Time: 0.001905202865600586s
get-fuzzy-augmented-matches Time: 5.159571409225464s
get-exact-matches Time: 0.6010477542877197s
get-kgtk-search-matches Time: 4.57928204536438s
ground-truth-labeler Time: 0.05541801452636719s


138it [53:01, 20.33s/it]

clean Time: 0.0017397403717041016s
get-fuzzy-augmented-matches Time: 3.987497091293335s
get-exact-matches Time: 0.7644286155700684s
get-kgtk-search-matches Time: 4.307087659835815s
ground-truth-labeler Time: 0.306225061416626s


139it [53:19, 19.66s/it]

clean Time: 0.0017719268798828125s
get-fuzzy-augmented-matches Time: 4.839114189147949s
get-exact-matches Time: 0.973886251449585s
get-kgtk-search-matches Time: 2.2403411865234375s
ground-truth-labeler Time: 0.32903075218200684s


140it [53:36, 18.86s/it]

clean Time: 0.0037908554077148438s
get-fuzzy-augmented-matches Time: 9.113769054412842s
get-exact-matches Time: 0.7993736267089844s
get-kgtk-search-matches Time: 8.86905813217163s
ground-truth-labeler Time: 0.13483476638793945s


141it [54:03, 21.43s/it]

clean Time: 0.004686117172241211s
get-fuzzy-augmented-matches Time: 7.590083122253418s
get-exact-matches Time: 0.9071559906005859s
get-kgtk-search-matches Time: 19.591675996780396s
ground-truth-labeler Time: 0.22042202949523926s


142it [54:42, 26.55s/it]

clean Time: 0.006738185882568359s
get-fuzzy-augmented-matches Time: 8.837846040725708s
get-exact-matches Time: 0.7018969058990479s
get-kgtk-search-matches Time: 7.407633066177368s
ground-truth-labeler Time: 0.09597492218017578s


143it [55:08, 26.59s/it]

clean Time: 0.0012469291687011719s
get-fuzzy-augmented-matches Time: 5.432693958282471s
get-exact-matches Time: 0.643043041229248s
get-kgtk-search-matches Time: 6.206825017929077s
ground-truth-labeler Time: 0.060271263122558594s


144it [55:29, 24.83s/it]

clean Time: 0.003041982650756836s
get-fuzzy-augmented-matches Time: 7.34176778793335s
get-exact-matches Time: 1.0578408241271973s
get-kgtk-search-matches Time: 11.94385814666748s
ground-truth-labeler Time: 0.16162800788879395s


145it [55:59, 26.34s/it]

clean Time: 0.003909111022949219s
get-fuzzy-augmented-matches Time: 11.187220811843872s
get-exact-matches Time: 0.3097190856933594s
get-kgtk-search-matches Time: 4.290462017059326s
ground-truth-labeler Time: 0.0993039608001709s


146it [56:24, 25.98s/it]

clean Time: 0.0016510486602783203s
get-fuzzy-augmented-matches Time: 4.156270980834961s
get-exact-matches Time: 0.26084208488464355s
get-kgtk-search-matches Time: 3.2336409091949463s
ground-truth-labeler Time: 0.05545997619628906s


147it [56:38, 22.45s/it]

clean Time: 0.008681058883666992s
get-fuzzy-augmented-matches Time: 9.248597145080566s
get-exact-matches Time: 0.9196972846984863s
get-kgtk-search-matches Time: 9.611428022384644s
ground-truth-labeler Time: 0.213242769241333s


148it [57:07, 24.26s/it]

clean Time: 0.002312898635864258s
get-fuzzy-augmented-matches Time: 4.496909141540527s
get-exact-matches Time: 0.3746607303619385s
get-kgtk-search-matches Time: 2.319927930831909s
ground-truth-labeler Time: 0.05185508728027344s


149it [57:21, 21.20s/it]

clean Time: 0.005073070526123047s
get-fuzzy-augmented-matches Time: 9.677266836166382s
get-exact-matches Time: 0.8416140079498291s
get-kgtk-search-matches Time: 11.359586000442505s
ground-truth-labeler Time: 0.16153192520141602s


150it [57:53, 24.42s/it]

clean Time: 0.0024619102478027344s
get-fuzzy-augmented-matches Time: 5.122930288314819s
get-exact-matches Time: 0.6551089286804199s
get-kgtk-search-matches Time: 2.830828905105591s
ground-truth-labeler Time: 0.05440783500671387s


151it [58:11, 22.52s/it]

clean Time: 0.0033807754516601562s
get-fuzzy-augmented-matches Time: 6.296602725982666s
get-exact-matches Time: 0.6885108947753906s
get-kgtk-search-matches Time: 7.799170017242432s
ground-truth-labeler Time: 0.1545121669769287s


152it [58:32, 22.10s/it]

clean Time: 0.004599094390869141s
get-fuzzy-augmented-matches Time: 5.635630130767822s
get-exact-matches Time: 0.39882898330688477s
get-kgtk-search-matches Time: 10.57449197769165s
ground-truth-labeler Time: 0.3204970359802246s


153it [58:59, 23.54s/it]

clean Time: 0.002053976058959961s
get-fuzzy-augmented-matches Time: 3.628720998764038s
get-exact-matches Time: 0.12501907348632812s
get-kgtk-search-matches Time: 2.433256149291992s
ground-truth-labeler Time: 0.05087590217590332s


154it [59:14, 20.86s/it]

clean Time: 0.004996061325073242s
get-fuzzy-augmented-matches Time: 6.512391805648804s
get-exact-matches Time: 1.3172359466552734s
get-kgtk-search-matches Time: 9.234317064285278s
ground-truth-labeler Time: 0.12874627113342285s


155it [59:41, 22.67s/it]

clean Time: 0.0015079975128173828s
get-fuzzy-augmented-matches Time: 4.602887868881226s
get-exact-matches Time: 0.21935415267944336s
get-kgtk-search-matches Time: 2.44448184967041s
ground-truth-labeler Time: 0.05764603614807129s


156it [59:56, 20.39s/it]

clean Time: 0.002920866012573242s
get-fuzzy-augmented-matches Time: 7.74867582321167s
get-exact-matches Time: 0.47690606117248535s
get-kgtk-search-matches Time: 12.95213007926941s
ground-truth-labeler Time: 0.09548497200012207s


157it [1:00:24, 22.91s/it]

clean Time: 0.002203226089477539s
get-fuzzy-augmented-matches Time: 5.877694129943848s
get-exact-matches Time: 0.45393896102905273s
get-kgtk-search-matches Time: 7.592811107635498s
ground-truth-labeler Time: 0.09596896171569824s


158it [1:00:43, 21.61s/it]

clean Time: 0.005674839019775391s
get-fuzzy-augmented-matches Time: 8.37878680229187s
get-exact-matches Time: 0.7924189567565918s
get-kgtk-search-matches Time: 7.257237911224365s
ground-truth-labeler Time: 0.1558527946472168s


159it [1:01:08, 22.68s/it]

clean Time: 0.0037622451782226562s
get-fuzzy-augmented-matches Time: 5.853076934814453s
get-exact-matches Time: 0.7307047843933105s
get-kgtk-search-matches Time: 6.486939907073975s
ground-truth-labeler Time: 0.1293020248413086s


160it [1:01:29, 22.17s/it]

clean Time: 0.00446319580078125s
get-fuzzy-augmented-matches Time: 6.733047008514404s
get-exact-matches Time: 0.4913818836212158s
get-kgtk-search-matches Time: 6.456459999084473s
ground-truth-labeler Time: 0.12700390815734863s


161it [1:01:51, 22.20s/it]

clean Time: 0.0033416748046875s
get-fuzzy-augmented-matches Time: 5.877784013748169s
get-exact-matches Time: 0.33452391624450684s
get-kgtk-search-matches Time: 3.9204907417297363s
ground-truth-labeler Time: 0.090087890625s


162it [1:02:10, 21.01s/it]

clean Time: 0.002975940704345703s
get-fuzzy-augmented-matches Time: 7.253192901611328s
get-exact-matches Time: 0.4686279296875s
get-kgtk-search-matches Time: 9.337208986282349s
ground-truth-labeler Time: 0.11654281616210938s


163it [1:02:34, 22.15s/it]

clean Time: 0.0026810169219970703s
get-fuzzy-augmented-matches Time: 4.707445859909058s
get-exact-matches Time: 0.4109072685241699s
get-kgtk-search-matches Time: 3.78096604347229s
ground-truth-labeler Time: 0.11002683639526367s


164it [1:02:52, 20.64s/it]

clean Time: 0.0021691322326660156s
get-fuzzy-augmented-matches Time: 2.502487897872925s
get-exact-matches Time: 0.13871383666992188s
get-kgtk-search-matches Time: 4.471682071685791s
ground-truth-labeler Time: 0.05773806571960449s


165it [1:03:05, 18.45s/it]

clean Time: 0.004879951477050781s
get-fuzzy-augmented-matches Time: 7.854609727859497s
Command: get-exact-matches
Error Message:  Traceback (most recent call last):
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/urllib3/connection.py", line 169, in _new_conn
    conn = connection.create_connection(
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/urllib3/util/connection.py", line 73, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/socket.py", line 953, in getaddrinfo
    for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno 8] nodename nor servname provided, or not known

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/urllib3/connec

166it [1:03:22, 18.13s/it]

clean Time: 0.009784936904907227s
get-fuzzy-augmented-matches Time: 12.582686185836792s
get-exact-matches Time: 1.4667117595672607s
get-kgtk-search-matches Time: 4.210125923156738s
ground-truth-labeler Time: 0.17140412330627441s


167it [1:03:50, 20.98s/it]

clean Time: 0.005184173583984375s
get-fuzzy-augmented-matches Time: 7.986837148666382s
get-exact-matches Time: 0.5295498371124268s
get-kgtk-search-matches Time: 9.716063976287842s
ground-truth-labeler Time: 0.11233687400817871s


168it [1:04:18, 23.24s/it]

clean Time: 0.0020558834075927734s
get-fuzzy-augmented-matches Time: 4.116482973098755s
get-exact-matches Time: 0.19123411178588867s
get-kgtk-search-matches Time: 2.782572031021118s
ground-truth-labeler Time: 0.05292797088623047s


169it [1:04:33, 20.74s/it]

clean Time: 0.001506805419921875s
get-fuzzy-augmented-matches Time: 4.725539207458496s
get-exact-matches Time: 0.658308744430542s
get-kgtk-search-matches Time: 4.872506141662598s
ground-truth-labeler Time: 0.07819914817810059s


170it [1:04:52, 20.19s/it]

clean Time: 0.0017940998077392578s
get-fuzzy-augmented-matches Time: 4.4767680168151855s
get-exact-matches Time: 0.6762878894805908s
get-kgtk-search-matches Time: 5.2915380001068115s
ground-truth-labeler Time: 0.07040691375732422s


171it [1:05:09, 19.14s/it]

clean Time: 0.003515005111694336s
get-fuzzy-augmented-matches Time: 6.08174991607666s
get-exact-matches Time: 0.4481201171875s
get-kgtk-search-matches Time: 8.530226230621338s
ground-truth-labeler Time: 0.11669611930847168s


172it [1:05:31, 19.91s/it]

clean Time: 0.008122920989990234s
get-fuzzy-augmented-matches Time: 8.782397031784058s
get-exact-matches Time: 1.7991721630096436s
get-kgtk-search-matches Time: 26.59605312347412s
ground-truth-labeler Time: 0.2429802417755127s


173it [1:06:18, 28.23s/it]

clean Time: 0.0056607723236083984s
get-fuzzy-augmented-matches Time: 6.556663751602173s
get-exact-matches Time: 0.42346787452697754s
get-kgtk-search-matches Time: 3.980943202972412s
ground-truth-labeler Time: 0.1122591495513916s


174it [1:06:39, 26.09s/it]

clean Time: 0.00167083740234375s
get-fuzzy-augmented-matches Time: 5.741302967071533s
get-exact-matches Time: 1.2198078632354736s
get-kgtk-search-matches Time: 6.371896743774414s
ground-truth-labeler Time: 0.07238292694091797s


175it [1:07:02, 25.11s/it]

clean Time: 0.002440929412841797s
get-fuzzy-augmented-matches Time: 2.7017459869384766s
get-exact-matches Time: 0.16025686264038086s
get-kgtk-search-matches Time: 1.782891035079956s
ground-truth-labeler Time: 0.056668758392333984s


176it [1:07:13, 20.67s/it]

clean Time: 0.0031080245971679688s
get-fuzzy-augmented-matches Time: 7.276435852050781s
get-exact-matches Time: 0.5783810615539551s
get-kgtk-search-matches Time: 8.925456047058105s
ground-truth-labeler Time: 0.1005711555480957s


177it [1:07:35, 21.13s/it]

clean Time: 0.00973200798034668s
get-fuzzy-augmented-matches Time: 9.54050326347351s
get-exact-matches Time: 0.4003622531890869s
get-kgtk-search-matches Time: 7.114161968231201s
ground-truth-labeler Time: 0.1784801483154297s


178it [1:08:00, 22.37s/it]

clean Time: 0.0038290023803710938s
get-fuzzy-augmented-matches Time: 7.383312940597534s
get-exact-matches Time: 0.7911691665649414s
get-kgtk-search-matches Time: 11.05840015411377s
ground-truth-labeler Time: 0.1320807933807373s


179it [1:08:28, 24.09s/it]

clean Time: 0.0014238357543945312s
get-fuzzy-augmented-matches Time: 6.329135894775391s
get-exact-matches Time: 0.2237861156463623s
get-kgtk-search-matches Time: 4.571716070175171s
ground-truth-labeler Time: 0.06416106224060059s


180it [1:08:47, 22.66s/it]

clean Time: 0.0026400089263916016s
get-fuzzy-augmented-matches Time: 5.656649827957153s
get-exact-matches Time: 0.6862301826477051s
get-kgtk-search-matches Time: 7.4478089809417725s
ground-truth-labeler Time: 0.13918495178222656s


181it [1:09:10, 22.61s/it]

clean Time: 0.005932807922363281s
get-fuzzy-augmented-matches Time: 6.225890874862671s
get-exact-matches Time: 0.4821908473968506s
get-kgtk-search-matches Time: 4.815340042114258s
ground-truth-labeler Time: 0.14121389389038086s


182it [1:09:29, 21.60s/it]

clean Time: 0.0035898685455322266s
get-fuzzy-augmented-matches Time: 5.167649984359741s
get-exact-matches Time: 0.2592039108276367s
get-kgtk-search-matches Time: 5.27944540977478s
ground-truth-labeler Time: 0.09467387199401855s


183it [1:09:46, 20.20s/it]

clean Time: 0.002397775650024414s
get-fuzzy-augmented-matches Time: 3.5267131328582764s
get-exact-matches Time: 0.29036498069763184s
get-kgtk-search-matches Time: 4.1916282176971436s
ground-truth-labeler Time: 0.0735781192779541s


184it [1:10:01, 18.58s/it]

clean Time: 0.0021047592163085938s
get-fuzzy-augmented-matches Time: 6.128638982772827s
get-exact-matches Time: 0.5958280563354492s
get-kgtk-search-matches Time: 1.7668321132659912s
ground-truth-labeler Time: 0.055953264236450195s


185it [1:10:18, 18.16s/it]

clean Time: 0.004775285720825195s
get-fuzzy-augmented-matches Time: 6.056613922119141s
get-exact-matches Time: 0.678375244140625s
get-kgtk-search-matches Time: 3.900815010070801s
ground-truth-labeler Time: 0.08795523643493652s


186it [1:10:39, 18.89s/it]

clean Time: 0.0029230117797851562s
get-fuzzy-augmented-matches Time: 3.241847038269043s
get-exact-matches Time: 0.19417309761047363s
get-kgtk-search-matches Time: 4.469947814941406s
ground-truth-labeler Time: 0.12162995338439941s


187it [1:10:55, 18.24s/it]

clean Time: 0.006269216537475586s
get-fuzzy-augmented-matches Time: 8.605547904968262s
get-exact-matches Time: 6.366891145706177s
get-kgtk-search-matches Time: 13.616717100143433s
ground-truth-labeler Time: 0.14189386367797852s


188it [1:11:32, 23.88s/it]

clean Time: 0.003793954849243164s
get-fuzzy-augmented-matches Time: 8.080109119415283s
get-exact-matches Time: 0.3332390785217285s
get-kgtk-search-matches Time: 8.009139060974121s
ground-truth-labeler Time: 0.1800088882446289s


189it [1:11:59, 24.56s/it]

clean Time: 0.005640268325805664s
get-fuzzy-augmented-matches Time: 7.65778923034668s
get-exact-matches Time: 0.7310647964477539s
get-kgtk-search-matches Time: 8.202054023742676s
ground-truth-labeler Time: 0.11756229400634766s


190it [1:12:20, 23.77s/it]

clean Time: 0.0016710758209228516s
get-fuzzy-augmented-matches Time: 4.116004943847656s
get-exact-matches Time: 0.21355295181274414s
get-kgtk-search-matches Time: 2.079987049102783s
ground-truth-labeler Time: 0.05173921585083008s


191it [1:12:34, 20.68s/it]

clean Time: 0.005342960357666016s
get-fuzzy-augmented-matches Time: 6.639777898788452s
get-exact-matches Time: 0.7288830280303955s
get-kgtk-search-matches Time: 3.7986161708831787s
ground-truth-labeler Time: 0.14698386192321777s


192it [1:12:54, 20.47s/it]

clean Time: 0.0020639896392822266s
get-fuzzy-augmented-matches Time: 4.586341142654419s
get-exact-matches Time: 0.6814000606536865s
get-kgtk-search-matches Time: 6.208409070968628s
ground-truth-labeler Time: 0.07482790946960449s


193it [1:13:14, 20.47s/it]

clean Time: 0.0021440982818603516s
get-fuzzy-augmented-matches Time: 2.90198016166687s
get-exact-matches Time: 0.14211297035217285s
get-kgtk-search-matches Time: 4.311450004577637s
ground-truth-labeler Time: 0.04197192192077637s


194it [1:13:30, 19.00s/it]

clean Time: 0.0017368793487548828s
get-fuzzy-augmented-matches Time: 5.367893934249878s
get-exact-matches Time: 0.16594696044921875s
get-kgtk-search-matches Time: 2.463038206100464s
ground-truth-labeler Time: 0.0521540641784668s


195it [1:13:46, 18.15s/it]

clean Time: 0.003634214401245117s
get-fuzzy-augmented-matches Time: 4.634432077407837s
get-exact-matches Time: 0.27943921089172363s
get-kgtk-search-matches Time: 3.5035510063171387s
ground-truth-labeler Time: 0.05360293388366699s


196it [1:14:03, 17.67s/it]

clean Time: 0.0013079643249511719s
get-fuzzy-augmented-matches Time: 3.913490056991577s
get-exact-matches Time: 0.4595451354980469s
get-kgtk-search-matches Time: 4.123016357421875s
ground-truth-labeler Time: 0.10508990287780762s


197it [1:14:17, 16.70s/it]

clean Time: 0.0036950111389160156s
get-fuzzy-augmented-matches Time: 5.452135324478149s
get-exact-matches Time: 0.6361219882965088s
get-kgtk-search-matches Time: 2.4291770458221436s
ground-truth-labeler Time: 0.08393311500549316s


198it [1:14:31, 15.77s/it]

clean Time: 0.007233142852783203s
get-fuzzy-augmented-matches Time: 7.532589912414551s
get-exact-matches Time: 0.8917670249938965s
get-kgtk-search-matches Time: 8.659622192382812s
ground-truth-labeler Time: 0.1509549617767334s


199it [1:14:58, 19.20s/it]

clean Time: 0.0018720626831054688s
get-fuzzy-augmented-matches Time: 3.8006319999694824s
get-exact-matches Time: 0.139024019241333s
get-kgtk-search-matches Time: 7.350301265716553s
ground-truth-labeler Time: 0.06447696685791016s


200it [1:15:16, 19.00s/it]

clean Time: 0.0039920806884765625s
get-fuzzy-augmented-matches Time: 5.995997905731201s
get-exact-matches Time: 0.7981278896331787s
get-kgtk-search-matches Time: 9.649457931518555s
ground-truth-labeler Time: 0.16771912574768066s


201it [1:15:41, 20.77s/it]

clean Time: 0.004791975021362305s
get-fuzzy-augmented-matches Time: 4.115300893783569s
get-exact-matches Time: 0.1591050624847412s
get-kgtk-search-matches Time: 2.971560001373291s
ground-truth-labeler Time: 0.06782102584838867s


202it [1:15:57, 19.34s/it]

clean Time: 0.0019288063049316406s
get-fuzzy-augmented-matches Time: 6.702276706695557s
get-exact-matches Time: 2.9914278984069824s
get-kgtk-search-matches Time: 8.87910509109497s
ground-truth-labeler Time: 0.1135869026184082s


203it [1:16:24, 21.45s/it]

clean Time: 0.003654003143310547s
get-fuzzy-augmented-matches Time: 6.676293134689331s
get-exact-matches Time: 0.7909820079803467s
get-kgtk-search-matches Time: 6.2918901443481445s
ground-truth-labeler Time: 0.13564491271972656s


204it [1:16:43, 20.86s/it]

clean Time: 0.00429987907409668s
get-fuzzy-augmented-matches Time: 5.278741121292114s
get-exact-matches Time: 0.23140931129455566s
get-kgtk-search-matches Time: 2.969419002532959s
ground-truth-labeler Time: 0.10408711433410645s


205it [1:16:56, 18.43s/it]

clean Time: 0.0024280548095703125s
get-fuzzy-augmented-matches Time: 3.8675920963287354s
get-exact-matches Time: 0.6206250190734863s
get-kgtk-search-matches Time: 6.043370962142944s
ground-truth-labeler Time: 0.08615493774414062s


206it [1:17:15, 18.64s/it]

clean Time: 0.0016300678253173828s
get-fuzzy-augmented-matches Time: 5.902835130691528s
get-exact-matches Time: 0.5623419284820557s
get-kgtk-search-matches Time: 6.47199821472168s
ground-truth-labeler Time: 0.06868505477905273s


207it [1:17:37, 19.47s/it]

clean Time: 0.002766132354736328s
get-fuzzy-augmented-matches Time: 4.256191968917847s
get-exact-matches Time: 0.19954681396484375s
get-kgtk-search-matches Time: 3.3265018463134766s
ground-truth-labeler Time: 0.05428719520568848s


208it [1:17:53, 18.44s/it]

clean Time: 0.0034322738647460938s
get-fuzzy-augmented-matches Time: 8.610133171081543s
get-exact-matches Time: 0.5403890609741211s
get-kgtk-search-matches Time: 10.77859878540039s
ground-truth-labeler Time: 0.13072514533996582s


209it [1:18:20, 21.27s/it]

clean Time: 0.014020204544067383s
get-fuzzy-augmented-matches Time: 15.220197916030884s
get-exact-matches Time: 1.4060869216918945s
get-kgtk-search-matches Time: 42.022873878479004s
ground-truth-labeler Time: 0.44199609756469727s


210it [1:19:33, 36.62s/it]

clean Time: 0.0035409927368164062s
get-fuzzy-augmented-matches Time: 5.868659973144531s
get-exact-matches Time: 0.5425398349761963s
get-kgtk-search-matches Time: 5.281031847000122s
ground-truth-labeler Time: 0.09195923805236816s


211it [1:19:52, 31.24s/it]

clean Time: 0.003686189651489258s
get-fuzzy-augmented-matches Time: 4.41287088394165s
get-exact-matches Time: 0.6461927890777588s
get-kgtk-search-matches Time: 1.9843039512634277s
ground-truth-labeler Time: 0.12456512451171875s


212it [1:20:07, 26.38s/it]

clean Time: 0.0023610591888427734s
get-fuzzy-augmented-matches Time: 4.688507795333862s
get-exact-matches Time: 0.8230159282684326s
get-kgtk-search-matches Time: 3.2897651195526123s
ground-truth-labeler Time: 0.07452511787414551s


213it [1:20:24, 23.79s/it]

clean Time: 0.0038449764251708984s
get-fuzzy-augmented-matches Time: 5.157814979553223s
get-exact-matches Time: 0.19170904159545898s
get-kgtk-search-matches Time: 4.298790216445923s
ground-truth-labeler Time: 0.0886831283569336s


214it [1:20:42, 22.05s/it]

clean Time: 0.011902809143066406s
get-fuzzy-augmented-matches Time: 13.62979006767273s
get-exact-matches Time: 1.097646713256836s
get-kgtk-search-matches Time: 24.249501943588257s
ground-truth-labeler Time: 0.29887890815734863s


215it [1:21:31, 30.03s/it]

clean Time: 0.0038123130798339844s
get-fuzzy-augmented-matches Time: 5.64444375038147s
get-exact-matches Time: 0.46779799461364746s
get-kgtk-search-matches Time: 3.223586082458496s
ground-truth-labeler Time: 0.152662992477417s


216it [1:21:48, 26.09s/it]

clean Time: 0.0019240379333496094s
get-fuzzy-augmented-matches Time: 6.092751979827881s
get-exact-matches Time: 0.9447638988494873s
get-kgtk-search-matches Time: 8.595371007919312s
ground-truth-labeler Time: 0.10948014259338379s


217it [1:22:13, 25.79s/it]

clean Time: 0.0063359737396240234s
get-fuzzy-augmented-matches Time: 9.776829957962036s
get-exact-matches Time: 0.5844018459320068s
get-kgtk-search-matches Time: 8.739258766174316s
ground-truth-labeler Time: 0.18471312522888184s


218it [1:22:42, 26.69s/it]

clean Time: 0.003675222396850586s
get-fuzzy-augmented-matches Time: 10.106834888458252s
get-exact-matches Time: 0.7166979312896729s
get-kgtk-search-matches Time: 15.479468822479248s
ground-truth-labeler Time: 0.3901040554046631s


219it [1:23:19, 29.97s/it]

clean Time: 0.004823923110961914s
get-fuzzy-augmented-matches Time: 5.813137054443359s
get-exact-matches Time: 0.6577489376068115s
get-kgtk-search-matches Time: 7.015290975570679s
ground-truth-labeler Time: 0.13608694076538086s


220it [1:23:42, 27.81s/it]

clean Time: 0.0018410682678222656s
get-fuzzy-augmented-matches Time: 3.2397818565368652s
get-exact-matches Time: 0.1673741340637207s
get-kgtk-search-matches Time: 3.3970398902893066s
ground-truth-labeler Time: 0.08177995681762695s


221it [1:23:58, 24.14s/it]

clean Time: 0.007249116897583008s
get-fuzzy-augmented-matches Time: 8.523739099502563s
get-exact-matches Time: 0.4288918972015381s
get-kgtk-search-matches Time: 9.327737808227539s
ground-truth-labeler Time: 0.11897969245910645s


222it [1:24:22, 24.27s/it]

clean Time: 0.006507158279418945s
get-fuzzy-augmented-matches Time: 7.1794281005859375s
get-exact-matches Time: 0.5761637687683105s
get-kgtk-search-matches Time: 10.811218976974487s
ground-truth-labeler Time: 0.1285719871520996s


223it [1:24:50, 25.15s/it]

clean Time: 0.002002239227294922s
get-fuzzy-augmented-matches Time: 5.001084804534912s
get-exact-matches Time: 0.7119259834289551s
get-kgtk-search-matches Time: 5.544676780700684s
ground-truth-labeler Time: 0.05881810188293457s


224it [1:25:10, 23.71s/it]

clean Time: 0.0014760494232177734s
get-fuzzy-augmented-matches Time: 5.905512809753418s
get-exact-matches Time: 0.6774337291717529s
get-kgtk-search-matches Time: 5.274375915527344s
ground-truth-labeler Time: 0.07135891914367676s


225it [1:25:30, 22.75s/it]

clean Time: 0.002843141555786133s
get-fuzzy-augmented-matches Time: 9.453239917755127s
get-exact-matches Time: 1.3437910079956055s
get-kgtk-search-matches Time: 10.19993281364441s
ground-truth-labeler Time: 0.16952872276306152s


226it [1:26:01, 25.18s/it]

clean Time: 0.004866838455200195s
get-fuzzy-augmented-matches Time: 14.326548099517822s
get-exact-matches Time: 0.788032054901123s
get-kgtk-search-matches Time: 12.210520029067993s
ground-truth-labeler Time: 0.48731493949890137s


227it [1:26:37, 28.50s/it]

clean Time: 0.0019249916076660156s
get-fuzzy-augmented-matches Time: 4.641663074493408s
get-exact-matches Time: 0.41632890701293945s
get-kgtk-search-matches Time: 2.225053310394287s
ground-truth-labeler Time: 0.05604100227355957s


228it [1:26:52, 24.20s/it]

clean Time: 0.005341053009033203s
get-fuzzy-augmented-matches Time: 10.351420879364014s
get-exact-matches Time: 0.8163039684295654s
get-kgtk-search-matches Time: 3.740494728088379s
ground-truth-labeler Time: 0.10450887680053711s


229it [1:27:15, 24.01s/it]

clean Time: 0.0041599273681640625s
get-fuzzy-augmented-matches Time: 6.431804180145264s
get-exact-matches Time: 0.5449738502502441s
get-kgtk-search-matches Time: 6.879248142242432s
ground-truth-labeler Time: 0.19121098518371582s


230it [1:27:39, 23.87s/it]

clean Time: 0.007809877395629883s
get-fuzzy-augmented-matches Time: 7.871191024780273s
get-exact-matches Time: 0.589514970779419s
get-kgtk-search-matches Time: 9.003137826919556s
ground-truth-labeler Time: 0.13791584968566895s


231it [1:28:07, 25.11s/it]

clean Time: 0.006028175354003906s
get-fuzzy-augmented-matches Time: 5.765671968460083s
Command: get-exact-matches
Error Message:  Traceback (most recent call last):
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/urllib3/connection.py", line 169, in _new_conn
    conn = connection.create_connection(
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/urllib3/util/connection.py", line 73, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/socket.py", line 953, in getaddrinfo
    for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno 8] nodename nor servname provided, or not known

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/urllib3/connec

232it [1:28:20, 21.61s/it]

clean Time: 0.004398822784423828s
get-fuzzy-augmented-matches Time: 9.209438800811768s
get-exact-matches Time: 0.6503579616546631s
get-kgtk-search-matches Time: 11.066428899765015s
ground-truth-labeler Time: 0.11472201347351074s


233it [1:28:48, 23.57s/it]

clean Time: 0.0024437904357910156s
get-fuzzy-augmented-matches Time: 4.1428728103637695s
get-exact-matches Time: 0.3165547847747803s
get-kgtk-search-matches Time: 4.516558885574341s
ground-truth-labeler Time: 0.05492901802062988s


234it [1:29:06, 21.92s/it]

clean Time: 0.0016720294952392578s
get-fuzzy-augmented-matches Time: 2.690260171890259s
get-exact-matches Time: 0.26442694664001465s
get-kgtk-search-matches Time: 3.8448989391326904s
ground-truth-labeler Time: 0.04730820655822754s


235it [1:29:20, 19.48s/it]

clean Time: 0.0029299259185791016s
get-fuzzy-augmented-matches Time: 4.5641350746154785s
get-exact-matches Time: 0.2648601531982422s
get-kgtk-search-matches Time: 3.953627109527588s
ground-truth-labeler Time: 0.05316615104675293s


236it [1:29:37, 18.71s/it]

clean Time: 0.003787994384765625s
get-fuzzy-augmented-matches Time: 6.5788609981536865s
get-exact-matches Time: 0.5049350261688232s
get-kgtk-search-matches Time: 8.924455165863037s
ground-truth-labeler Time: 0.13726282119750977s


237it [1:30:01, 20.17s/it]

clean Time: 0.003777027130126953s
get-fuzzy-augmented-matches Time: 4.425922155380249s
get-exact-matches Time: 0.5645961761474609s
get-kgtk-search-matches Time: 7.755577802658081s
ground-truth-labeler Time: 0.09657406806945801s


238it [1:30:22, 20.55s/it]

clean Time: 0.0032188892364501953s
get-fuzzy-augmented-matches Time: 8.019428968429565s
get-exact-matches Time: 0.6003928184509277s
get-kgtk-search-matches Time: 10.75025200843811s
ground-truth-labeler Time: 0.13353395462036133s


239it [1:30:48, 22.07s/it]

clean Time: 0.0054781436920166016s
get-fuzzy-augmented-matches Time: 7.365311145782471s
get-exact-matches Time: 0.704401969909668s
get-kgtk-search-matches Time: 12.692087888717651s
ground-truth-labeler Time: 0.18085002899169922s


240it [1:31:17, 24.18s/it]

clean Time: 0.007748842239379883s
get-fuzzy-augmented-matches Time: 8.410703897476196s
get-exact-matches Time: 0.5196559429168701s
get-kgtk-search-matches Time: 9.96609115600586s
ground-truth-labeler Time: 0.17098331451416016s


241it [1:31:45, 25.39s/it]

clean Time: 0.005426168441772461s
get-fuzzy-augmented-matches Time: 8.834304094314575s
get-exact-matches Time: 0.42194199562072754s
get-kgtk-search-matches Time: 7.290678024291992s
ground-truth-labeler Time: 0.1521611213684082s


242it [1:32:10, 25.12s/it]

clean Time: 0.0066051483154296875s
get-fuzzy-augmented-matches Time: 4.511843919754028s
get-exact-matches Time: 0.4974989891052246s
get-kgtk-search-matches Time: 3.973966121673584s
ground-truth-labeler Time: 0.10562491416931152s


243it [1:32:26, 22.58s/it]

clean Time: 0.00516200065612793s
get-fuzzy-augmented-matches Time: 7.598479986190796s
get-exact-matches Time: 0.6135001182556152s
get-kgtk-search-matches Time: 8.918837070465088s
ground-truth-labeler Time: 0.1115882396697998s


244it [1:32:51, 23.26s/it]

clean Time: 0.003487825393676758s
get-fuzzy-augmented-matches Time: 5.336464881896973s
get-exact-matches Time: 0.572512149810791s
get-kgtk-search-matches Time: 9.82937502861023s
ground-truth-labeler Time: 0.13146591186523438s


245it [1:33:12, 22.61s/it]

clean Time: 0.0018398761749267578s
get-fuzzy-augmented-matches Time: 3.298936128616333s
get-exact-matches Time: 0.46838998794555664s
get-kgtk-search-matches Time: 1.474055290222168s
ground-truth-labeler Time: 0.10791277885437012s


246it [1:33:23, 18.97s/it]

clean Time: 0.0019032955169677734s
get-fuzzy-augmented-matches Time: 3.663440227508545s
get-exact-matches Time: 0.23183274269104004s
get-kgtk-search-matches Time: 4.1451661586761475s
ground-truth-labeler Time: 0.05835103988647461s


247it [1:33:38, 18.04s/it]

clean Time: 0.0015320777893066406s
get-fuzzy-augmented-matches Time: 6.016354084014893s
get-exact-matches Time: 0.1722731590270996s
get-kgtk-search-matches Time: 3.050755023956299s
ground-truth-labeler Time: 0.09305500984191895s


248it [1:33:56, 17.92s/it]

clean Time: 0.0029740333557128906s
get-fuzzy-augmented-matches Time: 5.161556005477905s
get-exact-matches Time: 0.3036379814147949s
get-kgtk-search-matches Time: 4.996438026428223s
ground-truth-labeler Time: 0.09555387496948242s


249it [1:34:15, 18.32s/it]

clean Time: 0.002855062484741211s
get-fuzzy-augmented-matches Time: 3.910118341445923s
get-exact-matches Time: 0.13173484802246094s
get-kgtk-search-matches Time: 2.285789966583252s
ground-truth-labeler Time: 0.05211806297302246s


250it [1:34:29, 17.02s/it]

clean Time: 0.0050160884857177734s
get-fuzzy-augmented-matches Time: 6.293877840042114s
get-exact-matches Time: 1.0335090160369873s
get-kgtk-search-matches Time: 8.332074880599976s
ground-truth-labeler Time: 0.3037269115447998s


251it [1:34:55, 19.62s/it]

clean Time: 0.0061490535736083984s
get-fuzzy-augmented-matches Time: 6.9735331535339355s
get-exact-matches Time: 0.6086139678955078s
get-kgtk-search-matches Time: 11.255608797073364s
ground-truth-labeler Time: 0.15691113471984863s


252it [1:35:21, 21.55s/it]

clean Time: 0.005091190338134766s
get-fuzzy-augmented-matches Time: 3.9078361988067627s
get-exact-matches Time: 0.17725896835327148s
get-kgtk-search-matches Time: 5.354984998703003s
ground-truth-labeler Time: 0.0895240306854248s


253it [1:35:37, 19.75s/it]

clean Time: 0.0035638809204101562s
get-fuzzy-augmented-matches Time: 5.268578052520752s
get-exact-matches Time: 0.6815180778503418s
get-kgtk-search-matches Time: 3.3391261100769043s
ground-truth-labeler Time: 0.06365203857421875s


254it [1:35:53, 18.76s/it]

clean Time: 0.03202009201049805s
get-fuzzy-augmented-matches Time: 38.887641191482544s
get-exact-matches Time: 1.715867042541504s
get-kgtk-search-matches Time: 56.52703595161438s
ground-truth-labeler Time: 0.960658073425293s


255it [1:37:46, 47.05s/it]

clean Time: 0.00344085693359375s
get-fuzzy-augmented-matches Time: 5.826447248458862s
get-exact-matches Time: 0.819716215133667s
get-kgtk-search-matches Time: 5.4246132373809814s
ground-truth-labeler Time: 0.09762692451477051s


256it [1:38:03, 38.12s/it]

clean Time: 0.0015859603881835938s
get-fuzzy-augmented-matches Time: 5.356257200241089s
get-exact-matches Time: 0.5974819660186768s
get-kgtk-search-matches Time: 4.1398351192474365s
ground-truth-labeler Time: 0.056333065032958984s


257it [1:38:21, 31.87s/it]

clean Time: 0.001644134521484375s
get-fuzzy-augmented-matches Time: 4.027535915374756s
get-exact-matches Time: 0.42353105545043945s
get-kgtk-search-matches Time: 4.248929738998413s
ground-truth-labeler Time: 0.0690302848815918s


258it [1:38:38, 27.49s/it]

clean Time: 0.0038559436798095703s
get-fuzzy-augmented-matches Time: 6.7314231395721436s
get-exact-matches Time: 0.7791292667388916s
get-kgtk-search-matches Time: 10.05292272567749s
ground-truth-labeler Time: 0.13897085189819336s


259it [1:39:04, 26.96s/it]

clean Time: 0.0025568008422851562s
get-fuzzy-augmented-matches Time: 5.275799036026001s
get-exact-matches Time: 1.4877300262451172s
get-kgtk-search-matches Time: 7.667104959487915s
ground-truth-labeler Time: 0.5466198921203613s


260it [1:39:29, 26.47s/it]

clean Time: 0.0026941299438476562s
get-fuzzy-augmented-matches Time: 6.6278767585754395s
get-exact-matches Time: 0.9335958957672119s
get-kgtk-search-matches Time: 10.110841035842896s
ground-truth-labeler Time: 0.15547704696655273s


261it [1:39:55, 26.38s/it]

clean Time: 0.005067110061645508s
get-fuzzy-augmented-matches Time: 8.4149010181427s
get-exact-matches Time: 0.5908620357513428s
get-kgtk-search-matches Time: 10.166334867477417s
ground-truth-labeler Time: 0.16745877265930176s


262it [1:40:21, 26.13s/it]

clean Time: 0.0021131038665771484s
get-fuzzy-augmented-matches Time: 3.6544649600982666s
get-exact-matches Time: 0.7174332141876221s
get-kgtk-search-matches Time: 3.542135000228882s
ground-truth-labeler Time: 0.05729985237121582s


263it [1:40:37, 23.04s/it]

clean Time: 0.0014650821685791016s
get-fuzzy-augmented-matches Time: 2.894084930419922s
get-exact-matches Time: 0.1680288314819336s
get-kgtk-search-matches Time: 3.47430419921875s
ground-truth-labeler Time: 0.059545040130615234s


264it [1:40:51, 20.53s/it]

clean Time: 0.002399921417236328s
get-fuzzy-augmented-matches Time: 2.325747013092041s
get-exact-matches Time: 0.1434321403503418s
get-kgtk-search-matches Time: 4.299068212509155s
ground-truth-labeler Time: 0.06125998497009277s


265it [1:41:07, 19.07s/it]

clean Time: 0.0038590431213378906s
get-fuzzy-augmented-matches Time: 7.806210994720459s
get-exact-matches Time: 0.9621520042419434s
get-kgtk-search-matches Time: 11.20511794090271s
ground-truth-labeler Time: 0.1559600830078125s


266it [1:41:36, 21.95s/it]

clean Time: 0.0013689994812011719s
get-fuzzy-augmented-matches Time: 3.8259294033050537s
get-exact-matches Time: 0.1326441764831543s
get-kgtk-search-matches Time: 1.3415019512176514s
ground-truth-labeler Time: 0.07928824424743652s


267it [1:41:49, 19.28s/it]

clean Time: 0.002022266387939453s
get-fuzzy-augmented-matches Time: 2.7658002376556396s
get-exact-matches Time: 0.20805716514587402s
get-kgtk-search-matches Time: 3.0507309436798096s
ground-truth-labeler Time: 0.04819607734680176s


268it [1:42:03, 17.91s/it]

clean Time: 0.0042498111724853516s
get-fuzzy-augmented-matches Time: 9.503325939178467s
get-exact-matches Time: 0.6210448741912842s
get-kgtk-search-matches Time: 10.925993919372559s
ground-truth-labeler Time: 0.21126699447631836s


269it [1:42:34, 21.71s/it]

clean Time: 0.004068851470947266s
get-fuzzy-augmented-matches Time: 4.868340969085693s
get-exact-matches Time: 0.3972201347351074s
get-kgtk-search-matches Time: 2.826047897338867s
ground-truth-labeler Time: 0.06491518020629883s


270it [1:42:48, 19.40s/it]

clean Time: 0.0013680458068847656s
get-fuzzy-augmented-matches Time: 4.0245208740234375s
get-exact-matches Time: 0.20464205741882324s
get-kgtk-search-matches Time: 5.012685775756836s
ground-truth-labeler Time: 0.055856943130493164s


271it [1:43:05, 18.68s/it]

clean Time: 0.005712985992431641s
get-fuzzy-augmented-matches Time: 5.957664728164673s
get-exact-matches Time: 0.2881040573120117s
get-kgtk-search-matches Time: 6.757540941238403s
ground-truth-labeler Time: 0.1195688247680664s


272it [1:43:27, 19.81s/it]

clean Time: 0.002980947494506836s
get-fuzzy-augmented-matches Time: 4.117595195770264s
get-exact-matches Time: 0.2102208137512207s
get-kgtk-search-matches Time: 3.383401870727539s
ground-truth-labeler Time: 0.07597112655639648s


273it [1:43:44, 18.81s/it]

clean Time: 0.01098179817199707s
get-fuzzy-augmented-matches Time: 12.443819761276245s
get-exact-matches Time: 0.9924538135528564s
get-kgtk-search-matches Time: 9.382867813110352s
ground-truth-labeler Time: 0.24023175239562988s


274it [1:44:16, 22.68s/it]

clean Time: 0.003818035125732422s
get-fuzzy-augmented-matches Time: 5.462256193161011s
get-exact-matches Time: 0.48569393157958984s
get-kgtk-search-matches Time: 6.248327970504761s
ground-truth-labeler Time: 0.09754800796508789s


275it [1:44:36, 21.92s/it]

clean Time: 0.001783132553100586s
get-fuzzy-augmented-matches Time: 4.418865919113159s
get-exact-matches Time: 0.16690397262573242s
get-kgtk-search-matches Time: 3.361786127090454s
ground-truth-labeler Time: 0.057823896408081055s


276it [1:44:49, 19.29s/it]

clean Time: 0.002103090286254883s
get-fuzzy-augmented-matches Time: 2.8075389862060547s
get-exact-matches Time: 0.13931608200073242s
get-kgtk-search-matches Time: 4.39774489402771s
ground-truth-labeler Time: 0.06474924087524414s


277it [1:45:04, 18.08s/it]

clean Time: 0.0021500587463378906s
get-fuzzy-augmented-matches Time: 5.7479798793792725s
get-exact-matches Time: 0.4489879608154297s
get-kgtk-search-matches Time: 7.569272041320801s
ground-truth-labeler Time: 0.06207895278930664s


278it [1:45:26, 19.15s/it]

clean Time: 0.011679887771606445s
get-fuzzy-augmented-matches Time: 9.676113843917847s
get-exact-matches Time: 1.1317932605743408s
get-kgtk-search-matches Time: 14.076818943023682s
ground-truth-labeler Time: 0.23225808143615723s


279it [1:46:00, 23.55s/it]

clean Time: 0.0029230117797851562s
get-fuzzy-augmented-matches Time: 4.023844003677368s
get-exact-matches Time: 0.22359013557434082s
get-kgtk-search-matches Time: 6.339627981185913s
ground-truth-labeler Time: 0.09499812126159668s


280it [1:46:19, 22.31s/it]

clean Time: 0.0015826225280761719s
get-fuzzy-augmented-matches Time: 3.0041017532348633s
get-exact-matches Time: 0.20493698120117188s
get-kgtk-search-matches Time: 3.24294114112854s
ground-truth-labeler Time: 0.053240060806274414s


281it [1:46:33, 19.92s/it]

clean Time: 0.00716400146484375s
get-fuzzy-augmented-matches Time: 5.8075270652771s
get-exact-matches Time: 0.2682669162750244s
get-kgtk-search-matches Time: 4.266334056854248s
ground-truth-labeler Time: 0.10976099967956543s


282it [1:46:53, 19.78s/it]

clean Time: 0.0024662017822265625s
get-fuzzy-augmented-matches Time: 3.9039511680603027s
get-exact-matches Time: 0.2536020278930664s
get-kgtk-search-matches Time: 2.2856638431549072s
ground-truth-labeler Time: 0.04811882972717285s


283it [1:47:06, 17.80s/it]

clean Time: 0.0023031234741210938s
get-fuzzy-augmented-matches Time: 4.840934991836548s
get-exact-matches Time: 0.44896507263183594s
get-kgtk-search-matches Time: 5.279635906219482s
ground-truth-labeler Time: 0.062461137771606445s


284it [1:47:24, 17.71s/it]

clean Time: 0.004302978515625s
get-fuzzy-augmented-matches Time: 4.737617015838623s
get-exact-matches Time: 0.4021339416503906s
get-kgtk-search-matches Time: 7.227486848831177s
ground-truth-labeler Time: 0.6469557285308838s


285it [1:47:46, 19.01s/it]

clean Time: 0.003484010696411133s
get-fuzzy-augmented-matches Time: 5.8594911098480225s
get-exact-matches Time: 0.6272487640380859s
get-kgtk-search-matches Time: 6.0638267993927s
ground-truth-labeler Time: 0.11441278457641602s


286it [1:48:08, 19.96s/it]

clean Time: 0.003258228302001953s
get-fuzzy-augmented-matches Time: 6.6280999183654785s
get-exact-matches Time: 0.4372520446777344s
get-kgtk-search-matches Time: 8.512571811676025s
ground-truth-labeler Time: 0.12044095993041992s


287it [1:48:32, 21.24s/it]

clean Time: 0.002034902572631836s
get-fuzzy-augmented-matches Time: 3.4326107501983643s
get-exact-matches Time: 0.2903931140899658s
get-kgtk-search-matches Time: 2.054901123046875s
ground-truth-labeler Time: 0.053872108459472656s


288it [1:48:45, 18.87s/it]

clean Time: 0.004935026168823242s
get-fuzzy-augmented-matches Time: 6.41339898109436s
get-exact-matches Time: 0.6174347400665283s
get-kgtk-search-matches Time: 9.168295860290527s
ground-truth-labeler Time: 0.1739509105682373s


289it [1:49:10, 20.60s/it]

clean Time: 0.0013549327850341797s
get-fuzzy-augmented-matches Time: 3.6308741569519043s
get-exact-matches Time: 0.1633908748626709s
get-kgtk-search-matches Time: 2.6217689514160156s
ground-truth-labeler Time: 0.04322409629821777s


290it [1:49:22, 18.05s/it]

clean Time: 0.02114105224609375s
get-fuzzy-augmented-matches Time: 8.02890396118164s
get-exact-matches Time: 0.7799222469329834s
get-kgtk-search-matches Time: 9.920065641403198s
ground-truth-labeler Time: 0.18119382858276367s


291it [1:49:47, 20.22s/it]

clean Time: 0.002218008041381836s
get-fuzzy-augmented-matches Time: 2.667747974395752s
get-exact-matches Time: 0.20936179161071777s
get-kgtk-search-matches Time: 4.332314729690552s
ground-truth-labeler Time: 0.05772066116333008s


292it [1:50:02, 18.63s/it]

clean Time: 0.0021059513092041016s
get-fuzzy-augmented-matches Time: 3.0608181953430176s
get-exact-matches Time: 0.7479331493377686s
get-kgtk-search-matches Time: 1.4161031246185303s
ground-truth-labeler Time: 0.052951812744140625s


293it [1:50:17, 17.40s/it]

clean Time: 0.0051419734954833984s
get-fuzzy-augmented-matches Time: 8.175443887710571s
get-exact-matches Time: 0.5844969749450684s
get-kgtk-search-matches Time: 10.846532821655273s
ground-truth-labeler Time: 0.15186214447021484s


294it [1:50:45, 20.80s/it]

clean Time: 0.00744318962097168s
get-fuzzy-augmented-matches Time: 7.960134983062744s
get-exact-matches Time: 0.3560447692871094s
get-kgtk-search-matches Time: 2.410137176513672s
ground-truth-labeler Time: 0.10752677917480469s


295it [1:51:05, 20.49s/it]

clean Time: 0.0017452239990234375s
get-fuzzy-augmented-matches Time: 8.5223228931427s
get-exact-matches Time: 0.20270109176635742s
get-kgtk-search-matches Time: 3.44073224067688s
ground-truth-labeler Time: 0.05257892608642578s


296it [1:51:26, 20.43s/it]

clean Time: 0.002382993698120117s
get-fuzzy-augmented-matches Time: 6.705013990402222s
get-exact-matches Time: 0.23419904708862305s
get-kgtk-search-matches Time: 10.993705034255981s
ground-truth-labeler Time: 0.14090394973754883s


297it [1:51:50, 21.75s/it]

clean Time: 0.004208087921142578s
get-fuzzy-augmented-matches Time: 6.048009157180786s
get-exact-matches Time: 0.6906948089599609s
get-kgtk-search-matches Time: 1.7432451248168945s
ground-truth-labeler Time: 0.07752513885498047s


298it [1:52:05, 19.66s/it]

clean Time: 0.002347230911254883s
get-fuzzy-augmented-matches Time: 2.856778144836426s
get-exact-matches Time: 0.2125701904296875s
get-kgtk-search-matches Time: 4.908327102661133s
ground-truth-labeler Time: 0.5430848598480225s


299it [1:52:22, 18.89s/it]

clean Time: 0.0013709068298339844s
get-fuzzy-augmented-matches Time: 4.045334100723267s
get-exact-matches Time: 0.5584549903869629s
get-kgtk-search-matches Time: 3.6111791133880615s
ground-truth-labeler Time: 0.09921717643737793s


300it [1:52:38, 17.99s/it]

clean Time: 0.004211902618408203s
get-fuzzy-augmented-matches Time: 8.370994091033936s
get-exact-matches Time: 0.626323938369751s
get-kgtk-search-matches Time: 7.952007055282593s
ground-truth-labeler Time: 0.15335583686828613s


301it [1:53:05, 20.66s/it]

clean Time: 0.0032308101654052734s
get-fuzzy-augmented-matches Time: 5.070088863372803s
get-exact-matches Time: 0.135026216506958s
get-kgtk-search-matches Time: 2.7025630474090576s
ground-truth-labeler Time: 0.046636104583740234s


302it [1:53:20, 19.09s/it]

clean Time: 0.00251007080078125s
get-fuzzy-augmented-matches Time: 4.607146978378296s
get-exact-matches Time: 0.5842108726501465s
get-kgtk-search-matches Time: 5.234753131866455s
ground-truth-labeler Time: 0.2080538272857666s


303it [1:53:40, 19.25s/it]

clean Time: 0.0020880699157714844s
get-fuzzy-augmented-matches Time: 2.812695026397705s
get-exact-matches Time: 0.2934110164642334s
get-kgtk-search-matches Time: 1.5828831195831299s
ground-truth-labeler Time: 0.09221482276916504s


304it [1:53:50, 16.40s/it]

clean Time: 0.002864837646484375s
get-fuzzy-augmented-matches Time: 8.211759090423584s
get-exact-matches Time: 0.7618801593780518s
get-kgtk-search-matches Time: 10.161789894104004s
ground-truth-labeler Time: 0.12696194648742676s


305it [1:54:14, 18.72s/it]

clean Time: 0.0034279823303222656s
get-fuzzy-augmented-matches Time: 6.0460779666900635s
get-exact-matches Time: 0.7165200710296631s
get-kgtk-search-matches Time: 5.919242858886719s
ground-truth-labeler Time: 0.09797906875610352s


306it [1:54:36, 19.83s/it]

clean Time: 0.003041982650756836s
get-fuzzy-augmented-matches Time: 4.98759913444519s
get-exact-matches Time: 0.7018039226531982s
get-kgtk-search-matches Time: 3.269407033920288s
ground-truth-labeler Time: 0.08643388748168945s


307it [1:54:54, 19.29s/it]

clean Time: 0.0020537376403808594s
get-fuzzy-augmented-matches Time: 3.195754051208496s
get-exact-matches Time: 0.12639808654785156s
get-kgtk-search-matches Time: 3.9579648971557617s
ground-truth-labeler Time: 0.07144784927368164s


308it [1:55:11, 18.37s/it]

clean Time: 0.001554250717163086s
get-fuzzy-augmented-matches Time: 4.6982409954071045s
get-exact-matches Time: 0.1992480754852295s
get-kgtk-search-matches Time: 3.2847177982330322s
ground-truth-labeler Time: 0.05543017387390137s


309it [1:55:26, 17.35s/it]

clean Time: 0.005615949630737305s
get-fuzzy-augmented-matches Time: 6.286852121353149s
get-exact-matches Time: 0.39594483375549316s
get-kgtk-search-matches Time: 6.672632932662964s
ground-truth-labeler Time: 0.09269928932189941s


310it [1:55:47, 18.64s/it]

clean Time: 0.0027818679809570312s
get-fuzzy-augmented-matches Time: 4.3874781131744385s
get-exact-matches Time: 0.5576512813568115s
get-kgtk-search-matches Time: 5.988785982131958s
ground-truth-labeler Time: 0.11032485961914062s


311it [1:56:07, 18.97s/it]

clean Time: 0.006734132766723633s
get-fuzzy-augmented-matches Time: 11.904087781906128s
get-exact-matches Time: 0.9240849018096924s
get-kgtk-search-matches Time: 11.935362100601196s
ground-truth-labeler Time: 0.14529991149902344s


312it [1:56:38, 22.45s/it]

clean Time: 0.0018808841705322266s
get-fuzzy-augmented-matches Time: 4.87531304359436s
get-exact-matches Time: 0.7237730026245117s
get-kgtk-search-matches Time: 4.862874746322632s
ground-truth-labeler Time: 0.0659639835357666s


313it [1:56:55, 21.05s/it]

clean Time: 0.010166168212890625s
get-fuzzy-augmented-matches Time: 15.32623291015625s
get-exact-matches Time: 1.07785964012146s
get-kgtk-search-matches Time: 32.33040690422058s
ground-truth-labeler Time: 0.3543570041656494s


314it [1:57:55, 32.51s/it]

clean Time: 0.0024569034576416016s
get-fuzzy-augmented-matches Time: 5.186562776565552s
get-exact-matches Time: 0.7661738395690918s
get-kgtk-search-matches Time: 4.165821313858032s
ground-truth-labeler Time: 0.06752204895019531s


315it [1:58:14, 28.65s/it]

clean Time: 0.010567903518676758s
get-fuzzy-augmented-matches Time: 14.38807988166809s
get-exact-matches Time: 1.1231768131256104s
get-kgtk-search-matches Time: 26.948652744293213s
ground-truth-labeler Time: 0.3236689567565918s


316it [1:59:09, 36.35s/it]

clean Time: 0.0017080307006835938s
get-fuzzy-augmented-matches Time: 3.18570613861084s
get-exact-matches Time: 0.7994627952575684s
get-kgtk-search-matches Time: 4.8159332275390625s
ground-truth-labeler Time: 0.05492591857910156s


317it [1:59:26, 30.59s/it]

clean Time: 0.009460926055908203s
get-fuzzy-augmented-matches Time: 9.350749969482422s
get-exact-matches Time: 0.8377342224121094s
get-kgtk-search-matches Time: 22.00022292137146s
ground-truth-labeler Time: 0.19711709022521973s


318it [2:00:07, 33.93s/it]

clean Time: 0.02197408676147461s
get-fuzzy-augmented-matches Time: 29.81496286392212s
get-exact-matches Time: 1.450834035873413s
get-kgtk-search-matches Time: 44.48745083808899s
ground-truth-labeler Time: 0.4739201068878174s


319it [2:01:36, 50.47s/it]

clean Time: 0.009742021560668945s
get-fuzzy-augmented-matches Time: 12.97999906539917s
get-exact-matches Time: 0.6682851314544678s
get-kgtk-search-matches Time: 20.748570203781128s
ground-truth-labeler Time: 0.2800610065460205s


320it [2:02:17, 47.61s/it]

clean Time: 0.0011241436004638672s
get-fuzzy-augmented-matches Time: 3.1947200298309326s
get-exact-matches Time: 0.9254679679870605s
get-kgtk-search-matches Time: 3.1169960498809814s
ground-truth-labeler Time: 0.04977679252624512s


321it [2:02:33, 38.13s/it]

clean Time: 0.01970076560974121s
get-fuzzy-augmented-matches Time: 28.628719806671143s
get-exact-matches Time: 1.8839390277862549s
get-kgtk-search-matches Time: 43.52040696144104s
ground-truth-labeler Time: 0.5816812515258789s


322it [2:04:02, 53.18s/it]

clean Time: 0.024069786071777344s
get-fuzzy-augmented-matches Time: 34.50380182266235s
get-exact-matches Time: 3.0177290439605713s
get-kgtk-search-matches Time: 83.04582095146179s
ground-truth-labeler Time: 0.7717299461364746s


323it [2:06:19, 78.29s/it]

clean Time: 0.009299993515014648s
get-fuzzy-augmented-matches Time: 20.935270071029663s
get-exact-matches Time: 2.055806875228882s
get-kgtk-search-matches Time: 79.7972400188446s
ground-truth-labeler Time: 0.7466678619384766s


324it [2:08:16, 89.89s/it]

clean Time: 0.009071111679077148s
get-fuzzy-augmented-matches Time: 16.82182288169861s
get-exact-matches Time: 1.7921299934387207s
get-kgtk-search-matches Time: 58.2157142162323s
ground-truth-labeler Time: 0.3968009948730469s


325it [2:09:45, 89.67s/it]

clean Time: 0.0023810863494873047s
get-fuzzy-augmented-matches Time: 3.1219329833984375s
get-exact-matches Time: 0.2677450180053711s
get-kgtk-search-matches Time: 5.4323060512542725s
ground-truth-labeler Time: 0.06241726875305176s


326it [2:10:02, 67.96s/it]

clean Time: 0.010728120803833008s
get-fuzzy-augmented-matches Time: 17.599432229995728s
get-exact-matches Time: 2.925524950027466s
get-kgtk-search-matches Time: 67.32521510124207s
ground-truth-labeler Time: 0.6054189205169678s


327it [2:11:45, 78.59s/it]

clean Time: 0.0009739398956298828s
get-fuzzy-augmented-matches Time: 2.3499319553375244s
get-exact-matches Time: 0.4504568576812744s
get-kgtk-search-matches Time: 2.1018619537353516s
ground-truth-labeler Time: 0.044249773025512695s


328it [2:11:55, 58.01s/it]

clean Time: 0.002582073211669922s
get-fuzzy-augmented-matches Time: 5.222877025604248s
get-exact-matches Time: 1.3325109481811523s
get-kgtk-search-matches Time: 7.255189895629883s
ground-truth-labeler Time: 0.11456990242004395s


329it [2:12:17, 47.11s/it]

clean Time: 0.0044977664947509766s
get-fuzzy-augmented-matches Time: 7.116717100143433s
get-exact-matches Time: 0.7733960151672363s
get-kgtk-search-matches Time: 18.451555967330933s
ground-truth-labeler Time: 0.1700899600982666s


330it [2:12:53, 43.89s/it]

clean Time: 0.0044672489166259766s
get-fuzzy-augmented-matches Time: 9.330379009246826s
get-exact-matches Time: 1.1977930068969727s
get-kgtk-search-matches Time: 17.83202815055847s
ground-truth-labeler Time: 0.18925094604492188s


331it [2:13:32, 42.33s/it]

clean Time: 0.0016350746154785156s
get-fuzzy-augmented-matches Time: 2.2901268005371094s
get-exact-matches Time: 0.20204901695251465s
get-kgtk-search-matches Time: 3.2183899879455566s
ground-truth-labeler Time: 0.05096793174743652s


332it [2:13:43, 32.80s/it]

clean Time: 0.003407716751098633s
get-fuzzy-augmented-matches Time: 10.738755226135254s
get-exact-matches Time: 1.7697477340698242s
get-kgtk-search-matches Time: 29.3773136138916s
ground-truth-labeler Time: 0.5406091213226318s


333it [2:14:36, 38.84s/it]

clean Time: 0.003817319869995117s
get-fuzzy-augmented-matches Time: 5.077666759490967s
get-exact-matches Time: 0.44835495948791504s
get-kgtk-search-matches Time: 8.245548009872437s
ground-truth-labeler Time: 0.0904550552368164s


334it [2:14:59, 34.21s/it]

clean Time: 0.023808002471923828s
get-fuzzy-augmented-matches Time: 23.815918684005737s
get-exact-matches Time: 1.9148502349853516s
get-kgtk-search-matches Time: 39.03243708610535s
ground-truth-labeler Time: 0.7154450416564941s


335it [2:16:19, 47.92s/it]

clean Time: 0.004461050033569336s
get-fuzzy-augmented-matches Time: 7.262033939361572s
get-exact-matches Time: 0.9061529636383057s
get-kgtk-search-matches Time: 22.725807905197144s
ground-truth-labeler Time: 0.2655909061431885s


336it [2:17:01, 46.16s/it]

clean Time: 0.0018448829650878906s
get-fuzzy-augmented-matches Time: 2.927417755126953s
get-exact-matches Time: 0.7255580425262451s
get-kgtk-search-matches Time: 3.2772738933563232s
ground-truth-labeler Time: 0.05540609359741211s


337it [2:17:17, 36.97s/it]

clean Time: 0.01573014259338379s
get-fuzzy-augmented-matches Time: 15.746215343475342s
get-exact-matches Time: 3.2672457695007324s
get-kgtk-search-matches Time: 42.15046691894531s
ground-truth-labeler Time: 0.43537211418151855s


338it [2:18:32, 48.49s/it]

clean Time: 0.00692296028137207s
get-fuzzy-augmented-matches Time: 8.06748104095459s
get-exact-matches Time: 0.8970718383789062s
get-kgtk-search-matches Time: 18.49109387397766s
ground-truth-labeler Time: 0.26598095893859863s


339it [2:19:08, 44.80s/it]

clean Time: 0.0019230842590332031s
get-fuzzy-augmented-matches Time: 2.7406342029571533s
get-exact-matches Time: 0.7543296813964844s
get-kgtk-search-matches Time: 3.3601348400115967s
ground-truth-labeler Time: 0.039768218994140625s


340it [2:19:22, 35.61s/it]

clean Time: 0.004926919937133789s
get-fuzzy-augmented-matches Time: 5.597031116485596s
get-exact-matches Time: 0.2886998653411865s
get-kgtk-search-matches Time: 8.571640014648438s
ground-truth-labeler Time: 0.10552978515625s


341it [2:19:45, 31.88s/it]

clean Time: 0.018839120864868164s
get-fuzzy-augmented-matches Time: 21.067628860473633s
get-exact-matches Time: 2.4339699745178223s
get-kgtk-search-matches Time: 41.69972801208496s
ground-truth-labeler Time: 0.547119140625s


342it [2:21:06, 24.76s/it]


In [10]:
if not copy_candidates_from_previous_version:
    candidate_generation(dev_path, ground_truth_files, dev_candidate_path, dev_class_count, dev_prop_count, dev_context_path, dev_graph_embedding)

0it [00:00, ?it/s]

clean Time: 0.005154609680175781s
get-fuzzy-augmented-matches Time: 9.847553014755249s
get-exact-matches Time: 1.1910877227783203s
get-kgtk-search-matches Time: 19.845626831054688s
ground-truth-labeler Time: 0.20462584495544434s


1it [00:40, 40.76s/it]

clean Time: 0.0016167163848876953s
get-fuzzy-augmented-matches Time: 3.5746359825134277s
get-exact-matches Time: 0.3863821029663086s
get-kgtk-search-matches Time: 5.325264930725098s
ground-truth-labeler Time: 0.07149434089660645s


2it [00:57, 26.70s/it]

clean Time: 0.0023360252380371094s
get-fuzzy-augmented-matches Time: 3.0465199947357178s
get-exact-matches Time: 0.2678048610687256s
get-kgtk-search-matches Time: 4.211138963699341s
ground-truth-labeler Time: 0.06421923637390137s


3it [01:12, 21.29s/it]

clean Time: 0.00816798210144043s
get-fuzzy-augmented-matches Time: 9.412149906158447s
get-exact-matches Time: 0.6508927345275879s
get-kgtk-search-matches Time: 18.2010018825531s
ground-truth-labeler Time: 0.4265782833099365s


4it [01:52, 28.53s/it]

clean Time: 0.0014672279357910156s
get-fuzzy-augmented-matches Time: 3.022275686264038s
get-exact-matches Time: 0.3390350341796875s
get-kgtk-search-matches Time: 6.324038028717041s
ground-truth-labeler Time: 0.06607818603515625s


5it [02:09, 24.64s/it]

clean Time: 0.004533052444458008s
get-fuzzy-augmented-matches Time: 8.217726945877075s
get-exact-matches Time: 1.179988145828247s
get-kgtk-search-matches Time: 25.477843046188354s
ground-truth-labeler Time: 0.264786958694458s


6it [02:55, 31.70s/it]

clean Time: 0.0072307586669921875s
get-fuzzy-augmented-matches Time: 10.73344373703003s
get-exact-matches Time: 0.7932617664337158s
get-kgtk-search-matches Time: 15.908459901809692s
ground-truth-labeler Time: 0.2966949939727783s


7it [03:31, 33.12s/it]

clean Time: 0.00979304313659668s
get-fuzzy-augmented-matches Time: 14.7984139919281s
get-exact-matches Time: 1.862421989440918s
get-kgtk-search-matches Time: 41.22956299781799s
ground-truth-labeler Time: 0.49227309226989746s


8it [04:41, 44.89s/it]

clean Time: 0.003983974456787109s
get-fuzzy-augmented-matches Time: 5.238208055496216s
get-exact-matches Time: 0.5212647914886475s
get-kgtk-search-matches Time: 14.30022406578064s
ground-truth-labeler Time: 0.14105892181396484s


9it [05:11, 34.66s/it]


## Feature Generation

In [10]:
def feature_generation(candidate_dir, embedding_dir, class_count_dir, property_count_dir, context_path, output_path):
    file_list = glob.glob(candidate_dir + '/*.csv')
    for i, file in tqdm(enumerate(file_list)):
        filename = file.split('/')[-1]
#         print(f"{filename}: {i+1} of {len(file_list)}")
        embedding_file = f"{embedding_dir}/{filename.strip('.csv')}_graph_embedding_complex.tsv"
        class_count_file = f"{class_count_dir}/{filename.strip('.csv')}_class_count.tsv"
        property_count_file = f"{property_count_dir}/{filename.strip('.csv')}_prop_count.tsv"
        context_file = f"{context_path}/{filename.strip('.csv')}_context.tsv"
        output_file = f"{output_path}/{filename}"
        classifier_features_str = ",".join(classifier_features)
        !tl align-page-rank $file \
            / string-similarity -i --method symmetric_monge_elkan:tokenizer=word -o monge_elkan \
            / string-similarity -i --method symmetric_monge_elkan:tokenizer=word -c label_clean kg_aliases -o monge_elkan_aliases \
            / string-similarity -i --method jaro_winkler -o jaro_winkler \
            / string-similarity -i --method levenshtein -o levenshtein \
            / string-similarity -i --method jaccard:tokenizer=word -c kg_descriptions context -o des_cont_jaccard \
            / normalize-scores -c des_cont_jaccard / smallest-qnode-number \
            / mosaic-features -c kg_labels --num-char --num-tokens \
            / create-singleton-feature -o singleton \
            / vote-by-classifier  \
            --prob-threshold 0.995 \
            --features $classifier_features_str \
            --model $classifier_model_path \
            / score-using-embedding \
            --column-vector-strategy centroid-of-lof \
            --lof-strategy ems-mv \
            -o lof-graph-embedding-score \
            --embedding-file $embedding_file \
            / generate-reciprocal-rank  \
            -c lof-graph-embedding-score \
            -o lof-reciprocal-rank \
            / compute-tf-idf  \
            --feature-file $class_count_file \
            --feature-name class_count \
            --singleton-column singleton \
            -o lof_class_count_tf_idf_score \
            / compute-tf-idf \
            --feature-file $property_count_file \
            --feature-name property_count \
            --singleton-column singleton \
            -o lof_property_count_tf_idf_score \
            / context-match --context-file $context_file \
            -o context_score \
            > $output_file

In [None]:
feature_generation(train_candidate_path, train_graph_embedding, train_class_count, train_prop_count, train_context_path, train_feature_path)

0it [00:00, ?it/s]

align-page-rank Time: 0.22191905975341797s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.466212749481201s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 12.080178022384644s
string-similarity-['jaro_winkler'] Time: 0.495136022567749s
string-similarity-['levenshtein'] Time: 5.341928958892822s
string-similarity-['jaccard:tokenizer=word'] Time: 0.05250382423400879s
normalize-scores-des_cont_jaccard Time: 0.026294946670532227s
smallest-qnode-number Time: 0.21240901947021484s
mosaic-features Time: 0.010735750198364258s
creat-singleton-feature Time: 0.16389822959899902s
vote-by-classifier Time: 2.3569889068603516s
Qnodes to lookup: 3148
Qnodes from file: 3089
Outlier removal generates 4 lof-voted candidates
Outlier removal generates 21 lof-voted candidates
score-using-embedding Time: 36.75325894355774s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.2624509334564209s
compute-tf-idf-class_count Time: 38.3315007686615s
compute-tf-idf-property_co

1it [01:15, 75.37s/it]

align-page-rank Time: 0.14556026458740234s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.3670871257781982s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.0737719535827637s
string-similarity-['jaro_winkler'] Time: 0.7165331840515137s
string-similarity-['levenshtein'] Time: 5.139523029327393s
string-similarity-['jaccard:tokenizer=word'] Time: 0.029052257537841797s
normalize-scores-des_cont_jaccard Time: 0.01286005973815918s
smallest-qnode-number Time: 0.08622193336486816s
mosaic-features Time: 0.006398200988769531s
creat-singleton-feature Time: 0.39304304122924805s
vote-by-classifier Time: 0.9565598964691162s
Qnodes to lookup: 436
Qnodes from file: 436
Outlier removal generates 11 lof-voted candidates
score-using-embedding Time: 22.45358395576477s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.11409997940063477s
compute-tf-idf-class_count Time: 24.281742811203003s
compute-tf-idf-property_count Time: 23.49156093597412s
context-match Tim

2it [01:51, 52.32s/it]

align-page-rank Time: 0.1822497844696045s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.407083034515381s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 6.970815896987915s
string-similarity-['jaro_winkler'] Time: 0.637199878692627s
string-similarity-['levenshtein'] Time: 4.0634119510650635s
string-similarity-['jaccard:tokenizer=word'] Time: 0.09019112586975098s
normalize-scores-des_cont_jaccard Time: 0.03568124771118164s
smallest-qnode-number Time: 0.2843031883239746s
mosaic-features Time: 0.020635128021240234s
creat-singleton-feature Time: 0.21683192253112793s
vote-by-classifier Time: 0.39275503158569336s
Qnodes to lookup: 5632
Qnodes from file: 5492
Outlier removal generates 12 lof-voted candidates
Outlier removal generates 22 lof-voted candidates
Outlier removal generates 13 lof-voted candidates
score-using-embedding Time: 27.18977379798889s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3908050060272217s
compute-tf-idf-class_count T

3it [03:52, 83.78s/it]

align-page-rank Time: 0.2417311668395996s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.739609956741333s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.893061876296997s
string-similarity-['jaro_winkler'] Time: 0.44855594635009766s
string-similarity-['levenshtein'] Time: 1.762470006942749s
string-similarity-['jaccard:tokenizer=word'] Time: 0.11628317832946777s
normalize-scores-des_cont_jaccard Time: 0.039465904235839844s
smallest-qnode-number Time: 0.444382905960083s
mosaic-features Time: 0.018981218338012695s
creat-singleton-feature Time: 0.4857971668243408s
vote-by-classifier Time: 1.6326749324798584s
Qnodes to lookup: 6078
Qnodes from file: 6000
Outlier removal generates 6 lof-voted candidates
Column_vector_stragtegy centroid_of_lof failed
Outlier removal generates 14 lof-voted candidates
Outlier removal generates 24 lof-voted candidates
score-using-embedding Time: 21.720832109451294s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.

4it [05:12, 82.24s/it]

align-page-rank Time: 0.29036593437194824s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.397026777267456s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.4751088619232178s
string-similarity-['jaro_winkler'] Time: 0.3221399784088135s
string-similarity-['levenshtein'] Time: 1.5705199241638184s
string-similarity-['jaccard:tokenizer=word'] Time: 0.10025310516357422s
normalize-scores-des_cont_jaccard Time: 0.030749082565307617s
smallest-qnode-number Time: 0.2596428394317627s
mosaic-features Time: 0.08944582939147949s
creat-singleton-feature Time: 0.16361093521118164s
vote-by-classifier Time: 0.4927029609680176s
Qnodes to lookup: 3170
Qnodes from file: 3137
Outlier removal generates 47 lof-voted candidates
Outlier removal generates 15 lof-voted candidates
Outlier removal generates 10 lof-voted candidates
score-using-embedding Time: 18.782434940338135s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.4448421001434326s
compute-tf-idf-class_coun

5it [06:24, 78.51s/it]

align-page-rank Time: 0.3201777935028076s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.440642833709717s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 13.169529676437378s
string-similarity-['jaro_winkler'] Time: 0.8065342903137207s
string-similarity-['levenshtein'] Time: 4.484777927398682s
string-similarity-['jaccard:tokenizer=word'] Time: 0.20517683029174805s
normalize-scores-des_cont_jaccard Time: 0.04726386070251465s
smallest-qnode-number Time: 0.5110902786254883s
mosaic-features Time: 0.02406907081604004s
creat-singleton-feature Time: 0.3344881534576416s
vote-by-classifier Time: 0.46608805656433105s
Qnodes to lookup: 11144
Qnodes from file: 10843
Outlier removal generates 88 lof-voted candidates
score-using-embedding Time: 36.32066011428833s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.5817511081695557s
compute-tf-idf-class_count Time: 38.942898988723755s
compute-tf-idf-property_count Time: 38.69187617301941s
context-match Time:

6it [08:07, 86.88s/it]

align-page-rank Time: 0.15323996543884277s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.6703331470489502s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.6225190162658691s
string-similarity-['jaro_winkler'] Time: 0.18364214897155762s
string-similarity-['levenshtein'] Time: 0.5542268753051758s
string-similarity-['jaccard:tokenizer=word'] Time: 0.06367301940917969s
normalize-scores-des_cont_jaccard Time: 0.021943092346191406s
smallest-qnode-number Time: 0.1625680923461914s
mosaic-features Time: 0.011867046356201172s
creat-singleton-feature Time: 0.14670991897583008s
vote-by-classifier Time: 1.6387760639190674s
Qnodes to lookup: 3718
Qnodes from file: 3703
Outlier removal generates 21 lof-voted candidates
score-using-embedding Time: 16.342185020446777s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.49239277839660645s
compute-tf-idf-class_count Time: 17.373814582824707s
compute-tf-idf-property_count Time: 16.737354040145874s
context-matc

7it [08:43, 70.17s/it]

align-page-rank Time: 0.29534101486206055s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.187575101852417s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.19346022605896s
string-similarity-['jaro_winkler'] Time: 0.6769380569458008s
string-similarity-['levenshtein'] Time: 3.3761367797851562s
string-similarity-['jaccard:tokenizer=word'] Time: 0.08540511131286621s
normalize-scores-des_cont_jaccard Time: 0.033429861068725586s
smallest-qnode-number Time: 0.26169681549072266s
mosaic-features Time: 0.01955413818359375s
creat-singleton-feature Time: 0.20443177223205566s
vote-by-classifier Time: 0.922921895980835s
Qnodes to lookup: 3241
Qnodes from file: 3132
Outlier removal generates 11 lof-voted candidates
Outlier removal generates 19 lof-voted candidates
Outlier removal generates 11 lof-voted candidates
score-using-embedding Time: 25.630057096481323s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.377716064453125s
compute-tf-idf-class_count T

8it [10:19, 78.40s/it]

align-page-rank Time: 0.1909501552581787s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 7.080425977706909s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 14.307936906814575s
string-similarity-['jaro_winkler'] Time: 0.9214770793914795s
string-similarity-['levenshtein'] Time: 7.903357982635498s
string-similarity-['jaccard:tokenizer=word'] Time: 0.06733918190002441s
normalize-scores-des_cont_jaccard Time: 0.027918100357055664s
smallest-qnode-number Time: 0.17572903633117676s
mosaic-features Time: 0.01629185676574707s
creat-singleton-feature Time: 0.15119600296020508s
vote-by-classifier Time: 0.6667740345001221s
Qnodes to lookup: 2874
Qnodes from file: 2685
Outlier removal generates 20 lof-voted candidates
_centroid_of_lof: Missing 1 of 31
Outlier removal generates 18 lof-voted candidates
score-using-embedding Time: 42.585469245910645s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.2979562282562256s
compute-tf-idf-class_count Time: 44.406031

9it [13:00, 104.19s/it]

align-page-rank Time: 0.17138290405273438s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.9779548645019531s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.7871880531311035s
string-similarity-['jaro_winkler'] Time: 0.23455381393432617s
string-similarity-['levenshtein'] Time: 0.8426852226257324s
string-similarity-['jaccard:tokenizer=word'] Time: 0.057170867919921875s
normalize-scores-des_cont_jaccard Time: 0.03038311004638672s
smallest-qnode-number Time: 0.17876100540161133s
mosaic-features Time: 0.01130819320678711s
creat-singleton-feature Time: 0.1302809715270996s
vote-by-classifier Time: 0.6341960430145264s
Qnodes to lookup: 2560
Qnodes from file: 2531
Outlier removal generates 14 lof-voted candidates
score-using-embedding Time: 16.807559967041016s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.27141308784484863s
compute-tf-idf-class_count Time: 18.262060165405273s
compute-tf-idf-property_count Time: 19.506273984909058s
context-match

10it [13:30, 81.34s/it]

Command: align-page-rank
Error Message:  Traceback (most recent call last):
  File "/Users/amandeep/Github/table-linker/tl/cli/align-page-rank.py", line 29, in run
    df = pd.read_csv(kwargs['input_file'], dtype=object)
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/pandas/io/parsers.py", line 610, in read_csv
    return _read(filepath_or_buffer, kwds)
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/pandas/io/parsers.py", line 462, in _read
    parser = TextFileReader(filepath_or_buffer, **kwds)
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/pandas/io/parsers.py", line 819, in __init__
    self._engine = self._make_engine(self.engine)
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/pandas/io/parsers.py", line 1050, in _make_engine
    return mapping[engine](self.f, **self.options)  # type: ignore[call-arg]
  File "/Users/amandeep/Github/table-linker/tl_env/lib/

11it [13:52, 63.18s/it]

align-page-rank Time: 0.1693110466003418s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.671731948852539s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.8249878883361816s
string-similarity-['jaro_winkler'] Time: 0.43677401542663574s
string-similarity-['levenshtein'] Time: 1.8030998706817627s
string-similarity-['jaccard:tokenizer=word'] Time: 0.0719001293182373s
normalize-scores-des_cont_jaccard Time: 0.02872490882873535s
smallest-qnode-number Time: 0.16878414154052734s
mosaic-features Time: 0.015111923217773438s
creat-singleton-feature Time: 0.15172886848449707s
vote-by-classifier Time: 0.43934011459350586s
Qnodes to lookup: 2018
Qnodes from file: 1950
Outlier removal generates 7 lof-voted candidates
Outlier removal generates 48 lof-voted candidates
score-using-embedding Time: 17.89047384262085s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.32752394676208496s
compute-tf-idf-class_count Time: 19.026007175445557s
compute-tf-idf-propert

12it [15:15, 69.34s/it]

align-page-rank Time: 0.11927294731140137s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.4386570453643799s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.9289951324462891s
string-similarity-['jaro_winkler'] Time: 0.12350201606750488s
string-similarity-['levenshtein'] Time: 0.45486903190612793s
string-similarity-['jaccard:tokenizer=word'] Time: 0.03873181343078613s
normalize-scores-des_cont_jaccard Time: 0.014291048049926758s
smallest-qnode-number Time: 0.0892329216003418s
mosaic-features Time: 0.0061528682708740234s
creat-singleton-feature Time: 0.06980490684509277s
vote-by-classifier Time: 1.6479308605194092s
Qnodes to lookup: 1981
Qnodes from file: 1962
Outlier removal generates 8 lof-voted candidates
score-using-embedding Time: 14.200151920318604s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.33505797386169434s
compute-tf-idf-class_count Time: 15.93990683555603s
compute-tf-idf-property_count Time: 16.106279134750366s
context-matc

13it [15:42, 56.41s/it]

align-page-rank Time: 0.1800401210784912s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.6560869216918945s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.4597651958465576s
string-similarity-['jaro_winkler'] Time: 0.15399503707885742s
string-similarity-['levenshtein'] Time: 0.5235037803649902s
string-similarity-['jaccard:tokenizer=word'] Time: 0.05253028869628906s
normalize-scores-des_cont_jaccard Time: 0.02128314971923828s
smallest-qnode-number Time: 0.24175620079040527s
mosaic-features Time: 0.01070094108581543s
creat-singleton-feature Time: 0.11869215965270996s
vote-by-classifier Time: 0.49817991256713867s
Qnodes to lookup: 4609
Qnodes from file: 4594
Outlier removal generates 4 lof-voted candidates
score-using-embedding Time: 14.576766967773438s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.26140499114990234s
compute-tf-idf-class_count Time: 15.625056982040405s
compute-tf-idf-property_count Time: 15.848875045776367s
context-match 

14it [16:14, 48.92s/it]

align-page-rank Time: 0.24260401725769043s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.8345563411712646s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.85605525970459s
string-similarity-['jaro_winkler'] Time: 0.8406703472137451s
string-similarity-['levenshtein'] Time: 4.43608283996582s
string-similarity-['jaccard:tokenizer=word'] Time: 0.14168286323547363s
normalize-scores-des_cont_jaccard Time: 0.03721308708190918s
smallest-qnode-number Time: 0.25301194190979004s
mosaic-features Time: 0.01974177360534668s
creat-singleton-feature Time: 0.202347993850708s
vote-by-classifier Time: 0.4638991355895996s
Qnodes to lookup: 4505
Qnodes from file: 4371
Outlier removal generates 20 lof-voted candidates
Outlier removal generates 28 lof-voted candidates
_centroid_of_lof: Missing 1 of 39
Outlier removal generates 24 lof-voted candidates
score-using-embedding Time: 25.77737331390381s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.376207828521728

15it [17:28, 56.51s/it]

align-page-rank Time: 0.36925601959228516s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.2643089294433594s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.5488500595092773s
string-similarity-['jaro_winkler'] Time: 0.5485508441925049s
string-similarity-['levenshtein'] Time: 2.8003358840942383s
string-similarity-['jaccard:tokenizer=word'] Time: 0.11690020561218262s
normalize-scores-des_cont_jaccard Time: 0.029495716094970703s
smallest-qnode-number Time: 0.24399113655090332s
mosaic-features Time: 0.016225099563598633s
creat-singleton-feature Time: 0.1690077781677246s
vote-by-classifier Time: 1.5214471817016602s
Qnodes to lookup: 1444
Qnodes from file: 1411
Outlier removal generates 86 lof-voted candidates
Outlier removal generates 40 lof-voted candidates
Outlier removal generates 62 lof-voted candidates
score-using-embedding Time: 20.610105991363525s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.32782888412475586s
compute-tf-idf-class_c

16it [18:28, 57.61s/it]

align-page-rank Time: 0.22034525871276855s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.2235431671142578s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.7162396907806396s
string-similarity-['jaro_winkler'] Time: 0.3470189571380615s
string-similarity-['levenshtein'] Time: 1.3270270824432373s
string-similarity-['jaccard:tokenizer=word'] Time: 0.14793777465820312s
normalize-scores-des_cont_jaccard Time: 0.032171010971069336s
smallest-qnode-number Time: 0.3105449676513672s
mosaic-features Time: 0.01495671272277832s
creat-singleton-feature Time: 0.17876386642456055s
vote-by-classifier Time: 0.9227807521820068s
Qnodes to lookup: 5482
Qnodes from file: 5401
Outlier removal generates 6 lof-voted candidates
Outlier removal generates 12 lof-voted candidates
Outlier removal generates 7 lof-voted candidates
score-using-embedding Time: 18.787561893463135s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3635408878326416s
compute-tf-idf-class_count

17it [19:41, 62.23s/it]

align-page-rank Time: 0.21584486961364746s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.5998170375823975s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.578638076782227s
string-similarity-['jaro_winkler'] Time: 0.4490089416503906s
string-similarity-['levenshtein'] Time: 2.337824821472168s
string-similarity-['jaccard:tokenizer=word'] Time: 0.0710611343383789s
normalize-scores-des_cont_jaccard Time: 0.026790857315063477s
smallest-qnode-number Time: 0.17948269844055176s
mosaic-features Time: 0.013099908828735352s
creat-singleton-feature Time: 0.15649700164794922s
vote-by-classifier Time: 0.44353413581848145s
Qnodes to lookup: 2770
Qnodes from file: 2720
Outlier removal generates 13 lof-voted candidates
Outlier removal generates 29 lof-voted candidates
score-using-embedding Time: 22.797819137573242s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3223133087158203s
compute-tf-idf-class_count Time: 24.648716926574707s
compute-tf-idf-proper

18it [21:08, 69.62s/it]

align-page-rank Time: 0.23296713829040527s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 23.501874923706055s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 21.17334818840027s
string-similarity-['jaro_winkler'] Time: 2.009003162384033s
string-similarity-['levenshtein'] Time: 26.66276478767395s
string-similarity-['jaccard:tokenizer=word'] Time: 0.1027991771697998s
normalize-scores-des_cont_jaccard Time: 0.03942608833312988s
smallest-qnode-number Time: 0.31923985481262207s
mosaic-features Time: 0.027196884155273438s
creat-singleton-feature Time: 0.22952890396118164s
vote-by-classifier Time: 0.49118804931640625s
Qnodes to lookup: 3523
Qnodes from file: 3370
Outlier removal generates 11 lof-voted candidates
Outlier removal generates 64 lof-voted candidates
Outlier removal generates 4 lof-voted candidates
score-using-embedding Time: 85.1189079284668s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.437824010848999s
compute-tf-idf-class_count Tim

19it [23:12, 86.14s/it]

align-page-rank Time: 0.4514901638031006s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.8720991611480713s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 9.020314931869507s
string-similarity-['jaro_winkler'] Time: 1.0727949142456055s
string-similarity-['levenshtein'] Time: 4.3900511264801025s
string-similarity-['jaccard:tokenizer=word'] Time: 0.20263886451721191s
normalize-scores-des_cont_jaccard Time: 0.0438539981842041s
smallest-qnode-number Time: 0.5356152057647705s
mosaic-features Time: 0.021579980850219727s
creat-singleton-feature Time: 0.31695008277893066s
vote-by-classifier Time: 0.5832619667053223s
Qnodes to lookup: 10475
Qnodes from file: 10241
Outlier removal generates 79 lof-voted candidates
score-using-embedding Time: 32.096360206604004s
generate-reciprocal-rank-lof-graph-embedding-score Time: 1.041888952255249s
compute-tf-idf-class_count Time: 35.09846305847168s
compute-tf-idf-property_count Time: 35.66751194000244s
context-match Time:

20it [24:54, 90.89s/it]

align-page-rank Time: 0.07955503463745117s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.4803040027618408s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.7814750671386719s
string-similarity-['jaro_winkler'] Time: 0.14049291610717773s
string-similarity-['levenshtein'] Time: 0.6524200439453125s
string-similarity-['jaccard:tokenizer=word'] Time: 0.048246145248413086s
normalize-scores-des_cont_jaccard Time: 0.014909982681274414s
smallest-qnode-number Time: 0.13018083572387695s
mosaic-features Time: 0.00629878044128418s
creat-singleton-feature Time: 0.07436919212341309s
vote-by-classifier Time: 0.9337608814239502s
Qnodes to lookup: 680
Qnodes from file: 678
Outlier removal generates 13 lof-voted candidates
score-using-embedding Time: 13.568814992904663s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.2718329429626465s
compute-tf-idf-class_count Time: 14.24359393119812s
compute-tf-idf-property_count Time: 14.4166738986969s
context-match Tim

21it [25:26, 73.10s/it]

align-page-rank Time: 0.2687098979949951s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.8407349586486816s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.1707541942596436s
string-similarity-['jaro_winkler'] Time: 0.41336512565612793s
string-similarity-['levenshtein'] Time: 1.8720717430114746s
string-similarity-['jaccard:tokenizer=word'] Time: 0.09167289733886719s
normalize-scores-des_cont_jaccard Time: 0.02497100830078125s
smallest-qnode-number Time: 0.3125760555267334s
mosaic-features Time: 0.015583038330078125s
creat-singleton-feature Time: 0.16436409950256348s
vote-by-classifier Time: 1.135585069656372s
Qnodes to lookup: 1448
Qnodes from file: 1392
_centroid_of_lof: Missing 7 of 13
Outlier removal generates 4 lof-voted candidates
Outlier removal generates 34 lof-voted candidates
_centroid_of_lof: Missing 12 of 34
Outlier removal generates 18 lof-voted candidates
Outlier removal generates 34 lof-voted candidates
score-using-embedding Time: 20.3

22it [27:06, 81.23s/it]

align-page-rank Time: 0.2247450351715088s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.7426619529724121s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.2067630290985107s
string-similarity-['jaro_winkler'] Time: 0.1747138500213623s
string-similarity-['levenshtein'] Time: 0.9188830852508545s
string-similarity-['jaccard:tokenizer=word'] Time: 0.0277862548828125s
normalize-scores-des_cont_jaccard Time: 0.013276815414428711s
smallest-qnode-number Time: 0.13803625106811523s
mosaic-features Time: 0.0063059329986572266s
creat-singleton-feature Time: 0.06808781623840332s
vote-by-classifier Time: 0.4212648868560791s
Qnodes to lookup: 1646
Qnodes from file: 1613
Outlier removal generates 10 lof-voted candidates
score-using-embedding Time: 11.450648784637451s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.13244295120239258s
compute-tf-idf-class_count Time: 12.062746047973633s
compute-tf-idf-property_count Time: 12.257724046707153s
context-match

23it [27:34, 65.11s/it]

align-page-rank Time: 0.20837783813476562s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.3199238777160645s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.9662280082702637s
string-similarity-['jaro_winkler'] Time: 0.4927248954772949s
string-similarity-['levenshtein'] Time: 3.1523473262786865s
string-similarity-['jaccard:tokenizer=word'] Time: 0.06562399864196777s
normalize-scores-des_cont_jaccard Time: 0.030173301696777344s
smallest-qnode-number Time: 0.23969697952270508s
mosaic-features Time: 0.013410091400146484s
creat-singleton-feature Time: 0.16813898086547852s
vote-by-classifier Time: 1.0774929523468018s
Qnodes to lookup: 3280
Qnodes from file: 3231
Outlier removal generates 10 lof-voted candidates
Outlier removal generates 20 lof-voted candidates
Outlier removal generates 87 lof-voted candidates
score-using-embedding Time: 22.83120894432068s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3123898506164551s
compute-tf-idf-class_co

24it [28:41, 65.85s/it]

align-page-rank Time: 0.16942620277404785s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.43797993659973145s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.5128140449523926s
string-similarity-['jaro_winkler'] Time: 0.10579204559326172s
string-similarity-['levenshtein'] Time: 0.5582730770111084s
string-similarity-['jaccard:tokenizer=word'] Time: 0.033249855041503906s
normalize-scores-des_cont_jaccard Time: 0.014711141586303711s
smallest-qnode-number Time: 0.08874392509460449s
mosaic-features Time: 0.005562782287597656s
creat-singleton-feature Time: 0.22860193252563477s
vote-by-classifier Time: 0.8361680507659912s
Qnodes to lookup: 1612
Qnodes from file: 1599
Outlier removal generates 2 lof-voted candidates
score-using-embedding Time: 13.54197382926941s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.2802259922027588s
compute-tf-idf-class_count Time: 14.264541149139404s
compute-tf-idf-property_count Time: 14.36652398109436s
context-match

25it [29:09, 54.27s/it]

align-page-rank Time: 0.18608498573303223s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.600172758102417s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.0642180442810059s
string-similarity-['jaro_winkler'] Time: 0.21756196022033691s
string-similarity-['levenshtein'] Time: 0.7816851139068604s
string-similarity-['jaccard:tokenizer=word'] Time: 0.11466288566589355s
normalize-scores-des_cont_jaccard Time: 0.024045705795288086s
smallest-qnode-number Time: 0.1575942039489746s
mosaic-features Time: 0.010433197021484375s
creat-singleton-feature Time: 0.12186431884765625s
vote-by-classifier Time: 0.45584988594055176s
Qnodes to lookup: 558
Qnodes from file: 540
Outlier removal generates 30 lof-voted candidates
Outlier removal generates 17 lof-voted candidates
score-using-embedding Time: 14.468991994857788s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.228593111038208s
compute-tf-idf-class_count Time: 15.255612134933472s
compute-tf-idf-propert

26it [29:48, 49.80s/it]

align-page-rank Time: 0.21099591255187988s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.0510482788085938s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.063758134841919s
string-similarity-['jaro_winkler'] Time: 0.31270718574523926s
string-similarity-['levenshtein'] Time: 2.303151845932007s
string-similarity-['jaccard:tokenizer=word'] Time: 0.04595828056335449s
normalize-scores-des_cont_jaccard Time: 0.0211029052734375s
smallest-qnode-number Time: 0.1619260311126709s
mosaic-features Time: 0.017129898071289062s
creat-singleton-feature Time: 0.11446380615234375s
vote-by-classifier Time: 0.4469897747039795s
Qnodes to lookup: 2729
Qnodes from file: 2680
Outlier removal generates 23 lof-voted candidates
Outlier removal generates 7 lof-voted candidates
score-using-embedding Time: 14.61606764793396s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.2987849712371826s
compute-tf-idf-class_count Time: 17.31546187400818s
compute-tf-idf-property_co

27it [30:23, 45.48s/it]

align-page-rank Time: 0.31612396240234375s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.453608989715576s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 8.134101867675781s
string-similarity-['jaro_winkler'] Time: 0.623002290725708s
string-similarity-['levenshtein'] Time: 2.4563229084014893s
string-similarity-['jaccard:tokenizer=word'] Time: 0.21607589721679688s
normalize-scores-des_cont_jaccard Time: 0.05309700965881348s
smallest-qnode-number Time: 0.45186400413513184s
mosaic-features Time: 0.021207809448242188s
creat-singleton-feature Time: 0.3025479316711426s
vote-by-classifier Time: 0.7665190696716309s
Qnodes to lookup: 10045
Qnodes from file: 9916
Outlier removal generates 13 lof-voted candidates
Outlier removal generates 17 lof-voted candidates
_centroid_of_lof: Missing 1 of 31
Outlier removal generates 18 lof-voted candidates
Outlier removal generates 21 lof-voted candidates
Outlier removal generates 18 lof-voted candidates
score-using-embed

28it [34:54, 112.94s/it]

align-page-rank Time: 0.26503419876098633s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.2377049922943115s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 6.606300115585327s
string-similarity-['jaro_winkler'] Time: 0.6080911159515381s
string-similarity-['levenshtein'] Time: 3.3248419761657715s
string-similarity-['jaccard:tokenizer=word'] Time: 0.1291821002960205s
normalize-scores-des_cont_jaccard Time: 0.03343319892883301s
smallest-qnode-number Time: 0.28175830841064453s
mosaic-features Time: 0.015367984771728516s
creat-singleton-feature Time: 0.19360733032226562s
vote-by-classifier Time: 0.8674817085266113s
Qnodes to lookup: 3439
Qnodes from file: 3263
Outlier removal generates 12 lof-voted candidates
_centroid_of_lof: Missing 1 of 53
Outlier removal generates 32 lof-voted candidates
Outlier removal generates 23 lof-voted candidates
score-using-embedding Time: 24.20465612411499s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3971970081

29it [36:41, 111.24s/it]

align-page-rank Time: 0.241196870803833s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.531484842300415s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.798301935195923s
string-similarity-['jaro_winkler'] Time: 0.7332038879394531s
string-similarity-['levenshtein'] Time: 3.0068092346191406s
string-similarity-['jaccard:tokenizer=word'] Time: 0.10284304618835449s
normalize-scores-des_cont_jaccard Time: 0.03513693809509277s
smallest-qnode-number Time: 0.7757987976074219s
mosaic-features Time: 0.01667308807373047s
creat-singleton-feature Time: 0.514441967010498s
vote-by-classifier Time: 0.9363052845001221s
Qnodes to lookup: 4528
Qnodes from file: 4449
Outlier removal generates 13 lof-voted candidates
Outlier removal generates 23 lof-voted candidates
Outlier removal generates 8 lof-voted candidates
score-using-embedding Time: 24.710729837417603s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.39455294609069824s
compute-tf-idf-class_count Time

30it [38:14, 105.82s/it]

align-page-rank Time: 0.2872750759124756s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.930117845535278s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 8.375874996185303s
string-similarity-['jaro_winkler'] Time: 1.0393507480621338s
string-similarity-['levenshtein'] Time: 5.804741144180298s
string-similarity-['jaccard:tokenizer=word'] Time: 0.14723610877990723s
normalize-scores-des_cont_jaccard Time: 0.04409599304199219s
smallest-qnode-number Time: 0.39743804931640625s
mosaic-features Time: 0.023051977157592773s
creat-singleton-feature Time: 0.2636547088623047s
vote-by-classifier Time: 0.5783779621124268s
Qnodes to lookup: 6461
Qnodes from file: 6157
Outlier removal generates 11 lof-voted candidates
_centroid_of_lof: Missing 1 of 25
Outlier removal generates 14 lof-voted candidates
Outlier removal generates 35 lof-voted candidates
Outlier removal generates 29 lof-voted candidates
score-using-embedding Time: 32.04588532447815s
generate-reciprocal-ra

31it [41:19, 129.54s/it]

align-page-rank Time: 0.16049408912658691s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.6735720634460449s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.8139183521270752s
string-similarity-['jaro_winkler'] Time: 0.18364739418029785s
string-similarity-['levenshtein'] Time: 0.8373939990997314s
string-similarity-['jaccard:tokenizer=word'] Time: 0.028224945068359375s
normalize-scores-des_cont_jaccard Time: 0.013648748397827148s
smallest-qnode-number Time: 0.10056304931640625s
mosaic-features Time: 0.005699872970581055s
creat-singleton-feature Time: 0.06482696533203125s
vote-by-classifier Time: 0.7932648658752441s
Qnodes to lookup: 1224
Qnodes from file: 1217
Outlier removal generates 12 lof-voted candidates
score-using-embedding Time: 14.286099910736084s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.12818479537963867s
compute-tf-idf-class_count Time: 15.891145944595337s
compute-tf-idf-property_count Time: 16.048994779586792s
context-ma

32it [41:45, 98.36s/it] 

align-page-rank Time: 0.30181121826171875s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 13.980037689208984s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 10.681357860565186s
string-similarity-['jaro_winkler'] Time: 1.8188591003417969s
string-similarity-['levenshtein'] Time: 19.34166431427002s
string-similarity-['jaccard:tokenizer=word'] Time: 0.25290584564208984s
normalize-scores-des_cont_jaccard Time: 0.05204415321350098s
smallest-qnode-number Time: 0.5018959045410156s
mosaic-features Time: 0.026032209396362305s
creat-singleton-feature Time: 0.2949650287628174s
vote-by-classifier Time: 0.4815490245819092s
Qnodes to lookup: 695
Qnodes from file: 688
Outlier removal generates 1683 lof-voted candidates
Outlier removal generates 1671 lof-voted candidates
Outlier removal generates 260 lof-voted candidates
Outlier removal generates 1716 lof-voted candidates
Outlier removal generates 1681 lof-voted candidates
score-using-embedding Time: 61.0771353244781

33it [44:04, 110.73s/it]

align-page-rank Time: 0.27081799507141113s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.133849859237671s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 7.089594841003418s
string-similarity-['jaro_winkler'] Time: 0.9892888069152832s
string-similarity-['levenshtein'] Time: 6.538455963134766s
string-similarity-['jaccard:tokenizer=word'] Time: 0.15264892578125s
normalize-scores-des_cont_jaccard Time: 0.041113853454589844s
smallest-qnode-number Time: 0.3232100009918213s
mosaic-features Time: 0.021724939346313477s
creat-singleton-feature Time: 0.27727413177490234s
vote-by-classifier Time: 0.5515220165252686s
Qnodes to lookup: 3950
Qnodes from file: 3880
Outlier removal generates 32 lof-voted candidates
Outlier removal generates 30 lof-voted candidates
Outlier removal generates 2 lof-voted candidates
score-using-embedding Time: 33.16235423088074s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.41322922706604004s
compute-tf-idf-class_count Tim

34it [46:13, 116.23s/it]

align-page-rank Time: 0.27636289596557617s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.33416485786438s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 8.827623128890991s
string-similarity-['jaro_winkler'] Time: 0.7492351531982422s
string-similarity-['levenshtein'] Time: 5.3868420124053955s
string-similarity-['jaccard:tokenizer=word'] Time: 0.1308879852294922s
normalize-scores-des_cont_jaccard Time: 0.03816819190979004s
smallest-qnode-number Time: 0.40238404273986816s
mosaic-features Time: 0.018597126007080078s
creat-singleton-feature Time: 0.2179579734802246s
vote-by-classifier Time: 0.41831302642822266s
Qnodes to lookup: 2109
Qnodes from file: 2022
_centroid_of_lof: Missing 5 of 28
Outlier removal generates 14 lof-voted candidates
Outlier removal generates 15 lof-voted candidates
_centroid_of_lof: Missing 6 of 28
Outlier removal generates 14 lof-voted candidates
_centroid_of_lof: Missing 6 of 83
Outlier removal generates 47 lof-voted candidates


35it [49:18, 136.69s/it]

align-page-rank Time: 0.06435990333557129s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.37332892417907715s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.5898241996765137s
string-similarity-['jaro_winkler'] Time: 0.11661291122436523s
string-similarity-['levenshtein'] Time: 0.35870885848999023s
string-similarity-['jaccard:tokenizer=word'] Time: 0.02885293960571289s
normalize-scores-des_cont_jaccard Time: 0.013348102569580078s
smallest-qnode-number Time: 0.08679914474487305s
mosaic-features Time: 0.005861759185791016s
creat-singleton-feature Time: 0.06346702575683594s
vote-by-classifier Time: 0.3974292278289795s
Qnodes to lookup: 1988
Qnodes from file: 1944
No pseudo GT available, using all exact matches as high precision
_centroid_of_lof: Missing 18 of 80
Outlier removal generates 37 lof-voted candidates
score-using-embedding Time: 9.907135963439941s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.11860489845275879s
compute-tf-idf-cla

36it [49:40, 102.37s/it]

align-page-rank Time: 0.18305301666259766s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.7967369556427s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.0900208950042725s
string-similarity-['jaro_winkler'] Time: 0.4585108757019043s
string-similarity-['levenshtein'] Time: 2.032763719558716s
string-similarity-['jaccard:tokenizer=word'] Time: 0.11078619956970215s
normalize-scores-des_cont_jaccard Time: 0.03308606147766113s
smallest-qnode-number Time: 0.25725317001342773s
mosaic-features Time: 0.013532876968383789s
creat-singleton-feature Time: 0.15766406059265137s
vote-by-classifier Time: 0.44179201126098633s
Qnodes to lookup: 3037
Qnodes from file: 2964
Outlier removal generates 20 lof-voted candidates
Column_vector_stragtegy centroid_of_lof failed
Outlier removal generates 16 lof-voted candidates
score-using-embedding Time: 20.262189865112305s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3433511257171631s
compute-tf-idf-class_count Ti

37it [50:35, 88.18s/it] 

align-page-rank Time: 0.2750968933105469s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.206120014190674s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 6.4332380294799805s
string-similarity-['jaro_winkler'] Time: 0.7399671077728271s
string-similarity-['levenshtein'] Time: 3.982267141342163s
string-similarity-['jaccard:tokenizer=word'] Time: 0.15157079696655273s
normalize-scores-des_cont_jaccard Time: 0.04647684097290039s
smallest-qnode-number Time: 0.3561251163482666s
mosaic-features Time: 0.024779081344604492s
creat-singleton-feature Time: 0.2574639320373535s
vote-by-classifier Time: 0.3983180522918701s
Qnodes to lookup: 5145
Qnodes from file: 5094
Outlier removal generates 62 lof-voted candidates
Outlier removal generates 36 lof-voted candidates
No pseudo GT available, using all exact matches as high precision
_centroid_of_lof: Missing 20 of 20
Column_vector_stragtegy centroid_of_lof failed
  return _methods._mean(a, axis=axis, dtype=dtype,
  re

38it [51:14, 73.30s/it]

align-page-rank Time: 0.21095013618469238s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.8637120723724365s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.953979015350342s
string-similarity-['jaro_winkler'] Time: 0.4723927974700928s
string-similarity-['levenshtein'] Time: 2.394927978515625s
string-similarity-['jaccard:tokenizer=word'] Time: 0.11847090721130371s
normalize-scores-des_cont_jaccard Time: 0.03792572021484375s
smallest-qnode-number Time: 0.25523924827575684s
mosaic-features Time: 0.017827987670898438s
creat-singleton-feature Time: 0.2082839012145996s
vote-by-classifier Time: 0.43957996368408203s
Qnodes to lookup: 2681
Qnodes from file: 2613
Outlier removal generates 25 lof-voted candidates
Outlier removal generates 25 lof-voted candidates
Outlier removal generates 28 lof-voted candidates
score-using-embedding Time: 19.919408082962036s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.4153571128845215s
compute-tf-idf-class_coun

39it [52:46, 78.93s/it]

align-page-rank Time: 0.06661009788513184s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.31082892417907715s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.690748929977417s
string-similarity-['jaro_winkler'] Time: 0.09606075286865234s
string-similarity-['levenshtein'] Time: 0.34234070777893066s
string-similarity-['jaccard:tokenizer=word'] Time: 0.026268959045410156s
normalize-scores-des_cont_jaccard Time: 0.013698101043701172s
smallest-qnode-number Time: 0.5324299335479736s
mosaic-features Time: 0.005525827407836914s
creat-singleton-feature Time: 0.060710906982421875s
vote-by-classifier Time: 0.9216749668121338s
Qnodes to lookup: 1819
Qnodes from file: 1769
_centroid_of_lof: Missing 1 of 16
Outlier removal generates 9 lof-voted candidates
score-using-embedding Time: 13.354278326034546s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.43119001388549805s
compute-tf-idf-class_count Time: 14.286355018615723s
compute-tf-idf-property_count Ti

40it [53:10, 62.55s/it]

align-page-rank Time: 0.061714887619018555s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.534264087677002s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.0302839279174805s
string-similarity-['jaro_winkler'] Time: 0.19436907768249512s
string-similarity-['levenshtein'] Time: 1.0623250007629395s
string-similarity-['jaccard:tokenizer=word'] Time: 0.027520179748535156s
normalize-scores-des_cont_jaccard Time: 0.013473033905029297s
smallest-qnode-number Time: 0.09056472778320312s
mosaic-features Time: 0.004251003265380859s
creat-singleton-feature Time: 0.05141186714172363s
vote-by-classifier Time: 0.7798829078674316s
Qnodes to lookup: 1819
Qnodes from file: 1745
Outlier removal generates 8 lof-voted candidates
score-using-embedding Time: 15.111902952194214s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.12313699722290039s
compute-tf-idf-class_count Time: 16.18557906150818s
compute-tf-idf-property_count Time: 17.157230138778687s
context-matc

41it [53:40, 52.74s/it]

align-page-rank Time: 0.20574498176574707s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.2893218994140625s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.5679171085357666s
string-similarity-['jaro_winkler'] Time: 0.28183913230895996s
string-similarity-['levenshtein'] Time: 1.5289151668548584s
string-similarity-['jaccard:tokenizer=word'] Time: 0.05539512634277344s
normalize-scores-des_cont_jaccard Time: 0.023118019104003906s
smallest-qnode-number Time: 0.16461396217346191s
mosaic-features Time: 0.011038064956665039s
creat-singleton-feature Time: 0.11924099922180176s
vote-by-classifier Time: 0.5016670227050781s
Qnodes to lookup: 3844
Qnodes from file: 3776
Outlier removal generates 11 lof-voted candidates
Outlier removal generates 6 lof-voted candidates
score-using-embedding Time: 15.401278018951416s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.23058509826660156s
compute-tf-idf-class_count Time: 17.06067681312561s
compute-tf-idf-prop

42it [54:33, 52.81s/it]

align-page-rank Time: 0.05777287483215332s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.4660370349884033s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.8588647842407227s
string-similarity-['jaro_winkler'] Time: 0.26788997650146484s
string-similarity-['levenshtein'] Time: 2.173022985458374s
string-similarity-['jaccard:tokenizer=word'] Time: 0.025839805603027344s
normalize-scores-des_cont_jaccard Time: 0.013778924942016602s
smallest-qnode-number Time: 0.08528375625610352s
mosaic-features Time: 0.006055355072021484s
creat-singleton-feature Time: 0.06148219108581543s
vote-by-classifier Time: 0.819270133972168s
Qnodes to lookup: 1898
Qnodes from file: 1876
Outlier removal generates 10 lof-voted candidates
score-using-embedding Time: 17.269015073776245s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.12871193885803223s
compute-tf-idf-class_count Time: 18.38685178756714s
compute-tf-idf-property_count Time: 18.946374893188477s
context-match

43it [55:07, 47.32s/it]

align-page-rank Time: 0.10508918762207031s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 13.491322994232178s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.5635011196136475s
string-similarity-['jaro_winkler'] Time: 1.85545015335083s
string-similarity-['levenshtein'] Time: 18.075154781341553s
string-similarity-['jaccard:tokenizer=word'] Time: 0.05546402931213379s
normalize-scores-des_cont_jaccard Time: 0.020823001861572266s
smallest-qnode-number Time: 0.16204094886779785s
mosaic-features Time: 0.016387939453125s
creat-singleton-feature Time: 0.10243797302246094s
vote-by-classifier Time: 0.761328935623169s
Qnodes to lookup: 130
Qnodes from file: 130
Outlier removal generates 51 lof-voted candidates
Outlier removal generates 47 lof-voted candidates
score-using-embedding Time: 47.28055500984192s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.2143700122833252s
compute-tf-idf-class_count Time: 48.73297309875488s
compute-tf-idf-property_count

44it [56:14, 53.19s/it]

align-page-rank Time: 0.25421690940856934s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.5090501308441162s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.654761791229248s
string-similarity-['jaro_winkler'] Time: 0.4260849952697754s
string-similarity-['levenshtein'] Time: 1.6724762916564941s
string-similarity-['jaccard:tokenizer=word'] Time: 0.08588600158691406s
normalize-scores-des_cont_jaccard Time: 0.03540205955505371s
smallest-qnode-number Time: 0.24725079536437988s
mosaic-features Time: 0.018239974975585938s
creat-singleton-feature Time: 0.19725918769836426s
vote-by-classifier Time: 0.4630570411682129s
Qnodes to lookup: 4609
Qnodes from file: 4545
Column_vector_stragtegy centroid_of_lof failed
Outlier removal generates 6 lof-voted candidates
No pseudo GT available, using all exact matches as high precision
Outlier removal generates 32 lof-voted candidates
score-using-embedding Time: 19.231431245803833s
generate-reciprocal-rank-lof-graph-embe

45it [57:38, 62.44s/it]

align-page-rank Time: 0.21363401412963867s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.6435470581054688s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.841602087020874s
string-similarity-['jaro_winkler'] Time: 0.3028910160064697s
string-similarity-['levenshtein'] Time: 1.3503029346466064s
string-similarity-['jaccard:tokenizer=word'] Time: 0.09513425827026367s
normalize-scores-des_cont_jaccard Time: 0.023797988891601562s
smallest-qnode-number Time: 0.18118977546691895s
mosaic-features Time: 0.011518001556396484s
creat-singleton-feature Time: 0.14357495307922363s
vote-by-classifier Time: 1.0349161624908447s
Qnodes to lookup: 2011
Qnodes from file: 1998
Outlier removal generates 21 lof-voted candidates
Outlier removal generates 22 lof-voted candidates
score-using-embedding Time: 16.94954490661621s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.5658807754516602s
compute-tf-idf-class_count Time: 18.851240158081055s
compute-tf-idf-proper

46it [58:47, 64.34s/it]

align-page-rank Time: 0.2787940502166748s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.6677029132843018s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.2690558433532715s
string-similarity-['jaro_winkler'] Time: 0.37789273262023926s
string-similarity-['levenshtein'] Time: 2.15378475189209s
string-similarity-['jaccard:tokenizer=word'] Time: 0.05102109909057617s
normalize-scores-des_cont_jaccard Time: 0.022461891174316406s
smallest-qnode-number Time: 0.18640780448913574s
mosaic-features Time: 0.00978398323059082s
creat-singleton-feature Time: 0.12769317626953125s
vote-by-classifier Time: 1.0087618827819824s
Qnodes to lookup: 2861
Qnodes from file: 2798
Outlier removal generates 15 lof-voted candidates
Outlier removal generates 13 lof-voted candidates
score-using-embedding Time: 18.561089038848877s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.2575218677520752s
compute-tf-idf-class_count Time: 20.821699857711792s
compute-tf-idf-propert

47it [59:32, 58.47s/it]

align-page-rank Time: 0.29912710189819336s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 7.562235116958618s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 12.036427021026611s
string-similarity-['jaro_winkler'] Time: 1.0712668895721436s
string-similarity-['levenshtein'] Time: 8.275910139083862s
string-similarity-['jaccard:tokenizer=word'] Time: 0.15378403663635254s
normalize-scores-des_cont_jaccard Time: 0.04940080642700195s
smallest-qnode-number Time: 0.3872110843658447s
mosaic-features Time: 0.024929046630859375s
creat-singleton-feature Time: 0.2968728542327881s
vote-by-classifier Time: 0.6715991497039795s
Qnodes to lookup: 5945
Qnodes from file: 5809
Outlier removal generates 11 lof-voted candidates
Outlier removal generates 21 lof-voted candidates
Outlier removal generates 33 lof-voted candidates
Outlier removal generates 37 lof-voted candidates
score-using-embedding Time: 42.20905876159668s
generate-reciprocal-rank-lof-graph-embedding-score Time

48it [1:04:31, 130.82s/it]

align-page-rank Time: 0.18755507469177246s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.8625140190124512s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.1079697608947754s
string-similarity-['jaro_winkler'] Time: 0.27396273612976074s
string-similarity-['levenshtein'] Time: 1.5137038230895996s
string-similarity-['jaccard:tokenizer=word'] Time: 0.0619959831237793s
normalize-scores-des_cont_jaccard Time: 0.02432394027709961s
smallest-qnode-number Time: 0.17332768440246582s
mosaic-features Time: 0.012330055236816406s
creat-singleton-feature Time: 0.14709091186523438s
vote-by-classifier Time: 0.46396708488464355s
Qnodes to lookup: 503
Qnodes from file: 500
Outlier removal generates 12 lof-voted candidates
Outlier removal generates 20 lof-voted candidates
score-using-embedding Time: 11.70074200630188s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.26207995414733887s
compute-tf-idf-class_count Time: 12.466116905212402s
compute-tf-idf-proper

49it [1:05:08, 102.41s/it]

align-page-rank Time: 0.1801769733428955s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.0016140937805176s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.674067974090576s
string-similarity-['jaro_winkler'] Time: 0.35938191413879395s
string-similarity-['levenshtein'] Time: 2.126654863357544s
string-similarity-['jaccard:tokenizer=word'] Time: 0.05829906463623047s
normalize-scores-des_cont_jaccard Time: 0.02464008331298828s
smallest-qnode-number Time: 0.16625690460205078s
mosaic-features Time: 0.011440038681030273s
creat-singleton-feature Time: 0.12142205238342285s
vote-by-classifier Time: 0.44810914993286133s
Qnodes to lookup: 3543
Qnodes from file: 3482
Outlier removal generates 14 lof-voted candidates
Outlier removal generates 23 lof-voted candidates
score-using-embedding Time: 18.154332160949707s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.4772510528564453s
compute-tf-idf-class_count Time: 19.99893093109131s
compute-tf-idf-propert

50it [1:06:12, 90.99s/it] 

align-page-rank Time: 0.14225006103515625s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.41178011894226074s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.7807810306549072s
string-similarity-['jaro_winkler'] Time: 0.1472780704498291s
string-similarity-['levenshtein'] Time: 0.7073853015899658s
string-similarity-['jaccard:tokenizer=word'] Time: 0.034143924713134766s
normalize-scores-des_cont_jaccard Time: 0.015674829483032227s
smallest-qnode-number Time: 0.48662710189819336s
mosaic-features Time: 0.005280017852783203s
creat-singleton-feature Time: 0.06063222885131836s
vote-by-classifier Time: 0.7169320583343506s
Qnodes to lookup: 1626
Qnodes from file: 1568
Outlier removal generates 11 lof-voted candidates
score-using-embedding Time: 14.152900218963623s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.11375284194946289s
compute-tf-idf-class_count Time: 14.649774074554443s
compute-tf-idf-property_count Time: 14.7308030128479s
context-matc

51it [1:06:41, 72.32s/it]

align-page-rank Time: 0.21102285385131836s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.7571020126342773s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.3561789989471436s
string-similarity-['jaro_winkler'] Time: 0.15076375007629395s
string-similarity-['levenshtein'] Time: 0.8507130146026611s
string-similarity-['jaccard:tokenizer=word'] Time: 0.02744293212890625s
normalize-scores-des_cont_jaccard Time: 0.013573169708251953s
smallest-qnode-number Time: 0.09206986427307129s
mosaic-features Time: 0.005831003189086914s
creat-singleton-feature Time: 0.06249809265136719s
vote-by-classifier Time: 0.39522409439086914s
Qnodes to lookup: 1195
Qnodes from file: 1185
Outlier removal generates 8 lof-voted candidates
score-using-embedding Time: 12.330118894577026s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.11710119247436523s
compute-tf-idf-class_count Time: 13.061742782592773s
compute-tf-idf-property_count Time: 13.225816249847412s
context-mat

52it [1:07:02, 57.03s/it]

align-page-rank Time: 0.17620205879211426s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.9063329696655273s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.568876028060913s
string-similarity-['jaro_winkler'] Time: 0.4419362545013428s
string-similarity-['levenshtein'] Time: 2.251168966293335s
string-similarity-['jaccard:tokenizer=word'] Time: 0.08023238182067871s
normalize-scores-des_cont_jaccard Time: 0.0340731143951416s
smallest-qnode-number Time: 0.26251888275146484s
mosaic-features Time: 0.017033100128173828s
creat-singleton-feature Time: 0.19495129585266113s
vote-by-classifier Time: 0.8304708003997803s
Qnodes to lookup: 5438
Qnodes from file: 5348
Outlier removal generates 14 lof-voted candidates
Outlier removal generates 17 lof-voted candidates
Outlier removal generates 10 lof-voted candidates
score-using-embedding Time: 20.76374888420105s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.36098504066467285s
compute-tf-idf-class_count

53it [1:08:26, 65.20s/it]

Command: align-page-rank
Error Message:  Traceback (most recent call last):
  File "/Users/amandeep/Github/table-linker/tl/cli/align-page-rank.py", line 29, in run
    df = pd.read_csv(kwargs['input_file'], dtype=object)
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/pandas/io/parsers.py", line 610, in read_csv
    return _read(filepath_or_buffer, kwds)
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/pandas/io/parsers.py", line 462, in _read
    parser = TextFileReader(filepath_or_buffer, **kwds)
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/pandas/io/parsers.py", line 819, in __init__
    self._engine = self._make_engine(self.engine)
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/pandas/io/parsers.py", line 1050, in _make_engine
    return mapping[engine](self.f, **self.options)  # type: ignore[call-arg]
  File "/Users/amandeep/Github/table-linker/tl_env/lib/

54it [1:08:50, 52.70s/it]

align-page-rank Time: 0.16301608085632324s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.2876908779144287s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.05954384803772s
string-similarity-['jaro_winkler'] Time: 0.637092113494873s
string-similarity-['levenshtein'] Time: 4.059168100357056s
string-similarity-['jaccard:tokenizer=word'] Time: 0.07095694541931152s
normalize-scores-des_cont_jaccard Time: 0.024085044860839844s
smallest-qnode-number Time: 0.22826313972473145s
mosaic-features Time: 0.011851072311401367s
creat-singleton-feature Time: 0.264603853225708s
vote-by-classifier Time: 0.40013885498046875s
Qnodes to lookup: 5281
Qnodes from file: 4766
Outlier removal generates 9 lof-voted candidates
Outlier removal generates 17 lof-voted candidates
_centroid_of_lof: Missing 1 of 29
Outlier removal generates 17 lof-voted candidates
score-using-embedding Time: 25.810741901397705s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.350904703140

55it [1:10:10, 60.83s/it]

align-page-rank Time: 0.18071484565734863s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.1531639099121094s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.446923017501831s
string-similarity-['jaro_winkler'] Time: 0.47228074073791504s
string-similarity-['levenshtein'] Time: 2.2608418464660645s
string-similarity-['jaccard:tokenizer=word'] Time: 0.07214975357055664s
normalize-scores-des_cont_jaccard Time: 0.027817249298095703s
smallest-qnode-number Time: 0.2332460880279541s
mosaic-features Time: 0.012686014175415039s
creat-singleton-feature Time: 0.18730521202087402s
vote-by-classifier Time: 0.4177858829498291s
Qnodes to lookup: 4911
Qnodes from file: 4785
Outlier removal generates 14 lof-voted candidates
Outlier removal generates 17 lof-voted candidates
_centroid_of_lof: Missing 1 of 8
Outlier removal generates 6 lof-voted candidates
score-using-embedding Time: 20.746389150619507s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.404715776

56it [1:11:31, 66.93s/it]

align-page-rank Time: 0.1680760383605957s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.198808193206787s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.241769313812256s
string-similarity-['jaro_winkler'] Time: 0.44269895553588867s
string-similarity-['levenshtein'] Time: 2.54994797706604s
string-similarity-['jaccard:tokenizer=word'] Time: 0.09271907806396484s
normalize-scores-des_cont_jaccard Time: 0.030719995498657227s
smallest-qnode-number Time: 0.245377779006958s
mosaic-features Time: 0.015121936798095703s
creat-singleton-feature Time: 0.16115999221801758s
vote-by-classifier Time: 0.4476752281188965s
Qnodes to lookup: 3345
Qnodes from file: 3297
Outlier removal generates 11 lof-voted candidates
No pseudo GT available, using all exact matches as high precision
_centroid_of_lof: Missing 20 of 25
Outlier removal generates 5 lof-voted candidates
Outlier removal generates 17 lof-voted candidates
score-using-embedding Time: 21.48074698448181s
genera

57it [1:12:37, 66.58s/it]

align-page-rank Time: 0.18145084381103516s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.619858980178833s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.8629181385040283s
string-similarity-['jaro_winkler'] Time: 0.14717912673950195s
string-similarity-['levenshtein'] Time: 0.7117788791656494s
string-similarity-['jaccard:tokenizer=word'] Time: 0.038336992263793945s
normalize-scores-des_cont_jaccard Time: 0.014500856399536133s
smallest-qnode-number Time: 0.09563112258911133s
mosaic-features Time: 0.005962848663330078s
creat-singleton-feature Time: 0.06876993179321289s
vote-by-classifier Time: 0.8306779861450195s
Qnodes to lookup: 2160
Qnodes from file: 2100
Outlier removal generates 3 lof-voted candidates
score-using-embedding Time: 14.33990216255188s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.12216305732727051s
compute-tf-idf-class_count Time: 14.959078073501587s
compute-tf-idf-property_count Time: 15.155030965805054s
context-match

58it [1:13:04, 54.80s/it]

align-page-rank Time: 0.08515191078186035s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.42809510231018066s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.20847511291503906s
string-similarity-['jaro_winkler'] Time: 0.12142586708068848s
string-similarity-['levenshtein'] Time: 0.8264381885528564s
string-similarity-['jaccard:tokenizer=word'] Time: 0.019484996795654297s
normalize-scores-des_cont_jaccard Time: 0.01002812385559082s
smallest-qnode-number Time: 0.4899427890777588s
mosaic-features Time: 0.004955768585205078s
creat-singleton-feature Time: 0.06118297576904297s
vote-by-classifier Time: 0.788815975189209s
Qnodes to lookup: 1288
Qnodes from file: 1286
No pseudo GT available, using all exact matches as high precision
_centroid_of_lof: Missing 20 of 20
Column_vector_stragtegy centroid_of_lof failed
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
score-using-embedding Time: 13.202474117279053s
Command: gen

59it [1:13:30, 46.16s/it]

align-page-rank Time: 0.14477896690368652s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.4122660160064697s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.4798431396484375s
string-similarity-['jaro_winkler'] Time: 0.10531306266784668s
string-similarity-['levenshtein'] Time: 0.4335799217224121s
string-similarity-['jaccard:tokenizer=word'] Time: 0.03464794158935547s
normalize-scores-des_cont_jaccard Time: 0.01293802261352539s
smallest-qnode-number Time: 0.09181475639343262s
mosaic-features Time: 0.0047168731689453125s
creat-singleton-feature Time: 0.06323003768920898s
vote-by-classifier Time: 1.540571928024292s
Qnodes to lookup: 1662
Qnodes from file: 1641
Outlier removal generates 6 lof-voted candidates
score-using-embedding Time: 14.32714295387268s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.1291959285736084s
compute-tf-idf-class_count Time: 14.839890241622925s
compute-tf-idf-property_count Time: 14.967584133148193s
context-match T

60it [1:13:59, 40.93s/it]

align-page-rank Time: 0.12911009788513184s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.4505758285522461s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.773252010345459s
string-similarity-['jaro_winkler'] Time: 0.11125397682189941s
string-similarity-['levenshtein'] Time: 0.4433779716491699s
string-similarity-['jaccard:tokenizer=word'] Time: 0.031783103942871094s
normalize-scores-des_cont_jaccard Time: 0.01430201530456543s
smallest-qnode-number Time: 0.0955040454864502s
mosaic-features Time: 0.006726741790771484s
creat-singleton-feature Time: 0.06974673271179199s
vote-by-classifier Time: 0.4300956726074219s
Qnodes to lookup: 2191
Qnodes from file: 2141
Outlier removal generates 4 lof-voted candidates
score-using-embedding Time: 13.540699005126953s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.12769389152526855s
compute-tf-idf-class_count Time: 14.006237030029297s
compute-tf-idf-property_count Time: 14.177700996398926s
context-match 

61it [1:14:24, 36.30s/it]

align-page-rank Time: 0.05405998229980469s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.46631383895874023s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.0129711627960205s
string-similarity-['jaro_winkler'] Time: 0.12227392196655273s
string-similarity-['levenshtein'] Time: 0.4127218723297119s
string-similarity-['jaccard:tokenizer=word'] Time: 0.036000728607177734s
normalize-scores-des_cont_jaccard Time: 0.015210390090942383s
smallest-qnode-number Time: 0.08846592903137207s
mosaic-features Time: 0.007043123245239258s
creat-singleton-feature Time: 0.07800722122192383s
vote-by-classifier Time: 0.42019081115722656s
Qnodes to lookup: 2689
Qnodes from file: 2644
Outlier removal generates 7 lof-voted candidates
score-using-embedding Time: 10.682899951934814s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.19045805931091309s
compute-tf-idf-class_count Time: 11.397541046142578s
compute-tf-idf-property_count Time: 11.601673126220703s
context-m

62it [1:14:53, 34.13s/it]

align-page-rank Time: 0.2521779537200928s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.837203025817871s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 9.845712184906006s
string-similarity-['jaro_winkler'] Time: 0.6022629737854004s
string-similarity-['levenshtein'] Time: 3.28604793548584s
string-similarity-['jaccard:tokenizer=word'] Time: 0.08554792404174805s
normalize-scores-des_cont_jaccard Time: 0.02609705924987793s
smallest-qnode-number Time: 0.3484780788421631s
mosaic-features Time: 0.015279054641723633s
creat-singleton-feature Time: 0.21602201461791992s
vote-by-classifier Time: 0.42220640182495117s
Qnodes to lookup: 8650
Qnodes from file: 8563
Outlier removal generates 61 lof-voted candidates
score-using-embedding Time: 29.574202060699463s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.4623751640319824s
compute-tf-idf-class_count Time: 32.58653283119202s
compute-tf-idf-property_count Time: 32.08116388320923s
context-match Time: 1

63it [1:17:58, 79.23s/it]

align-page-rank Time: 0.05549478530883789s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.3480682373046875s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.7416961193084717s
string-similarity-['jaro_winkler'] Time: 0.10075807571411133s
string-similarity-['levenshtein'] Time: 0.295029878616333s
string-similarity-['jaccard:tokenizer=word'] Time: 0.047930002212524414s
normalize-scores-des_cont_jaccard Time: 0.016872167587280273s
smallest-qnode-number Time: 0.4847078323364258s
mosaic-features Time: 0.0072247982025146484s
creat-singleton-feature Time: 0.09618401527404785s
vote-by-classifier Time: 1.5636038780212402s
Qnodes to lookup: 2733
Qnodes from file: 2692
Outlier removal generates 11 lof-voted candidates
score-using-embedding Time: 14.428880214691162s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.13928985595703125s
compute-tf-idf-class_count Time: 15.787214040756226s
compute-tf-idf-property_count Time: 15.936691045761108s
context-mat

64it [1:18:27, 64.21s/it]

align-page-rank Time: 0.10994505882263184s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.437739133834839s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.210663080215454s
string-similarity-['jaro_winkler'] Time: 0.4600048065185547s
string-similarity-['levenshtein'] Time: 3.0500638484954834s
string-similarity-['jaccard:tokenizer=word'] Time: 0.07273292541503906s
normalize-scores-des_cont_jaccard Time: 0.028235912322998047s
smallest-qnode-number Time: 0.22592735290527344s
mosaic-features Time: 0.014989137649536133s
creat-singleton-feature Time: 0.3768491744995117s
vote-by-classifier Time: 0.7752349376678467s
Qnodes to lookup: 4107
Qnodes from file: 4053
Outlier removal generates 21 lof-voted candidates
Outlier removal generates 22 lof-voted candidates
score-using-embedding Time: 22.741421937942505s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.27062034606933594s
compute-tf-idf-class_count Time: 23.599693775177002s
compute-tf-idf-proper

65it [1:19:29, 63.65s/it]

align-page-rank Time: 0.36138486862182617s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.676276922225952s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 15.408460855484009s
string-similarity-['jaro_winkler'] Time: 0.8117260932922363s
string-similarity-['levenshtein'] Time: 4.727930068969727s
string-similarity-['jaccard:tokenizer=word'] Time: 0.20426511764526367s
normalize-scores-des_cont_jaccard Time: 0.04845309257507324s
smallest-qnode-number Time: 0.5612869262695312s
mosaic-features Time: 0.025954008102416992s
creat-singleton-feature Time: 0.29187893867492676s
vote-by-classifier Time: 0.5072999000549316s
Qnodes to lookup: 10809
Qnodes from file: 10539
Outlier removal generates 111 lof-voted candidates
score-using-embedding Time: 36.99315667152405s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.5728282928466797s
compute-tf-idf-class_count Time: 38.83451581001282s
compute-tf-idf-property_count Time: 39.50213694572449s
context-match Tim

66it [1:21:15, 76.41s/it]

align-page-rank Time: 0.8439188003540039s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.543318748474121s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 10.748165130615234s
string-similarity-['jaro_winkler'] Time: 0.8775699138641357s
string-similarity-['levenshtein'] Time: 4.630217790603638s
string-similarity-['jaccard:tokenizer=word'] Time: 0.23171496391296387s
normalize-scores-des_cont_jaccard Time: 0.05403304100036621s
smallest-qnode-number Time: 0.6238887310028076s
mosaic-features Time: 0.028557777404785156s
creat-singleton-feature Time: 0.330136775970459s
vote-by-classifier Time: 0.5209739208221436s
Qnodes to lookup: 13137
Qnodes from file: 12727
Outlier removal generates 114 lof-voted candidates
score-using-embedding Time: 34.714035987854004s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.6592650413513184s
compute-tf-idf-class_count Time: 36.81241202354431s
compute-tf-idf-property_count Time: 37.48990201950073s
context-match Time:

67it [1:23:03, 85.66s/it]

align-page-rank Time: 0.11737585067749023s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.7019720077514648s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.161725759506226s
string-similarity-['jaro_winkler'] Time: 0.42176103591918945s
string-similarity-['levenshtein'] Time: 1.6033248901367188s
string-similarity-['jaccard:tokenizer=word'] Time: 0.07283210754394531s
normalize-scores-des_cont_jaccard Time: 0.028049230575561523s
smallest-qnode-number Time: 0.6359319686889648s
mosaic-features Time: 0.016234159469604492s
creat-singleton-feature Time: 0.578218936920166s
vote-by-classifier Time: 1.0006730556488037s
Qnodes to lookup: 3699
Qnodes from file: 3664
Outlier removal generates 13 lof-voted candidates
Outlier removal generates 26 lof-voted candidates
score-using-embedding Time: 20.83644723892212s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3076610565185547s
compute-tf-idf-class_count Time: 22.034964084625244s
compute-tf-idf-property

68it [1:24:21, 83.40s/it]

align-page-rank Time: 0.17105817794799805s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.6277251243591309s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.5811660289764404s
string-similarity-['jaro_winkler'] Time: 0.4642627239227295s
string-similarity-['levenshtein'] Time: 1.722588062286377s
string-similarity-['jaccard:tokenizer=word'] Time: 0.1248939037322998s
normalize-scores-des_cont_jaccard Time: 0.032225847244262695s
smallest-qnode-number Time: 0.24224114418029785s
mosaic-features Time: 0.015263795852661133s
creat-singleton-feature Time: 0.17203283309936523s
vote-by-classifier Time: 0.4987788200378418s
Qnodes to lookup: 6121
Qnodes from file: 6053
Outlier removal generates 4 lof-voted candidates
Outlier removal generates 19 lof-voted candidates
Outlier removal generates 16 lof-voted candidates
score-using-embedding Time: 18.552871227264404s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3286631107330322s
compute-tf-idf-class_coun

69it [1:25:25, 77.79s/it]

align-page-rank Time: 0.1542799472808838s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.6316850185394287s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.3332748413085938s
string-similarity-['jaro_winkler'] Time: 0.16371798515319824s
string-similarity-['levenshtein'] Time: 1.0017709732055664s
string-similarity-['jaccard:tokenizer=word'] Time: 0.027811288833618164s
normalize-scores-des_cont_jaccard Time: 0.013470888137817383s
smallest-qnode-number Time: 0.14257502555847168s
mosaic-features Time: 0.005764007568359375s
creat-singleton-feature Time: 0.056150197982788086s
vote-by-classifier Time: 1.4262378215789795s
Qnodes to lookup: 1523
Qnodes from file: 1502
Outlier removal generates 17 lof-voted candidates
score-using-embedding Time: 16.069877862930298s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.1271679401397705s
compute-tf-idf-class_count Time: 17.38022494316101s
compute-tf-idf-property_count Time: 17.539663076400757s
context-matc

70it [1:25:55, 63.24s/it]

align-page-rank Time: 0.15837407112121582s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.6138238906860352s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.3636338710784912s
string-similarity-['jaro_winkler'] Time: 0.14959311485290527s
string-similarity-['levenshtein'] Time: 0.6922028064727783s
string-similarity-['jaccard:tokenizer=word'] Time: 0.030821800231933594s
normalize-scores-des_cont_jaccard Time: 0.013448953628540039s
smallest-qnode-number Time: 0.23905682563781738s
mosaic-features Time: 0.006028890609741211s
creat-singleton-feature Time: 0.06417489051818848s
vote-by-classifier Time: 0.8668889999389648s
Qnodes to lookup: 1669
Qnodes from file: 1618
Outlier removal generates 9 lof-voted candidates
score-using-embedding Time: 14.374362230300903s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.13726305961608887s
compute-tf-idf-class_count Time: 16.300790071487427s
compute-tf-idf-property_count Time: 16.049305200576782s
context-mat

71it [1:26:30, 54.83s/it]

align-page-rank Time: 0.3670217990875244s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.6439900398254395s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.100013017654419s
string-similarity-['jaro_winkler'] Time: 0.3960092067718506s
string-similarity-['levenshtein'] Time: 2.2658300399780273s
string-similarity-['jaccard:tokenizer=word'] Time: 0.05701708793640137s
normalize-scores-des_cont_jaccard Time: 0.023942947387695312s
smallest-qnode-number Time: 0.1626579761505127s
mosaic-features Time: 0.01126408576965332s
creat-singleton-feature Time: 0.12273478507995605s
vote-by-classifier Time: 0.5095980167388916s
Qnodes to lookup: 3414
Qnodes from file: 3376
Outlier removal generates 49 lof-voted candidates
Outlier removal generates 48 lof-voted candidates
score-using-embedding Time: 18.333820104599s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.22606801986694336s
compute-tf-idf-class_count Time: 19.83764100074768s
compute-tf-idf-property_co

72it [1:27:12, 50.95s/it]

align-page-rank Time: 0.34915781021118164s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.6125669479370117s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 9.38919186592102s
string-similarity-['jaro_winkler'] Time: 1.0656418800354004s
string-similarity-['levenshtein'] Time: 3.888307809829712s
string-similarity-['jaccard:tokenizer=word'] Time: 0.13597798347473145s
normalize-scores-des_cont_jaccard Time: 0.04528188705444336s
smallest-qnode-number Time: 0.4108290672302246s
mosaic-features Time: 0.021973848342895508s
creat-singleton-feature Time: 0.25452709197998047s
vote-by-classifier Time: 1.774630069732666s
Qnodes to lookup: 5372
Qnodes from file: 5290
Outlier removal generates 24 lof-voted candidates
Outlier removal generates 17 lof-voted candidates
Outlier removal generates 34 lof-voted candidates
Outlier removal generates 28 lof-voted candidates
score-using-embedding Time: 30.555796146392822s
generate-reciprocal-rank-lof-graph-embedding-score Time

73it [1:30:16, 90.93s/it]

Command: align-page-rank
Error Message:  Traceback (most recent call last):
  File "/Users/amandeep/Github/table-linker/tl/cli/align-page-rank.py", line 29, in run
    df = pd.read_csv(kwargs['input_file'], dtype=object)
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/pandas/io/parsers.py", line 610, in read_csv
    return _read(filepath_or_buffer, kwds)
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/pandas/io/parsers.py", line 462, in _read
    parser = TextFileReader(filepath_or_buffer, **kwds)
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/pandas/io/parsers.py", line 819, in __init__
    self._engine = self._make_engine(self.engine)
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/pandas/io/parsers.py", line 1050, in _make_engine
    return mapping[engine](self.f, **self.options)  # type: ignore[call-arg]
  File "/Users/amandeep/Github/table-linker/tl_env/lib/

74it [1:30:36, 69.58s/it]

align-page-rank Time: 0.16811585426330566s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.1984589099884033s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.71149206161499s
string-similarity-['jaro_winkler'] Time: 0.5311250686645508s
string-similarity-['levenshtein'] Time: 3.7622439861297607s
string-similarity-['jaccard:tokenizer=word'] Time: 0.06910586357116699s
normalize-scores-des_cont_jaccard Time: 0.031579017639160156s
smallest-qnode-number Time: 0.23838400840759277s
mosaic-features Time: 0.0156857967376709s
creat-singleton-feature Time: 0.16890192031860352s
vote-by-classifier Time: 0.8913280963897705s
Qnodes to lookup: 4202
Qnodes from file: 4086
Outlier removal generates 23 lof-voted candidates
_centroid_of_lof: Missing 1 of 99
Outlier removal generates 59 lof-voted candidates
Outlier removal generates 24 lof-voted candidates
score-using-embedding Time: 24.797508001327515s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3199958801

75it [1:32:09, 76.82s/it]

align-page-rank Time: 0.12235188484191895s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.30791783332824707s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.4874696731567383s
string-similarity-['jaro_winkler'] Time: 0.09856820106506348s
string-similarity-['levenshtein'] Time: 0.2880880832672119s
string-similarity-['jaccard:tokenizer=word'] Time: 0.02982497215270996s
normalize-scores-des_cont_jaccard Time: 0.014221906661987305s
smallest-qnode-number Time: 0.09316706657409668s
mosaic-features Time: 0.006021976470947266s
creat-singleton-feature Time: 0.0662841796875s
vote-by-classifier Time: 0.5122652053833008s
Qnodes to lookup: 2239
Qnodes from file: 2198
Outlier removal generates 8 lof-voted candidates
score-using-embedding Time: 11.159744024276733s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.13574600219726562s
compute-tf-idf-class_count Time: 12.448137044906616s
compute-tf-idf-property_count Time: 12.66657280921936s
context-match Ti

76it [1:32:32, 60.43s/it]

align-page-rank Time: 0.18224430084228516s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.603790044784546s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 6.461230754852295s
string-similarity-['jaro_winkler'] Time: 0.6500370502471924s
string-similarity-['levenshtein'] Time: 4.184568881988525s
string-similarity-['jaccard:tokenizer=word'] Time: 0.06621384620666504s
normalize-scores-des_cont_jaccard Time: 0.02761101722717285s
smallest-qnode-number Time: 0.16584992408752441s
mosaic-features Time: 0.016993045806884766s
creat-singleton-feature Time: 0.15050101280212402s
vote-by-classifier Time: 0.5420310497283936s
Qnodes to lookup: 3248
Qnodes from file: 3112
Outlier removal generates 7 lof-voted candidates
Outlier removal generates 33 lof-voted candidates
score-using-embedding Time: 26.839451789855957s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.26623106002807617s
compute-tf-idf-class_count Time: 27.809172868728638s
compute-tf-idf-property

77it [1:34:06, 70.52s/it]

align-page-rank Time: 0.22892427444458008s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.432208776473999s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.7298951148986816s
string-similarity-['jaro_winkler'] Time: 0.12109613418579102s
string-similarity-['levenshtein'] Time: 0.41403794288635254s
string-similarity-['jaccard:tokenizer=word'] Time: 0.0458371639251709s
normalize-scores-des_cont_jaccard Time: 0.015579938888549805s
smallest-qnode-number Time: 0.09921598434448242s
mosaic-features Time: 0.0077130794525146484s
creat-singleton-feature Time: 0.08587217330932617s
vote-by-classifier Time: 0.7998321056365967s
Qnodes to lookup: 1566
Qnodes from file: 1543
Outlier removal generates 11 lof-voted candidates
score-using-embedding Time: 13.38284182548523s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.44359803199768066s
compute-tf-idf-class_count Time: 15.09986686706543s
compute-tf-idf-property_count Time: 14.40239691734314s
context-match 

78it [1:34:36, 58.45s/it]

align-page-rank Time: 0.1325380802154541s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.4077880382537842s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.3213529586791992s
string-similarity-['jaro_winkler'] Time: 0.11690497398376465s
string-similarity-['levenshtein'] Time: 0.5417380332946777s
string-similarity-['jaccard:tokenizer=word'] Time: 0.04813790321350098s
normalize-scores-des_cont_jaccard Time: 0.012665987014770508s
smallest-qnode-number Time: 0.09469318389892578s
mosaic-features Time: 0.0055999755859375s
creat-singleton-feature Time: 0.06234478950500488s
vote-by-classifier Time: 0.8763391971588135s
Qnodes to lookup: 1806
Qnodes from file: 1789
No pseudo GT available, using all exact matches as high precision
_centroid_of_lof: Missing 20 of 20
Column_vector_stragtegy centroid_of_lof failed
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
score-using-embedding Time: 10.546016931533813s
Command: genera

79it [1:34:57, 47.26s/it]

align-page-rank Time: 0.05304408073425293s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 14.21627426147461s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 12.094101905822754s
string-similarity-['jaro_winkler'] Time: 1.1179399490356445s
string-similarity-['levenshtein'] Time: 17.13623309135437s
string-similarity-['jaccard:tokenizer=word'] Time: 0.028532981872558594s
normalize-scores-des_cont_jaccard Time: 0.012972831726074219s
smallest-qnode-number Time: 0.09139823913574219s
mosaic-features Time: 0.010571956634521484s
creat-singleton-feature Time: 0.0589449405670166s
vote-by-classifier Time: 1.1132893562316895s
Qnodes to lookup: 1597
Qnodes from file: 1543
Outlier removal generates 4 lof-voted candidates
score-using-embedding Time: 56.85623502731323s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.11454963684082031s
compute-tf-idf-class_count Time: 57.42284417152405s
compute-tf-idf-property_count Time: 57.603384017944336s
context-match Tim

80it [1:36:10, 55.09s/it]

align-page-rank Time: 0.16315603256225586s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.43700718879699707s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.2856309413909912s
string-similarity-['jaro_winkler'] Time: 0.15019798278808594s
string-similarity-['levenshtein'] Time: 0.705225944519043s
string-similarity-['jaccard:tokenizer=word'] Time: 0.035037994384765625s
normalize-scores-des_cont_jaccard Time: 0.014536380767822266s
smallest-qnode-number Time: 0.5239028930664062s
mosaic-features Time: 0.0060498714447021484s
creat-singleton-feature Time: 0.06461477279663086s
vote-by-classifier Time: 0.8149838447570801s
Qnodes to lookup: 855
Qnodes from file: 843
Outlier removal generates 27 lof-voted candidates
score-using-embedding Time: 15.247038841247559s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.11118602752685547s
compute-tf-idf-class_count Time: 15.776194095611572s
compute-tf-idf-property_count Time: 15.952385902404785s
context-matc

81it [1:36:42, 48.08s/it]

align-page-rank Time: 0.1114192008972168s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.0461928844451904s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.5839710235595703s
string-similarity-['jaro_winkler'] Time: 0.2955482006072998s
string-similarity-['levenshtein'] Time: 1.0902941226959229s
string-similarity-['jaccard:tokenizer=word'] Time: 0.0586702823638916s
normalize-scores-des_cont_jaccard Time: 0.02428913116455078s
smallest-qnode-number Time: 0.15997529029846191s
mosaic-features Time: 0.011214017868041992s
creat-singleton-feature Time: 0.12389612197875977s
vote-by-classifier Time: 0.37441110610961914s
Qnodes to lookup: 1883
Qnodes from file: 1818
Outlier removal generates 15 lof-voted candidates
Outlier removal generates 13 lof-voted candidates
score-using-embedding Time: 15.170583963394165s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.2349071502685547s
compute-tf-idf-class_count Time: 15.983407020568848s
compute-tf-idf-proper

82it [1:37:19, 44.82s/it]

align-page-rank Time: 0.21092987060546875s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.4190857410430908s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.46212100982666016s
string-similarity-['jaro_winkler'] Time: 0.09837794303894043s
string-similarity-['levenshtein'] Time: 0.3799161911010742s
string-similarity-['jaccard:tokenizer=word'] Time: 0.03130793571472168s
normalize-scores-des_cont_jaccard Time: 0.01148676872253418s
smallest-qnode-number Time: 0.5592310428619385s
mosaic-features Time: 0.005469322204589844s
creat-singleton-feature Time: 0.06003308296203613s
vote-by-classifier Time: 0.9109630584716797s
Qnodes to lookup: 1640
Qnodes from file: 1632
Column_vector_stragtegy centroid_of_lof failed
score-using-embedding Time: 13.407691955566406s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.5156071186065674s
compute-tf-idf-class_count Time: 14.26249098777771s
compute-tf-idf-property_count Time: 15.275696754455566s
context-match Tim

83it [1:37:45, 38.92s/it]

align-page-rank Time: 0.2813727855682373s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 13.124910831451416s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.1390557289123535s
string-similarity-['jaro_winkler'] Time: 1.5854370594024658s
string-similarity-['levenshtein'] Time: 20.80668330192566s
string-similarity-['jaccard:tokenizer=word'] Time: 0.04578900337219238s
normalize-scores-des_cont_jaccard Time: 0.014101028442382812s
smallest-qnode-number Time: 0.1439497470855713s
mosaic-features Time: 0.010153055191040039s
creat-singleton-feature Time: 0.09301900863647461s
vote-by-classifier Time: 0.37548184394836426s
Qnodes to lookup: 642
Qnodes from file: 634
Outlier removal generates 20 lof-voted candidates
Outlier removal generates 100 lof-voted candidates
score-using-embedding Time: 50.546459913253784s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.21213412284851074s
compute-tf-idf-class_count Time: 52.5689640045166s
compute-tf-idf-property

84it [1:38:52, 47.40s/it]

align-page-rank Time: 0.2995109558105469s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.5096890926361084s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.3575990200042725s
string-similarity-['jaro_winkler'] Time: 0.40923380851745605s
string-similarity-['levenshtein'] Time: 2.3267247676849365s
string-similarity-['jaccard:tokenizer=word'] Time: 0.0776059627532959s
normalize-scores-des_cont_jaccard Time: 0.03345084190368652s
smallest-qnode-number Time: 0.31162595748901367s
mosaic-features Time: 0.014566898345947266s
creat-singleton-feature Time: 0.18710994720458984s
vote-by-classifier Time: 1.537959098815918s
Qnodes to lookup: 5207
Qnodes from file: 5146
Outlier removal generates 14 lof-voted candidates
Outlier removal generates 6 lof-voted candidates
Outlier removal generates 11 lof-voted candidates
score-using-embedding Time: 22.5954749584198s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3122577667236328s
compute-tf-idf-class_count T

85it [1:40:21, 60.00s/it]

align-page-rank Time: 0.09095001220703125s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.48188281059265137s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.9849438667297363s
string-similarity-['jaro_winkler'] Time: 0.14453506469726562s
string-similarity-['levenshtein'] Time: 0.5279562473297119s
string-similarity-['jaccard:tokenizer=word'] Time: 0.036588191986083984s
normalize-scores-des_cont_jaccard Time: 0.05421710014343262s
smallest-qnode-number Time: 0.13930606842041016s
mosaic-features Time: 0.00823664665222168s
creat-singleton-feature Time: 0.08564138412475586s
vote-by-classifier Time: 0.5745420455932617s
Qnodes to lookup: 3104
Qnodes from file: 3022
Outlier removal generates 26 lof-voted candidates
score-using-embedding Time: 14.534401178359985s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.4722900390625s
compute-tf-idf-class_count Time: 16.328272104263306s
compute-tf-idf-property_count Time: 16.527672052383423s
context-match T

86it [1:40:50, 50.58s/it]

align-page-rank Time: 0.15448808670043945s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 8.053575992584229s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.500870227813721s
string-similarity-['jaro_winkler'] Time: 0.8809750080108643s
string-similarity-['levenshtein'] Time: 9.672203063964844s
string-similarity-['jaccard:tokenizer=word'] Time: 0.08492088317871094s
normalize-scores-des_cont_jaccard Time: 0.02818894386291504s
smallest-qnode-number Time: 0.23562216758728027s
mosaic-features Time: 0.01437520980834961s
creat-singleton-feature Time: 0.1534731388092041s
vote-by-classifier Time: 0.5415198802947998s
Qnodes to lookup: 344
Qnodes from file: 341
Outlier removal generates 1920 lof-voted candidates
Outlier removal generates 37 lof-voted candidates
Outlier removal generates 80 lof-voted candidates
score-using-embedding Time: 35.51144099235535s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.2986311912536621s
compute-tf-idf-class_count Ti

87it [1:41:43, 51.48s/it]

align-page-rank Time: 0.1712949275970459s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.185287952423096s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 7.962983846664429s
string-similarity-['jaro_winkler'] Time: 0.7054400444030762s
string-similarity-['levenshtein'] Time: 4.195804834365845s
string-similarity-['jaccard:tokenizer=word'] Time: 0.09589195251464844s
normalize-scores-des_cont_jaccard Time: 0.03643989562988281s
smallest-qnode-number Time: 0.24774599075317383s
mosaic-features Time: 0.020651817321777344s
creat-singleton-feature Time: 0.20354318618774414s
vote-by-classifier Time: 0.3904702663421631s
Qnodes to lookup: 3830
Qnodes from file: 3803
Outlier removal generates 19 lof-voted candidates
Outlier removal generates 38 lof-voted candidates
Outlier removal generates 34 lof-voted candidates
score-using-embedding Time: 28.818964958190918s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.37061500549316406s
compute-tf-idf-class_count

88it [1:44:48, 91.45s/it]

align-page-rank Time: 0.18491387367248535s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 6.677907943725586s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 13.178278923034668s
string-similarity-['jaro_winkler'] Time: 1.1834547519683838s
string-similarity-['levenshtein'] Time: 7.6871302127838135s
string-similarity-['jaccard:tokenizer=word'] Time: 0.09815597534179688s
normalize-scores-des_cont_jaccard Time: 0.0414426326751709s
smallest-qnode-number Time: 0.302462100982666s
mosaic-features Time: 0.023747920989990234s
creat-singleton-feature Time: 0.2622358798980713s
vote-by-classifier Time: 1.696146011352539s
Qnodes to lookup: 5162
Qnodes from file: 5046
Outlier removal generates 19 lof-voted candidates
Outlier removal generates 26 lof-voted candidates
Outlier removal generates 26 lof-voted candidates
score-using-embedding Time: 40.07560205459595s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3993949890136719s
compute-tf-idf-class_count Tim

89it [1:48:13, 125.40s/it]

align-page-rank Time: 0.17623615264892578s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.4337809085845947s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.895503044128418s
string-similarity-['jaro_winkler'] Time: 0.13029909133911133s
string-similarity-['levenshtein'] Time: 0.47485899925231934s
string-similarity-['jaccard:tokenizer=word'] Time: 0.03341794013977051s
normalize-scores-des_cont_jaccard Time: 0.015465974807739258s
smallest-qnode-number Time: 0.7283401489257812s
mosaic-features Time: 0.006896018981933594s
creat-singleton-feature Time: 0.0799708366394043s
vote-by-classifier Time: 1.5287489891052246s
Qnodes to lookup: 2516
Qnodes from file: 2468
Outlier removal generates 3 lof-voted candidates
score-using-embedding Time: 17.189350843429565s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.5776462554931641s
compute-tf-idf-class_count Time: 17.27912712097168s
compute-tf-idf-property_count Time: 17.46254014968872s
context-match Tim

90it [1:48:40, 96.02s/it] 

align-page-rank Time: 0.1626732349395752s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.7045860290527344s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.8504600524902344s
string-similarity-['jaro_winkler'] Time: 0.18341803550720215s
string-similarity-['levenshtein'] Time: 0.6991150379180908s
string-similarity-['jaccard:tokenizer=word'] Time: 0.06084108352661133s
normalize-scores-des_cont_jaccard Time: 0.017355918884277344s
smallest-qnode-number Time: 0.14409375190734863s
mosaic-features Time: 0.009116888046264648s
creat-singleton-feature Time: 0.10659408569335938s
vote-by-classifier Time: 1.0808250904083252s
Qnodes to lookup: 2234
Qnodes from file: 2205
Outlier removal generates 9 lof-voted candidates
score-using-embedding Time: 15.720799922943115s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.2196509838104248s
compute-tf-idf-class_count Time: 16.912559032440186s
compute-tf-idf-property_count Time: 17.9358069896698s
context-match Ti

91it [1:49:11, 76.38s/it]

align-page-rank Time: 0.17113590240478516s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.233382940292358s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 7.455204010009766s
string-similarity-['jaro_winkler'] Time: 0.6903939247131348s
string-similarity-['levenshtein'] Time: 4.893839120864868s
string-similarity-['jaccard:tokenizer=word'] Time: 0.09914898872375488s
normalize-scores-des_cont_jaccard Time: 0.03618979454040527s
smallest-qnode-number Time: 0.2712991237640381s
mosaic-features Time: 0.01879596710205078s
creat-singleton-feature Time: 0.23002386093139648s
vote-by-classifier Time: 1.4429781436920166s
Qnodes to lookup: 3714
Qnodes from file: 3658
Outlier removal generates 19 lof-voted candidates
Outlier removal generates 36 lof-voted candidates
Outlier removal generates 19 lof-voted candidates
score-using-embedding Time: 30.886070013046265s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3738100528717041s
compute-tf-idf-class_count T

92it [1:51:40, 98.35s/it]

align-page-rank Time: 0.19393324851989746s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.2332358360290527s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.4627530574798584s
string-similarity-['jaro_winkler'] Time: 0.2692699432373047s
string-similarity-['levenshtein'] Time: 1.2693800926208496s
string-similarity-['jaccard:tokenizer=word'] Time: 0.06929802894592285s
normalize-scores-des_cont_jaccard Time: 0.019989013671875s
smallest-qnode-number Time: 0.22880101203918457s
mosaic-features Time: 0.01275014877319336s
creat-singleton-feature Time: 0.12523293495178223s
vote-by-classifier Time: 1.0989511013031006s
Qnodes to lookup: 3823
Qnodes from file: 3797
Outlier removal generates 8 lof-voted candidates
score-using-embedding Time: 17.417189836502075s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.2410120964050293s
compute-tf-idf-class_count Time: 18.32545804977417s
compute-tf-idf-property_count Time: 18.563810110092163s
context-match Time:

93it [1:52:11, 77.91s/it]

align-page-rank Time: 0.26140618324279785s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.4276108741760254s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.7913129329681396s
string-similarity-['jaro_winkler'] Time: 0.38129115104675293s
string-similarity-['levenshtein'] Time: 1.3430452346801758s
string-similarity-['jaccard:tokenizer=word'] Time: 0.05602288246154785s
normalize-scores-des_cont_jaccard Time: 0.021796703338623047s
smallest-qnode-number Time: 0.16968083381652832s
mosaic-features Time: 0.010288000106811523s
creat-singleton-feature Time: 0.12109184265136719s
vote-by-classifier Time: 0.39211201667785645s
Qnodes to lookup: 476
Qnodes from file: 452
Outlier removal generates 14 lof-voted candidates
Outlier removal generates 22 lof-voted candidates
score-using-embedding Time: 13.864470958709717s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.225449800491333s
compute-tf-idf-class_count Time: 14.604729890823364s
compute-tf-idf-prope

94it [1:52:50, 66.41s/it]

align-page-rank Time: 0.0703880786895752s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.3752419948577881s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.2776782512664795s
string-similarity-['jaro_winkler'] Time: 0.08496570587158203s
string-similarity-['levenshtein'] Time: 0.2926509380340576s
string-similarity-['jaccard:tokenizer=word'] Time: 0.04117178916931152s
normalize-scores-des_cont_jaccard Time: 0.013949155807495117s
smallest-qnode-number Time: 0.10075902938842773s
mosaic-features Time: 0.006215095520019531s
creat-singleton-feature Time: 0.0663909912109375s
vote-by-classifier Time: 1.5061290264129639s
Qnodes to lookup: 1898
Qnodes from file: 1885
Outlier removal generates 2 lof-voted candidates
score-using-embedding Time: 15.218066215515137s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.12627029418945312s
compute-tf-idf-class_count Time: 14.638818740844727s
compute-tf-idf-property_count Time: 16.6049382686615s
context-match Ti

95it [1:53:21, 55.67s/it]

align-page-rank Time: 0.0872797966003418s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.6645100116729736s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.8291311264038086s
string-similarity-['jaro_winkler'] Time: 0.49228882789611816s
string-similarity-['levenshtein'] Time: 2.828523874282837s
string-similarity-['jaccard:tokenizer=word'] Time: 0.02821183204650879s
normalize-scores-des_cont_jaccard Time: 0.013734817504882812s
smallest-qnode-number Time: 0.08992123603820801s
mosaic-features Time: 0.0064449310302734375s
creat-singleton-feature Time: 0.06344413757324219s
vote-by-classifier Time: 0.4089169502258301s
Qnodes to lookup: 832
Qnodes from file: 820
Outlier removal generates 11 lof-voted candidates
score-using-embedding Time: 18.516896963119507s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.12060332298278809s
compute-tf-idf-class_count Time: 19.144119024276733s
compute-tf-idf-property_count Time: 20.216213941574097s
context-match 

96it [1:53:53, 48.68s/it]

align-page-rank Time: 0.11588001251220703s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.9506540298461914s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.001273155212402s
string-similarity-['jaro_winkler'] Time: 0.4199790954589844s
string-similarity-['levenshtein'] Time: 2.548480749130249s
string-similarity-['jaccard:tokenizer=word'] Time: 0.06769371032714844s
normalize-scores-des_cont_jaccard Time: 0.028423786163330078s
smallest-qnode-number Time: 0.1717996597290039s
mosaic-features Time: 0.015185117721557617s
creat-singleton-feature Time: 0.15023374557495117s
vote-by-classifier Time: 0.39045000076293945s
Qnodes to lookup: 3377
Qnodes from file: 3317
Outlier removal generates 13 lof-voted candidates
Outlier removal generates 24 lof-voted candidates
score-using-embedding Time: 20.221383094787598s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3128478527069092s
compute-tf-idf-class_count Time: 23.024675846099854s
compute-tf-idf-proper

97it [1:55:06, 55.95s/it]

align-page-rank Time: 0.24324297904968262s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.1305198669433594s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.0489132404327393s
string-similarity-['jaro_winkler'] Time: 0.452070951461792s
string-similarity-['levenshtein'] Time: 2.4657089710235596s
string-similarity-['jaccard:tokenizer=word'] Time: 0.07434415817260742s
normalize-scores-des_cont_jaccard Time: 0.03036808967590332s
smallest-qnode-number Time: 0.28588199615478516s
mosaic-features Time: 0.015067338943481445s
creat-singleton-feature Time: 0.16017794609069824s
vote-by-classifier Time: 0.6051650047302246s
Qnodes to lookup: 4384
Qnodes from file: 4211
Outlier removal generates 18 lof-voted candidates
Outlier removal generates 17 lof-voted candidates
Outlier removal generates 22 lof-voted candidates
score-using-embedding Time: 18.747506856918335s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.32582902908325195s
compute-tf-idf-class_co

98it [1:56:13, 59.13s/it]

align-page-rank Time: 0.27542901039123535s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.111649990081787s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.52138090133667s
string-similarity-['jaro_winkler'] Time: 0.6906461715698242s
string-similarity-['levenshtein'] Time: 4.756540060043335s
string-similarity-['jaccard:tokenizer=word'] Time: 0.07463192939758301s
normalize-scores-des_cont_jaccard Time: 0.029298782348632812s
smallest-qnode-number Time: 0.2361140251159668s
mosaic-features Time: 0.015078067779541016s
creat-singleton-feature Time: 0.14942002296447754s
vote-by-classifier Time: 0.898529052734375s
Qnodes to lookup: 2572
Qnodes from file: 2555
Outlier removal generates 26 lof-voted candidates
Outlier removal generates 20 lof-voted candidates
Outlier removal generates 60 lof-voted candidates
score-using-embedding Time: 26.96370792388916s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.29695987701416016s
compute-tf-idf-class_count T

99it [1:57:25, 63.15s/it]

align-page-rank Time: 0.21152377128601074s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 14.833936214447021s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 13.786595821380615s
string-similarity-['jaro_winkler'] Time: 1.726801872253418s
string-similarity-['levenshtein'] Time: 18.502671241760254s
string-similarity-['jaccard:tokenizer=word'] Time: 0.08458375930786133s
normalize-scores-des_cont_jaccard Time: 0.03492093086242676s
smallest-qnode-number Time: 0.2511630058288574s
mosaic-features Time: 0.02388310432434082s
creat-singleton-feature Time: 0.1774301528930664s
vote-by-classifier Time: 0.40871214866638184s
Qnodes to lookup: 3444
Qnodes from file: 3345
Outlier removal generates 41 lof-voted candidates
Outlier removal generates 17 lof-voted candidates
Outlier removal generates 25 lof-voted candidates
score-using-embedding Time: 60.47026014328003s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3586149215698242s
compute-tf-idf-class_count 

100it [1:59:23, 79.70s/it]

align-page-rank Time: 0.12732481956481934s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.3411378860473633s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.4284050464630127s
string-similarity-['jaro_winkler'] Time: 0.09997081756591797s
string-similarity-['levenshtein'] Time: 0.3340790271759033s
string-similarity-['jaccard:tokenizer=word'] Time: 0.05711483955383301s
normalize-scores-des_cont_jaccard Time: 0.014907360076904297s
smallest-qnode-number Time: 0.09946393966674805s
mosaic-features Time: 0.006387948989868164s
creat-singleton-feature Time: 0.08429217338562012s
vote-by-classifier Time: 0.9781250953674316s
Qnodes to lookup: 2055
Qnodes from file: 2022
Outlier removal generates 6 lof-voted candidates
score-using-embedding Time: 14.46452283859253s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3874642848968506s
compute-tf-idf-class_count Time: 15.2187979221344s
compute-tf-idf-property_count Time: 15.389459133148193s
context-match Ti

101it [1:59:57, 65.80s/it]

align-page-rank Time: 0.052459001541137695s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.5180847644805908s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.186121940612793s
string-similarity-['jaro_winkler'] Time: 0.12336397171020508s
string-similarity-['levenshtein'] Time: 0.5383191108703613s
string-similarity-['jaccard:tokenizer=word'] Time: 0.029117822647094727s
normalize-scores-des_cont_jaccard Time: 0.013965845108032227s
smallest-qnode-number Time: 0.09016895294189453s
mosaic-features Time: 0.004561901092529297s
creat-singleton-feature Time: 0.06490182876586914s
vote-by-classifier Time: 1.4572629928588867s
Qnodes to lookup: 1942
Qnodes from file: 1912
Column_vector_stragtegy centroid_of_lof failed
score-using-embedding Time: 15.037811279296875s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.12311506271362305s
compute-tf-idf-class_count Time: 15.611795902252197s
compute-tf-idf-property_count Time: 15.844987869262695s
context-match

102it [2:00:25, 54.60s/it]

align-page-rank Time: 0.13372492790222168s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.1235368251800537s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.3911359310150146s
string-similarity-['jaro_winkler'] Time: 0.24721670150756836s
string-similarity-['levenshtein'] Time: 1.6700029373168945s
string-similarity-['jaccard:tokenizer=word'] Time: 0.05067586898803711s
normalize-scores-des_cont_jaccard Time: 0.023984909057617188s
smallest-qnode-number Time: 0.15718889236450195s
mosaic-features Time: 0.009488105773925781s
creat-singleton-feature Time: 0.1039121150970459s
vote-by-classifier Time: 0.39600372314453125s
Qnodes to lookup: 3530
Qnodes from file: 3457
Outlier removal generates 13 lof-voted candidates
Outlier removal generates 13 lof-voted candidates
score-using-embedding Time: 14.86236023902893s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.2149190902709961s
compute-tf-idf-class_count Time: 15.622000932693481s
compute-tf-idf-prop

103it [2:01:07, 50.69s/it]

align-page-rank Time: 0.11085391044616699s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.7406349182128906s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.384286880493164s
string-similarity-['jaro_winkler'] Time: 0.13129186630249023s
string-similarity-['levenshtein'] Time: 0.7971169948577881s
string-similarity-['jaccard:tokenizer=word'] Time: 0.02829909324645996s
normalize-scores-des_cont_jaccard Time: 0.0137939453125s
smallest-qnode-number Time: 0.0885629653930664s
mosaic-features Time: 0.005920886993408203s
creat-singleton-feature Time: 0.062193870544433594s
vote-by-classifier Time: 0.40202975273132324s
Qnodes to lookup: 1762
Qnodes from file: 1749
Outlier removal generates 10 lof-voted candidates
score-using-embedding Time: 13.471421003341675s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.5072391033172607s
compute-tf-idf-class_count Time: 15.297701120376587s
compute-tf-idf-property_count Time: 16.006216764450073s
context-match Tim

104it [2:01:35, 43.89s/it]

align-page-rank Time: 0.05993986129760742s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.49915289878845215s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.1691367626190186s
string-similarity-['jaro_winkler'] Time: 0.1363677978515625s
string-similarity-['levenshtein'] Time: 0.6111822128295898s
string-similarity-['jaccard:tokenizer=word'] Time: 0.02598094940185547s
normalize-scores-des_cont_jaccard Time: 0.013051033020019531s
smallest-qnode-number Time: 0.08482098579406738s
mosaic-features Time: 0.0055999755859375s
creat-singleton-feature Time: 0.06040787696838379s
vote-by-classifier Time: 0.6513040065765381s
Qnodes to lookup: 1822
Qnodes from file: 1768
Outlier removal generates 10 lof-voted candidates
score-using-embedding Time: 15.237374067306519s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.1127007007598877s
compute-tf-idf-class_count Time: 16.61138081550598s
compute-tf-idf-property_count Time: 15.990272998809814s
context-match T

105it [2:02:03, 39.28s/it]

align-page-rank Time: 0.2752830982208252s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 6.176396131515503s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 14.059811115264893s
string-similarity-['jaro_winkler'] Time: 1.08915376663208s
string-similarity-['levenshtein'] Time: 7.1183648109436035s
string-similarity-['jaccard:tokenizer=word'] Time: 0.1586148738861084s
normalize-scores-des_cont_jaccard Time: 0.04589104652404785s
smallest-qnode-number Time: 0.35668396949768066s
mosaic-features Time: 0.02426004409790039s
creat-singleton-feature Time: 0.3354358673095703s
vote-by-classifier Time: 0.5140118598937988s
Qnodes to lookup: 2142
Qnodes from file: 2121
Outlier removal generates 33 lof-voted candidates
Outlier removal generates 256 lof-voted candidates
Outlier removal generates 20 lof-voted candidates
Outlier removal generates 200 lof-voted candidates
score-using-embedding Time: 41.13499402999878s
generate-reciprocal-rank-lof-graph-embedding-score Time:

106it [2:06:22, 105.00s/it]

align-page-rank Time: 0.20243310928344727s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.2425782680511475s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 28.389214992523193s
string-similarity-['jaro_winkler'] Time: 0.7685539722442627s
string-similarity-['levenshtein'] Time: 5.664712905883789s
string-similarity-['jaccard:tokenizer=word'] Time: 0.10293889045715332s
normalize-scores-des_cont_jaccard Time: 0.03907895088195801s
smallest-qnode-number Time: 0.2732200622558594s
mosaic-features Time: 0.019188880920410156s
creat-singleton-feature Time: 0.2942028045654297s
vote-by-classifier Time: 0.8867838382720947s
Qnodes to lookup: 3828
Qnodes from file: 3801
Outlier removal generates 44 lof-voted candidates
Outlier removal generates 20 lof-voted candidates
Outlier removal generates 44 lof-voted candidates
score-using-embedding Time: 54.54340410232544s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.42558717727661133s
compute-tf-idf-class_count

107it [2:08:14, 107.18s/it]

align-page-rank Time: 0.21327781677246094s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 6.436732769012451s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 15.49084997177124s
string-similarity-['jaro_winkler'] Time: 1.012113332748413s
string-similarity-['levenshtein'] Time: 7.833695888519287s
string-similarity-['jaccard:tokenizer=word'] Time: 0.08311986923217773s
normalize-scores-des_cont_jaccard Time: 0.03630399703979492s
smallest-qnode-number Time: 0.7241599559783936s
mosaic-features Time: 0.018706798553466797s
creat-singleton-feature Time: 0.17502689361572266s
vote-by-classifier Time: 0.7710587978363037s
Qnodes to lookup: 4549
Qnodes from file: 4188
Outlier removal generates 14 lof-voted candidates
Outlier removal generates 34 lof-voted candidates
_centroid_of_lof: Missing 1 of 30
Outlier removal generates 17 lof-voted candidates
score-using-embedding Time: 43.48613119125366s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3463568687438

108it [2:10:12, 110.51s/it]

align-page-rank Time: 0.15725922584533691s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.4299468994140625s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.8313627243041992s
string-similarity-['jaro_winkler'] Time: 0.14239192008972168s
string-similarity-['levenshtein'] Time: 0.47425007820129395s
string-similarity-['jaccard:tokenizer=word'] Time: 0.03904891014099121s
normalize-scores-des_cont_jaccard Time: 0.016080856323242188s
smallest-qnode-number Time: 0.09081912040710449s
mosaic-features Time: 0.007714033126831055s
creat-singleton-feature Time: 0.0892181396484375s
vote-by-classifier Time: 0.4383249282836914s
Qnodes to lookup: 2899
Qnodes from file: 2867
Outlier removal generates 12 lof-voted candidates
score-using-embedding Time: 10.519605875015259s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.14196395874023438s
compute-tf-idf-class_count Time: 12.01374077796936s
compute-tf-idf-property_count Time: 12.04897403717041s
context-match

109it [2:10:33, 83.69s/it] 

align-page-rank Time: 0.12134289741516113s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.5810210704803467s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.2086207866668701s
string-similarity-['jaro_winkler'] Time: 0.19303083419799805s
string-similarity-['levenshtein'] Time: 0.7439150810241699s
string-similarity-['jaccard:tokenizer=word'] Time: 0.03323006629943848s
normalize-scores-des_cont_jaccard Time: 0.015311717987060547s
smallest-qnode-number Time: 0.5298020839691162s
mosaic-features Time: 0.006758928298950195s
creat-singleton-feature Time: 0.07877826690673828s
vote-by-classifier Time: 1.4424550533294678s
Qnodes to lookup: 1728
Qnodes from file: 1688
Column_vector_stragtegy centroid_of_lof failed
score-using-embedding Time: 16.0815532207489s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.1449871063232422s
compute-tf-idf-class_count Time: 16.78470492362976s
compute-tf-idf-property_count Time: 16.963881254196167s
context-match Time:

110it [2:11:03, 67.43s/it]

align-page-rank Time: 0.1140439510345459s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.3624849319458008s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.3062441349029541s
string-similarity-['jaro_winkler'] Time: 0.09718489646911621s
string-similarity-['levenshtein'] Time: 0.35468125343322754s
string-similarity-['jaccard:tokenizer=word'] Time: 0.054064273834228516s
normalize-scores-des_cont_jaccard Time: 0.01412200927734375s
smallest-qnode-number Time: 0.09551811218261719s
mosaic-features Time: 0.006403207778930664s
creat-singleton-feature Time: 0.07362794876098633s
vote-by-classifier Time: 0.39740896224975586s
Qnodes to lookup: 2103
Qnodes from file: 2092
Outlier removal generates 8 lof-voted candidates
score-using-embedding Time: 13.528717279434204s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.15226292610168457s
compute-tf-idf-class_count Time: 15.469113111495972s
compute-tf-idf-property_count Time: 16.07233500480652s
context-matc

111it [2:11:37, 57.31s/it]

align-page-rank Time: 0.1657238006591797s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.0017409324645996s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.8042597770690918s
string-similarity-['jaro_winkler'] Time: 0.22705698013305664s
string-similarity-['levenshtein'] Time: 0.8509199619293213s
string-similarity-['jaccard:tokenizer=word'] Time: 0.05396318435668945s
normalize-scores-des_cont_jaccard Time: 0.02138209342956543s
smallest-qnode-number Time: 0.15767121315002441s
mosaic-features Time: 0.010413885116577148s
creat-singleton-feature Time: 0.11334681510925293s
vote-by-classifier Time: 0.45667195320129395s
Qnodes to lookup: 2710
Qnodes from file: 2682
Outlier removal generates 8 lof-voted candidates
No pseudo GT available, using all exact matches as high precision
_centroid_of_lof: Missing 20 of 20
Column_vector_stragtegy centroid_of_lof failed
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
score-using-

112it [2:12:06, 48.99s/it]

align-page-rank Time: 0.23915314674377441s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.45704102516174316s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.134232997894287s
string-similarity-['jaro_winkler'] Time: 0.14767217636108398s
string-similarity-['levenshtein'] Time: 0.5099480152130127s
string-similarity-['jaccard:tokenizer=word'] Time: 0.03812813758850098s
normalize-scores-des_cont_jaccard Time: 0.01314091682434082s
smallest-qnode-number Time: 0.10314798355102539s
mosaic-features Time: 0.006114006042480469s
creat-singleton-feature Time: 0.059660911560058594s
vote-by-classifier Time: 0.9581701755523682s
Qnodes to lookup: 1676
Qnodes from file: 1658
Column_vector_stragtegy centroid_of_lof failed
score-using-embedding Time: 14.34406304359436s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.535193920135498s
compute-tf-idf-class_count Time: 15.319626092910767s
compute-tf-idf-property_count Time: 15.433011293411255s
context-match Tim

113it [2:12:34, 42.63s/it]

align-page-rank Time: 0.1370859146118164s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.7195630073547363s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 15.079071998596191s
string-similarity-['jaro_winkler'] Time: 0.26445698738098145s
string-similarity-['levenshtein'] Time: 3.090226888656616s
string-similarity-['jaccard:tokenizer=word'] Time: 0.029471158981323242s
normalize-scores-des_cont_jaccard Time: 0.013097047805786133s
smallest-qnode-number Time: 0.09032177925109863s
mosaic-features Time: 0.006225109100341797s
creat-singleton-feature Time: 0.062146902084350586s
vote-by-classifier Time: 0.38570165634155273s
Qnodes to lookup: 1799
Qnodes from file: 1716
Outlier removal generates 7 lof-voted candidates
score-using-embedding Time: 31.353094816207886s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.11563777923583984s
compute-tf-idf-class_count Time: 32.680846214294434s
compute-tf-idf-property_count Time: 32.861873149871826s
context-mat

114it [2:13:21, 44.00s/it]

align-page-rank Time: 0.14992380142211914s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.5259511470794678s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.7154181003570557s
string-similarity-['jaro_winkler'] Time: 0.5991790294647217s
string-similarity-['levenshtein'] Time: 2.7466068267822266s
string-similarity-['jaccard:tokenizer=word'] Time: 0.04702305793762207s
normalize-scores-des_cont_jaccard Time: 0.020851850509643555s
smallest-qnode-number Time: 0.1645350456237793s
mosaic-features Time: 0.011435985565185547s
creat-singleton-feature Time: 0.11377787590026855s
vote-by-classifier Time: 0.982224702835083s
Qnodes to lookup: 335
Qnodes from file: 331
Outlier removal generates 31 lof-voted candidates
Outlier removal generates 31 lof-voted candidates
score-using-embedding Time: 19.51345729827881s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.24751806259155273s
compute-tf-idf-class_count Time: 21.807719945907593s
compute-tf-idf-property

115it [2:13:59, 42.13s/it]

align-page-rank Time: 0.10514378547668457s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.0909678936004639s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.659174919128418s
string-similarity-['jaro_winkler'] Time: 0.3930690288543701s
string-similarity-['levenshtein'] Time: 1.5651531219482422s
string-similarity-['jaccard:tokenizer=word'] Time: 0.04778313636779785s
normalize-scores-des_cont_jaccard Time: 0.02304387092590332s
smallest-qnode-number Time: 0.208604097366333s
mosaic-features Time: 0.01141214370727539s
creat-singleton-feature Time: 0.33089423179626465s
vote-by-classifier Time: 1.0095880031585693s
Qnodes to lookup: 3959
Qnodes from file: 3892
Outlier removal generates 15 lof-voted candidates
Outlier removal generates 17 lof-voted candidates
score-using-embedding Time: 17.785353183746338s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.26727795600891113s
compute-tf-idf-class_count Time: 19.465215921401978s
compute-tf-idf-property

116it [2:14:40, 41.91s/it]

align-page-rank Time: 0.23749995231628418s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.572453737258911s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.177573919296265s
string-similarity-['jaro_winkler'] Time: 0.5350401401519775s
string-similarity-['levenshtein'] Time: 2.5478010177612305s
string-similarity-['jaccard:tokenizer=word'] Time: 0.21105694770812988s
normalize-scores-des_cont_jaccard Time: 0.04874086380004883s
smallest-qnode-number Time: 0.3474156856536865s
mosaic-features Time: 0.02303600311279297s
creat-singleton-feature Time: 0.2625908851623535s
vote-by-classifier Time: 1.312849998474121s
Qnodes to lookup: 2442
Qnodes from file: 2395
Outlier removal generates 3 lof-voted candidates
Outlier removal generates 3 lof-voted candidates
Outlier removal generates 3 lof-voted candidates
Outlier removal generates 13 lof-voted candidates
score-using-embedding Time: 24.096287965774536s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.

117it [2:18:44, 102.58s/it]

align-page-rank Time: 0.173004150390625s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.409714937210083s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.7852509021759033s
string-similarity-['jaro_winkler'] Time: 0.13564419746398926s
string-similarity-['levenshtein'] Time: 0.4568362236022949s
string-similarity-['jaccard:tokenizer=word'] Time: 0.05222916603088379s
normalize-scores-des_cont_jaccard Time: 0.015573978424072266s
smallest-qnode-number Time: 0.10668802261352539s
mosaic-features Time: 0.008012056350708008s
creat-singleton-feature Time: 0.5974521636962891s
vote-by-classifier Time: 1.03721022605896s
Qnodes to lookup: 2826
Qnodes from file: 2782
Outlier removal generates 8 lof-voted candidates
score-using-embedding Time: 13.424022197723389s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.13908696174621582s
compute-tf-idf-class_count Time: 14.488840818405151s
compute-tf-idf-property_count Time: 14.678973913192749s
context-match Time

118it [2:19:09, 79.08s/it] 

align-page-rank Time: 0.24111199378967285s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.76816725730896s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.1456620693206787s
string-similarity-['jaro_winkler'] Time: 0.404033899307251s
string-similarity-['levenshtein'] Time: 2.0800130367279053s
string-similarity-['jaccard:tokenizer=word'] Time: 0.06587505340576172s
normalize-scores-des_cont_jaccard Time: 0.025870084762573242s
smallest-qnode-number Time: 0.16521525382995605s
mosaic-features Time: 0.013081073760986328s
creat-singleton-feature Time: 0.13068604469299316s
vote-by-classifier Time: 0.3920478820800781s
Qnodes to lookup: 4393
Qnodes from file: 4268
Outlier removal generates 10 lof-voted candidates
Outlier removal generates 38 lof-voted candidates
score-using-embedding Time: 19.0485360622406s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.242081880569458s
compute-tf-idf-class_count Time: 20.774999141693115s
compute-tf-idf-property_c

119it [2:20:11, 73.93s/it]

align-page-rank Time: 0.23989391326904297s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.6598348617553711s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.7147469520568848s
string-similarity-['jaro_winkler'] Time: 0.22112107276916504s
string-similarity-['levenshtein'] Time: 0.8584146499633789s
string-similarity-['jaccard:tokenizer=word'] Time: 0.05645489692687988s
normalize-scores-des_cont_jaccard Time: 0.023251771926879883s
smallest-qnode-number Time: 0.17884111404418945s
mosaic-features Time: 0.010719060897827148s
creat-singleton-feature Time: 0.13510727882385254s
vote-by-classifier Time: 0.39136195182800293s
Qnodes to lookup: 4698
Qnodes from file: 4643
Outlier removal generates 11 lof-voted candidates
score-using-embedding Time: 12.831680059432983s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.23663878440856934s
compute-tf-idf-class_count Time: 13.754362106323242s
compute-tf-idf-property_count Time: 14.042715787887573s
context-ma

120it [2:20:34, 58.79s/it]

align-page-rank Time: 0.15980291366577148s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.8902170658111572s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.2848219871521s
string-similarity-['jaro_winkler'] Time: 0.49294590950012207s
string-similarity-['levenshtein'] Time: 3.266710042953491s
string-similarity-['jaccard:tokenizer=word'] Time: 0.07430672645568848s
normalize-scores-des_cont_jaccard Time: 0.03115224838256836s
smallest-qnode-number Time: 0.24418258666992188s
mosaic-features Time: 0.014313936233520508s
creat-singleton-feature Time: 0.18671083450317383s
vote-by-classifier Time: 1.628047227859497s
Qnodes to lookup: 3902
Qnodes from file: 3868
Outlier removal generates 17 lof-voted candidates
Outlier removal generates 14 lof-voted candidates
Outlier removal generates 14 lof-voted candidates
score-using-embedding Time: 24.336436986923218s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.31813931465148926s
compute-tf-idf-class_count

121it [2:22:34, 77.28s/it]

align-page-rank Time: 3.7777230739593506s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 19.002207040786743s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 46.524985790252686s
string-similarity-['jaro_winkler'] Time: 2.8668529987335205s
string-similarity-['levenshtein'] Time: 19.715524911880493s
string-similarity-['jaccard:tokenizer=word'] Time: 0.6720750331878662s
normalize-scores-des_cont_jaccard Time: 0.14695286750793457s
smallest-qnode-number Time: 3.8176090717315674s
mosaic-features Time: 0.08831620216369629s
creat-singleton-feature Time: 1.1254560947418213s
vote-by-classifier Time: 0.5093870162963867s
Qnodes to lookup: 23425
Qnodes from file: 22795
Outlier removal generates 98 lof-voted candidates
score-using-embedding Time: 114.81900191307068s
generate-reciprocal-rank-lof-graph-embedding-score Time: 2.044363021850586s
compute-tf-idf-class_count Time: 120.80511713027954s
compute-tf-idf-property_count Time: 121.69296288490295s
context-match Time

122it [2:26:20, 121.71s/it]

align-page-rank Time: 0.16692805290222168s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.3227500915527344s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.40475010871887207s
string-similarity-['jaro_winkler'] Time: 0.10072898864746094s
string-similarity-['levenshtein'] Time: 0.4555063247680664s
string-similarity-['jaccard:tokenizer=word'] Time: 0.05864763259887695s
normalize-scores-des_cont_jaccard Time: 0.014173030853271484s
smallest-qnode-number Time: 0.14937901496887207s
mosaic-features Time: 0.006287813186645508s
creat-singleton-feature Time: 0.06772613525390625s
vote-by-classifier Time: 0.39850807189941406s
Qnodes to lookup: 1913
Qnodes from file: 1892
Column_vector_stragtegy centroid_of_lof failed
score-using-embedding Time: 13.32751727104187s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.13240599632263184s
compute-tf-idf-class_count Time: 14.96882939338684s
compute-tf-idf-property_count Time: 15.158230066299438s
context-match 

123it [2:26:51, 94.63s/it] 

align-page-rank Time: 0.18681597709655762s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.38605713844299316s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.8029932975769043s
string-similarity-['jaro_winkler'] Time: 0.12481093406677246s
string-similarity-['levenshtein'] Time: 0.3675801753997803s
string-similarity-['jaccard:tokenizer=word'] Time: 0.03441476821899414s
normalize-scores-des_cont_jaccard Time: 0.012833118438720703s
smallest-qnode-number Time: 0.5141119956970215s
mosaic-features Time: 0.00745701789855957s
creat-singleton-feature Time: 0.0785219669342041s
vote-by-classifier Time: 0.9401650428771973s
Qnodes to lookup: 2708
Qnodes from file: 2668
Column_vector_stragtegy centroid_of_lof failed
score-using-embedding Time: 14.31815505027771s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.416165828704834s
compute-tf-idf-class_count Time: 15.264094829559326s
compute-tf-idf-property_count Time: 15.438646078109741s
context-match Time:

124it [2:27:20, 74.83s/it]

align-page-rank Time: 0.20446205139160156s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.8858602046966553s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.0922718048095703s
string-similarity-['jaro_winkler'] Time: 0.4926290512084961s
string-similarity-['levenshtein'] Time: 2.0839407444000244s
string-similarity-['jaccard:tokenizer=word'] Time: 0.18489480018615723s
normalize-scores-des_cont_jaccard Time: 0.04077577590942383s
smallest-qnode-number Time: 0.5982940196990967s
mosaic-features Time: 0.020851850509643555s
creat-singleton-feature Time: 0.2728567123413086s
vote-by-classifier Time: 1.3960981369018555s
Qnodes to lookup: 7121
Qnodes from file: 7030
Outlier removal generates 49 lof-voted candidates
Outlier removal generates 12 lof-voted candidates
Outlier removal generates 55 lof-voted candidates
score-using-embedding Time: 20.981395959854126s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.440814733505249s
compute-tf-idf-class_count

125it [2:28:54, 80.65s/it]

align-page-rank Time: 0.16492390632629395s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.5259549617767334s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.05070424079895s
string-similarity-['jaro_winkler'] Time: 0.39885830879211426s
string-similarity-['levenshtein'] Time: 1.851613998413086s
string-similarity-['jaccard:tokenizer=word'] Time: 0.09044718742370605s
normalize-scores-des_cont_jaccard Time: 0.030657052993774414s
smallest-qnode-number Time: 0.24669599533081055s
mosaic-features Time: 0.014237165451049805s
creat-singleton-feature Time: 0.16390275955200195s
vote-by-classifier Time: 0.40248966217041016s
Qnodes to lookup: 2878
Qnodes from file: 2819
Outlier removal generates 55 lof-voted candidates
No pseudo GT available, using all exact matches as high precision
_centroid_of_lof: Missing 20 of 20
Column_vector_stragtegy centroid_of_lof failed
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
Outlier remo

126it [2:29:23, 65.20s/it]

align-page-rank Time: 0.2016141414642334s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.5642170906066895s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.051419258117676s
string-similarity-['jaro_winkler'] Time: 0.38879895210266113s
string-similarity-['levenshtein'] Time: 2.693004846572876s
string-similarity-['jaccard:tokenizer=word'] Time: 0.0351710319519043s
normalize-scores-des_cont_jaccard Time: 0.014319181442260742s
smallest-qnode-number Time: 0.14450716972351074s
mosaic-features Time: 0.0068149566650390625s
creat-singleton-feature Time: 0.08774280548095703s
vote-by-classifier Time: 0.7735743522644043s
Qnodes to lookup: 920
Qnodes from file: 913
Outlier removal generates 14 lof-voted candidates
Column_vector_stragtegy centroid_of_lof failed
score-using-embedding Time: 19.015977144241333s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.6001198291778564s
compute-tf-idf-class_count Time: 20.90275526046753s
compute-tf-idf-property_cou

127it [2:30:08, 58.99s/it]

align-page-rank Time: 0.19025897979736328s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.8793268203735352s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.269634962081909s
string-similarity-['jaro_winkler'] Time: 0.40181398391723633s
string-similarity-['levenshtein'] Time: 1.873878002166748s
string-similarity-['jaccard:tokenizer=word'] Time: 0.08954024314880371s
normalize-scores-des_cont_jaccard Time: 0.029368877410888672s
smallest-qnode-number Time: 0.16087102890014648s
mosaic-features Time: 0.014734983444213867s
creat-singleton-feature Time: 0.14455890655517578s
vote-by-classifier Time: 0.37559008598327637s
Qnodes to lookup: 3312
Qnodes from file: 3264
Outlier removal generates 2 lof-voted candidates
Outlier removal generates 38 lof-voted candidates
score-using-embedding Time: 17.252128839492798s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.2684638500213623s
compute-tf-idf-class_count Time: 18.233371019363403s
compute-tf-idf-prope

128it [2:31:02, 57.64s/it]

align-page-rank Time: 0.09638309478759766s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.22402215003967285s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.4434208869934082s
string-similarity-['jaro_winkler'] Time: 0.06474494934082031s
string-similarity-['levenshtein'] Time: 0.2934610843658447s
string-similarity-['jaccard:tokenizer=word'] Time: 0.03996109962463379s
normalize-scores-des_cont_jaccard Time: 0.010902881622314453s
smallest-qnode-number Time: 0.09043216705322266s
mosaic-features Time: 0.003982067108154297s
creat-singleton-feature Time: 0.04127001762390137s
vote-by-classifier Time: 0.8500649929046631s
Qnodes to lookup: 1104
Qnodes from file: 1091
Outlier removal generates 16 lof-voted candidates
score-using-embedding Time: 15.24315333366394s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.10016703605651855s
compute-tf-idf-class_count Time: 15.936866044998169s
compute-tf-idf-property_count Time: 16.095162868499756s
context-mat

129it [2:31:30, 48.74s/it]

align-page-rank Time: 0.15185999870300293s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.105325698852539s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.7013020515441895s
string-similarity-['jaro_winkler'] Time: 0.23320603370666504s
string-similarity-['levenshtein'] Time: 1.3722381591796875s
string-similarity-['jaccard:tokenizer=word'] Time: 0.03234219551086426s
normalize-scores-des_cont_jaccard Time: 0.015637874603271484s
smallest-qnode-number Time: 0.4239232540130615s
mosaic-features Time: 0.007053852081298828s
creat-singleton-feature Time: 0.07247781753540039s
vote-by-classifier Time: 0.5994808673858643s
Qnodes to lookup: 1891
Qnodes from file: 1877
Outlier removal generates 7 lof-voted candidates
score-using-embedding Time: 17.029778957366943s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.12952899932861328s
compute-tf-idf-class_count Time: 17.652312994003296s
compute-tf-idf-property_count Time: 17.82256531715393s
context-match T

130it [2:32:00, 42.94s/it]

align-page-rank Time: 0.12771296501159668s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.24797606468200684s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.43423891067504883s
string-similarity-['jaro_winkler'] Time: 0.08397698402404785s
string-similarity-['levenshtein'] Time: 0.27832508087158203s
string-similarity-['jaccard:tokenizer=word'] Time: 0.024714946746826172s
normalize-scores-des_cont_jaccard Time: 0.013155221939086914s
smallest-qnode-number Time: 0.08633017539978027s
mosaic-features Time: 0.005242109298706055s
creat-singleton-feature Time: 0.061248064041137695s
vote-by-classifier Time: 0.6735110282897949s
Qnodes to lookup: 1771
Qnodes from file: 1737
Outlier removal generates 3 lof-voted candidates
score-using-embedding Time: 13.998162031173706s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.1294419765472412s
compute-tf-idf-class_count Time: 14.988127946853638s
compute-tf-idf-property_count Time: 15.012115955352783s
context-

131it [2:32:29, 38.73s/it]

align-page-rank Time: 0.05223798751831055s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.4122040271759033s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.8895509243011475s
string-similarity-['jaro_winkler'] Time: 0.140822172164917s
string-similarity-['levenshtein'] Time: 0.3989901542663574s
string-similarity-['jaccard:tokenizer=word'] Time: 0.036508798599243164s
normalize-scores-des_cont_jaccard Time: 0.014642000198364258s
smallest-qnode-number Time: 0.10804462432861328s
mosaic-features Time: 0.006930112838745117s
creat-singleton-feature Time: 0.08188486099243164s
vote-by-classifier Time: 0.7542130947113037s
Qnodes to lookup: 2599
Qnodes from file: 2521
Outlier removal generates 7 lof-voted candidates
score-using-embedding Time: 13.51057505607605s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.44394755363464355s
compute-tf-idf-class_count Time: 15.223567962646484s
compute-tf-idf-property_count Time: 15.404003143310547s
context-match 

132it [2:32:55, 35.18s/it]

align-page-rank Time: 0.0957801342010498s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.8408780097961426s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.6091961860656738s
string-similarity-['jaro_winkler'] Time: 0.18236017227172852s
string-similarity-['levenshtein'] Time: 1.136167287826538s
string-similarity-['jaccard:tokenizer=word'] Time: 0.034998178482055664s
normalize-scores-des_cont_jaccard Time: 0.014179706573486328s
smallest-qnode-number Time: 0.09789514541625977s
mosaic-features Time: 0.005751848220825195s
creat-singleton-feature Time: 0.06465625762939453s
vote-by-classifier Time: 0.9858670234680176s
Qnodes to lookup: 1603
Qnodes from file: 1584
Outlier removal generates 13 lof-voted candidates
score-using-embedding Time: 15.402467727661133s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.14770293235778809s
compute-tf-idf-class_count Time: 17.03733992576599s
compute-tf-idf-property_count Time: 17.188974142074585s
context-match

133it [2:33:26, 33.87s/it]

align-page-rank Time: 0.32216715812683105s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 7.2763447761535645s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.806335926055908s
string-similarity-['jaro_winkler'] Time: 1.191230058670044s
string-similarity-['levenshtein'] Time: 7.498449802398682s
string-similarity-['jaccard:tokenizer=word'] Time: 0.06374907493591309s
normalize-scores-des_cont_jaccard Time: 0.028873920440673828s
smallest-qnode-number Time: 0.2404038906097412s
mosaic-features Time: 0.017447948455810547s
creat-singleton-feature Time: 0.15370488166809082s
vote-by-classifier Time: 0.4082460403442383s
Qnodes to lookup: 366
Qnodes from file: 365
Column_vector_stragtegy centroid_of_lof failed
Outlier removal generates 13 lof-voted candidates
Outlier removal generates 13 lof-voted candidates
score-using-embedding Time: 30.83075213432312s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.5080227851867676s
compute-tf-idf-class_count Time:

134it [2:34:13, 37.72s/it]

align-page-rank Time: 0.17079401016235352s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 7.210878133773804s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 8.51653504371643s
string-similarity-['jaro_winkler'] Time: 0.7749340534210205s
string-similarity-['levenshtein'] Time: 8.975060939788818s
string-similarity-['jaccard:tokenizer=word'] Time: 0.0869300365447998s
normalize-scores-des_cont_jaccard Time: 0.029227018356323242s
smallest-qnode-number Time: 0.23014283180236816s
mosaic-features Time: 0.010575056076049805s
creat-singleton-feature Time: 0.15133309364318848s
vote-by-classifier Time: 0.4683349132537842s
Qnodes to lookup: 3087
Qnodes from file: 3066
Outlier removal generates 19 lof-voted candidates
Outlier removal generates 16 lof-voted candidates
Outlier removal generates 16 lof-voted candidates
score-using-embedding Time: 37.78818368911743s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.5193147659301758s
compute-tf-idf-class_count T

135it [2:36:09, 61.19s/it]

align-page-rank Time: 0.2629101276397705s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.36519885063171387s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.3637676239013672s
string-similarity-['jaro_winkler'] Time: 0.11901187896728516s
string-similarity-['levenshtein'] Time: 0.30866408348083496s
string-similarity-['jaccard:tokenizer=word'] Time: 0.034956932067871094s
normalize-scores-des_cont_jaccard Time: 0.015506982803344727s
smallest-qnode-number Time: 0.10704207420349121s
mosaic-features Time: 0.007252931594848633s
creat-singleton-feature Time: 0.08945083618164062s
vote-by-classifier Time: 1.43440580368042s
Qnodes to lookup: 2321
Qnodes from file: 2314
Outlier removal generates 11 lof-voted candidates
score-using-embedding Time: 12.236825942993164s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.14638400077819824s
compute-tf-idf-class_count Time: 13.498739957809448s
compute-tf-idf-property_count Time: 13.034005165100098s
context-mat

136it [2:36:34, 50.33s/it]

align-page-rank Time: 0.16255688667297363s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.8152060508728027s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.1938741207122803s
string-similarity-['jaro_winkler'] Time: 0.2463212013244629s
string-similarity-['levenshtein'] Time: 0.9115321636199951s
string-similarity-['jaccard:tokenizer=word'] Time: 0.08906197547912598s
normalize-scores-des_cont_jaccard Time: 0.02401566505432129s
smallest-qnode-number Time: 0.21288180351257324s
mosaic-features Time: 0.012892007827758789s
creat-singleton-feature Time: 0.14519476890563965s
vote-by-classifier Time: 0.7526988983154297s
Qnodes to lookup: 4823
Qnodes from file: 4750
Outlier removal generates 29 lof-voted candidates
score-using-embedding Time: 15.499333143234253s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.28149890899658203s
compute-tf-idf-class_count Time: 16.365429162979126s
compute-tf-idf-property_count Time: 16.72980523109436s
context-match 

137it [2:37:12, 46.68s/it]

align-page-rank Time: 0.17143607139587402s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.32413530349731445s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.7014951705932617s
string-similarity-['jaro_winkler'] Time: 0.09848976135253906s
string-similarity-['levenshtein'] Time: 0.3706529140472412s
string-similarity-['jaccard:tokenizer=word'] Time: 0.03499603271484375s
normalize-scores-des_cont_jaccard Time: 0.013850927352905273s
smallest-qnode-number Time: 0.5171608924865723s
mosaic-features Time: 0.005615949630737305s
creat-singleton-feature Time: 0.05802798271179199s
vote-by-classifier Time: 0.7850611209869385s
Qnodes to lookup: 2007
Qnodes from file: 1967
Outlier removal generates 14 lof-voted candidates
score-using-embedding Time: 14.34656310081482s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.12180304527282715s
compute-tf-idf-class_count Time: 15.694612979888916s
compute-tf-idf-property_count Time: 15.940097093582153s
context-matc

138it [2:37:39, 40.87s/it]

align-page-rank Time: 0.10727119445800781s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.7482211589813232s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.7592599391937256s
string-similarity-['jaro_winkler'] Time: 0.3268930912017822s
string-similarity-['levenshtein'] Time: 2.494216203689575s
string-similarity-['jaccard:tokenizer=word'] Time: 0.03245997428894043s
normalize-scores-des_cont_jaccard Time: 0.017552852630615234s
smallest-qnode-number Time: 0.08566999435424805s
mosaic-features Time: 0.00728297233581543s
creat-singleton-feature Time: 0.06795072555541992s
vote-by-classifier Time: 0.8937711715698242s
Qnodes to lookup: 723
Qnodes from file: 709
Outlier removal generates 2 lof-voted candidates
score-using-embedding Time: 17.1949622631073s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.13550114631652832s
compute-tf-idf-class_count Time: 17.581827878952026s
compute-tf-idf-property_count Time: 17.78091812133789s
context-match Time: 

139it [2:38:12, 38.38s/it]

align-page-rank Time: 0.15268778800964355s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.468886137008667s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.21090102195739746s
string-similarity-['jaro_winkler'] Time: 0.12103390693664551s
string-similarity-['levenshtein'] Time: 0.6641702651977539s
string-similarity-['jaccard:tokenizer=word'] Time: 0.0402369499206543s
normalize-scores-des_cont_jaccard Time: 0.012858867645263672s
smallest-qnode-number Time: 0.08443188667297363s
mosaic-features Time: 0.0054662227630615234s
creat-singleton-feature Time: 0.05983614921569824s
vote-by-classifier Time: 0.4012291431427002s
Qnodes to lookup: 1549
Qnodes from file: 1538
Outlier removal generates 2 lof-voted candidates
score-using-embedding Time: 11.352466821670532s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.5166640281677246s
compute-tf-idf-class_count Time: 13.195519924163818s
compute-tf-idf-property_count Time: 13.175057888031006s
context-match

140it [2:38:38, 34.60s/it]

align-page-rank Time: 0.22280120849609375s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.536510944366455s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 6.202125072479248s
string-similarity-['jaro_winkler'] Time: 0.6701350212097168s
string-similarity-['levenshtein'] Time: 3.907607078552246s
string-similarity-['jaccard:tokenizer=word'] Time: 0.07932901382446289s
normalize-scores-des_cont_jaccard Time: 0.03310203552246094s
smallest-qnode-number Time: 0.2482438087463379s
mosaic-features Time: 0.017020225524902344s
creat-singleton-feature Time: 0.1789100170135498s
vote-by-classifier Time: 0.45807623863220215s
Qnodes to lookup: 4676
Qnodes from file: 4598
Outlier removal generates 11 lof-voted candidates
Outlier removal generates 23 lof-voted candidates
Outlier removal generates 14 lof-voted candidates
score-using-embedding Time: 26.211936950683594s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.43413519859313965s
compute-tf-idf-class_count

141it [2:41:05, 68.26s/it]

align-page-rank Time: 0.43819475173950195s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.8443899154663086s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 8.175450086593628s
string-similarity-['jaro_winkler'] Time: 0.7806739807128906s
string-similarity-['levenshtein'] Time: 3.942554235458374s
string-similarity-['jaccard:tokenizer=word'] Time: 0.20467495918273926s
normalize-scores-des_cont_jaccard Time: 0.0526890754699707s
smallest-qnode-number Time: 0.48925185203552246s
mosaic-features Time: 0.02492809295654297s
creat-singleton-feature Time: 0.2958028316497803s
vote-by-classifier Time: 0.4256570339202881s
Qnodes to lookup: 11733
Qnodes from file: 11468
Outlier removal generates 103 lof-voted candidates
score-using-embedding Time: 28.140923023223877s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.5675048828125s
compute-tf-idf-class_count Time: 30.535450220108032s
compute-tf-idf-property_count Time: 30.365219831466675s
context-match Time:

142it [2:42:44, 77.74s/it]

align-page-rank Time: 0.19525814056396484s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.760380983352661s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.394672870635986s
string-similarity-['jaro_winkler'] Time: 0.5742049217224121s
string-similarity-['levenshtein'] Time: 3.532417058944702s
string-similarity-['jaccard:tokenizer=word'] Time: 0.06238603591918945s
normalize-scores-des_cont_jaccard Time: 0.02799391746520996s
smallest-qnode-number Time: 0.3083813190460205s
mosaic-features Time: 0.01361989974975586s
creat-singleton-feature Time: 0.19202804565429688s
vote-by-classifier Time: 1.5548601150512695s
Qnodes to lookup: 3416
Qnodes from file: 3367
Outlier removal generates 11 lof-voted candidates
Outlier removal generates 8 lof-voted candidates
Outlier removal generates 13 lof-voted candidates
score-using-embedding Time: 25.674909830093384s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.29392480850219727s
compute-tf-idf-class_count T

143it [2:44:01, 77.37s/it]

align-page-rank Time: 0.10661196708679199s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.3320748805999756s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.7842578887939453s
string-similarity-['jaro_winkler'] Time: 0.11338090896606445s
string-similarity-['levenshtein'] Time: 0.36055994033813477s
string-similarity-['jaccard:tokenizer=word'] Time: 0.038928985595703125s
normalize-scores-des_cont_jaccard Time: 0.01663517951965332s
smallest-qnode-number Time: 0.09219503402709961s
mosaic-features Time: 0.007853031158447266s
creat-singleton-feature Time: 0.08612728118896484s
vote-by-classifier Time: 0.6296670436859131s
Qnodes to lookup: 2850
Qnodes from file: 2814
Outlier removal generates 2 lof-voted candidates
score-using-embedding Time: 13.121810913085938s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.15605998039245605s
compute-tf-idf-class_count Time: 14.01876711845398s
compute-tf-idf-property_count Time: 14.186047077178955s
context-matc

144it [2:44:25, 61.36s/it]

align-page-rank Time: 0.2042551040649414s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.697519063949585s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.354259967803955s
string-similarity-['jaro_winkler'] Time: 0.6113388538360596s
string-similarity-['levenshtein'] Time: 3.2811529636383057s
string-similarity-['jaccard:tokenizer=word'] Time: 0.12018203735351562s
normalize-scores-des_cont_jaccard Time: 0.04138016700744629s
smallest-qnode-number Time: 0.3043029308319092s
mosaic-features Time: 0.021610260009765625s
creat-singleton-feature Time: 0.22404885292053223s
vote-by-classifier Time: 0.5337920188903809s
Qnodes to lookup: 7025
Qnodes from file: 6900
Outlier removal generates 24 lof-voted candidates
Outlier removal generates 15 lof-voted candidates
Outlier removal generates 26 lof-voted candidates
score-using-embedding Time: 24.903941869735718s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.38444995880126953s
compute-tf-idf-class_count

145it [2:46:28, 79.78s/it]

align-page-rank Time: 0.1420001983642578s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.7534761428833008s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.239776134490967s
string-similarity-['jaro_winkler'] Time: 0.31714510917663574s
string-similarity-['levenshtein'] Time: 1.6472558975219727s
string-similarity-['jaccard:tokenizer=word'] Time: 0.06289315223693848s
normalize-scores-des_cont_jaccard Time: 0.02325606346130371s
smallest-qnode-number Time: 0.23346805572509766s
mosaic-features Time: 0.011475801467895508s
creat-singleton-feature Time: 0.12827110290527344s
vote-by-classifier Time: 0.7507319450378418s
Qnodes to lookup: 2712
Qnodes from file: 2695
Outlier removal generates 21 lof-voted candidates
score-using-embedding Time: 17.032766103744507s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.2487781047821045s
compute-tf-idf-class_count Time: 17.690385341644287s
compute-tf-idf-property_count Time: 17.941795110702515s
context-match T

146it [2:46:56, 64.49s/it]

align-page-rank Time: 0.1513838768005371s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.5369970798492432s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.3007771968841553s
string-similarity-['jaro_winkler'] Time: 0.13747501373291016s
string-similarity-['levenshtein'] Time: 0.6471219062805176s
string-similarity-['jaccard:tokenizer=word'] Time: 0.027428865432739258s
normalize-scores-des_cont_jaccard Time: 0.013080120086669922s
smallest-qnode-number Time: 0.08697986602783203s
mosaic-features Time: 0.00551915168762207s
creat-singleton-feature Time: 0.2035672664642334s
vote-by-classifier Time: 0.3939659595489502s
Qnodes to lookup: 1415
Qnodes from file: 1400
Outlier removal generates 10 lof-voted candidates
score-using-embedding Time: 14.388715028762817s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.48233699798583984s
compute-tf-idf-class_count Time: 15.27831506729126s
compute-tf-idf-property_count Time: 15.336812019348145s
context-match 

147it [2:47:25, 53.63s/it]

align-page-rank Time: 0.276303768157959s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.077942848205566s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 7.185353994369507s
string-similarity-['jaro_winkler'] Time: 0.9783987998962402s
string-similarity-['levenshtein'] Time: 5.998267889022827s
string-similarity-['jaccard:tokenizer=word'] Time: 0.17264413833618164s
normalize-scores-des_cont_jaccard Time: 0.056854963302612305s
smallest-qnode-number Time: 0.4210021495819092s
mosaic-features Time: 0.029603958129882812s
creat-singleton-feature Time: 0.3186049461364746s
vote-by-classifier Time: 0.8164570331573486s
Qnodes to lookup: 5133
Qnodes from file: 5080
Outlier removal generates 39 lof-voted candidates
Outlier removal generates 30 lof-voted candidates
Outlier removal generates 34 lof-voted candidates
Outlier removal generates 13 lof-voted candidates
score-using-embedding Time: 33.389660358428955s
generate-reciprocal-rank-lof-graph-embedding-score Time:

148it [2:51:06, 103.96s/it]

align-page-rank Time: 0.13689613342285156s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.39769673347473145s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.7693731784820557s
string-similarity-['jaro_winkler'] Time: 0.11261534690856934s
string-similarity-['levenshtein'] Time: 0.5227401256561279s
string-similarity-['jaccard:tokenizer=word'] Time: 0.026392698287963867s
normalize-scores-des_cont_jaccard Time: 0.012222051620483398s
smallest-qnode-number Time: 0.09318780899047852s
mosaic-features Time: 0.005391120910644531s
creat-singleton-feature Time: 0.059783935546875s
vote-by-classifier Time: 1.0162432193756104s
Qnodes to lookup: 1822
Qnodes from file: 1796
Column_vector_stragtegy centroid_of_lof failed
score-using-embedding Time: 10.362465620040894s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.11018896102905273s
compute-tf-idf-class_count Time: 11.501537084579468s
compute-tf-idf-property_count Time: 11.674855947494507s
context-match 

149it [2:51:27, 79.12s/it] 

align-page-rank Time: 0.2288219928741455s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.8151519298553467s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.823108911514282s
string-similarity-['jaro_winkler'] Time: 0.5637800693511963s
string-similarity-['levenshtein'] Time: 3.311196804046631s
string-similarity-['jaccard:tokenizer=word'] Time: 0.15793704986572266s
normalize-scores-des_cont_jaccard Time: 0.045145273208618164s
smallest-qnode-number Time: 0.34468889236450195s
mosaic-features Time: 0.021607160568237305s
creat-singleton-feature Time: 0.2347698211669922s
vote-by-classifier Time: 0.4127180576324463s
Qnodes to lookup: 6268
Qnodes from file: 6166
Outlier removal generates 4 lof-voted candidates
Outlier removal generates 10 lof-voted candidates
Outlier removal generates 9 lof-voted candidates
Outlier removal generates 18 lof-voted candidates
score-using-embedding Time: 24.02840518951416s
generate-reciprocal-rank-lof-graph-embedding-score Time:

150it [2:54:05, 102.70s/it]

align-page-rank Time: 0.2285170555114746s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.1372308731079102s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.4595420360565186s
string-similarity-['jaro_winkler'] Time: 0.19383811950683594s
string-similarity-['levenshtein'] Time: 1.4637141227722168s
string-similarity-['jaccard:tokenizer=word'] Time: 0.028727054595947266s
normalize-scores-des_cont_jaccard Time: 0.013193130493164062s
smallest-qnode-number Time: 0.08913111686706543s
mosaic-features Time: 0.006006956100463867s
creat-singleton-feature Time: 0.07147789001464844s
vote-by-classifier Time: 0.4117720127105713s
Qnodes to lookup: 1852
Qnodes from file: 1776
Outlier removal generates 11 lof-voted candidates
score-using-embedding Time: 13.829141855239868s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.40845394134521484s
compute-tf-idf-class_count Time: 14.691519021987915s
compute-tf-idf-property_count Time: 14.81185531616211s
context-matc

151it [2:54:29, 79.21s/it] 

align-page-rank Time: 0.20606684684753418s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.4025752544403076s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 6.388460159301758s
string-similarity-['jaro_winkler'] Time: 0.5612607002258301s
string-similarity-['levenshtein'] Time: 3.361314058303833s
string-similarity-['jaccard:tokenizer=word'] Time: 0.09572219848632812s
normalize-scores-des_cont_jaccard Time: 0.039617061614990234s
smallest-qnode-number Time: 0.2893369197845459s
mosaic-features Time: 0.017757177352905273s
creat-singleton-feature Time: 0.21203994750976562s
vote-by-classifier Time: 0.4100968837738037s
Qnodes to lookup: 3575
Qnodes from file: 3453
Outlier removal generates 8 lof-voted candidates
Outlier removal generates 9 lof-voted candidates
Outlier removal generates 21 lof-voted candidates
score-using-embedding Time: 25.311400890350342s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3847360610961914s
compute-tf-idf-class_count 

152it [2:55:45, 78.26s/it]

align-page-rank Time: 0.171464204788208s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.3015296459198s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.476516962051392s
string-similarity-['jaro_winkler'] Time: 0.8632180690765381s
string-similarity-['levenshtein'] Time: 4.9671759605407715s
string-similarity-['jaccard:tokenizer=word'] Time: 0.08026909828186035s
normalize-scores-des_cont_jaccard Time: 0.03409910202026367s
smallest-qnode-number Time: 0.29736328125s
mosaic-features Time: 0.018825054168701172s
creat-singleton-feature Time: 0.1983330249786377s
vote-by-classifier Time: 0.8372926712036133s
Qnodes to lookup: 5096
Qnodes from file: 4885
Outlier removal generates 14 lof-voted candidates
Outlier removal generates 53 lof-voted candidates
_centroid_of_lof: Missing 1 of 61
Outlier removal generates 36 lof-voted candidates
score-using-embedding Time: 28.252427101135254s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3476071357727051s
co

153it [2:57:21, 83.32s/it]

align-page-rank Time: 0.09160017967224121s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.4593048095703125s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.298133134841919s
string-similarity-['jaro_winkler'] Time: 0.13212108612060547s
string-similarity-['levenshtein'] Time: 0.7808699607849121s
string-similarity-['jaccard:tokenizer=word'] Time: 0.028528213500976562s
normalize-scores-des_cont_jaccard Time: 0.01280832290649414s
smallest-qnode-number Time: 0.09254002571105957s
mosaic-features Time: 0.005612850189208984s
creat-singleton-feature Time: 0.06438922882080078s
vote-by-classifier Time: 0.4085569381713867s
Qnodes to lookup: 1169
Qnodes from file: 1145
Outlier removal generates 12 lof-voted candidates
score-using-embedding Time: 13.42793083190918s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.11887526512145996s
compute-tf-idf-class_count Time: 14.941563844680786s
compute-tf-idf-property_count Time: 14.131136178970337s
context-match

154it [2:57:46, 66.07s/it]

align-page-rank Time: 0.15723705291748047s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 10.749826192855835s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 16.41466999053955s
string-similarity-['jaro_winkler'] Time: 1.387618064880371s
string-similarity-['levenshtein'] Time: 13.56185507774353s
string-similarity-['jaccard:tokenizer=word'] Time: 0.07907891273498535s
normalize-scores-des_cont_jaccard Time: 0.03064703941345215s
smallest-qnode-number Time: 0.2743349075317383s
mosaic-features Time: 0.020257949829101562s
creat-singleton-feature Time: 0.1624908447265625s
vote-by-classifier Time: 0.4324049949645996s
Qnodes to lookup: 1514
Qnodes from file: 1478
Outlier removal generates 538 lof-voted candidates
Outlier removal generates 24 lof-voted candidates
Outlier removal generates 131 lof-voted candidates
score-using-embedding Time: 53.890536308288574s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.31349992752075195s
compute-tf-idf-class_coun

155it [2:59:27, 76.56s/it]

align-page-rank Time: 0.16283082962036133s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.45485401153564453s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.7253730297088623s
string-similarity-['jaro_winkler'] Time: 0.1187748908996582s
string-similarity-['levenshtein'] Time: 0.5081160068511963s
string-similarity-['jaccard:tokenizer=word'] Time: 0.03162074089050293s
normalize-scores-des_cont_jaccard Time: 0.014361858367919922s
smallest-qnode-number Time: 0.0935521125793457s
mosaic-features Time: 0.005951881408691406s
creat-singleton-feature Time: 0.06773781776428223s
vote-by-classifier Time: 0.836482048034668s
Qnodes to lookup: 1698
Qnodes from file: 1666
Outlier removal generates 8 lof-voted candidates
score-using-embedding Time: 13.403165102005005s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.4641537666320801s
compute-tf-idf-class_count Time: 14.310471773147583s
compute-tf-idf-property_count Time: 14.467748880386353s
context-match T

156it [2:59:53, 61.27s/it]

align-page-rank Time: 0.1411440372467041s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.9834601879119873s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.334406852722168s
string-similarity-['jaro_winkler'] Time: 0.19771862030029297s
string-similarity-['levenshtein'] Time: 0.9351229667663574s
string-similarity-['jaccard:tokenizer=word'] Time: 0.05171394348144531s
normalize-scores-des_cont_jaccard Time: 0.01651930809020996s
smallest-qnode-number Time: 0.1841590404510498s
mosaic-features Time: 0.01056981086730957s
creat-singleton-feature Time: 0.12618803977966309s
vote-by-classifier Time: 1.5084478855133057s
Qnodes to lookup: 3722
Qnodes from file: 3687
Outlier removal generates 5 lof-voted candidates
score-using-embedding Time: 16.17604422569275s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.24124383926391602s
compute-tf-idf-class_count Time: 16.963276147842407s
compute-tf-idf-property_count Time: 17.188692092895508s
context-match Time

157it [3:00:23, 51.96s/it]

align-page-rank Time: 0.17024016380310059s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.049274206161499s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.015500068664551s
string-similarity-['jaro_winkler'] Time: 0.22686123847961426s
string-similarity-['levenshtein'] Time: 0.975938081741333s
string-similarity-['jaccard:tokenizer=word'] Time: 0.05518913269042969s
normalize-scores-des_cont_jaccard Time: 0.02232980728149414s
smallest-qnode-number Time: 0.16327881813049316s
mosaic-features Time: 0.010654926300048828s
creat-singleton-feature Time: 0.11287498474121094s
vote-by-classifier Time: 0.3901069164276123s
Qnodes to lookup: 3007
Qnodes from file: 2962
Outlier removal generates 15 lof-voted candidates
_centroid_of_lof: Missing 1 of 46
Outlier removal generates 27 lof-voted candidates
score-using-embedding Time: 15.805922985076904s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.25737500190734863s
compute-tf-idf-class_count Time: 17.0928

158it [3:01:08, 49.79s/it]

align-page-rank Time: 0.31526708602905273s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.296190977096558s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 12.198735237121582s
string-similarity-['jaro_winkler'] Time: 0.8441610336303711s
string-similarity-['levenshtein'] Time: 4.446259021759033s
string-similarity-['jaccard:tokenizer=word'] Time: 0.12744498252868652s
normalize-scores-des_cont_jaccard Time: 0.03804206848144531s
smallest-qnode-number Time: 0.2912921905517578s
mosaic-features Time: 0.020287275314331055s
creat-singleton-feature Time: 0.20912981033325195s
vote-by-classifier Time: 0.4555220603942871s
Qnodes to lookup: 4671
Qnodes from file: 4502
Outlier removal generates 10 lof-voted candidates
_centroid_of_lof: Missing 1 of 40
Outlier removal generates 23 lof-voted candidates
Outlier removal generates 37 lof-voted candidates
score-using-embedding Time: 31.346611976623535s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3928050994

159it [3:04:21, 92.88s/it]

align-page-rank Time: 0.275317907333374s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.8987069129943848s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.3869829177856445s
string-similarity-['jaro_winkler'] Time: 0.7243750095367432s
string-similarity-['levenshtein'] Time: 3.575350046157837s
string-similarity-['jaccard:tokenizer=word'] Time: 0.08061909675598145s
normalize-scores-des_cont_jaccard Time: 0.03419899940490723s
smallest-qnode-number Time: 0.3014090061187744s
mosaic-features Time: 0.020336151123046875s
creat-singleton-feature Time: 0.19472694396972656s
vote-by-classifier Time: 1.4865269660949707s
Qnodes to lookup: 2602
Qnodes from file: 2575
Outlier removal generates 49 lof-voted candidates
Outlier removal generates 38 lof-voted candidates
Outlier removal generates 40 lof-voted candidates
score-using-embedding Time: 24.432048797607422s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.39339494705200195s
compute-tf-idf-class_count

160it [3:06:36, 105.44s/it]

align-page-rank Time: 0.16669321060180664s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.791534185409546s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.874647855758667s
string-similarity-['jaro_winkler'] Time: 0.42121386528015137s
string-similarity-['levenshtein'] Time: 2.0257091522216797s
string-similarity-['jaccard:tokenizer=word'] Time: 0.07851696014404297s
normalize-scores-des_cont_jaccard Time: 0.034317970275878906s
smallest-qnode-number Time: 0.2587289810180664s
mosaic-features Time: 0.016483068466186523s
creat-singleton-feature Time: 0.17327284812927246s
vote-by-classifier Time: 0.5918078422546387s
Qnodes to lookup: 3856
Qnodes from file: 3799
Outlier removal generates 2 lof-voted candidates
Outlier removal generates 20 lof-voted candidates
Outlier removal generates 15 lof-voted candidates
score-using-embedding Time: 16.938154935836792s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3331310749053955s
compute-tf-idf-class_coun

161it [3:07:37, 92.19s/it] 

align-page-rank Time: 0.17508602142333984s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.177363157272339s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.118607044219971s
string-similarity-['jaro_winkler'] Time: 0.287463903427124s
string-similarity-['levenshtein'] Time: 1.8360118865966797s
string-similarity-['jaccard:tokenizer=word'] Time: 0.05266284942626953s
normalize-scores-des_cont_jaccard Time: 0.021641016006469727s
smallest-qnode-number Time: 0.15998125076293945s
mosaic-features Time: 0.009477853775024414s
creat-singleton-feature Time: 0.10466575622558594s
vote-by-classifier Time: 0.8515689373016357s
Qnodes to lookup: 2004
Qnodes from file: 1977
Outlier removal generates 3 lof-voted candidates
No pseudo GT available, using all exact matches as high precision
_centroid_of_lof: Missing 20 of 20
Column_vector_stragtegy centroid_of_lof failed
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
score-using-emb

162it [3:08:12, 74.97s/it]

align-page-rank Time: 0.15883088111877441s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.2709758281707764s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.4758472442626953s
string-similarity-['jaro_winkler'] Time: 0.34370923042297363s
string-similarity-['levenshtein'] Time: 1.195188045501709s
string-similarity-['jaccard:tokenizer=word'] Time: 0.07195591926574707s
normalize-scores-des_cont_jaccard Time: 0.029886245727539062s
smallest-qnode-number Time: 0.32164573669433594s
mosaic-features Time: 0.01417398452758789s
creat-singleton-feature Time: 0.16195988655090332s
vote-by-classifier Time: 0.5516109466552734s
Qnodes to lookup: 3221
Qnodes from file: 3167
Outlier removal generates 10 lof-voted candidates
No pseudo GT available, using all exact matches as high precision
_centroid_of_lof: Missing 20 of 20
Column_vector_stragtegy centroid_of_lof failed
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
Outlier remo

163it [3:08:42, 61.48s/it]

align-page-rank Time: 0.15517830848693848s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.5986690521240234s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.8252100944519043s
string-similarity-['jaro_winkler'] Time: 0.18571805953979492s
string-similarity-['levenshtein'] Time: 0.5753600597381592s
string-similarity-['jaccard:tokenizer=word'] Time: 0.11551094055175781s
normalize-scores-des_cont_jaccard Time: 0.025121212005615234s
smallest-qnode-number Time: 0.202301025390625s
mosaic-features Time: 0.011534690856933594s
creat-singleton-feature Time: 0.13340210914611816s
vote-by-classifier Time: 0.9547240734100342s
Qnodes to lookup: 1516
Qnodes from file: 1490
Outlier removal generates 7 lof-voted candidates
No pseudo GT available, using all exact matches as high precision
_centroid_of_lof: Missing 20 of 116
Outlier removal generates 58 lof-voted candidates
score-using-embedding Time: 15.798074007034302s
generate-reciprocal-rank-lof-graph-embedding-scor

164it [3:09:22, 54.86s/it]

align-page-rank Time: 0.13378310203552246s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.33646607398986816s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.2532789707183838s
string-similarity-['jaro_winkler'] Time: 0.1373281478881836s
string-similarity-['levenshtein'] Time: 0.5114419460296631s
string-similarity-['jaccard:tokenizer=word'] Time: 0.02815699577331543s
normalize-scores-des_cont_jaccard Time: 0.014475107192993164s
smallest-qnode-number Time: 0.0941309928894043s
mosaic-features Time: 0.005816221237182617s
creat-singleton-feature Time: 0.22188639640808105s
vote-by-classifier Time: 0.8599438667297363s
Qnodes to lookup: 615
Qnodes from file: 603
Outlier removal generates 29 lof-voted candidates
score-using-embedding Time: 14.035248041152954s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.14024615287780762s
compute-tf-idf-class_count Time: 15.067290782928467s
compute-tf-idf-property_count Time: 15.074383974075317s
context-match 

165it [3:09:50, 46.86s/it]

Command: align-page-rank
Error Message:  Traceback (most recent call last):
  File "/Users/amandeep/Github/table-linker/tl/cli/align-page-rank.py", line 29, in run
    df = pd.read_csv(kwargs['input_file'], dtype=object)
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/pandas/io/parsers.py", line 610, in read_csv
    return _read(filepath_or_buffer, kwds)
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/pandas/io/parsers.py", line 462, in _read
    parser = TextFileReader(filepath_or_buffer, **kwds)
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/pandas/io/parsers.py", line 819, in __init__
    self._engine = self._make_engine(self.engine)
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/pandas/io/parsers.py", line 1050, in _make_engine
    return mapping[engine](self.f, **self.options)  # type: ignore[call-arg]
  File "/Users/amandeep/Github/table-linker/tl_env/lib/

166it [3:10:13, 39.72s/it]

align-page-rank Time: 0.343641996383667s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 9.938534259796143s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 13.035402059555054s
string-similarity-['jaro_winkler'] Time: 1.1513171195983887s
string-similarity-['levenshtein'] Time: 12.736163139343262s
string-similarity-['jaccard:tokenizer=word'] Time: 0.21752405166625977s
normalize-scores-des_cont_jaccard Time: 0.047676801681518555s
smallest-qnode-number Time: 0.45404911041259766s
mosaic-features Time: 0.02290487289428711s
creat-singleton-feature Time: 0.2684650421142578s
vote-by-classifier Time: 0.5821287631988525s
Qnodes to lookup: 3556
Qnodes from file: 3532
Outlier removal generates 5 lof-voted candidates
_centroid_of_lof: Missing 2 of 76
Outlier removal generates 60 lof-voted candidates
Outlier removal generates 57 lof-voted candidates
Outlier removal generates 30 lof-voted candidates
Outlier removal generates 21 lof-voted candidates
score-using-embeddi

167it [3:16:43, 144.87s/it]

align-page-rank Time: 0.17903780937194824s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.6351101398468018s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.768462896347046s
string-similarity-['jaro_winkler'] Time: 0.712597131729126s
string-similarity-['levenshtein'] Time: 4.893054962158203s
string-similarity-['jaccard:tokenizer=word'] Time: 0.09760308265686035s
normalize-scores-des_cont_jaccard Time: 0.03214001655578613s
smallest-qnode-number Time: 0.4169652462005615s
mosaic-features Time: 0.015113115310668945s
creat-singleton-feature Time: 0.501763105392456s
vote-by-classifier Time: 1.0478458404541016s
Qnodes to lookup: 3154
Qnodes from file: 3120
Outlier removal generates 32 lof-voted candidates
Outlier removal generates 35 lof-voted candidates
Outlier removal generates 47 lof-voted candidates
score-using-embedding Time: 25.85238814353943s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3078117370605469s
compute-tf-idf-class_count Tim

168it [3:18:19, 130.12s/it]

align-page-rank Time: 0.194976806640625s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.3117499351501465s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.5130071640014648s
string-similarity-['jaro_winkler'] Time: 0.1129617691040039s
string-similarity-['levenshtein'] Time: 0.44089484214782715s
string-similarity-['jaccard:tokenizer=word'] Time: 0.029827117919921875s
normalize-scores-des_cont_jaccard Time: 0.014058828353881836s
smallest-qnode-number Time: 0.09565210342407227s
mosaic-features Time: 0.005779743194580078s
creat-singleton-feature Time: 0.0666038990020752s
vote-by-classifier Time: 0.45929503440856934s
Qnodes to lookup: 2120
Qnodes from file: 2025
Outlier removal generates 12 lof-voted candidates
score-using-embedding Time: 12.929828882217407s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.1220700740814209s
compute-tf-idf-class_count Time: 14.383757829666138s
compute-tf-idf-property_count Time: 13.5333571434021s
context-match T

169it [3:18:45, 98.90s/it] 

align-page-rank Time: 0.18004798889160156s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.4301738739013672s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.5034370422363281s
string-similarity-['jaro_winkler'] Time: 0.1270298957824707s
string-similarity-['levenshtein'] Time: 0.359342098236084s
string-similarity-['jaccard:tokenizer=word'] Time: 0.04494428634643555s
normalize-scores-des_cont_jaccard Time: 0.017601966857910156s
smallest-qnode-number Time: 0.09804701805114746s
mosaic-features Time: 0.008163928985595703s
creat-singleton-feature Time: 0.1005561351776123s
vote-by-classifier Time: 0.7509338855743408s
Qnodes to lookup: 2590
Qnodes from file: 2570
Outlier removal generates 11 lof-voted candidates
score-using-embedding Time: 12.59117317199707s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.32122302055358887s
compute-tf-idf-class_count Time: 14.28846025466919s
compute-tf-idf-property_count Time: 14.345246076583862s
context-match Ti

170it [3:19:11, 77.14s/it]

align-page-rank Time: 0.12043118476867676s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.48713088035583496s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.9559569358825684s
string-similarity-['jaro_winkler'] Time: 0.14664196968078613s
string-similarity-['levenshtein'] Time: 0.5448689460754395s
string-similarity-['jaccard:tokenizer=word'] Time: 0.03737330436706543s
normalize-scores-des_cont_jaccard Time: 0.016223907470703125s
smallest-qnode-number Time: 0.47854089736938477s
mosaic-features Time: 0.007678031921386719s
creat-singleton-feature Time: 0.08225584030151367s
vote-by-classifier Time: 1.4735050201416016s
Qnodes to lookup: 2726
Qnodes from file: 2689
Outlier removal generates 16 lof-voted candidates
score-using-embedding Time: 15.097523927688599s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.14588212966918945s
compute-tf-idf-class_count Time: 15.804926872253418s
compute-tf-idf-property_count Time: 16.78021502494812s
context-mat

171it [3:19:40, 62.53s/it]

align-page-rank Time: 0.17729997634887695s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.791132926940918s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.5106921195983887s
string-similarity-['jaro_winkler'] Time: 0.5146470069885254s
string-similarity-['levenshtein'] Time: 3.2543318271636963s
string-similarity-['jaccard:tokenizer=word'] Time: 0.06737279891967773s
normalize-scores-des_cont_jaccard Time: 0.028996944427490234s
smallest-qnode-number Time: 0.23851704597473145s
mosaic-features Time: 0.01364898681640625s
creat-singleton-feature Time: 0.1501140594482422s
vote-by-classifier Time: 0.4429659843444824s
Qnodes to lookup: 2712
Qnodes from file: 2532
Outlier removal generates 128 lof-voted candidates
Outlier removal generates 28 lof-voted candidates
Outlier removal generates 133 lof-voted candidates
score-using-embedding Time: 22.89085602760315s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.4175889492034912s
compute-tf-idf-class_cou

172it [3:20:17, 55.04s/it]

align-page-rank Time: 0.4057741165161133s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.460202932357788s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 8.327927827835083s
string-similarity-['jaro_winkler'] Time: 0.8301181793212891s
string-similarity-['levenshtein'] Time: 4.007498025894165s
string-similarity-['jaccard:tokenizer=word'] Time: 0.21857905387878418s
normalize-scores-des_cont_jaccard Time: 0.051557064056396484s
smallest-qnode-number Time: 0.585496187210083s
mosaic-features Time: 0.027256250381469727s
creat-singleton-feature Time: 0.3314518928527832s
vote-by-classifier Time: 0.44821882247924805s
Qnodes to lookup: 12686
Qnodes from file: 12320
_centroid_of_lof: Missing 1 of 171
Outlier removal generates 102 lof-voted candidates
score-using-embedding Time: 32.464595794677734s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.6871469020843506s
compute-tf-idf-class_count Time: 35.31721496582031s
compute-tf-idf-property_count Time: 35

173it [3:22:04, 70.50s/it]

align-page-rank Time: 0.23008990287780762s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.1352148056030273s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 10.380339860916138s
string-similarity-['jaro_winkler'] Time: 0.42783093452453613s
string-similarity-['levenshtein'] Time: 4.0432820320129395s
string-similarity-['jaccard:tokenizer=word'] Time: 0.07356882095336914s
normalize-scores-des_cont_jaccard Time: 0.029272079467773438s
smallest-qnode-number Time: 0.24042201042175293s
mosaic-features Time: 0.013318061828613281s
creat-singleton-feature Time: 0.15228533744812012s
vote-by-classifier Time: 0.44748902320861816s
Qnodes to lookup: 3206
Qnodes from file: 3172
Outlier removal generates 2 lof-voted candidates
Outlier removal generates 27 lof-voted candidates
Outlier removal generates 11 lof-voted candidates
score-using-embedding Time: 29.47947406768799s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.2924528121948242s
compute-tf-idf-class_c

174it [3:23:40, 78.17s/it]

align-page-rank Time: 0.14748001098632812s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.6493170261383057s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.3057630062103271s
string-similarity-['jaro_winkler'] Time: 0.15012311935424805s
string-similarity-['levenshtein'] Time: 0.5664627552032471s
string-similarity-['jaccard:tokenizer=word'] Time: 0.04095911979675293s
normalize-scores-des_cont_jaccard Time: 0.01679825782775879s
smallest-qnode-number Time: 0.0919189453125s
mosaic-features Time: 0.008012056350708008s
creat-singleton-feature Time: 0.08237528800964355s
vote-by-classifier Time: 0.42352986335754395s
Qnodes to lookup: 2918
Qnodes from file: 2872
Outlier removal generates 7 lof-voted candidates
score-using-embedding Time: 11.958269834518433s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.14541220664978027s
compute-tf-idf-class_count Time: 12.697541952133179s
compute-tf-idf-property_count Time: 12.907531023025513s
context-match Ti

175it [3:24:03, 61.55s/it]

align-page-rank Time: 0.13436007499694824s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.424582004547119s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.431117057800293s
string-similarity-['jaro_winkler'] Time: 0.5548691749572754s
string-similarity-['levenshtein'] Time: 4.339847087860107s
string-similarity-['jaccard:tokenizer=word'] Time: 0.036379098892211914s
normalize-scores-des_cont_jaccard Time: 0.01285099983215332s
smallest-qnode-number Time: 0.09015178680419922s
mosaic-features Time: 0.007210969924926758s
creat-singleton-feature Time: 0.06482887268066406s
vote-by-classifier Time: 0.400723934173584s
Qnodes to lookup: 218
Qnodes from file: 212
Outlier removal generates 12 lof-voted candidates
score-using-embedding Time: 24.09455180168152s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.12777233123779297s
compute-tf-idf-class_count Time: 23.930460214614868s
compute-tf-idf-property_count Time: 23.996196031570435s
context-match Time:

176it [3:24:39, 53.84s/it]

align-page-rank Time: 0.10524106025695801s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.3056950569152832s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.202075958251953s
string-similarity-['jaro_winkler'] Time: 0.2618241310119629s
string-similarity-['levenshtein'] Time: 1.2597579956054688s
string-similarity-['jaccard:tokenizer=word'] Time: 0.06495189666748047s
normalize-scores-des_cont_jaccard Time: 0.02566695213317871s
smallest-qnode-number Time: 0.16242194175720215s
mosaic-features Time: 0.011416912078857422s
creat-singleton-feature Time: 0.13091015815734863s
vote-by-classifier Time: 0.43280601501464844s
Qnodes to lookup: 4489
Qnodes from file: 4385
Outlier removal generates 13 lof-voted candidates
Outlier removal generates 31 lof-voted candidates
score-using-embedding Time: 16.554747819900513s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.6536099910736084s
compute-tf-idf-class_count Time: 19.08965301513672s
compute-tf-idf-proper

177it [3:25:34, 54.29s/it]

align-page-rank Time: 0.4016141891479492s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.2977640628814697s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.9983229637146s
string-similarity-['jaro_winkler'] Time: 0.9085321426391602s
string-similarity-['levenshtein'] Time: 5.22006893157959s
string-similarity-['jaccard:tokenizer=word'] Time: 0.159440279006958s
normalize-scores-des_cont_jaccard Time: 0.04307198524475098s
smallest-qnode-number Time: 0.4065673351287842s
mosaic-features Time: 0.021061182022094727s
creat-singleton-feature Time: 0.2429790496826172s
vote-by-classifier Time: 0.5687248706817627s
Qnodes to lookup: 2013
Qnodes from file: 1996
Outlier removal generates 28 lof-voted candidates
Outlier removal generates 27 lof-voted candidates
Outlier removal generates 20 lof-voted candidates
Outlier removal generates 21 lof-voted candidates
Outlier removal generates 28 lof-voted candidates
score-using-embedding Time: 28.665493965148926s
generate-r

178it [3:27:27, 72.03s/it]

align-page-rank Time: 0.28951287269592285s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.8721020221710205s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.301616907119751s
string-similarity-['jaro_winkler'] Time: 0.49674081802368164s
string-similarity-['levenshtein'] Time: 2.345095157623291s
string-similarity-['jaccard:tokenizer=word'] Time: 0.07569599151611328s
normalize-scores-des_cont_jaccard Time: 0.03270220756530762s
smallest-qnode-number Time: 0.32920074462890625s
mosaic-features Time: 0.015733957290649414s
creat-singleton-feature Time: 0.17080116271972656s
vote-by-classifier Time: 0.4545271396636963s
Qnodes to lookup: 4289
Qnodes from file: 4012
_centroid_of_lof: Missing 1 of 118
Outlier removal generates 70 lof-voted candidates
_centroid_of_lof: Missing 1 of 133
Outlier removal generates 79 lof-voted candidates
Outlier removal generates 19 lof-voted candidates
score-using-embedding Time: 19.982013940811157s
generate-reciprocal-rank-lof-gr

179it [3:28:30, 69.26s/it]

align-page-rank Time: 0.17399215698242188s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.4440939426422119s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.8786191940307617s
string-similarity-['jaro_winkler'] Time: 0.13113689422607422s
string-similarity-['levenshtein'] Time: 0.39929795265197754s
string-similarity-['jaccard:tokenizer=word'] Time: 0.03629732131958008s
normalize-scores-des_cont_jaccard Time: 0.016119956970214844s
smallest-qnode-number Time: 0.09721207618713379s
mosaic-features Time: 0.007288932800292969s
creat-singleton-feature Time: 0.08680605888366699s
vote-by-classifier Time: 0.9707961082458496s
Qnodes to lookup: 2751
Qnodes from file: 2714
Outlier removal generates 6 lof-voted candidates
score-using-embedding Time: 14.23936414718628s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.13125920295715332s
compute-tf-idf-class_count Time: 14.872995138168335s
compute-tf-idf-property_count Time: 15.0690758228302s
context-match 

180it [3:28:58, 56.80s/it]

align-page-rank Time: 0.33341503143310547s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.860250949859619s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.760763883590698s
string-similarity-['jaro_winkler'] Time: 0.47745585441589355s
string-similarity-['levenshtein'] Time: 3.0209038257598877s
string-similarity-['jaccard:tokenizer=word'] Time: 0.08712911605834961s
normalize-scores-des_cont_jaccard Time: 0.036174774169921875s
smallest-qnode-number Time: 0.29189300537109375s
mosaic-features Time: 0.018706798553466797s
creat-singleton-feature Time: 0.1958620548248291s
vote-by-classifier Time: 0.45195603370666504s
Qnodes to lookup: 3605
Qnodes from file: 3450
Outlier removal generates 5 lof-voted candidates
Outlier removal generates 4 lof-voted candidates
Outlier removal generates 12 lof-voted candidates
score-using-embedding Time: 20.955622911453247s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3780670166015625s
compute-tf-idf-class_coun

181it [3:30:00, 58.41s/it]

align-page-rank Time: 0.22236895561218262s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.808774948120117s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.392124891281128s
string-similarity-['jaro_winkler'] Time: 0.4498131275177002s
string-similarity-['levenshtein'] Time: 3.116112232208252s
string-similarity-['jaccard:tokenizer=word'] Time: 0.0757288932800293s
normalize-scores-des_cont_jaccard Time: 0.03226804733276367s
smallest-qnode-number Time: 0.2625548839569092s
mosaic-features Time: 0.014059782028198242s
creat-singleton-feature Time: 0.16186308860778809s
vote-by-classifier Time: 0.46820998191833496s
Qnodes to lookup: 2525
Qnodes from file: 2450
Outlier removal generates 21 lof-voted candidates
Outlier removal generates 16 lof-voted candidates
Outlier removal generates 15 lof-voted candidates
score-using-embedding Time: 22.404959201812744s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.36813902854919434s
compute-tf-idf-class_count

182it [3:31:45, 72.30s/it]

align-page-rank Time: 0.19676899909973145s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.5702497959136963s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 10.665038108825684s
string-similarity-['jaro_winkler'] Time: 0.6529231071472168s
string-similarity-['levenshtein'] Time: 4.827535152435303s
string-similarity-['jaccard:tokenizer=word'] Time: 0.049386024475097656s
normalize-scores-des_cont_jaccard Time: 0.021631956100463867s
smallest-qnode-number Time: 0.16283917427062988s
mosaic-features Time: 0.013624906539916992s
creat-singleton-feature Time: 0.11669087409973145s
vote-by-classifier Time: 0.7496697902679443s
Qnodes to lookup: 1918
Qnodes from file: 1900
Outlier removal generates 15 lof-voted candidates
Outlier removal generates 22 lof-voted candidates
score-using-embedding Time: 29.31270408630371s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.2156977653503418s
compute-tf-idf-class_count Time: 30.953124046325684s
compute-tf-idf-prope

183it [3:33:04, 74.52s/it]

align-page-rank Time: 0.1815192699432373s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.3597848415374756s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.35851287841796875s
string-similarity-['jaro_winkler'] Time: 0.1307682991027832s
string-similarity-['levenshtein'] Time: 0.478971004486084s
string-similarity-['jaccard:tokenizer=word'] Time: 0.05526900291442871s
normalize-scores-des_cont_jaccard Time: 0.017267227172851562s
smallest-qnode-number Time: 0.09599113464355469s
mosaic-features Time: 0.008591175079345703s
creat-singleton-feature Time: 0.08879280090332031s
vote-by-classifier Time: 0.4841482639312744s
Qnodes to lookup: 2349
Qnodes from file: 2339
Outlier removal generates 11 lof-voted candidates
score-using-embedding Time: 14.104345798492432s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.15043210983276367s
compute-tf-idf-class_count Time: 14.75362515449524s
compute-tf-idf-property_count Time: 14.899172067642212s
context-match 

184it [3:33:34, 61.18s/it]

align-page-rank Time: 0.062200069427490234s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.5498230457305908s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 14.268167972564697s
string-similarity-['jaro_winkler'] Time: 0.18996882438659668s
string-similarity-['levenshtein'] Time: 1.8911490440368652s
string-similarity-['jaccard:tokenizer=word'] Time: 0.030395030975341797s
normalize-scores-des_cont_jaccard Time: 0.014746904373168945s
smallest-qnode-number Time: 0.08638715744018555s
mosaic-features Time: 0.006372690200805664s
creat-singleton-feature Time: 0.06854009628295898s
vote-by-classifier Time: 0.46819090843200684s
Qnodes to lookup: 2133
Qnodes from file: 2107
Column_vector_stragtegy centroid_of_lof failed
score-using-embedding Time: 26.275620698928833s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.12905192375183105s
compute-tf-idf-class_count Time: 27.660558938980103s
compute-tf-idf-property_count Time: 27.83171319961548s
context-matc

185it [3:34:14, 54.62s/it]

align-page-rank Time: 0.19201374053955078s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.1743531227111816s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.3663220405578613s
string-similarity-['jaro_winkler'] Time: 0.3056461811065674s
string-similarity-['levenshtein'] Time: 1.2722752094268799s
string-similarity-['jaccard:tokenizer=word'] Time: 0.051506996154785156s
normalize-scores-des_cont_jaccard Time: 0.022054195404052734s
smallest-qnode-number Time: 0.16208577156066895s
mosaic-features Time: 0.009723901748657227s
creat-singleton-feature Time: 0.10716080665588379s
vote-by-classifier Time: 0.4232339859008789s
Qnodes to lookup: 2696
Qnodes from file: 2668
Outlier removal generates 13 lof-voted candidates
Outlier removal generates 12 lof-voted candidates
score-using-embedding Time: 16.71182894706726s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.255878210067749s
compute-tf-idf-class_count Time: 17.60280203819275s
compute-tf-idf-proper

186it [3:35:00, 52.00s/it]

align-page-rank Time: 0.2159280776977539s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.764711856842041s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.720595121383667s
string-similarity-['jaro_winkler'] Time: 0.27265191078186035s
string-similarity-['levenshtein'] Time: 0.8622682094573975s
string-similarity-['jaccard:tokenizer=word'] Time: 0.13048601150512695s
normalize-scores-des_cont_jaccard Time: 0.02463817596435547s
smallest-qnode-number Time: 0.1830430030822754s
mosaic-features Time: 0.012198209762573242s
creat-singleton-feature Time: 0.14069676399230957s
vote-by-classifier Time: 0.53969407081604s
Qnodes to lookup: 1029
Qnodes from file: 1016
Outlier removal generates 829 lof-voted candidates
Outlier removal generates 40 lof-voted candidates
score-using-embedding Time: 14.067012071609497s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.25611114501953125s
compute-tf-idf-class_count Time: 15.840334177017212s
compute-tf-idf-property

187it [3:35:42, 48.97s/it]

align-page-rank Time: 0.22126102447509766s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.8386759757995605s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.328630685806274s
string-similarity-['jaro_winkler'] Time: 0.5652751922607422s
string-similarity-['levenshtein'] Time: 3.276711940765381s
string-similarity-['jaccard:tokenizer=word'] Time: 0.08517622947692871s
normalize-scores-des_cont_jaccard Time: 0.03419208526611328s
smallest-qnode-number Time: 0.24817299842834473s
mosaic-features Time: 0.01741170883178711s
creat-singleton-feature Time: 0.1847999095916748s
vote-by-classifier Time: 0.46758460998535156s
Qnodes to lookup: 5754
Qnodes from file: 5711
Outlier removal generates 5 lof-voted candidates
Outlier removal generates 25 lof-voted candidates
Outlier removal generates 11 lof-voted candidates
score-using-embedding Time: 23.318545818328857s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.47611498832702637s
compute-tf-idf-class_count

188it [3:37:29, 66.42s/it]

align-page-rank Time: 0.31771421432495117s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.181536912918091s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.755922794342041s
string-similarity-['jaro_winkler'] Time: 0.5641758441925049s
string-similarity-['levenshtein'] Time: 2.6129140853881836s
string-similarity-['jaccard:tokenizer=word'] Time: 0.13268780708312988s
normalize-scores-des_cont_jaccard Time: 0.04424118995666504s
smallest-qnode-number Time: 0.32558631896972656s
mosaic-features Time: 0.020503997802734375s
creat-singleton-feature Time: 0.23997902870178223s
vote-by-classifier Time: 1.0274591445922852s
Qnodes to lookup: 3760
Qnodes from file: 3627
Outlier removal generates 19 lof-voted candidates
_centroid_of_lof: Missing 3 of 29
Outlier removal generates 18 lof-voted candidates
Outlier removal generates 23 lof-voted candidates
Outlier removal generates 24 lof-voted candidates
score-using-embedding Time: 23.073922157287598s
generate-reciproca

189it [3:39:31, 83.15s/it]

align-page-rank Time: 0.2521531581878662s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.7442259788513184s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.75767707824707s
string-similarity-['jaro_winkler'] Time: 0.626655101776123s
string-similarity-['levenshtein'] Time: 4.356600761413574s
string-similarity-['jaccard:tokenizer=word'] Time: 0.07415890693664551s
normalize-scores-des_cont_jaccard Time: 0.03131294250488281s
smallest-qnode-number Time: 0.25789403915405273s
mosaic-features Time: 0.01656818389892578s
creat-singleton-feature Time: 0.16247296333312988s
vote-by-classifier Time: 1.0375988483428955s
Qnodes to lookup: 5499
Qnodes from file: 5252
Outlier removal generates 8 lof-voted candidates
Outlier removal generates 17 lof-voted candidates
Outlier removal generates 11 lof-voted candidates
score-using-embedding Time: 25.03740906715393s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.5404632091522217s
compute-tf-idf-class_count Time

190it [3:40:45, 80.49s/it]

align-page-rank Time: 0.1985788345336914s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.4230468273162842s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.19458580017089844s
string-similarity-['jaro_winkler'] Time: 0.1340479850769043s
string-similarity-['levenshtein'] Time: 0.7790300846099854s
string-similarity-['jaccard:tokenizer=word'] Time: 0.039839982986450195s
normalize-scores-des_cont_jaccard Time: 0.013090848922729492s
smallest-qnode-number Time: 0.16001415252685547s
mosaic-features Time: 0.0057489871978759766s
creat-singleton-feature Time: 0.0636589527130127s
vote-by-classifier Time: 0.9188947677612305s
Qnodes to lookup: 1352
Qnodes from file: 1349
Outlier removal generates 10 lof-voted candidates
score-using-embedding Time: 14.41221308708191s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.11695504188537598s
compute-tf-idf-class_count Time: 15.721819162368774s
compute-tf-idf-property_count Time: 14.91608190536499s
context-match

191it [3:41:16, 65.67s/it]

align-page-rank Time: 0.1677398681640625s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.242808103561401s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.127202987670898s
string-similarity-['jaro_winkler'] Time: 0.8034372329711914s
string-similarity-['levenshtein'] Time: 6.180491924285889s
string-similarity-['jaccard:tokenizer=word'] Time: 0.135084867477417s
normalize-scores-des_cont_jaccard Time: 0.025246858596801758s
smallest-qnode-number Time: 0.22671794891357422s
mosaic-features Time: 0.014670133590698242s
creat-singleton-feature Time: 0.18106818199157715s
vote-by-classifier Time: 0.42838287353515625s
Qnodes to lookup: 662
Qnodes from file: 648
Outlier removal generates 1920 lof-voted candidates
Outlier removal generates 78 lof-voted candidates
Outlier removal generates 50 lof-voted candidates
score-using-embedding Time: 28.143616914749146s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.36222124099731445s
compute-tf-idf-class_count

192it [3:43:35, 87.45s/it]

align-page-rank Time: 0.13094592094421387s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.38269805908203125s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.885728120803833s
string-similarity-['jaro_winkler'] Time: 0.3977208137512207s
string-similarity-['levenshtein'] Time: 0.4219069480895996s
string-similarity-['jaccard:tokenizer=word'] Time: 0.0388798713684082s
normalize-scores-des_cont_jaccard Time: 0.0170900821685791s
smallest-qnode-number Time: 0.4721109867095947s
mosaic-features Time: 0.007305145263671875s
creat-singleton-feature Time: 0.08924603462219238s
vote-by-classifier Time: 0.8505411148071289s
Qnodes to lookup: 2922
Qnodes from file: 2870
Outlier removal generates 4 lof-voted candidates
score-using-embedding Time: 15.26381802558899s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.14115405082702637s
compute-tf-idf-class_count Time: 16.4501371383667s
compute-tf-idf-property_count Time: 15.948989868164062s
context-match Time: 

193it [3:44:03, 69.71s/it]

align-page-rank Time: 0.053788185119628906s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.6691710948944092s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.239583969116211s
string-similarity-['jaro_winkler'] Time: 0.15015387535095215s
string-similarity-['levenshtein'] Time: 0.8411052227020264s
string-similarity-['jaccard:tokenizer=word'] Time: 0.019701004028320312s
normalize-scores-des_cont_jaccard Time: 0.013283729553222656s
smallest-qnode-number Time: 0.09559512138366699s
mosaic-features Time: 0.005553245544433594s
creat-singleton-feature Time: 0.4248640537261963s
vote-by-classifier Time: 0.8976550102233887s
Qnodes to lookup: 862
Qnodes from file: 840
Outlier removal generates 16 lof-voted candidates
score-using-embedding Time: 15.220389127731323s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.11854696273803711s
compute-tf-idf-class_count Time: 15.633365869522095s
compute-tf-idf-property_count Time: 16.940422296524048s
context-match

194it [3:44:33, 57.76s/it]

align-page-rank Time: 0.15542817115783691s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.7714428901672363s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.4630990028381348s
string-similarity-['jaro_winkler'] Time: 0.16266965866088867s
string-similarity-['levenshtein'] Time: 1.1256630420684814s
string-similarity-['jaccard:tokenizer=word'] Time: 0.02699899673461914s
normalize-scores-des_cont_jaccard Time: 0.013401985168457031s
smallest-qnode-number Time: 0.08903980255126953s
mosaic-features Time: 0.005722999572753906s
creat-singleton-feature Time: 0.06595206260681152s
vote-by-classifier Time: 0.4317760467529297s
Qnodes to lookup: 1444
Qnodes from file: 1404
Outlier removal generates 12 lof-voted candidates
score-using-embedding Time: 14.525428056716919s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.1281900405883789s
compute-tf-idf-class_count Time: 16.211637020111084s
compute-tf-idf-property_count Time: 16.45053195953369s
context-match

195it [3:45:00, 48.73s/it]

align-page-rank Time: 0.1237039566040039s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.6662039756774902s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.354938983917236s
string-similarity-['jaro_winkler'] Time: 0.21565580368041992s
string-similarity-['levenshtein'] Time: 1.9474232196807861s
string-similarity-['jaccard:tokenizer=word'] Time: 0.02045893669128418s
normalize-scores-des_cont_jaccard Time: 0.008943796157836914s
smallest-qnode-number Time: 0.0741429328918457s
mosaic-features Time: 0.0038340091705322266s
creat-singleton-feature Time: 0.04730820655822754s
vote-by-classifier Time: 0.44094324111938477s
Qnodes to lookup: 1764
Qnodes from file: 1649
No pseudo GT available, using all exact matches as high precision
_centroid_of_lof: Missing 20 of 20
Column_vector_stragtegy centroid_of_lof failed
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
score-using-embedding Time: 20.31561303138733s
Command: gener

196it [3:45:35, 44.43s/it]

align-page-rank Time: 0.20871329307556152s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.3957979679107666s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.46672987937927246s
string-similarity-['jaro_winkler'] Time: 0.12217593193054199s
string-similarity-['levenshtein'] Time: 0.41615915298461914s
string-similarity-['jaccard:tokenizer=word'] Time: 0.03171801567077637s
normalize-scores-des_cont_jaccard Time: 0.014893054962158203s
smallest-qnode-number Time: 0.5099992752075195s
mosaic-features Time: 0.0066258907318115234s
creat-singleton-feature Time: 0.07642006874084473s
vote-by-classifier Time: 0.7949492931365967s
Qnodes to lookup: 2233
Qnodes from file: 2203
Outlier removal generates 11 lof-voted candidates
score-using-embedding Time: 15.266185998916626s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.12817621231079102s
compute-tf-idf-class_count Time: 15.864619970321655s
compute-tf-idf-property_count Time: 16.04563808441162s
context-ma

197it [3:46:04, 39.88s/it]

align-page-rank Time: 0.10905098915100098s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.1827738285064697s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.55344295501709s
string-similarity-['jaro_winkler'] Time: 0.3756709098815918s
string-similarity-['levenshtein'] Time: 2.336199998855591s
string-similarity-['jaccard:tokenizer=word'] Time: 0.04665994644165039s
normalize-scores-des_cont_jaccard Time: 0.0218508243560791s
smallest-qnode-number Time: 0.3102128505706787s
mosaic-features Time: 0.010960102081298828s
creat-singleton-feature Time: 0.12644720077514648s
vote-by-classifier Time: 0.6499857902526855s
Qnodes to lookup: 2247
Qnodes from file: 2199
Outlier removal generates 3 lof-voted candidates
Outlier removal generates 19 lof-voted candidates
score-using-embedding Time: 20.42690420150757s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.26392292976379395s
compute-tf-idf-class_count Time: 22.068243265151978s
compute-tf-idf-property_co

198it [3:47:05, 46.23s/it]

align-page-rank Time: 0.17747902870178223s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.6855721473693848s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.254611015319824s
string-similarity-['jaro_winkler'] Time: 0.5852787494659424s
string-similarity-['levenshtein'] Time: 3.096130132675171s
string-similarity-['jaccard:tokenizer=word'] Time: 0.08750414848327637s
normalize-scores-des_cont_jaccard Time: 0.03717613220214844s
smallest-qnode-number Time: 0.2805519104003906s
mosaic-features Time: 0.018199920654296875s
creat-singleton-feature Time: 0.21132302284240723s
vote-by-classifier Time: 0.41733813285827637s
Qnodes to lookup: 5386
Qnodes from file: 5293
Column_vector_stragtegy centroid_of_lof failed
Outlier removal generates 50 lof-voted candidates
Outlier removal generates 13 lof-voted candidates
score-using-embedding Time: 20.7416410446167s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3718400001525879s
compute-tf-idf-class_count Tim

199it [3:48:55, 65.47s/it]

align-page-rank Time: 0.2414231300354004s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.34397315979003906s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.40349793434143066s
string-similarity-['jaro_winkler'] Time: 0.15577483177185059s
string-similarity-['levenshtein'] Time: 0.5061278343200684s
string-similarity-['jaccard:tokenizer=word'] Time: 0.04704117774963379s
normalize-scores-des_cont_jaccard Time: 0.014631986618041992s
smallest-qnode-number Time: 0.08855414390563965s
mosaic-features Time: 0.006281852722167969s
creat-singleton-feature Time: 0.07071590423583984s
vote-by-classifier Time: 0.7918548583984375s
Qnodes to lookup: 756
Qnodes from file: 742
Outlier removal generates 12 lof-voted candidates
score-using-embedding Time: 15.21327018737793s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.48185014724731445s
compute-tf-idf-class_count Time: 16.055155038833618s
compute-tf-idf-property_count Time: 15.438183784484863s
context-match

200it [3:49:28, 55.49s/it]

align-page-rank Time: 0.17595601081848145s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 7.31351900100708s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 11.657377004623413s
string-similarity-['jaro_winkler'] Time: 1.1873111724853516s
string-similarity-['levenshtein'] Time: 9.41706109046936s
string-similarity-['jaccard:tokenizer=word'] Time: 0.13502979278564453s
normalize-scores-des_cont_jaccard Time: 0.043029069900512695s
smallest-qnode-number Time: 0.3189582824707031s
mosaic-features Time: 0.02572178840637207s
creat-singleton-feature Time: 0.24719882011413574s
vote-by-classifier Time: 0.4420022964477539s
Qnodes to lookup: 3897
Qnodes from file: 3809
Outlier removal generates 10 lof-voted candidates
Outlier removal generates 31 lof-voted candidates
Outlier removal generates 31 lof-voted candidates
score-using-embedding Time: 42.16187405586243s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.4095778465270996s
compute-tf-idf-class_count Ti

201it [3:51:59, 84.17s/it]

align-page-rank Time: 0.23743104934692383s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.229802131652832s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.7325313091278076s
string-similarity-['jaro_winkler'] Time: 0.5101320743560791s
string-similarity-['levenshtein'] Time: 7.192936182022095s
string-similarity-['jaccard:tokenizer=word'] Time: 0.055056095123291016s
normalize-scores-des_cont_jaccard Time: 0.013199806213378906s
smallest-qnode-number Time: 0.09136319160461426s
mosaic-features Time: 0.0069789886474609375s
creat-singleton-feature Time: 0.06437182426452637s
vote-by-classifier Time: 0.42126893997192383s
Qnodes to lookup: 181
Qnodes from file: 181
Outlier removal generates 12 lof-voted candidates
score-using-embedding Time: 25.506333827972412s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.11747908592224121s
compute-tf-idf-class_count Time: 25.86938500404358s
compute-tf-idf-property_count Time: 26.019834756851196s
context-match 

202it [3:52:38, 70.58s/it]

align-page-rank Time: 0.2064528465270996s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.7909541130065918s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.7422049045562744s
string-similarity-['jaro_winkler'] Time: 0.23017597198486328s
string-similarity-['levenshtein'] Time: 0.9205927848815918s
string-similarity-['jaccard:tokenizer=word'] Time: 0.06340909004211426s
normalize-scores-des_cont_jaccard Time: 0.026794910430908203s
smallest-qnode-number Time: 0.167266845703125s
mosaic-features Time: 0.012414932250976562s
creat-singleton-feature Time: 0.12869811058044434s
vote-by-classifier Time: 0.45505690574645996s
Qnodes to lookup: 4955
Qnodes from file: 4899
Outlier removal generates 2 lof-voted candidates
Outlier removal generates 6 lof-voted candidates
score-using-embedding Time: 14.745387077331543s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.2911667823791504s
compute-tf-idf-class_count Time: 16.496749877929688s
compute-tf-idf-propert

203it [3:53:25, 63.52s/it]

align-page-rank Time: 0.2727477550506592s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.5995640754699707s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.5065908432006836s
string-similarity-['jaro_winkler'] Time: 0.3827517032623291s
string-similarity-['levenshtein'] Time: 1.5910100936889648s
string-similarity-['jaccard:tokenizer=word'] Time: 0.08092904090881348s
normalize-scores-des_cont_jaccard Time: 0.035428762435913086s
smallest-qnode-number Time: 0.30287694931030273s
mosaic-features Time: 0.016710996627807617s
creat-singleton-feature Time: 0.18698811531066895s
vote-by-classifier Time: 1.157444715499878s
Qnodes to lookup: 3966
Qnodes from file: 3879
Outlier removal generates 46 lof-voted candidates
Outlier removal generates 19 lof-voted candidates
Outlier removal generates 18 lof-voted candidates
score-using-embedding Time: 18.991033792495728s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3505263328552246s
compute-tf-idf-class_cou

204it [3:54:29, 63.67s/it]

align-page-rank Time: 0.29903197288513184s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.858093976974487s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.303961992263794s
string-similarity-['jaro_winkler'] Time: 0.8923330307006836s
string-similarity-['levenshtein'] Time: 5.6860032081604s
string-similarity-['jaccard:tokenizer=word'] Time: 0.06335186958312988s
normalize-scores-des_cont_jaccard Time: 0.029150962829589844s
smallest-qnode-number Time: 0.23552799224853516s
mosaic-features Time: 0.014283895492553711s
creat-singleton-feature Time: 0.14569401741027832s
vote-by-classifier Time: 0.4639401435852051s
Qnodes to lookup: 297
Qnodes from file: 290
Outlier removal generates 62 lof-voted candidates
Outlier removal generates 20 lof-voted candidates
Outlier removal generates 56 lof-voted candidates
score-using-embedding Time: 22.751111030578613s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.2949991226196289s
compute-tf-idf-class_count Ti

205it [3:55:37, 64.99s/it]

align-page-rank Time: 0.16581225395202637s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.345685958862305s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.764259099960327s
string-similarity-['jaro_winkler'] Time: 0.6934068202972412s
string-similarity-['levenshtein'] Time: 4.60190224647522s
string-similarity-['jaccard:tokenizer=word'] Time: 0.05943489074707031s
normalize-scores-des_cont_jaccard Time: 0.020620107650756836s
smallest-qnode-number Time: 0.4262571334838867s
mosaic-features Time: 0.012816905975341797s
creat-singleton-feature Time: 0.10691618919372559s
vote-by-classifier Time: 0.5582880973815918s
Qnodes to lookup: 327
Qnodes from file: 317
Outlier removal generates 12 lof-voted candidates
Outlier removal generates 40 lof-voted candidates
score-using-embedding Time: 25.385473012924194s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.19372272491455078s
compute-tf-idf-class_count Time: 26.05670189857483s
compute-tf-idf-property_co

206it [3:57:34, 80.52s/it]

align-page-rank Time: 0.15006804466247559s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.47083115577697754s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.638063907623291s
string-similarity-['jaro_winkler'] Time: 0.14336109161376953s
string-similarity-['levenshtein'] Time: 0.6141371726989746s
string-similarity-['jaccard:tokenizer=word'] Time: 0.03184199333190918s
normalize-scores-des_cont_jaccard Time: 0.015011072158813477s
smallest-qnode-number Time: 0.09131312370300293s
mosaic-features Time: 0.006515979766845703s
creat-singleton-feature Time: 0.07238292694091797s
vote-by-classifier Time: 0.8437778949737549s
Qnodes to lookup: 2292
Qnodes from file: 2248
Outlier removal generates 10 lof-voted candidates
score-using-embedding Time: 13.377920866012573s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.157850980758667s
compute-tf-idf-class_count Time: 14.50791883468628s
compute-tf-idf-property_count Time: 15.13361382484436s
context-match T

207it [3:57:59, 63.98s/it]

align-page-rank Time: 0.06055092811584473s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.41118288040161133s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.3871281147003174s
string-similarity-['jaro_winkler'] Time: 0.0871739387512207s
string-similarity-['levenshtein'] Time: 0.30950188636779785s
string-similarity-['jaccard:tokenizer=word'] Time: 0.03493213653564453s
normalize-scores-des_cont_jaccard Time: 0.014132022857666016s
smallest-qnode-number Time: 0.5286290645599365s
mosaic-features Time: 0.00638580322265625s
creat-singleton-feature Time: 0.06794595718383789s
vote-by-classifier Time: 0.852806806564331s
Qnodes to lookup: 1634
Qnodes from file: 1631
Outlier removal generates 5 lof-voted candidates
score-using-embedding Time: 13.479550123214722s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.4935269355773926s
compute-tf-idf-class_count Time: 15.26513671875s
compute-tf-idf-property_count Time: 15.361241102218628s
context-match Time:

208it [3:58:27, 53.18s/it]

align-page-rank Time: 0.18604588508605957s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.9521288871765137s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.402943849563599s
string-similarity-['jaro_winkler'] Time: 0.426271915435791s
string-similarity-['levenshtein'] Time: 2.2111079692840576s
string-similarity-['jaccard:tokenizer=word'] Time: 0.07834386825561523s
normalize-scores-des_cont_jaccard Time: 0.032913923263549805s
smallest-qnode-number Time: 0.24224281311035156s
mosaic-features Time: 0.015915870666503906s
creat-singleton-feature Time: 0.16877412796020508s
vote-by-classifier Time: 0.3938469886779785s
Qnodes to lookup: 5797
Qnodes from file: 5724
Outlier removal generates 21 lof-voted candidates
Outlier removal generates 14 lof-voted candidates
Outlier removal generates 13 lof-voted candidates
score-using-embedding Time: 20.178301095962524s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3345308303833008s
compute-tf-idf-class_cou

209it [3:59:50, 62.30s/it]

align-page-rank Time: 3.0261449813842773s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 8.664588928222656s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 18.017184019088745s
string-similarity-['jaro_winkler'] Time: 1.7954108715057373s
string-similarity-['levenshtein'] Time: 8.473796129226685s
string-similarity-['jaccard:tokenizer=word'] Time: 0.8426229953765869s
normalize-scores-des_cont_jaccard Time: 0.11170792579650879s
smallest-qnode-number Time: 1.8754539489746094s
mosaic-features Time: 0.05519294738769531s
creat-singleton-feature Time: 0.6895391941070557s
vote-by-classifier Time: 0.4317600727081299s
Qnodes to lookup: 25743
Qnodes from file: 24887
Outlier removal generates 70 lof-voted candidates
score-using-embedding Time: 57.211024045944214s
generate-reciprocal-rank-lof-graph-embedding-score Time: 1.6190557479858398s
compute-tf-idf-class_count Time: 61.97682785987854s
compute-tf-idf-property_count Time: 63.564395904541016s
context-match Time: 

210it [4:04:55, 135.10s/it]

align-page-rank Time: 0.18390512466430664s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.5568687915802s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 7.196784257888794s
string-similarity-['jaro_winkler'] Time: 0.363584041595459s
string-similarity-['levenshtein'] Time: 2.3449862003326416s
string-similarity-['jaccard:tokenizer=word'] Time: 0.05536484718322754s
normalize-scores-des_cont_jaccard Time: 0.023057937622070312s
smallest-qnode-number Time: 0.16405296325683594s
mosaic-features Time: 0.01080322265625s
creat-singleton-feature Time: 0.10941505432128906s
vote-by-classifier Time: 0.46781110763549805s
Qnodes to lookup: 3274
Qnodes from file: 3169
Outlier removal generates 5 lof-voted candidates
Outlier removal generates 10 lof-voted candidates
score-using-embedding Time: 22.175859928131104s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.5703630447387695s
compute-tf-idf-class_count Time: 24.601688146591187s
compute-tf-idf-property_coun

211it [4:06:08, 116.26s/it]

align-page-rank Time: 0.11117386817932129s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.570066213607788s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.8491051197052s
string-similarity-['jaro_winkler'] Time: 0.39565396308898926s
string-similarity-['levenshtein'] Time: 1.630295991897583s
string-similarity-['jaccard:tokenizer=word'] Time: 0.07143282890319824s
normalize-scores-des_cont_jaccard Time: 0.027055978775024414s
smallest-qnode-number Time: 0.16435885429382324s
mosaic-features Time: 0.015578746795654297s
creat-singleton-feature Time: 0.15525507926940918s
vote-by-classifier Time: 0.3961930274963379s
Qnodes to lookup: 1843
Qnodes from file: 1827
Outlier removal generates 14 lof-voted candidates
Outlier removal generates 38 lof-voted candidates
score-using-embedding Time: 21.74791979789734s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.2808399200439453s
compute-tf-idf-class_count Time: 22.690600156784058s
compute-tf-idf-property_

212it [4:07:33, 106.82s/it]

align-page-rank Time: 0.2744598388671875s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.7190961837768555s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.1004416942596436s
string-similarity-['jaro_winkler'] Time: 0.3319559097290039s
string-similarity-['levenshtein'] Time: 1.9182422161102295s
string-similarity-['jaccard:tokenizer=word'] Time: 0.04304838180541992s
normalize-scores-des_cont_jaccard Time: 0.018369674682617188s
smallest-qnode-number Time: 0.11451101303100586s
mosaic-features Time: 0.010081052780151367s
creat-singleton-feature Time: 0.09720706939697266s
vote-by-classifier Time: 0.9427049160003662s
Qnodes to lookup: 1504
Qnodes from file: 1490
Outlier removal generates 10 lof-voted candidates
score-using-embedding Time: 16.368441820144653s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.6635909080505371s
compute-tf-idf-class_count Time: 18.29951500892639s
compute-tf-idf-property_count Time: 17.69044804573059s
context-match Ti

213it [4:08:02, 83.60s/it] 

align-page-rank Time: 0.1406421661376953s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.4683711528778076s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.800755023956299s
string-similarity-['jaro_winkler'] Time: 0.3234710693359375s
string-similarity-['levenshtein'] Time: 1.872136116027832s
string-similarity-['jaccard:tokenizer=word'] Time: 0.0588231086730957s
normalize-scores-des_cont_jaccard Time: 0.021512269973754883s
smallest-qnode-number Time: 0.15944910049438477s
mosaic-features Time: 0.009872198104858398s
creat-singleton-feature Time: 0.10321593284606934s
vote-by-classifier Time: 0.3900618553161621s
Qnodes to lookup: 1867
Qnodes from file: 1836
Outlier removal generates 16 lof-voted candidates
Outlier removal generates 15 lof-voted candidates
score-using-embedding Time: 20.60279607772827s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.2013249397277832s
compute-tf-idf-class_count Time: 21.48411798477173s
compute-tf-idf-property_c

214it [4:08:48, 72.39s/it]

align-page-rank Time: 1.0910968780517578s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 6.935873031616211s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 13.38921594619751s
string-similarity-['jaro_winkler'] Time: 1.1728870868682861s
string-similarity-['levenshtein'] Time: 6.698659181594849s
string-similarity-['jaccard:tokenizer=word'] Time: 0.20873332023620605s
normalize-scores-des_cont_jaccard Time: 0.07549166679382324s
smallest-qnode-number Time: 1.2834312915802002s
mosaic-features Time: 0.04059600830078125s
creat-singleton-feature Time: 0.5552551746368408s
vote-by-classifier Time: 1.0639638900756836s
Qnodes to lookup: 12097
Qnodes from file: 11969
Outlier removal generates 50 lof-voted candidates
score-using-embedding Time: 46.05753493309021s
generate-reciprocal-rank-lof-graph-embedding-score Time: 1.146697998046875s
compute-tf-idf-class_count Time: 48.88716697692871s
compute-tf-idf-property_count Time: 49.61031699180603s
context-match Time: 50.

215it [4:10:42, 84.71s/it]

align-page-rank Time: 0.26218414306640625s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.2044968605041504s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.4186348915100098s
string-similarity-['jaro_winkler'] Time: 0.5375542640686035s
string-similarity-['levenshtein'] Time: 2.053049087524414s
string-similarity-['jaccard:tokenizer=word'] Time: 0.10854721069335938s
normalize-scores-des_cont_jaccard Time: 0.034571170806884766s
smallest-qnode-number Time: 0.24909591674804688s
mosaic-features Time: 0.018363237380981445s
creat-singleton-feature Time: 0.20879316329956055s
vote-by-classifier Time: 0.6493251323699951s
Qnodes to lookup: 2025
Qnodes from file: 1995
Column_vector_stragtegy centroid_of_lof failed
Outlier removal generates 28 lof-voted candidates
Outlier removal generates 26 lof-voted candidates
score-using-embedding Time: 21.890058040618896s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.5222108364105225s
compute-tf-idf-class_count

216it [4:11:32, 74.27s/it]

align-page-rank Time: 0.11492490768432617s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.1643199920654297s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.7253971099853516s
string-similarity-['jaro_winkler'] Time: 0.3305981159210205s
string-similarity-['levenshtein'] Time: 1.3168320655822754s
string-similarity-['jaccard:tokenizer=word'] Time: 0.07550406455993652s
normalize-scores-des_cont_jaccard Time: 0.025831937789916992s
smallest-qnode-number Time: 0.18582987785339355s
mosaic-features Time: 0.013497114181518555s
creat-singleton-feature Time: 0.15087580680847168s
vote-by-classifier Time: 0.40886807441711426s
Qnodes to lookup: 5089
Qnodes from file: 4984
Outlier removal generates 4 lof-voted candidates
Outlier removal generates 18 lof-voted candidates
score-using-embedding Time: 16.95177698135376s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.26162004470825195s
compute-tf-idf-class_count Time: 18.67497420310974s
compute-tf-idf-prope

217it [4:12:22, 67.06s/it]

align-page-rank Time: 0.2784879207611084s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.676613092422485s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 10.002255916595459s
string-similarity-['jaro_winkler'] Time: 0.8243958950042725s
string-similarity-['levenshtein'] Time: 5.857533931732178s
string-similarity-['jaccard:tokenizer=word'] Time: 0.14345788955688477s
normalize-scores-des_cont_jaccard Time: 0.04617595672607422s
smallest-qnode-number Time: 0.33037495613098145s
mosaic-features Time: 0.023450136184692383s
creat-singleton-feature Time: 0.25699377059936523s
vote-by-classifier Time: 0.39629578590393066s
Qnodes to lookup: 4529
Qnodes from file: 4449
Outlier removal generates 21 lof-voted candidates
Outlier removal generates 34 lof-voted candidates
Outlier removal generates 18 lof-voted candidates
Outlier removal generates 44 lof-voted candidates
score-using-embedding Time: 31.842038869857788s
generate-reciprocal-rank-lof-graph-embedding-score T

218it [4:16:12, 116.00s/it]

align-page-rank Time: 0.23311734199523926s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.3214521408081055s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.319072008132935s
string-similarity-['jaro_winkler'] Time: 0.5082061290740967s
string-similarity-['levenshtein'] Time: 2.4343230724334717s
string-similarity-['jaccard:tokenizer=word'] Time: 0.12115192413330078s
normalize-scores-des_cont_jaccard Time: 0.04034900665283203s
smallest-qnode-number Time: 0.32714104652404785s
mosaic-features Time: 0.01819014549255371s
creat-singleton-feature Time: 0.23917794227600098s
vote-by-classifier Time: 0.8749539852142334s
Qnodes to lookup: 7906
Qnodes from file: 7721
Outlier removal generates 23 lof-voted candidates
Outlier removal generates 14 lof-voted candidates
Outlier removal generates 13 lof-voted candidates
Outlier removal generates 11 lof-voted candidates
score-using-embedding Time: 23.79033374786377s
generate-reciprocal-rank-lof-graph-embedding-score Ti

219it [4:18:21, 120.04s/it]

align-page-rank Time: 0.18729782104492188s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.6960818767547607s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 6.861300945281982s
string-similarity-['jaro_winkler'] Time: 0.6359968185424805s
string-similarity-['levenshtein'] Time: 4.302377939224243s
string-similarity-['jaccard:tokenizer=word'] Time: 0.09807825088500977s
normalize-scores-des_cont_jaccard Time: 0.03881382942199707s
smallest-qnode-number Time: 0.25743699073791504s
mosaic-features Time: 0.019621849060058594s
creat-singleton-feature Time: 0.17880702018737793s
vote-by-classifier Time: 0.8092858791351318s
Qnodes to lookup: 2988
Qnodes from file: 2909
_centroid_of_lof: Missing 1 of 39
Outlier removal generates 24 lof-voted candidates
Outlier removal generates 31 lof-voted candidates
Outlier removal generates 14 lof-voted candidates
score-using-embedding Time: 28.760099172592163s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.349409103

220it [4:19:46, 109.38s/it]

align-page-rank Time: 0.05245089530944824s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.9692111015319824s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.5289068222045898s
string-similarity-['jaro_winkler'] Time: 0.1363999843597412s
string-similarity-['levenshtein'] Time: 0.4344203472137451s
string-similarity-['jaccard:tokenizer=word'] Time: 0.06752800941467285s
normalize-scores-des_cont_jaccard Time: 0.016930103302001953s
smallest-qnode-number Time: 0.09097123146057129s
mosaic-features Time: 0.008249759674072266s
creat-singleton-feature Time: 0.08870720863342285s
vote-by-classifier Time: 0.39880919456481934s
Qnodes to lookup: 2138
Qnodes from file: 2118
Outlier removal generates 12 lof-voted candidates
score-using-embedding Time: 11.339951038360596s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.17921018600463867s
compute-tf-idf-class_count Time: 13.016861915588379s
compute-tf-idf-property_count Time: 13.234570980072021s
context-mat

221it [4:20:13, 84.70s/it] 

align-page-rank Time: 0.1659562587738037s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.8355960845947266s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 6.5214269161224365s
string-similarity-['jaro_winkler'] Time: 0.563143253326416s
string-similarity-['levenshtein'] Time: 4.503078937530518s
string-similarity-['jaccard:tokenizer=word'] Time: 0.0672309398651123s
normalize-scores-des_cont_jaccard Time: 0.030200958251953125s
smallest-qnode-number Time: 0.2818470001220703s
mosaic-features Time: 0.014338016510009766s
creat-singleton-feature Time: 0.18376803398132324s
vote-by-classifier Time: 1.527501106262207s
Qnodes to lookup: 4452
Qnodes from file: 4399
Outlier removal generates 13 lof-voted candidates
Outlier removal generates 15 lof-voted candidates
Outlier removal generates 16 lof-voted candidates
score-using-embedding Time: 28.972835063934326s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3464210033416748s
compute-tf-idf-class_count T

222it [4:21:55, 89.80s/it]

align-page-rank Time: 0.25849199295043945s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.809464931488037s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.538464069366455s
string-similarity-['jaro_winkler'] Time: 0.5706768035888672s
string-similarity-['levenshtein'] Time: 3.3170928955078125s
string-similarity-['jaccard:tokenizer=word'] Time: 0.07942724227905273s
normalize-scores-des_cont_jaccard Time: 0.03465008735656738s
smallest-qnode-number Time: 0.2511632442474365s
mosaic-features Time: 0.019860029220581055s
creat-singleton-feature Time: 0.19850587844848633s
vote-by-classifier Time: 0.567274808883667s
Qnodes to lookup: 5816
Qnodes from file: 5659
Outlier removal generates 17 lof-voted candidates
Outlier removal generates 16 lof-voted candidates
Outlier removal generates 21 lof-voted candidates
score-using-embedding Time: 21.10002303123474s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3646101951599121s
compute-tf-idf-class_count T

223it [4:23:25, 89.85s/it]

align-page-rank Time: 0.11357903480529785s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.23621010780334473s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.4489579200744629s
string-similarity-['jaro_winkler'] Time: 0.08134984970092773s
string-similarity-['levenshtein'] Time: 0.25618577003479004s
string-similarity-['jaccard:tokenizer=word'] Time: 0.031327009201049805s
normalize-scores-des_cont_jaccard Time: 0.012867927551269531s
smallest-qnode-number Time: 0.09316396713256836s
mosaic-features Time: 0.005884885787963867s
creat-singleton-feature Time: 0.06659984588623047s
vote-by-classifier Time: 1.268986701965332s
Qnodes to lookup: 2032
Qnodes from file: 2006
Outlier removal generates 7 lof-voted candidates
score-using-embedding Time: 15.044259786605835s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.13428306579589844s
compute-tf-idf-class_count Time: 17.00465202331543s
compute-tf-idf-property_count Time: 16.314432621002197s
context-mat

224it [4:23:53, 71.43s/it]

align-page-rank Time: 0.06532526016235352s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.3517570495605469s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.7547519207000732s
string-similarity-['jaro_winkler'] Time: 0.13370108604431152s
string-similarity-['levenshtein'] Time: 0.4228689670562744s
string-similarity-['jaccard:tokenizer=word'] Time: 0.0365750789642334s
normalize-scores-des_cont_jaccard Time: 0.017277956008911133s
smallest-qnode-number Time: 0.09571003913879395s
mosaic-features Time: 0.0074770450592041016s
creat-singleton-feature Time: 0.08067512512207031s
vote-by-classifier Time: 1.390209674835205s
Qnodes to lookup: 2954
Qnodes from file: 2910
Outlier removal generates 6 lof-voted candidates
score-using-embedding Time: 16.03893494606018s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.13954591751098633s
compute-tf-idf-class_count Time: 15.834707975387573s
compute-tf-idf-property_count Time: 16.0051691532135s
context-match Ti

225it [4:24:22, 58.60s/it]

align-page-rank Time: 0.22361397743225098s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.5020110607147217s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.712578058242798s
string-similarity-['jaro_winkler'] Time: 0.44570398330688477s
string-similarity-['levenshtein'] Time: 1.5441169738769531s
string-similarity-['jaccard:tokenizer=word'] Time: 0.15258383750915527s
normalize-scores-des_cont_jaccard Time: 0.04157376289367676s
smallest-qnode-number Time: 0.2661769390106201s
mosaic-features Time: 0.02041792869567871s
creat-singleton-feature Time: 0.22651886940002441s
vote-by-classifier Time: 0.43808507919311523s
Qnodes to lookup: 7216
Qnodes from file: 7070
_centroid_of_lof: Missing 1 of 5
Outlier removal generates 4 lof-voted candidates
_centroid_of_lof: Missing 5 of 37
Outlier removal generates 19 lof-voted candidates
Outlier removal generates 8 lof-voted candidates
score-using-embedding Time: 18.855837106704712s
generate-reciprocal-rank-lof-graph-e

226it [4:26:37, 81.60s/it]

align-page-rank Time: 0.2533729076385498s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.8860230445861816s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.379251003265381s
string-similarity-['jaro_winkler'] Time: 0.5045568943023682s
string-similarity-['levenshtein'] Time: 2.4489500522613525s
string-similarity-['jaccard:tokenizer=word'] Time: 0.1219792366027832s
normalize-scores-des_cont_jaccard Time: 0.04261016845703125s
smallest-qnode-number Time: 0.35555601119995117s
mosaic-features Time: 0.01849222183227539s
creat-singleton-feature Time: 0.2238919734954834s
vote-by-classifier Time: 0.4426717758178711s
Qnodes to lookup: 7060
Qnodes from file: 6972
Outlier removal generates 13 lof-voted candidates
Outlier removal generates 11 lof-voted candidates
Outlier removal generates 13 lof-voted candidates
Outlier removal generates 19 lof-voted candidates
score-using-embedding Time: 24.15262508392334s
generate-reciprocal-rank-lof-graph-embedding-score Time:

227it [4:29:29, 108.63s/it]

align-page-rank Time: 0.15388178825378418s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.39087986946105957s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.48580121994018555s
string-similarity-['jaro_winkler'] Time: 0.11603188514709473s
string-similarity-['levenshtein'] Time: 0.5303237438201904s
string-similarity-['jaccard:tokenizer=word'] Time: 0.0469210147857666s
normalize-scores-des_cont_jaccard Time: 0.014005899429321289s
smallest-qnode-number Time: 0.10086297988891602s
mosaic-features Time: 0.007401227951049805s
creat-singleton-feature Time: 0.05951285362243652s
vote-by-classifier Time: 0.7331328392028809s
Qnodes to lookup: 1574
Qnodes from file: 1563
No pseudo GT available, using all exact matches as high precision
_centroid_of_lof: Missing 20 of 20
Column_vector_stragtegy centroid_of_lof failed
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
score-using-embedding Time: 14.295314073562622s
Command: ge

228it [4:29:54, 83.46s/it] 

align-page-rank Time: 0.14888811111450195s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.9925167560577393s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.941851854324341s
string-similarity-['jaro_winkler'] Time: 0.4329380989074707s
string-similarity-['levenshtein'] Time: 2.223335027694702s
string-similarity-['jaccard:tokenizer=word'] Time: 0.07692313194274902s
normalize-scores-des_cont_jaccard Time: 0.021965742111206055s
smallest-qnode-number Time: 0.22158598899841309s
mosaic-features Time: 0.012848854064941406s
creat-singleton-feature Time: 0.13488101959228516s
vote-by-classifier Time: 0.4215359687805176s
Qnodes to lookup: 4593
Qnodes from file: 4479
Outlier removal generates 8 lof-voted candidates
score-using-embedding Time: 18.631067037582397s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.28582119941711426s
compute-tf-idf-class_count Time: 19.65160298347473s
compute-tf-idf-property_count Time: 20.892725944519043s
context-match Ti

229it [4:30:27, 68.57s/it]

align-page-rank Time: 0.2154862880706787s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.9208319187164307s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.385953187942505s
string-similarity-['jaro_winkler'] Time: 0.5792393684387207s
string-similarity-['levenshtein'] Time: 3.3383498191833496s
string-similarity-['jaccard:tokenizer=word'] Time: 0.12323212623596191s
normalize-scores-des_cont_jaccard Time: 0.03930497169494629s
smallest-qnode-number Time: 0.7367649078369141s
mosaic-features Time: 0.020968914031982422s
creat-singleton-feature Time: 0.23027610778808594s
vote-by-classifier Time: 0.40198588371276855s
Qnodes to lookup: 2727
Qnodes from file: 2667
Outlier removal generates 20 lof-voted candidates
Outlier removal generates 16 lof-voted candidates
score-using-embedding Time: 25.77085280418396s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.4410717487335205s
compute-tf-idf-class_count Time: 27.168426990509033s
compute-tf-idf-property

230it [4:32:12, 79.39s/it]

align-page-rank Time: 0.20685601234436035s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.450589895248413s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 7.561712741851807s
string-similarity-['jaro_winkler'] Time: 0.8195199966430664s
string-similarity-['levenshtein'] Time: 5.40462589263916s
string-similarity-['jaccard:tokenizer=word'] Time: 0.0837392807006836s
normalize-scores-des_cont_jaccard Time: 0.040160179138183594s
smallest-qnode-number Time: 0.23854613304138184s
mosaic-features Time: 0.019491910934448242s
creat-singleton-feature Time: 0.1935720443725586s
vote-by-classifier Time: 0.45972204208374023s
Qnodes to lookup: 5788
Qnodes from file: 5730
Outlier removal generates 16 lof-voted candidates
Outlier removal generates 11 lof-voted candidates
Outlier removal generates 33 lof-voted candidates
score-using-embedding Time: 28.26453709602356s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3555941581726074s
compute-tf-idf-class_count T

231it [4:34:36, 98.74s/it]

Command: align-page-rank
Error Message:  Traceback (most recent call last):
  File "/Users/amandeep/Github/table-linker/tl/cli/align-page-rank.py", line 29, in run
    df = pd.read_csv(kwargs['input_file'], dtype=object)
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/pandas/io/parsers.py", line 610, in read_csv
    return _read(filepath_or_buffer, kwds)
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/pandas/io/parsers.py", line 462, in _read
    parser = TextFileReader(filepath_or_buffer, **kwds)
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/pandas/io/parsers.py", line 819, in __init__
    self._engine = self._make_engine(self.engine)
  File "/Users/amandeep/Github/table-linker/tl_env/lib/python3.9/site-packages/pandas/io/parsers.py", line 1050, in _make_engine
    return mapping[engine](self.f, **self.options)  # type: ignore[call-arg]
  File "/Users/amandeep/Github/table-linker/tl_env/lib/

232it [4:35:00, 76.31s/it]

align-page-rank Time: 0.23639702796936035s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.418881893157959s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.973256826400757s
string-similarity-['jaro_winkler'] Time: 0.44955897331237793s
string-similarity-['levenshtein'] Time: 2.437164068222046s
string-similarity-['jaccard:tokenizer=word'] Time: 0.06669282913208008s
normalize-scores-des_cont_jaccard Time: 0.03286409378051758s
smallest-qnode-number Time: 0.25218987464904785s
mosaic-features Time: 0.016062259674072266s
creat-singleton-feature Time: 0.15772390365600586s
vote-by-classifier Time: 0.9313070774078369s
Qnodes to lookup: 5603
Qnodes from file: 5467
Outlier removal generates 33 lof-voted candidates
Outlier removal generates 20 lof-voted candidates
Outlier removal generates 29 lof-voted candidates
score-using-embedding Time: 24.31308913230896s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.30957508087158203s
compute-tf-idf-class_coun

233it [4:36:47, 85.63s/it]

align-page-rank Time: 0.19214797019958496s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.4755580425262451s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.7678179740905762s
string-similarity-['jaro_winkler'] Time: 0.11895513534545898s
string-similarity-['levenshtein'] Time: 0.5651888847351074s
string-similarity-['jaccard:tokenizer=word'] Time: 0.02766108512878418s
normalize-scores-des_cont_jaccard Time: 0.014262199401855469s
smallest-qnode-number Time: 0.24342584609985352s
mosaic-features Time: 0.006031036376953125s
creat-singleton-feature Time: 0.06606602668762207s
vote-by-classifier Time: 0.820436954498291s
Qnodes to lookup: 1977
Qnodes from file: 1939
Outlier removal generates 10 lof-voted candidates
score-using-embedding Time: 14.207133769989014s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.1187889575958252s
compute-tf-idf-class_count Time: 15.01311707496643s
compute-tf-idf-property_count Time: 15.17135500907898s
context-match T

234it [4:37:15, 68.27s/it]

align-page-rank Time: 0.0790548324584961s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.30463385581970215s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.6140520572662354s
string-similarity-['jaro_winkler'] Time: 0.09432506561279297s
string-similarity-['levenshtein'] Time: 0.29555702209472656s
string-similarity-['jaccard:tokenizer=word'] Time: 0.022068023681640625s
normalize-scores-des_cont_jaccard Time: 0.0120849609375s
smallest-qnode-number Time: 0.06850790977478027s
mosaic-features Time: 0.004709005355834961s
creat-singleton-feature Time: 0.05916023254394531s
vote-by-classifier Time: 0.3892648220062256s
Qnodes to lookup: 1270
Qnodes from file: 1261
Outlier removal generates 11 lof-voted candidates
score-using-embedding Time: 11.959941864013672s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.09668302536010742s
compute-tf-idf-class_count Time: 13.153327941894531s
compute-tf-idf-property_count Time: 12.464843034744263s
context-match 

235it [4:37:40, 55.41s/it]

align-page-rank Time: 0.05017685890197754s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.9019308090209961s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.6029129028320312s
string-similarity-['jaro_winkler'] Time: 0.20161199569702148s
string-similarity-['levenshtein'] Time: 1.1482429504394531s
string-similarity-['jaccard:tokenizer=word'] Time: 0.0301971435546875s
normalize-scores-des_cont_jaccard Time: 0.014186859130859375s
smallest-qnode-number Time: 0.09047508239746094s
mosaic-features Time: 0.0061969757080078125s
creat-singleton-feature Time: 0.07061100006103516s
vote-by-classifier Time: 0.7619011402130127s
Qnodes to lookup: 1673
Qnodes from file: 1642
Outlier removal generates 8 lof-voted candidates
score-using-embedding Time: 15.40809416770935s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.13581585884094238s
compute-tf-idf-class_count Time: 16.400799989700317s
compute-tf-idf-property_count Time: 16.56009817123413s
context-match 

236it [4:38:07, 46.73s/it]

align-page-rank Time: 0.2608621120452881s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.6794838905334473s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.9690358638763428s
string-similarity-['jaro_winkler'] Time: 0.349484920501709s
string-similarity-['levenshtein'] Time: 1.6684679985046387s
string-similarity-['jaccard:tokenizer=word'] Time: 0.1058187484741211s
normalize-scores-des_cont_jaccard Time: 0.023782730102539062s
smallest-qnode-number Time: 0.2904670238494873s
mosaic-features Time: 0.01578497886657715s
creat-singleton-feature Time: 0.3991689682006836s
vote-by-classifier Time: 1.6198320388793945s
Qnodes to lookup: 3332
Qnodes from file: 3304
Outlier removal generates 38 lof-voted candidates
Outlier removal generates 6 lof-voted candidates
Outlier removal generates 15 lof-voted candidates
score-using-embedding Time: 20.894189834594727s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3585968017578125s
compute-tf-idf-class_count Ti

237it [4:39:12, 52.25s/it]

align-page-rank Time: 0.11033105850219727s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.7975540161132812s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.523648023605347s
string-similarity-['jaro_winkler'] Time: 0.5097548961639404s
string-similarity-['levenshtein'] Time: 3.5064809322357178s
string-similarity-['jaccard:tokenizer=word'] Time: 0.07129192352294922s
normalize-scores-des_cont_jaccard Time: 0.023756027221679688s
smallest-qnode-number Time: 0.16692709922790527s
mosaic-features Time: 0.012471199035644531s
creat-singleton-feature Time: 0.11625504493713379s
vote-by-classifier Time: 0.4311857223510742s
Qnodes to lookup: 3533
Qnodes from file: 3336
Outlier removal generates 16 lof-voted candidates
Outlier removal generates 22 lof-voted candidates
score-using-embedding Time: 23.14812397956848s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.33067798614501953s
compute-tf-idf-class_count Time: 23.176185846328735s
compute-tf-idf-prope

238it [4:40:24, 58.15s/it]

align-page-rank Time: 0.2778801918029785s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.557192802429199s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 6.021345138549805s
string-similarity-['jaro_winkler'] Time: 0.5354843139648438s
string-similarity-['levenshtein'] Time: 2.7517619132995605s
string-similarity-['jaccard:tokenizer=word'] Time: 0.08035707473754883s
normalize-scores-des_cont_jaccard Time: 0.036826133728027344s
smallest-qnode-number Time: 0.2931180000305176s
mosaic-features Time: 0.017394065856933594s
creat-singleton-feature Time: 0.18871092796325684s
vote-by-classifier Time: 0.9595038890838623s
Qnodes to lookup: 6020
Qnodes from file: 5955
Outlier removal generates 14 lof-voted candidates
Outlier removal generates 26 lof-voted candidates
Outlier removal generates 15 lof-voted candidates
score-using-embedding Time: 23.478229999542236s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3569059371948242s
compute-tf-idf-class_count

239it [4:41:57, 68.62s/it]

align-page-rank Time: 0.3773970603942871s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.1868457794189453s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.09687614440918s
string-similarity-['jaro_winkler'] Time: 0.6329178810119629s
string-similarity-['levenshtein'] Time: 3.922455072402954s
string-similarity-['jaccard:tokenizer=word'] Time: 0.12781476974487305s
normalize-scores-des_cont_jaccard Time: 0.03631329536437988s
smallest-qnode-number Time: 0.34263086318969727s
mosaic-features Time: 0.014535903930664062s
creat-singleton-feature Time: 0.25334811210632324s
vote-by-classifier Time: 0.7269251346588135s
Qnodes to lookup: 3736
Qnodes from file: 3673
Outlier removal generates 17 lof-voted candidates
Outlier removal generates 21 lof-voted candidates
Outlier removal generates 22 lof-voted candidates
Outlier removal generates 17 lof-voted candidates
score-using-embedding Time: 24.024866104125977s
generate-reciprocal-rank-lof-graph-embedding-score Tim

240it [4:44:12, 88.57s/it]

align-page-rank Time: 0.33887314796447754s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.5689730644226074s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 6.732997179031372s
string-similarity-['jaro_winkler'] Time: 0.7920289039611816s
string-similarity-['levenshtein'] Time: 4.634001016616821s
string-similarity-['jaccard:tokenizer=word'] Time: 0.1569690704345703s
normalize-scores-des_cont_jaccard Time: 0.03232908248901367s
smallest-qnode-number Time: 0.34542179107666016s
mosaic-features Time: 0.016571998596191406s
creat-singleton-feature Time: 0.23225688934326172s
vote-by-classifier Time: 1.0231599807739258s
Qnodes to lookup: 5579
Qnodes from file: 5443
Outlier removal generates 23 lof-voted candidates
Outlier removal generates 42 lof-voted candidates
Outlier removal generates 24 lof-voted candidates
_centroid_of_lof: Missing 1 of 31
Outlier removal generates 19 lof-voted candidates
score-using-embedding Time: 29.023658990859985s
generate-reciprocal

241it [4:47:08, 114.91s/it]

align-page-rank Time: 0.23230600357055664s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 7.4612627029418945s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.397442102432251s
string-similarity-['jaro_winkler'] Time: 0.9805848598480225s
string-similarity-['levenshtein'] Time: 9.85358190536499s
string-similarity-['jaccard:tokenizer=word'] Time: 0.13083386421203613s
normalize-scores-des_cont_jaccard Time: 0.036287784576416016s
smallest-qnode-number Time: 0.3613016605377197s
mosaic-features Time: 0.018064022064208984s
creat-singleton-feature Time: 0.22128891944885254s
vote-by-classifier Time: 1.484757900238037s
Qnodes to lookup: 1141
Qnodes from file: 1141
Outlier removal generates 1008 lof-voted candidates
Outlier removal generates 1038 lof-voted candidates
Outlier removal generates 1052 lof-voted candidates
Outlier removal generates 20 lof-voted candidates
score-using-embedding Time: 37.47725582122803s
generate-reciprocal-rank-lof-graph-embedding-scor

242it [4:48:47, 110.09s/it]

align-page-rank Time: 0.5032601356506348s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.9442172050476074s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.4539899826049805s
string-similarity-['jaro_winkler'] Time: 0.5878026485443115s
string-similarity-['levenshtein'] Time: 3.0926151275634766s
string-similarity-['jaccard:tokenizer=word'] Time: 0.055151939392089844s
normalize-scores-des_cont_jaccard Time: 0.02063584327697754s
smallest-qnode-number Time: 0.22573494911193848s
mosaic-features Time: 0.01007699966430664s
creat-singleton-feature Time: 0.13708114624023438s
vote-by-classifier Time: 0.4499058723449707s
Qnodes to lookup: 1065
Qnodes from file: 1045
Outlier removal generates 17 lof-voted candidates
Outlier removal generates 28 lof-voted candidates
Outlier removal generates 6 lof-voted candidates
score-using-embedding Time: 19.220705032348633s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.36844301223754883s
compute-tf-idf-class_cou

243it [4:49:51, 96.17s/it] 

align-page-rank Time: 0.1758098602294922s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.0828940868377686s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 6.889949321746826s
string-similarity-['jaro_winkler'] Time: 0.5117671489715576s
string-similarity-['levenshtein'] Time: 3.203503131866455s
string-similarity-['jaccard:tokenizer=word'] Time: 0.07789897918701172s
normalize-scores-des_cont_jaccard Time: 0.030098915100097656s
smallest-qnode-number Time: 0.23917317390441895s
mosaic-features Time: 0.015673160552978516s
creat-singleton-feature Time: 0.15786218643188477s
vote-by-classifier Time: 0.40819287300109863s
Qnodes to lookup: 3577
Qnodes from file: 3511
Outlier removal generates 23 lof-voted candidates
Outlier removal generates 30 lof-voted candidates
Outlier removal generates 23 lof-voted candidates
score-using-embedding Time: 26.718590021133423s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3252410888671875s
compute-tf-idf-class_cou

244it [4:51:22, 94.50s/it]

align-page-rank Time: 0.2550790309906006s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.039462089538574s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.298555850982666s
string-similarity-['jaro_winkler'] Time: 0.7438299655914307s
string-similarity-['levenshtein'] Time: 5.17547607421875s
string-similarity-['jaccard:tokenizer=word'] Time: 0.07228708267211914s
normalize-scores-des_cont_jaccard Time: 0.03146195411682129s
smallest-qnode-number Time: 0.2618598937988281s
mosaic-features Time: 0.015469074249267578s
creat-singleton-feature Time: 0.16831207275390625s
vote-by-classifier Time: 0.4263629913330078s
Qnodes to lookup: 2193
Qnodes from file: 2170
Outlier removal generates 72 lof-voted candidates
Outlier removal generates 17 lof-voted candidates
Outlier removal generates 29 lof-voted candidates
score-using-embedding Time: 23.958417177200317s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.33014678955078125s
compute-tf-idf-class_count T

245it [4:52:06, 79.46s/it]

align-page-rank Time: 0.2607688903808594s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.9226799011230469s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.5397439002990723s
string-similarity-['jaro_winkler'] Time: 0.26548004150390625s
string-similarity-['levenshtein'] Time: 0.8223917484283447s
string-similarity-['jaccard:tokenizer=word'] Time: 0.054685115814208984s
normalize-scores-des_cont_jaccard Time: 0.024884939193725586s
smallest-qnode-number Time: 0.4937558174133301s
mosaic-features Time: 0.011320114135742188s
creat-singleton-feature Time: 0.6524946689605713s
vote-by-classifier Time: 0.9953887462615967s
Qnodes to lookup: 1038
Qnodes from file: 1015
Outlier removal generates 335 lof-voted candidates
Outlier removal generates 20 lof-voted candidates
score-using-embedding Time: 15.557198762893677s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.22890329360961914s
compute-tf-idf-class_count Time: 17.618154048919678s
compute-tf-idf-pro

246it [4:52:43, 66.68s/it]

align-page-rank Time: 0.0898289680480957s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.5034101009368896s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.2606539726257324s
string-similarity-['jaro_winkler'] Time: 0.09814572334289551s
string-similarity-['levenshtein'] Time: 0.39925622940063477s
string-similarity-['jaccard:tokenizer=word'] Time: 0.04001617431640625s
normalize-scores-des_cont_jaccard Time: 0.013603925704956055s
smallest-qnode-number Time: 0.09405207633972168s
mosaic-features Time: 0.006525993347167969s
creat-singleton-feature Time: 0.0692129135131836s
vote-by-classifier Time: 1.5325062274932861s
Qnodes to lookup: 1150
Qnodes from file: 1150
Outlier removal generates 4 lof-voted candidates
score-using-embedding Time: 15.289183139801025s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.12441205978393555s
compute-tf-idf-class_count Time: 16.875592947006226s
compute-tf-idf-property_count Time: 16.035022974014282s
context-match

247it [4:53:15, 56.34s/it]

align-page-rank Time: 0.1463639736175537s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.33794403076171875s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.6026840209960938s
string-similarity-['jaro_winkler'] Time: 0.12239313125610352s
string-similarity-['levenshtein'] Time: 0.3711361885070801s
string-similarity-['jaccard:tokenizer=word'] Time: 0.03017401695251465s
normalize-scores-des_cont_jaccard Time: 0.014152288436889648s
smallest-qnode-number Time: 0.5149240493774414s
mosaic-features Time: 0.0059812068939208984s
creat-singleton-feature Time: 0.07197713851928711s
vote-by-classifier Time: 0.8953070640563965s
Qnodes to lookup: 2238
Qnodes from file: 2184
Outlier removal generates 12 lof-voted candidates
score-using-embedding Time: 14.647101879119873s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.12232208251953125s
compute-tf-idf-class_count Time: 16.248600959777832s
compute-tf-idf-property_count Time: 16.42157292366028s
context-matc

248it [4:53:43, 47.69s/it]

align-page-rank Time: 0.21445107460021973s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.8971481323242188s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.9060680866241455s
string-similarity-['jaro_winkler'] Time: 0.22596192359924316s
string-similarity-['levenshtein'] Time: 0.811262845993042s
string-similarity-['jaccard:tokenizer=word'] Time: 0.04149985313415527s
normalize-scores-des_cont_jaccard Time: 0.015108108520507812s
smallest-qnode-number Time: 0.1508491039276123s
mosaic-features Time: 0.007047176361083984s
creat-singleton-feature Time: 0.11456513404846191s
vote-by-classifier Time: 0.39641308784484863s
Qnodes to lookup: 2655
Qnodes from file: 2614
Outlier removal generates 8 lof-voted candidates
No pseudo GT available, using all exact matches as high precision
_centroid_of_lof: Missing 20 of 20
Column_vector_stragtegy centroid_of_lof failed
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
score-using-

249it [4:54:09, 41.25s/it]

align-page-rank Time: 0.13717865943908691s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.2031610012054443s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.24122190475463867s
string-similarity-['jaro_winkler'] Time: 0.22333121299743652s
string-similarity-['levenshtein'] Time: 1.9434471130371094s
string-similarity-['jaccard:tokenizer=word'] Time: 0.027547121047973633s
normalize-scores-des_cont_jaccard Time: 0.013562202453613281s
smallest-qnode-number Time: 0.09862780570983887s
mosaic-features Time: 0.005772113800048828s
creat-singleton-feature Time: 0.0646662712097168s
vote-by-classifier Time: 0.40538501739501953s
Qnodes to lookup: 262
Qnodes from file: 262
Outlier removal generates 13 lof-voted candidates
score-using-embedding Time: 15.451227903366089s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.4857041835784912s
compute-tf-idf-class_count Time: 16.99570894241333s
compute-tf-idf-property_count Time: 16.341061115264893s
context-match

250it [4:54:41, 38.62s/it]

align-page-rank Time: 0.23737406730651855s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 7.5275559425354s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 13.542292356491089s
string-similarity-['jaro_winkler'] Time: 1.314465045928955s
string-similarity-['levenshtein'] Time: 8.395809173583984s
string-similarity-['jaccard:tokenizer=word'] Time: 0.12345504760742188s
normalize-scores-des_cont_jaccard Time: 0.039381980895996094s
smallest-qnode-number Time: 0.7487118244171143s
mosaic-features Time: 0.022922992706298828s
creat-singleton-feature Time: 0.23693513870239258s
vote-by-classifier Time: 0.45618486404418945s
Qnodes to lookup: 4499
Qnodes from file: 4398
Outlier removal generates 40 lof-voted candidates
Outlier removal generates 46 lof-voted candidates
Outlier removal generates 5 lof-voted candidates
score-using-embedding Time: 43.64747881889343s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.5347721576690674s
compute-tf-idf-class_count Ti

251it [4:58:24, 93.72s/it]

align-page-rank Time: 0.39192819595336914s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.293267011642456s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.983755111694336s
string-similarity-['jaro_winkler'] Time: 0.4878883361816406s
string-similarity-['levenshtein'] Time: 1.7020258903503418s
string-similarity-['jaccard:tokenizer=word'] Time: 0.22018194198608398s
normalize-scores-des_cont_jaccard Time: 0.035588979721069336s
smallest-qnode-number Time: 0.5487971305847168s
mosaic-features Time: 0.019284963607788086s
creat-singleton-feature Time: 0.24266719818115234s
vote-by-classifier Time: 0.4207491874694824s
Qnodes to lookup: 7513
Qnodes from file: 7434
Outlier removal generates 42 lof-voted candidates
score-using-embedding Time: 16.406389951705933s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.5163731575012207s
compute-tf-idf-class_count Time: 18.812413930892944s
compute-tf-idf-property_count Time: 19.035832166671753s
context-match Ti

252it [4:59:32, 86.12s/it]

align-page-rank Time: 0.11113190650939941s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.345270872116089s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.7262039184570312s
string-similarity-['jaro_winkler'] Time: 0.7291131019592285s
string-similarity-['levenshtein'] Time: 4.673785209655762s
string-similarity-['jaccard:tokenizer=word'] Time: 0.06487512588500977s
normalize-scores-des_cont_jaccard Time: 0.02030181884765625s
smallest-qnode-number Time: 0.17454099655151367s
mosaic-features Time: 0.010296106338500977s
creat-singleton-feature Time: 0.10152602195739746s
vote-by-classifier Time: 0.3947019577026367s
Qnodes to lookup: 265
Qnodes from file: 258
Outlier removal generates 23 lof-voted candidates
Outlier removal generates 20 lof-voted candidates
score-using-embedding Time: 20.49338984489441s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.39824604988098145s
compute-tf-idf-class_count Time: 21.450059175491333s
compute-tf-idf-property_

253it [5:00:27, 76.90s/it]

align-page-rank Time: 0.1727607250213623s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.8168559074401855s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.156047821044922s
string-similarity-['jaro_winkler'] Time: 0.27257418632507324s
string-similarity-['levenshtein'] Time: 1.8674449920654297s
string-similarity-['jaccard:tokenizer=word'] Time: 0.03460979461669922s
normalize-scores-des_cont_jaccard Time: 0.014895200729370117s
smallest-qnode-number Time: 0.09600996971130371s
mosaic-features Time: 0.007547855377197266s
creat-singleton-feature Time: 0.07338523864746094s
vote-by-classifier Time: 0.5479609966278076s
Qnodes to lookup: 1760
Qnodes from file: 1740
Outlier removal generates 10 lof-voted candidates
score-using-embedding Time: 18.2173912525177s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.13591599464416504s
compute-tf-idf-class_count Time: 18.86795711517334s
compute-tf-idf-property_count Time: 18.96092963218689s
context-match Tim

254it [5:00:58, 63.03s/it]

align-page-rank Time: 6.779977798461914s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 12.958400011062622s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 15.033483982086182s
string-similarity-['jaro_winkler'] Time: 3.477020263671875s
string-similarity-['levenshtein'] Time: 18.48689889907837s
string-similarity-['jaccard:tokenizer=word'] Time: 0.6485381126403809s
normalize-scores-des_cont_jaccard Time: 0.17571783065795898s
smallest-qnode-number Time: 6.094740867614746s
mosaic-features Time: 0.0901939868927002s
creat-singleton-feature Time: 1.7185900211334229s
vote-by-classifier Time: 0.4753148555755615s
Qnodes to lookup: 42944
Qnodes from file: 42678
Outlier removal generates 363 lof-voted candidates
score-using-embedding Time: 81.63600492477417s
generate-reciprocal-rank-lof-graph-embedding-score Time: 2.8978700637817383s
compute-tf-idf-class_count Time: 87.74970483779907s
compute-tf-idf-property_count Time: 90.30502891540527s
context-match Time: 241.

255it [5:06:43, 147.65s/it]

align-page-rank Time: 0.1060478687286377s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.431187391281128s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.6726651191711426s
string-similarity-['jaro_winkler'] Time: 0.3039700984954834s
string-similarity-['levenshtein'] Time: 1.7730677127838135s
string-similarity-['jaccard:tokenizer=word'] Time: 0.05593299865722656s
normalize-scores-des_cont_jaccard Time: 0.023840904235839844s
smallest-qnode-number Time: 0.4323289394378662s
mosaic-features Time: 0.013365030288696289s
creat-singleton-feature Time: 0.11999106407165527s
vote-by-classifier Time: 0.4088270664215088s
Qnodes to lookup: 4117
Qnodes from file: 4028
Column_vector_stragtegy centroid_of_lof failed
Outlier removal generates 18 lof-voted candidates
score-using-embedding Time: 18.348910093307495s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.6975739002227783s
compute-tf-idf-class_count Time: 20.08515691757202s
compute-tf-idf-property_co

256it [5:07:39, 120.03s/it]

align-page-rank Time: 0.22019195556640625s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.248337984085083s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.2949669361114502s
string-similarity-['jaro_winkler'] Time: 0.08622431755065918s
string-similarity-['levenshtein'] Time: 0.25675177574157715s
string-similarity-['jaccard:tokenizer=word'] Time: 0.042122840881347656s
normalize-scores-des_cont_jaccard Time: 0.013263940811157227s
smallest-qnode-number Time: 0.08521413803100586s
mosaic-features Time: 0.0052471160888671875s
creat-singleton-feature Time: 0.06762886047363281s
vote-by-classifier Time: 0.8299698829650879s
Qnodes to lookup: 1966
Qnodes from file: 1928
Outlier removal generates 9 lof-voted candidates
score-using-embedding Time: 13.21235704421997s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.12253713607788086s
compute-tf-idf-class_count Time: 15.04805588722229s
compute-tf-idf-property_count Time: 16.028002738952637s
context-matc

257it [5:08:07, 92.42s/it] 

align-page-rank Time: 0.11197590827941895s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.4436910152435303s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.41675519943237305s
string-similarity-['jaro_winkler'] Time: 0.11941409111022949s
string-similarity-['levenshtein'] Time: 0.516822099685669s
string-similarity-['jaccard:tokenizer=word'] Time: 0.06099081039428711s
normalize-scores-des_cont_jaccard Time: 0.015153169631958008s
smallest-qnode-number Time: 0.10164785385131836s
mosaic-features Time: 0.007737874984741211s
creat-singleton-feature Time: 0.08468818664550781s
vote-by-classifier Time: 1.1032030582427979s
Qnodes to lookup: 2340
Qnodes from file: 2323
Outlier removal generates 10 lof-voted candidates
score-using-embedding Time: 15.474208116531372s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.48185133934020996s
compute-tf-idf-class_count Time: 17.261303186416626s
compute-tf-idf-property_count Time: 18.39839792251587s
context-matc

258it [5:08:41, 74.90s/it]

align-page-rank Time: 0.19228291511535645s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.6144828796386719s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.08624005317688s
string-similarity-['jaro_winkler'] Time: 0.4182770252227783s
string-similarity-['levenshtein'] Time: 1.8511910438537598s
string-similarity-['jaccard:tokenizer=word'] Time: 0.07877421379089355s
normalize-scores-des_cont_jaccard Time: 0.034093618392944336s
smallest-qnode-number Time: 0.2526540756225586s
mosaic-features Time: 0.015804052352905273s
creat-singleton-feature Time: 0.181779146194458s
vote-by-classifier Time: 0.8411509990692139s
Qnodes to lookup: 4077
Qnodes from file: 4015
Outlier removal generates 19 lof-voted candidates
No pseudo GT available, using all exact matches as high precision
_centroid_of_lof: Missing 20 of 20
Column_vector_stragtegy centroid_of_lof failed
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
Outlier removal 

259it [5:09:13, 62.25s/it]

align-page-rank Time: 0.15882182121276855s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.2028229236602783s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.8910927772521973s
string-similarity-['jaro_winkler'] Time: 0.4939239025115967s
string-similarity-['levenshtein'] Time: 2.696467161178589s
string-similarity-['jaccard:tokenizer=word'] Time: 0.06761384010314941s
normalize-scores-des_cont_jaccard Time: 0.02598285675048828s
smallest-qnode-number Time: 0.16846394538879395s
mosaic-features Time: 0.01274418830871582s
creat-singleton-feature Time: 0.14333009719848633s
vote-by-classifier Time: 0.39354801177978516s
Qnodes to lookup: 3593
Qnodes from file: 3543
Outlier removal generates 13 lof-voted candidates
Outlier removal generates 20 lof-voted candidates
score-using-embedding Time: 18.93057894706726s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.29770612716674805s
compute-tf-idf-class_count Time: 19.778486013412476s
compute-tf-idf-proper

260it [5:10:14, 61.65s/it]

align-page-rank Time: 0.16939997673034668s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.4013230800628662s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.5161380767822266s
string-similarity-['jaro_winkler'] Time: 0.37606382369995117s
string-similarity-['levenshtein'] Time: 1.3760371208190918s
string-similarity-['jaccard:tokenizer=word'] Time: 0.08141398429870605s
normalize-scores-des_cont_jaccard Time: 0.033937931060791016s
smallest-qnode-number Time: 0.24167203903198242s
mosaic-features Time: 0.025148868560791016s
creat-singleton-feature Time: 0.18866372108459473s
vote-by-classifier Time: 0.41097283363342285s
Qnodes to lookup: 4531
Qnodes from file: 4459
Outlier removal generates 14 lof-voted candidates
Outlier removal generates 14 lof-voted candidates
Outlier removal generates 14 lof-voted candidates
score-using-embedding Time: 18.105886936187744s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.35106778144836426s
compute-tf-idf-clas

261it [5:11:28, 65.41s/it]

align-page-rank Time: 0.25073885917663574s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.5088489055633545s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.8223960399627686s
string-similarity-['jaro_winkler'] Time: 0.5785379409790039s
string-similarity-['levenshtein'] Time: 3.0042009353637695s
string-similarity-['jaccard:tokenizer=word'] Time: 0.08602309226989746s
normalize-scores-des_cont_jaccard Time: 0.035418033599853516s
smallest-qnode-number Time: 0.2552640438079834s
mosaic-features Time: 0.019276857376098633s
creat-singleton-feature Time: 0.1927807331085205s
vote-by-classifier Time: 0.38941216468811035s
Qnodes to lookup: 6573
Qnodes from file: 6458
Outlier removal generates 13 lof-voted candidates
Outlier removal generates 26 lof-voted candidates
Outlier removal generates 16 lof-voted candidates
score-using-embedding Time: 23.973586797714233s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.40950870513916016s
compute-tf-idf-class_c

262it [5:13:12, 76.90s/it]

align-page-rank Time: 0.14589905738830566s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.5425848960876465s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.9591388702392578s
string-similarity-['jaro_winkler'] Time: 0.14734387397766113s
string-similarity-['levenshtein'] Time: 0.7046830654144287s
string-similarity-['jaccard:tokenizer=word'] Time: 0.02978682518005371s
normalize-scores-des_cont_jaccard Time: 0.01390981674194336s
smallest-qnode-number Time: 0.0916590690612793s
mosaic-features Time: 0.006211042404174805s
creat-singleton-feature Time: 0.06777787208557129s
vote-by-classifier Time: 1.4738428592681885s
Qnodes to lookup: 1884
Qnodes from file: 1842
Outlier removal generates 11 lof-voted candidates
score-using-embedding Time: 15.148549795150757s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.12196087837219238s
compute-tf-idf-class_count Time: 16.48134970664978s
compute-tf-idf-property_count Time: 16.64103603363037s
context-match T

263it [5:13:40, 62.30s/it]

align-page-rank Time: 0.12532401084899902s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.3384711742401123s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.5196290016174316s
string-similarity-['jaro_winkler'] Time: 0.1071310043334961s
string-similarity-['levenshtein'] Time: 0.3529531955718994s
string-similarity-['jaccard:tokenizer=word'] Time: 0.038902997970581055s
normalize-scores-des_cont_jaccard Time: 0.014017105102539062s
smallest-qnode-number Time: 0.09305071830749512s
mosaic-features Time: 0.00693202018737793s
creat-singleton-feature Time: 0.0708918571472168s
vote-by-classifier Time: 0.822098970413208s
Qnodes to lookup: 1874
Qnodes from file: 1860
Outlier removal generates 11 lof-voted candidates
score-using-embedding Time: 13.320424318313599s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.47615909576416016s
compute-tf-idf-class_count Time: 14.240204095840454s
compute-tf-idf-property_count Time: 14.451914072036743s
context-match 

264it [5:14:09, 52.51s/it]

align-page-rank Time: 0.16371488571166992s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.5380511283874512s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.3179051876068115s
string-similarity-['jaro_winkler'] Time: 0.14799284934997559s
string-similarity-['levenshtein'] Time: 0.6184308528900146s
string-similarity-['jaccard:tokenizer=word'] Time: 0.052375078201293945s
normalize-scores-des_cont_jaccard Time: 0.011923074722290039s
smallest-qnode-number Time: 0.5424349308013916s
mosaic-features Time: 0.006407976150512695s
creat-singleton-feature Time: 0.07116127014160156s
vote-by-classifier Time: 0.857032299041748s
Qnodes to lookup: 935
Qnodes from file: 925
Outlier removal generates 27 lof-voted candidates
score-using-embedding Time: 15.451089859008789s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.12458610534667969s
compute-tf-idf-class_count Time: 15.756209373474121s
compute-tf-idf-property_count Time: 15.048249006271362s
context-match 

265it [5:14:42, 46.42s/it]

align-page-rank Time: 0.27048182487487793s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.8462836742401123s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.0875320434570312s
string-similarity-['jaro_winkler'] Time: 0.47731995582580566s
string-similarity-['levenshtein'] Time: 1.8309900760650635s
string-similarity-['jaccard:tokenizer=word'] Time: 0.08251500129699707s
normalize-scores-des_cont_jaccard Time: 0.0348970890045166s
smallest-qnode-number Time: 0.2630641460418701s
mosaic-features Time: 0.017403841018676758s
creat-singleton-feature Time: 0.19063806533813477s
vote-by-classifier Time: 0.671076774597168s
Qnodes to lookup: 3991
Qnodes from file: 3901
Outlier removal generates 175 lof-voted candidates
Outlier removal generates 38 lof-voted candidates
Outlier removal generates 230 lof-voted candidates
score-using-embedding Time: 18.95900583267212s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.4077901840209961s
compute-tf-idf-class_cou

266it [5:16:13, 59.87s/it]

align-page-rank Time: 0.0689852237701416s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.6757149696350098s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.6294922828674316s
string-similarity-['jaro_winkler'] Time: 0.14021587371826172s
string-similarity-['levenshtein'] Time: 0.5102348327636719s
string-similarity-['jaccard:tokenizer=word'] Time: 0.05646705627441406s
normalize-scores-des_cont_jaccard Time: 0.016646862030029297s
smallest-qnode-number Time: 0.7097330093383789s
mosaic-features Time: 0.008265018463134766s
creat-singleton-feature Time: 0.09336113929748535s
vote-by-classifier Time: 0.7673208713531494s
Qnodes to lookup: 1714
Qnodes from file: 1676
Outlier removal generates 13 lof-voted candidates
score-using-embedding Time: 14.464396715164185s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.1469099521636963s
compute-tf-idf-class_count Time: 15.510531902313232s
compute-tf-idf-property_count Time: 16.70331311225891s
context-match T

267it [5:16:38, 49.55s/it]

align-page-rank Time: 0.09834694862365723s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.2499430179595947s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.8151211738586426s
string-similarity-['jaro_winkler'] Time: 0.15121006965637207s
string-similarity-['levenshtein'] Time: 0.9642558097839355s
string-similarity-['jaccard:tokenizer=word'] Time: 0.02655506134033203s
normalize-scores-des_cont_jaccard Time: 0.008939743041992188s
smallest-qnode-number Time: 0.058541059494018555s
mosaic-features Time: 0.003416776657104492s
creat-singleton-feature Time: 0.037072181701660156s
vote-by-classifier Time: 0.3772399425506592s
Qnodes to lookup: 1135
Qnodes from file: 1085
Outlier removal generates 6 lof-voted candidates
score-using-embedding Time: 15.460190773010254s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.09832596778869629s
compute-tf-idf-class_count Time: 16.813756942749023s
compute-tf-idf-property_count Time: 16.945706129074097s
context-ma

268it [5:17:29, 49.87s/it]

align-page-rank Time: 0.3455798625946045s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.201807975769043s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.743687868118286s
string-similarity-['jaro_winkler'] Time: 0.8091120719909668s
string-similarity-['levenshtein'] Time: 3.8123459815979004s
string-similarity-['jaccard:tokenizer=word'] Time: 0.19301509857177734s
normalize-scores-des_cont_jaccard Time: 0.054997920989990234s
smallest-qnode-number Time: 0.8740661144256592s
mosaic-features Time: 0.02383589744567871s
creat-singleton-feature Time: 0.28415894508361816s
vote-by-classifier Time: 0.426893949508667s
Qnodes to lookup: 4968
Qnodes from file: 4745
Outlier removal generates 21 lof-voted candidates
Outlier removal generates 14 lof-voted candidates
Outlier removal generates 16 lof-voted candidates
Outlier removal generates 18 lof-voted candidates
Outlier removal generates 22 lof-voted candidates
score-using-embedding Time: 26.62884497642517s
genera

269it [5:19:53, 77.97s/it]

align-page-rank Time: 0.14787006378173828s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.3832077980041504s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.3061492443084717s
string-similarity-['jaro_winkler'] Time: 0.11974883079528809s
string-similarity-['levenshtein'] Time: 0.359022855758667s
string-similarity-['jaccard:tokenizer=word'] Time: 0.03278994560241699s
normalize-scores-des_cont_jaccard Time: 0.015231847763061523s
smallest-qnode-number Time: 0.09286189079284668s
mosaic-features Time: 0.006867885589599609s
creat-singleton-feature Time: 0.07670903205871582s
vote-by-classifier Time: 0.3946568965911865s
Qnodes to lookup: 2626
Qnodes from file: 2593
Outlier removal generates 3 lof-voted candidates
score-using-embedding Time: 13.484605073928833s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.15483307838439941s
compute-tf-idf-class_count Time: 15.120137929916382s
compute-tf-idf-property_count Time: 15.333375215530396s
context-match

270it [5:20:21, 63.16s/it]

align-page-rank Time: 0.19574284553527832s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.35631608963012695s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.7345578670501709s
string-similarity-['jaro_winkler'] Time: 0.12597298622131348s
string-similarity-['levenshtein'] Time: 0.610846757888794s
string-similarity-['jaccard:tokenizer=word'] Time: 0.025216102600097656s
normalize-scores-des_cont_jaccard Time: 0.012843847274780273s
smallest-qnode-number Time: 0.09402203559875488s
mosaic-features Time: 0.005285024642944336s
creat-singleton-feature Time: 0.0635230541229248s
vote-by-classifier Time: 0.797792911529541s
Qnodes to lookup: 1593
Qnodes from file: 1559
Outlier removal generates 10 lof-voted candidates
score-using-embedding Time: 13.733940839767456s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.11136603355407715s
compute-tf-idf-class_count Time: 13.336781024932861s
compute-tf-idf-property_count Time: 14.432904958724976s
context-matc

271it [5:20:48, 52.17s/it]

align-page-rank Time: 0.17780804634094238s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.5922200679779053s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.7388782501220703s
string-similarity-['jaro_winkler'] Time: 0.4397411346435547s
string-similarity-['levenshtein'] Time: 2.060947895050049s
string-similarity-['jaccard:tokenizer=word'] Time: 0.09847235679626465s
normalize-scores-des_cont_jaccard Time: 0.02252793312072754s
smallest-qnode-number Time: 0.22718596458435059s
mosaic-features Time: 0.015218973159790039s
creat-singleton-feature Time: 0.2906198501586914s
vote-by-classifier Time: 0.4376199245452881s
Qnodes to lookup: 2646
Qnodes from file: 2596
Outlier removal generates 13 lof-voted candidates
Outlier removal generates 6 lof-voted candidates
Outlier removal generates 19 lof-voted candidates
score-using-embedding Time: 15.717830181121826s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3678920269012451s
compute-tf-idf-class_count

272it [5:21:52, 55.85s/it]

align-page-rank Time: 0.20432400703430176s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.4340548515319824s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.9060938358306885s
string-similarity-['jaro_winkler'] Time: 0.12961888313293457s
string-similarity-['levenshtein'] Time: 0.38790011405944824s
string-similarity-['jaccard:tokenizer=word'] Time: 0.04886317253112793s
normalize-scores-des_cont_jaccard Time: 0.015501022338867188s
smallest-qnode-number Time: 0.12341022491455078s
mosaic-features Time: 0.007423877716064453s
creat-singleton-feature Time: 0.09019279479980469s
vote-by-classifier Time: 0.9408519268035889s
Qnodes to lookup: 1028
Qnodes from file: 1017
Outlier removal generates 6 lof-voted candidates
score-using-embedding Time: 14.38416314125061s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.1873009204864502s
compute-tf-idf-class_count Time: 15.559462785720825s
compute-tf-idf-property_count Time: 16.008560180664062s
context-match

273it [5:22:21, 47.79s/it]

align-page-rank Time: 0.4097592830657959s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 9.135011196136475s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 8.034082889556885s
string-similarity-['jaro_winkler'] Time: 1.1563599109649658s
string-similarity-['levenshtein'] Time: 10.814824342727661s
string-similarity-['jaccard:tokenizer=word'] Time: 0.3039219379425049s
normalize-scores-des_cont_jaccard Time: 0.05892777442932129s
smallest-qnode-number Time: 0.9194297790527344s
mosaic-features Time: 0.02908492088317871s
creat-singleton-feature Time: 0.3445456027984619s
vote-by-classifier Time: 0.9061169624328613s
Qnodes to lookup: 2123
Qnodes from file: 2035
Outlier removal generates 25 lof-voted candidates
Outlier removal generates 23 lof-voted candidates
_centroid_of_lof: Missing 5 of 40
Outlier removal generates 23 lof-voted candidates
Outlier removal generates 40 lof-voted candidates
_centroid_of_lof: Missing 3 of 54
Outlier removal generates 31 lof-vote

274it [5:29:41, 165.34s/it]

align-page-rank Time: 0.1676311492919922s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.552670955657959s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.6399719715118408s
string-similarity-['jaro_winkler'] Time: 0.18151283264160156s
string-similarity-['levenshtein'] Time: 0.7110278606414795s
string-similarity-['jaccard:tokenizer=word'] Time: 0.05362200736999512s
normalize-scores-des_cont_jaccard Time: 0.021591901779174805s
smallest-qnode-number Time: 0.18730998039245605s
mosaic-features Time: 0.010342121124267578s
creat-singleton-feature Time: 0.10665202140808105s
vote-by-classifier Time: 0.41988301277160645s
Qnodes to lookup: 2378
Qnodes from file: 2348
Outlier removal generates 11 lof-voted candidates
Outlier removal generates 22 lof-voted candidates
score-using-embedding Time: 15.607625961303711s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.19597697257995605s
compute-tf-idf-class_count Time: 16.225852966308594s
compute-tf-idf-pro

275it [5:30:17, 126.66s/it]

align-page-rank Time: 0.08275294303894043s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.29805898666381836s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.693199872970581s
string-similarity-['jaro_winkler'] Time: 0.1018218994140625s
string-similarity-['levenshtein'] Time: 0.3862321376800537s
string-similarity-['jaccard:tokenizer=word'] Time: 0.03232121467590332s
normalize-scores-des_cont_jaccard Time: 0.014724969863891602s
smallest-qnode-number Time: 0.09816336631774902s
mosaic-features Time: 0.006473064422607422s
creat-singleton-feature Time: 0.06854391098022461s
vote-by-classifier Time: 0.4518120288848877s
Qnodes to lookup: 2236
Qnodes from file: 2183
Outlier removal generates 8 lof-voted candidates
score-using-embedding Time: 13.475030183792114s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.1331470012664795s
compute-tf-idf-class_count Time: 15.329699039459229s
compute-tf-idf-property_count Time: 15.468814849853516s
context-match 

276it [5:30:44, 96.77s/it] 

align-page-rank Time: 0.20531725883483887s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.40226006507873535s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.4251542091369629s
string-similarity-['jaro_winkler'] Time: 0.14055109024047852s
string-similarity-['levenshtein'] Time: 0.5555520057678223s
string-similarity-['jaccard:tokenizer=word'] Time: 0.04138302803039551s
normalize-scores-des_cont_jaccard Time: 0.014274120330810547s
smallest-qnode-number Time: 0.08899188041687012s
mosaic-features Time: 0.006504058837890625s
creat-singleton-feature Time: 0.07090187072753906s
vote-by-classifier Time: 0.4246981143951416s
Qnodes to lookup: 1063
Qnodes from file: 1046
Outlier removal generates 29 lof-voted candidates
score-using-embedding Time: 12.154382705688477s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.15174126625061035s
compute-tf-idf-class_count Time: 13.830802202224731s
compute-tf-idf-property_count Time: 13.978029012680054s
context-ma

277it [5:31:08, 74.99s/it]

align-page-rank Time: 0.21555519104003906s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.30649805068969727s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.7777371406555176s
string-similarity-['jaro_winkler'] Time: 0.10880208015441895s
string-similarity-['levenshtein'] Time: 0.40306591987609863s
string-similarity-['jaccard:tokenizer=word'] Time: 0.03772902488708496s
normalize-scores-des_cont_jaccard Time: 0.01584005355834961s
smallest-qnode-number Time: 0.7316832542419434s
mosaic-features Time: 0.00680994987487793s
creat-singleton-feature Time: 0.08141613006591797s
vote-by-classifier Time: 1.6935629844665527s
Qnodes to lookup: 2378
Qnodes from file: 2353
Outlier removal generates 10 lof-voted candidates
score-using-embedding Time: 15.50611400604248s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.1292262077331543s
compute-tf-idf-class_count Time: 16.97418189048767s
compute-tf-idf-property_count Time: 17.153386116027832s
context-match T

278it [5:31:36, 60.83s/it]

align-page-rank Time: 0.4022848606109619s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.3935911655426025s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 7.342243194580078s
string-similarity-['jaro_winkler'] Time: 1.1303789615631104s
string-similarity-['levenshtein'] Time: 5.272313117980957s
string-similarity-['jaccard:tokenizer=word'] Time: 0.29571104049682617s
normalize-scores-des_cont_jaccard Time: 0.06116199493408203s
smallest-qnode-number Time: 0.6098580360412598s
mosaic-features Time: 0.035067081451416016s
creat-singleton-feature Time: 0.42483091354370117s
vote-by-classifier Time: 1.156447172164917s
Qnodes to lookup: 5086
Qnodes from file: 4960
Outlier removal generates 20 lof-voted candidates
_centroid_of_lof: Missing 1 of 44
Outlier removal generates 26 lof-voted candidates
Outlier removal generates 56 lof-voted candidates
Outlier removal generates 41 lof-voted candidates
Outlier removal generates 41 lof-voted candidates
Outlier removal gen

279it [5:36:44, 135.00s/it]

align-page-rank Time: 0.1611649990081787s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.5614190101623535s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 6.6906819343566895s
string-similarity-['jaro_winkler'] Time: 0.8424358367919922s
string-similarity-['levenshtein'] Time: 6.047505140304565s
string-similarity-['jaccard:tokenizer=word'] Time: 0.06185793876647949s
normalize-scores-des_cont_jaccard Time: 0.02397894859313965s
smallest-qnode-number Time: 0.21174192428588867s
mosaic-features Time: 0.014487028121948242s
creat-singleton-feature Time: 0.12956809997558594s
vote-by-classifier Time: 0.5994529724121094s
Qnodes to lookup: 1577
Qnodes from file: 1553
Outlier removal generates 37 lof-voted candidates
Outlier removal generates 40 lof-voted candidates
score-using-embedding Time: 29.827450037002563s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.5903909206390381s
compute-tf-idf-class_count Time: 31.112159967422485s
compute-tf-idf-propert

280it [5:37:43, 112.26s/it]

align-page-rank Time: 0.22608709335327148s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.44121384620666504s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.8421850204467773s
string-similarity-['jaro_winkler'] Time: 0.12986087799072266s
string-similarity-['levenshtein'] Time: 0.5625240802764893s
string-similarity-['jaccard:tokenizer=word'] Time: 0.027518033981323242s
normalize-scores-des_cont_jaccard Time: 0.01298213005065918s
smallest-qnode-number Time: 0.0967860221862793s
mosaic-features Time: 0.005789756774902344s
creat-singleton-feature Time: 0.06437087059020996s
vote-by-classifier Time: 1.526939868927002s
Qnodes to lookup: 1288
Qnodes from file: 1269
Outlier removal generates 10 lof-voted candidates
score-using-embedding Time: 14.312211275100708s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.11944866180419922s
compute-tf-idf-class_count Time: 15.727173089981079s
compute-tf-idf-property_count Time: 15.707933902740479s
context-matc

281it [5:38:10, 86.53s/it] 

align-page-rank Time: 0.33217501640319824s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.6530280113220215s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 13.959993124008179s
string-similarity-['jaro_winkler'] Time: 0.7665231227874756s
string-similarity-['levenshtein'] Time: 5.608527898788452s
string-similarity-['jaccard:tokenizer=word'] Time: 0.07267093658447266s
normalize-scores-des_cont_jaccard Time: 0.027843236923217773s
smallest-qnode-number Time: 0.2358410358428955s
mosaic-features Time: 0.014342069625854492s
creat-singleton-feature Time: 0.14765334129333496s
vote-by-classifier Time: 0.40558314323425293s
Qnodes to lookup: 1476
Qnodes from file: 1425
Outlier removal generates 13 lof-voted candidates
Outlier removal generates 127 lof-voted candidates
Column_vector_stragtegy centroid_of_lof failed
score-using-embedding Time: 35.0935423374176s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3190891742706299s
compute-tf-idf-class_count 

282it [5:39:47, 89.69s/it]

align-page-rank Time: 0.06627607345581055s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.4198501110076904s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.6651418209075928s
string-similarity-['jaro_winkler'] Time: 0.27808380126953125s
string-similarity-['levenshtein'] Time: 2.173701763153076s
string-similarity-['jaccard:tokenizer=word'] Time: 0.027638673782348633s
normalize-scores-des_cont_jaccard Time: 0.01318979263305664s
smallest-qnode-number Time: 0.08873987197875977s
mosaic-features Time: 0.0057773590087890625s
creat-singleton-feature Time: 0.06199288368225098s
vote-by-classifier Time: 0.6150250434875488s
Qnodes to lookup: 1049
Qnodes from file: 1040
Outlier removal generates 11 lof-voted candidates
score-using-embedding Time: 16.611612796783447s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.4576282501220703s
compute-tf-idf-class_count Time: 17.457413911819458s
compute-tf-idf-property_count Time: 18.41392993927002s
context-match

283it [5:40:18, 72.01s/it]

align-page-rank Time: 0.19881391525268555s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.7258787155151367s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.9639420509338379s
string-similarity-['jaro_winkler'] Time: 0.17232489585876465s
string-similarity-['levenshtein'] Time: 0.8600518703460693s
string-similarity-['jaccard:tokenizer=word'] Time: 0.041890621185302734s
normalize-scores-des_cont_jaccard Time: 0.014009952545166016s
smallest-qnode-number Time: 0.10299873352050781s
mosaic-features Time: 0.006319284439086914s
creat-singleton-feature Time: 0.07285404205322266s
vote-by-classifier Time: 0.6887180805206299s
Qnodes to lookup: 1834
Qnodes from file: 1804
Outlier removal generates 12 lof-voted candidates
score-using-embedding Time: 13.784332990646362s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.12527012825012207s
compute-tf-idf-class_count Time: 14.130126237869263s
compute-tf-idf-property_count Time: 14.33020806312561s
context-mat

284it [5:40:45, 58.56s/it]

align-page-rank Time: 0.1367502212524414s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.5459558963775635s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 7.941145896911621s
string-similarity-['jaro_winkler'] Time: 0.7324600219726562s
string-similarity-['levenshtein'] Time: 5.180184841156006s
string-similarity-['jaccard:tokenizer=word'] Time: 0.06880307197570801s
normalize-scores-des_cont_jaccard Time: 0.024077653884887695s
smallest-qnode-number Time: 0.16301298141479492s
mosaic-features Time: 0.014968156814575195s
creat-singleton-feature Time: 0.13898491859436035s
vote-by-classifier Time: 0.43631505966186523s
Qnodes to lookup: 3109
Qnodes from file: 2998
Outlier removal generates 16 lof-voted candidates
Outlier removal generates 19 lof-voted candidates
score-using-embedding Time: 28.138073205947876s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.2964589595794678s
compute-tf-idf-class_count Time: 30.558940887451172s
compute-tf-idf-proper

285it [5:42:34, 73.63s/it]

align-page-rank Time: 0.19978594779968262s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.2014548778533936s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.254354000091553s
string-similarity-['jaro_winkler'] Time: 0.25291895866394043s
string-similarity-['levenshtein'] Time: 1.1721141338348389s
string-similarity-['jaccard:tokenizer=word'] Time: 0.11842203140258789s
normalize-scores-des_cont_jaccard Time: 0.02583599090576172s
smallest-qnode-number Time: 0.1646270751953125s
mosaic-features Time: 0.012207984924316406s
creat-singleton-feature Time: 0.14289498329162598s
vote-by-classifier Time: 0.40950703620910645s
Qnodes to lookup: 3690
Qnodes from file: 3670
Outlier removal generates 12 lof-voted candidates
Outlier removal generates 11 lof-voted candidates
score-using-embedding Time: 17.675854206085205s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.24530386924743652s
compute-tf-idf-class_count Time: 19.378518104553223s
compute-tf-idf-prop

286it [5:43:27, 67.59s/it]

align-page-rank Time: 0.16283273696899414s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 7.087418079376221s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.971729040145874s
string-similarity-['jaro_winkler'] Time: 1.0257642269134521s
string-similarity-['levenshtein'] Time: 8.17090916633606s
string-similarity-['jaccard:tokenizer=word'] Time: 0.0718841552734375s
normalize-scores-des_cont_jaccard Time: 0.03334403038024902s
smallest-qnode-number Time: 0.24651503562927246s
mosaic-features Time: 0.01816701889038086s
creat-singleton-feature Time: 0.15808820724487305s
vote-by-classifier Time: 0.40427374839782715s
Qnodes to lookup: 3116
Qnodes from file: 3090
Outlier removal generates 98 lof-voted candidates
Outlier removal generates 72 lof-voted candidates
Outlier removal generates 57 lof-voted candidates
score-using-embedding Time: 31.94030523300171s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3533799648284912s
compute-tf-idf-class_count Ti

287it [5:45:09, 77.77s/it]

align-page-rank Time: 0.18840694427490234s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.4338710308074951s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.21419811248779297s
string-similarity-['jaro_winkler'] Time: 0.13927292823791504s
string-similarity-['levenshtein'] Time: 0.706355094909668s
string-similarity-['jaccard:tokenizer=word'] Time: 0.0396878719329834s
normalize-scores-des_cont_jaccard Time: 0.014592647552490234s
smallest-qnode-number Time: 0.08804106712341309s
mosaic-features Time: 0.0053369998931884766s
creat-singleton-feature Time: 0.19587397575378418s
vote-by-classifier Time: 0.902012825012207s
Qnodes to lookup: 1504
Qnodes from file: 1500
Outlier removal generates 10 lof-voted candidates
score-using-embedding Time: 14.195269107818604s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.12572216987609863s
compute-tf-idf-class_count Time: 14.571531057357788s
compute-tf-idf-property_count Time: 15.073833227157593s
context-matc

288it [5:45:40, 63.89s/it]

align-page-rank Time: 0.23425698280334473s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.5982940196990967s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 6.67034125328064s
string-similarity-['jaro_winkler'] Time: 0.8828308582305908s
string-similarity-['levenshtein'] Time: 4.428938150405884s
string-similarity-['jaccard:tokenizer=word'] Time: 0.13829994201660156s
normalize-scores-des_cont_jaccard Time: 0.04749274253845215s
smallest-qnode-number Time: 0.35229992866516113s
mosaic-features Time: 0.024227142333984375s
creat-singleton-feature Time: 0.2486732006072998s
vote-by-classifier Time: 0.46663498878479004s
Qnodes to lookup: 4270
Qnodes from file: 4219
Outlier removal generates 22 lof-voted candidates
Outlier removal generates 37 lof-voted candidates
_centroid_of_lof: Missing 1 of 83
Outlier removal generates 49 lof-voted candidates
Outlier removal generates 43 lof-voted candidates
score-using-embedding Time: 26.279310941696167s
generate-reciprocal

289it [5:48:01, 87.01s/it]

align-page-rank Time: 0.09775114059448242s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.3109269142150879s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.4588289260864258s
string-similarity-['jaro_winkler'] Time: 0.09571504592895508s
string-similarity-['levenshtein'] Time: 0.4000132083892822s
string-similarity-['jaccard:tokenizer=word'] Time: 0.028992652893066406s
normalize-scores-des_cont_jaccard Time: 0.012544870376586914s
smallest-qnode-number Time: 0.0948479175567627s
mosaic-features Time: 0.004967212677001953s
creat-singleton-feature Time: 0.05406689643859863s
vote-by-classifier Time: 0.41750001907348633s
Qnodes to lookup: 1330
Qnodes from file: 1304
Outlier removal generates 13 lof-voted candidates
score-using-embedding Time: 10.854311227798462s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.103759765625s
compute-tf-idf-class_count Time: 12.158891916275024s
compute-tf-idf-property_count Time: 11.496008157730103s
context-match T

290it [5:48:26, 68.42s/it]

align-page-rank Time: 0.23663067817687988s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.316749811172485s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 9.656789064407349s
string-similarity-['jaro_winkler'] Time: 0.9278807640075684s
string-similarity-['levenshtein'] Time: 5.826282024383545s
string-similarity-['jaccard:tokenizer=word'] Time: 0.1942002773284912s
normalize-scores-des_cont_jaccard Time: 0.04423332214355469s
smallest-qnode-number Time: 0.3937242031097412s
mosaic-features Time: 0.024108171463012695s
creat-singleton-feature Time: 0.7201778888702393s
vote-by-classifier Time: 0.81416916847229s
Qnodes to lookup: 4296
Qnodes from file: 4216
Outlier removal generates 29 lof-voted candidates
Outlier removal generates 29 lof-voted candidates
Outlier removal generates 24 lof-voted candidates
Outlier removal generates 26 lof-voted candidates
score-using-embedding Time: 35.60056710243225s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.

291it [5:52:01, 112.45s/it]

align-page-rank Time: 0.17204809188842773s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.4521300792694092s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.0064270496368408s
string-similarity-['jaro_winkler'] Time: 0.12642407417297363s
string-similarity-['levenshtein'] Time: 0.4083709716796875s
string-similarity-['jaccard:tokenizer=word'] Time: 0.03613090515136719s
normalize-scores-des_cont_jaccard Time: 0.014847993850708008s
smallest-qnode-number Time: 0.12688493728637695s
mosaic-features Time: 0.005929231643676758s
creat-singleton-feature Time: 0.06977176666259766s
vote-by-classifier Time: 0.7612650394439697s
Qnodes to lookup: 1516
Qnodes from file: 1503
Outlier removal generates 17 lof-voted candidates
score-using-embedding Time: 15.192075967788696s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.6743221282958984s
compute-tf-idf-class_count Time: 17.167864084243774s
compute-tf-idf-property_count Time: 16.97352886199951s
context-match

292it [5:52:30, 87.26s/it] 

align-page-rank Time: 0.19260120391845703s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.9203987121582031s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.6078577041625977s
string-similarity-['jaro_winkler'] Time: 0.18697285652160645s
string-similarity-['levenshtein'] Time: 1.102849006652832s
string-similarity-['jaccard:tokenizer=word'] Time: 0.027636051177978516s
normalize-scores-des_cont_jaccard Time: 0.012613296508789062s
smallest-qnode-number Time: 0.18072009086608887s
mosaic-features Time: 0.00592494010925293s
creat-singleton-feature Time: 0.05995607376098633s
vote-by-classifier Time: 1.4957139492034912s
Qnodes to lookup: 1241
Qnodes from file: 1228
Column_vector_stragtegy centroid_of_lof failed
score-using-embedding Time: 16.2017662525177s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.11420202255249023s
compute-tf-idf-class_count Time: 17.012370824813843s
compute-tf-idf-property_count Time: 17.99145197868347s
context-match Time

293it [5:52:59, 69.95s/it]

align-page-rank Time: 0.27377796173095703s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.802138090133667s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.257385969161987s
string-similarity-['jaro_winkler'] Time: 0.47745800018310547s
string-similarity-['levenshtein'] Time: 2.647275924682617s
string-similarity-['jaccard:tokenizer=word'] Time: 0.07741189002990723s
normalize-scores-des_cont_jaccard Time: 0.03589892387390137s
smallest-qnode-number Time: 0.2526078224182129s
mosaic-features Time: 0.020061731338500977s
creat-singleton-feature Time: 0.18193817138671875s
vote-by-classifier Time: 1.1526131629943848s
Qnodes to lookup: 5047
Qnodes from file: 4964
Outlier removal generates 12 lof-voted candidates
Outlier removal generates 16 lof-voted candidates
Outlier removal generates 15 lof-voted candidates
score-using-embedding Time: 24.620833158493042s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.34039783477783203s
compute-tf-idf-class_coun

294it [5:54:33, 77.05s/it]

align-page-rank Time: 0.2131500244140625s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 6.482662916183472s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 23.534703016281128s
string-similarity-['jaro_winkler'] Time: 0.7973449230194092s
string-similarity-['levenshtein'] Time: 7.668368101119995s
string-similarity-['jaccard:tokenizer=word'] Time: 0.07768821716308594s
normalize-scores-des_cont_jaccard Time: 0.03053116798400879s
smallest-qnode-number Time: 0.2901420593261719s
mosaic-features Time: 0.014538049697875977s
creat-singleton-feature Time: 0.15490198135375977s
vote-by-classifier Time: 0.49877190589904785s
Qnodes to lookup: 2286
Qnodes from file: 2208
Outlier removal generates 9 lof-voted candidates
Outlier removal generates 38 lof-voted candidates
Outlier removal generates 24 lof-voted candidates
score-using-embedding Time: 49.429336071014404s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.40921497344970703s
compute-tf-idf-class_count

295it [5:57:35, 108.41s/it]

align-page-rank Time: 0.1792137622833252s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.27811408042907715s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.5431711673736572s
string-similarity-['jaro_winkler'] Time: 0.21948790550231934s
string-similarity-['levenshtein'] Time: 0.4989049434661865s
string-similarity-['jaccard:tokenizer=word'] Time: 0.028309106826782227s
normalize-scores-des_cont_jaccard Time: 0.014129161834716797s
smallest-qnode-number Time: 0.0958402156829834s
mosaic-features Time: 0.005323886871337891s
creat-singleton-feature Time: 0.06305384635925293s
vote-by-classifier Time: 1.4886767864227295s
Qnodes to lookup: 1911
Qnodes from file: 1888
Outlier removal generates 13 lof-voted candidates
score-using-embedding Time: 14.196063041687012s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.12054896354675293s
compute-tf-idf-class_count Time: 14.685222148895264s
compute-tf-idf-property_count Time: 15.930391073226929s
context-mat

296it [5:58:03, 84.38s/it] 

align-page-rank Time: 0.18502497673034668s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.0128161907196045s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.1145641803741455s
string-similarity-['jaro_winkler'] Time: 0.2855820655822754s
string-similarity-['levenshtein'] Time: 1.0900499820709229s
string-similarity-['jaccard:tokenizer=word'] Time: 0.09576892852783203s
normalize-scores-des_cont_jaccard Time: 0.030465126037597656s
smallest-qnode-number Time: 0.24283814430236816s
mosaic-features Time: 0.016416072845458984s
creat-singleton-feature Time: 0.4286646842956543s
vote-by-classifier Time: 0.8877010345458984s
Qnodes to lookup: 5642
Qnodes from file: 5621
Outlier removal generates 35 lof-voted candidates
score-using-embedding Time: 17.57704496383667s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.35262322425842285s
compute-tf-idf-class_count Time: 19.904879093170166s
compute-tf-idf-property_count Time: 19.43596887588501s
context-match T

297it [5:58:44, 71.41s/it]

align-page-rank Time: 0.24433326721191406s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.1654348373413086s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.203015089035034s
string-similarity-['jaro_winkler'] Time: 0.33212780952453613s
string-similarity-['levenshtein'] Time: 1.3979661464691162s
string-similarity-['jaccard:tokenizer=word'] Time: 0.0557551383972168s
normalize-scores-des_cont_jaccard Time: 0.017805814743041992s
smallest-qnode-number Time: 0.15557193756103516s
mosaic-features Time: 0.008800983428955078s
creat-singleton-feature Time: 0.09827423095703125s
vote-by-classifier Time: 0.4489319324493408s
Qnodes to lookup: 3110
Qnodes from file: 3074
Outlier removal generates 2 lof-voted candidates
score-using-embedding Time: 15.967578887939453s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.2800920009613037s
compute-tf-idf-class_count Time: 16.676480054855347s
compute-tf-idf-property_count Time: 16.899441957473755s
context-match T

298it [5:59:15, 59.20s/it]

align-page-rank Time: 0.17719197273254395s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.3340089321136475s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.9422547817230225s
string-similarity-['jaro_winkler'] Time: 0.3099651336669922s
string-similarity-['levenshtein'] Time: 1.5733838081359863s
string-similarity-['jaccard:tokenizer=word'] Time: 0.06641387939453125s
normalize-scores-des_cont_jaccard Time: 0.02133798599243164s
smallest-qnode-number Time: 0.16454792022705078s
mosaic-features Time: 0.009857177734375s
creat-singleton-feature Time: 0.11006593704223633s
vote-by-classifier Time: 0.9038000106811523s
Qnodes to lookup: 1571
Qnodes from file: 1528
Outlier removal generates 36 lof-voted candidates
Outlier removal generates 17 lof-voted candidates
score-using-embedding Time: 16.734214782714844s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.20761489868164062s
compute-tf-idf-class_count Time: 17.611860990524292s
compute-tf-idf-propert

299it [5:59:51, 52.24s/it]

align-page-rank Time: 0.08076882362365723s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.508979320526123s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.5953350067138672s
string-similarity-['jaro_winkler'] Time: 0.15745782852172852s
string-similarity-['levenshtein'] Time: 0.4707908630371094s
string-similarity-['jaccard:tokenizer=word'] Time: 0.04926490783691406s
normalize-scores-des_cont_jaccard Time: 0.018742084503173828s
smallest-qnode-number Time: 0.12141203880310059s
mosaic-features Time: 0.042327165603637695s
creat-singleton-feature Time: 0.12367510795593262s
vote-by-classifier Time: 0.42565011978149414s
Qnodes to lookup: 2920
Qnodes from file: 2898
Outlier removal generates 10 lof-voted candidates
score-using-embedding Time: 12.750287055969238s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.46549010276794434s
compute-tf-idf-class_count Time: 14.657822370529175s
compute-tf-idf-property_count Time: 14.777633905410767s
context-mat

300it [6:00:19, 44.98s/it]

align-page-rank Time: 0.16505026817321777s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.868373155593872s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.0099780559539795s
string-similarity-['jaro_winkler'] Time: 0.3829770088195801s
string-similarity-['levenshtein'] Time: 1.5427541732788086s
string-similarity-['jaccard:tokenizer=word'] Time: 0.08045792579650879s
normalize-scores-des_cont_jaccard Time: 0.035347938537597656s
smallest-qnode-number Time: 0.3609890937805176s
mosaic-features Time: 0.01677703857421875s
creat-singleton-feature Time: 0.1949000358581543s
vote-by-classifier Time: 0.9281561374664307s
Qnodes to lookup: 5442
Qnodes from file: 5373
Outlier removal generates 5 lof-voted candidates
_centroid_of_lof: Missing 1 of 47
Outlier removal generates 28 lof-voted candidates
Outlier removal generates 17 lof-voted candidates
score-using-embedding Time: 21.431586980819702s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.35774898529

301it [6:01:52, 59.50s/it]

align-page-rank Time: 0.13509607315063477s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.3970789909362793s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.0451011657714844s
string-similarity-['jaro_winkler'] Time: 0.5032069683074951s
string-similarity-['levenshtein'] Time: 3.890069007873535s
string-similarity-['jaccard:tokenizer=word'] Time: 0.02868199348449707s
normalize-scores-des_cont_jaccard Time: 0.013137340545654297s
smallest-qnode-number Time: 0.08854079246520996s
mosaic-features Time: 0.006553173065185547s
creat-singleton-feature Time: 0.06636285781860352s
vote-by-classifier Time: 0.45589423179626465s
Qnodes to lookup: 723
Qnodes from file: 720
Outlier removal generates 12 lof-voted candidates
score-using-embedding Time: 17.986212015151978s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.5120439529418945s
compute-tf-idf-class_count Time: 19.52847194671631s
compute-tf-idf-property_count Time: 19.685374975204468s
context-match Ti

302it [6:02:26, 51.74s/it]

align-page-rank Time: 0.10388565063476562s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.501300096511841s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.690959930419922s
string-similarity-['jaro_winkler'] Time: 0.4862840175628662s
string-similarity-['levenshtein'] Time: 2.7609429359436035s
string-similarity-['jaccard:tokenizer=word'] Time: 0.058763980865478516s
normalize-scores-des_cont_jaccard Time: 0.021197795867919922s
smallest-qnode-number Time: 0.18927812576293945s
mosaic-features Time: 0.015060901641845703s
creat-singleton-feature Time: 0.13489294052124023s
vote-by-classifier Time: 0.6483631134033203s
Qnodes to lookup: 3963
Qnodes from file: 3911
Outlier removal generates 4 lof-voted candidates
Outlier removal generates 20 lof-voted candidates
score-using-embedding Time: 21.002183198928833s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.25657200813293457s
compute-tf-idf-class_count Time: 23.627129793167114s
compute-tf-idf-prope

303it [6:03:44, 59.64s/it]

align-page-rank Time: 0.21389293670654297s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.6284208297729492s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.5645997524261475s
string-similarity-['jaro_winkler'] Time: 0.2555577754974365s
string-similarity-['levenshtein'] Time: 0.8174729347229004s
string-similarity-['jaccard:tokenizer=word'] Time: 0.049285173416137695s
normalize-scores-des_cont_jaccard Time: 0.016966819763183594s
smallest-qnode-number Time: 0.18592476844787598s
mosaic-features Time: 0.01110696792602539s
creat-singleton-feature Time: 0.524094820022583s
vote-by-classifier Time: 0.8448061943054199s
Qnodes to lookup: 1132
Qnodes from file: 1111
Outlier removal generates 290 lof-voted candidates
Outlier removal generates 20 lof-voted candidates
score-using-embedding Time: 15.133600950241089s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.21455097198486328s
compute-tf-idf-class_count Time: 16.60793709754944s
compute-tf-idf-prope

304it [6:04:17, 51.56s/it]

align-page-rank Time: 0.2389240264892578s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.01777982711792s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.1047980785369873s
string-similarity-['jaro_winkler'] Time: 0.2680830955505371s
string-similarity-['levenshtein'] Time: 0.9813277721405029s
string-similarity-['jaccard:tokenizer=word'] Time: 0.24785184860229492s
normalize-scores-des_cont_jaccard Time: 0.026769161224365234s
smallest-qnode-number Time: 0.21786117553710938s
mosaic-features Time: 0.014116048812866211s
creat-singleton-feature Time: 0.16311383247375488s
vote-by-classifier Time: 0.39973974227905273s
Qnodes to lookup: 5746
Qnodes from file: 5625
Outlier removal generates 26 lof-voted candidates
score-using-embedding Time: 15.673242092132568s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3615241050720215s
compute-tf-idf-class_count Time: 17.636245012283325s
compute-tf-idf-property_count Time: 18.380005836486816s
context-match T

305it [6:04:49, 45.89s/it]

align-page-rank Time: 0.10662388801574707s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.1396667957305908s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.7207047939300537s
string-similarity-['jaro_winkler'] Time: 0.2819938659667969s
string-similarity-['levenshtein'] Time: 1.340928077697754s
string-similarity-['jaccard:tokenizer=word'] Time: 0.05267024040222168s
normalize-scores-des_cont_jaccard Time: 0.023159027099609375s
smallest-qnode-number Time: 0.1571352481842041s
mosaic-features Time: 0.01017904281616211s
creat-singleton-feature Time: 0.10881614685058594s
vote-by-classifier Time: 0.4642341136932373s
Qnodes to lookup: 3229
Qnodes from file: 3114
Outlier removal generates 4 lof-voted candidates
Outlier removal generates 8 lof-voted candidates
score-using-embedding Time: 16.314797163009644s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.2433338165283203s
compute-tf-idf-class_count Time: 19.23847508430481s
compute-tf-idf-property_c

306it [6:05:33, 45.37s/it]

align-page-rank Time: 0.20709013938903809s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.463535785675049s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 6.865108966827393s
string-similarity-['jaro_winkler'] Time: 0.4707789421081543s
string-similarity-['levenshtein'] Time: 4.356525182723999s
string-similarity-['jaccard:tokenizer=word'] Time: 0.05073904991149902s
normalize-scores-des_cont_jaccard Time: 0.0213010311126709s
smallest-qnode-number Time: 0.1610569953918457s
mosaic-features Time: 0.010110855102539062s
creat-singleton-feature Time: 0.11789965629577637s
vote-by-classifier Time: 1.0338497161865234s
Qnodes to lookup: 927
Qnodes from file: 862
Outlier removal generates 14 lof-voted candidates
Outlier removal generates 20 lof-voted candidates
score-using-embedding Time: 23.32463002204895s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.20010781288146973s
compute-tf-idf-class_count Time: 24.865764141082764s
compute-tf-idf-property_cou

307it [6:06:35, 50.14s/it]

align-page-rank Time: 0.15229010581970215s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.5356850624084473s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.7309238910675049s
string-similarity-['jaro_winkler'] Time: 0.14333891868591309s
string-similarity-['levenshtein'] Time: 0.623507022857666s
string-similarity-['jaccard:tokenizer=word'] Time: 0.04104208946228027s
normalize-scores-des_cont_jaccard Time: 0.016307353973388672s
smallest-qnode-number Time: 0.10316181182861328s
mosaic-features Time: 0.007908105850219727s
creat-singleton-feature Time: 0.0842599868774414s
vote-by-classifier Time: 0.7997839450836182s
Qnodes to lookup: 2410
Qnodes from file: 2385
Outlier removal generates 12 lof-voted candidates
score-using-embedding Time: 14.573246955871582s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.1460268497467041s
compute-tf-idf-class_count Time: 16.349504947662354s
compute-tf-idf-property_count Time: 15.704328060150146s
context-match 

308it [6:07:04, 43.83s/it]

align-page-rank Time: 0.18359017372131348s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.40161800384521484s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.22593402862548828s
string-similarity-['jaro_winkler'] Time: 0.23662710189819336s
string-similarity-['levenshtein'] Time: 0.7176921367645264s
string-similarity-['jaccard:tokenizer=word'] Time: 0.05428600311279297s
normalize-scores-des_cont_jaccard Time: 0.013786077499389648s
smallest-qnode-number Time: 0.09895086288452148s
mosaic-features Time: 0.00596308708190918s
creat-singleton-feature Time: 0.06493687629699707s
vote-by-classifier Time: 1.501749038696289s
Qnodes to lookup: 1578
Qnodes from file: 1577
Outlier removal generates 11 lof-voted candidates
score-using-embedding Time: 14.323153257369995s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.12349629402160645s
compute-tf-idf-class_count Time: 16.630346059799194s
compute-tf-idf-property_count Time: 16.671750783920288s
context-mat

309it [6:07:37, 40.54s/it]

align-page-rank Time: 0.1104130744934082s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 7.385715007781982s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.1047239303588867s
string-similarity-['jaro_winkler'] Time: 0.6691160202026367s
string-similarity-['levenshtein'] Time: 8.77901005744934s
string-similarity-['jaccard:tokenizer=word'] Time: 0.05328202247619629s
normalize-scores-des_cont_jaccard Time: 0.024775981903076172s
smallest-qnode-number Time: 0.1973130702972412s
mosaic-features Time: 0.011478662490844727s
creat-singleton-feature Time: 0.11834597587585449s
vote-by-classifier Time: 0.9872190952301025s
Qnodes to lookup: 1931
Qnodes from file: 1885
Outlier removal generates 83 lof-voted candidates
Outlier removal generates 21 lof-voted candidates
score-using-embedding Time: 31.641285181045532s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.2171039581298828s
compute-tf-idf-class_count Time: 32.35832190513611s
compute-tf-idf-property_c

310it [6:09:14, 57.66s/it]

align-page-rank Time: 0.20636415481567383s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.2087550163269043s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.9649410247802734s
string-similarity-['jaro_winkler'] Time: 0.46933603286743164s
string-similarity-['levenshtein'] Time: 2.685605764389038s
string-similarity-['jaccard:tokenizer=word'] Time: 0.06490707397460938s
normalize-scores-des_cont_jaccard Time: 0.02725505828857422s
smallest-qnode-number Time: 0.1645488739013672s
mosaic-features Time: 0.015038251876831055s
creat-singleton-feature Time: 0.14259099960327148s
vote-by-classifier Time: 0.7938580513000488s
Qnodes to lookup: 3407
Qnodes from file: 3342
Outlier removal generates 18 lof-voted candidates
Outlier removal generates 29 lof-voted candidates
score-using-embedding Time: 22.033207654953003s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.29181981086730957s
compute-tf-idf-class_count Time: 24.322211742401123s
compute-tf-idf-prope

311it [6:10:19, 59.79s/it]

align-page-rank Time: 0.23038697242736816s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.8334898948669434s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.064118146896362s
string-similarity-['jaro_winkler'] Time: 0.5450379848480225s
string-similarity-['levenshtein'] Time: 3.3914711475372314s
string-similarity-['jaccard:tokenizer=word'] Time: 0.07959818840026855s
normalize-scores-des_cont_jaccard Time: 0.032837867736816406s
smallest-qnode-number Time: 0.23663806915283203s
mosaic-features Time: 0.016006946563720703s
creat-singleton-feature Time: 0.16812491416931152s
vote-by-classifier Time: 0.412384033203125s
Qnodes to lookup: 5133
Qnodes from file: 5032
Outlier removal generates 10 lof-voted candidates
Outlier removal generates 16 lof-voted candidates
Outlier removal generates 11 lof-voted candidates
score-using-embedding Time: 22.500959873199463s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.3311879634857178s
compute-tf-idf-class_cou

312it [6:11:40, 66.20s/it]

align-page-rank Time: 0.06197500228881836s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.29501891136169434s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.5998578071594238s
string-similarity-['jaro_winkler'] Time: 0.10470008850097656s
string-similarity-['levenshtein'] Time: 0.27469706535339355s
string-similarity-['jaccard:tokenizer=word'] Time: 0.036540985107421875s
normalize-scores-des_cont_jaccard Time: 0.015295267105102539s
smallest-qnode-number Time: 0.1002950668334961s
mosaic-features Time: 0.0071752071380615234s
creat-singleton-feature Time: 0.5803380012512207s
vote-by-classifier Time: 0.901587963104248s
Qnodes to lookup: 2690
Qnodes from file: 2648
Outlier removal generates 3 lof-voted candidates
score-using-embedding Time: 15.205726861953735s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.1347198486328125s
compute-tf-idf-class_count Time: 15.780189037322998s
compute-tf-idf-property_count Time: 15.945376873016357s
context-matc

313it [6:12:12, 55.97s/it]

align-page-rank Time: 1.24332594871521s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.534785032272339s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 8.502162218093872s
string-similarity-['jaro_winkler'] Time: 0.9646060466766357s
string-similarity-['levenshtein'] Time: 3.605649948120117s
string-similarity-['jaccard:tokenizer=word'] Time: 0.5635361671447754s
normalize-scores-des_cont_jaccard Time: 0.08282899856567383s
smallest-qnode-number Time: 1.3473758697509766s
mosaic-features Time: 0.04227614402770996s
creat-singleton-feature Time: 0.6790478229522705s
vote-by-classifier Time: 1.565173864364624s
Qnodes to lookup: 13836
Qnodes from file: 13590
_centroid_of_lof: Missing 1 of 174
Outlier removal generates 104 lof-voted candidates
score-using-embedding Time: 35.91355800628662s
generate-reciprocal-rank-lof-graph-embedding-score Time: 1.1569058895111084s
compute-tf-idf-class_count Time: 37.91162586212158s
compute-tf-idf-property_count Time: 38.792949

314it [6:15:05, 91.10s/it]

align-page-rank Time: 0.11862587928771973s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.7495839595794678s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.7130701541900635s
string-similarity-['jaro_winkler'] Time: 0.20651602745056152s
string-similarity-['levenshtein'] Time: 0.9235358238220215s
string-similarity-['jaccard:tokenizer=word'] Time: 0.058750152587890625s
normalize-scores-des_cont_jaccard Time: 0.01579117774963379s
smallest-qnode-number Time: 0.1168980598449707s
mosaic-features Time: 0.007248878479003906s
creat-singleton-feature Time: 0.07593679428100586s
vote-by-classifier Time: 0.5775859355926514s
Qnodes to lookup: 2769
Qnodes from file: 2739
Outlier removal generates 13 lof-voted candidates
score-using-embedding Time: 15.275460958480835s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.45173025131225586s
compute-tf-idf-class_count Time: 16.30448293685913s
compute-tf-idf-property_count Time: 17.183736085891724s
context-match

315it [6:16:45, 93.66s/it]

align-page-rank Time: 1.0259759426116943s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 8.262356996536255s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 15.73491907119751s
string-similarity-['jaro_winkler'] Time: 1.429368019104004s
string-similarity-['levenshtein'] Time: 8.241701126098633s
string-similarity-['jaccard:tokenizer=word'] Time: 0.24846601486206055s
normalize-scores-des_cont_jaccard Time: 0.06970381736755371s
smallest-qnode-number Time: 1.3543994426727295s
mosaic-features Time: 0.03735208511352539s
creat-singleton-feature Time: 0.7890360355377197s
vote-by-classifier Time: 0.5593011379241943s
Qnodes to lookup: 16438
Qnodes from file: 15872
Outlier removal generates 125 lof-voted candidates
score-using-embedding Time: 49.98639178276062s
generate-reciprocal-rank-lof-graph-embedding-score Time: 1.0139641761779785s
compute-tf-idf-class_count Time: 53.86527490615845s
compute-tf-idf-property_count Time: 54.72060012817383s
context-match Time: 11

316it [6:19:48, 120.61s/it]

align-page-rank Time: 0.22618913650512695s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.6648428440093994s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.0000269412994385s
string-similarity-['jaro_winkler'] Time: 0.15690207481384277s
string-similarity-['levenshtein'] Time: 0.5778298377990723s
string-similarity-['jaccard:tokenizer=word'] Time: 0.0605921745300293s
normalize-scores-des_cont_jaccard Time: 0.018210887908935547s
smallest-qnode-number Time: 0.2559049129486084s
mosaic-features Time: 0.008040904998779297s
creat-singleton-feature Time: 0.20832109451293945s
vote-by-classifier Time: 1.071349859237671s
Qnodes to lookup: 2219
Qnodes from file: 2199
Outlier removal generates 8 lof-voted candidates
score-using-embedding Time: 12.462282180786133s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.1522507667541504s
compute-tf-idf-class_count Time: 14.938864946365356s
compute-tf-idf-property_count Time: 14.368910789489746s
context-match Ti

317it [6:20:26, 95.60s/it] 

align-page-rank Time: 0.4665508270263672s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.73881983757019s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 15.84269905090332s
string-similarity-['jaro_winkler'] Time: 0.9000520706176758s
string-similarity-['levenshtein'] Time: 5.280369997024536s
string-similarity-['jaccard:tokenizer=word'] Time: 0.2088310718536377s
normalize-scores-des_cont_jaccard Time: 0.049932003021240234s
smallest-qnode-number Time: 0.5079121589660645s
mosaic-features Time: 0.02593994140625s
creat-singleton-feature Time: 0.2986588478088379s
vote-by-classifier Time: 0.4530961513519287s
Qnodes to lookup: 10679
Qnodes from file: 10380
_centroid_of_lof: Missing 1 of 159
Outlier removal generates 95 lof-voted candidates
score-using-embedding Time: 40.20350098609924s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.6012890338897705s
compute-tf-idf-class_count Time: 42.02075219154358s
compute-tf-idf-property_count Time: 42.6911568

318it [6:22:20, 101.31s/it]

align-page-rank Time: 3.0854909420013428s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 23.397165060043335s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 46.53506088256836s
string-similarity-['jaro_winkler'] Time: 3.422541856765747s
string-similarity-['levenshtein'] Time: 27.78504705429077s
string-similarity-['jaccard:tokenizer=word'] Time: 0.4379758834838867s
normalize-scores-des_cont_jaccard Time: 0.1451249122619629s
smallest-qnode-number Time: 3.324831962585449s
mosaic-features Time: 0.06892180442810059s
creat-singleton-feature Time: 0.9616687297821045s
vote-by-classifier Time: 0.57871413230896s
Qnodes to lookup: 22944
Qnodes from file: 22651
Outlier removal generates 185 lof-voted candidates
score-using-embedding Time: 124.28863883018494s
generate-reciprocal-rank-lof-graph-embedding-score Time: 2.039897918701172s
compute-tf-idf-class_count Time: 129.3760221004486s
compute-tf-idf-property_count Time: 130.01231694221497s
context-match Time: 94.27

319it [6:26:17, 141.78s/it]

align-page-rank Time: 0.9998469352722168s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.579556226730347s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.935553789138794s
string-similarity-['jaro_winkler'] Time: 1.2198669910430908s
string-similarity-['levenshtein'] Time: 6.197146892547607s
string-similarity-['jaccard:tokenizer=word'] Time: 0.1766669750213623s
normalize-scores-des_cont_jaccard Time: 0.07322096824645996s
smallest-qnode-number Time: 1.295496940612793s
mosaic-features Time: 0.036607980728149414s
creat-singleton-feature Time: 0.7910001277923584s
vote-by-classifier Time: 0.6084010601043701s
Qnodes to lookup: 13955
Qnodes from file: 13879
Outlier removal generates 118 lof-voted candidates
score-using-embedding Time: 34.04626512527466s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.9506890773773193s
compute-tf-idf-class_count Time: 36.89197397232056s
compute-tf-idf-property_count Time: 38.71378207206726s
context-match Time: 37

320it [6:27:43, 125.32s/it]

align-page-rank Time: 0.027544260025024414s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.2271897792816162s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.43682289123535156s
string-similarity-['jaro_winkler'] Time: 0.08067011833190918s
string-similarity-['levenshtein'] Time: 0.2642989158630371s
string-similarity-['jaccard:tokenizer=word'] Time: 0.030461788177490234s
normalize-scores-des_cont_jaccard Time: 0.011816978454589844s
smallest-qnode-number Time: 0.04553389549255371s
mosaic-features Time: 0.004736900329589844s
creat-singleton-feature Time: 0.26259922981262207s
vote-by-classifier Time: 1.101491928100586s
Qnodes to lookup: 1314
Qnodes from file: 1254
Outlier removal generates 4 lof-voted candidates
score-using-embedding Time: 14.995434045791626s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.07635188102722168s
compute-tf-idf-class_count Time: 16.705048084259033s
compute-tf-idf-property_count Time: 16.126714944839478s
context-ma

321it [6:28:11, 95.88s/it] 

align-page-rank Time: 3.2913148403167725s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 16.65488600730896s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 29.884846210479736s
string-similarity-['jaro_winkler'] Time: 2.729506731033325s
string-similarity-['levenshtein'] Time: 17.55770707130432s
string-similarity-['jaccard:tokenizer=word'] Time: 0.8133602142333984s
normalize-scores-des_cont_jaccard Time: 0.1431899070739746s
smallest-qnode-number Time: 3.379117965698242s
mosaic-features Time: 0.07187318801879883s
creat-singleton-feature Time: 1.0166869163513184s
vote-by-classifier Time: 0.44287776947021484s
Qnodes to lookup: 30704
Qnodes from file: 29605
Outlier removal generates 115 lof-voted candidates
score-using-embedding Time: 91.59076595306396s
generate-reciprocal-rank-lof-graph-embedding-score Time: 1.9841206073760986s
compute-tf-idf-class_count Time: 97.75871109962463s
compute-tf-idf-property_count Time: 98.4323787689209s
context-match Time: 198.

322it [6:33:21, 160.14s/it]

align-page-rank Time: 4.828068733215332s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 15.011897087097168s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 29.084489822387695s
string-similarity-['jaro_winkler'] Time: 3.3316121101379395s
string-similarity-['levenshtein'] Time: 14.869563817977905s
string-similarity-['jaccard:tokenizer=word'] Time: 0.6407327651977539s
normalize-scores-des_cont_jaccard Time: 0.19229388236999512s
smallest-qnode-number Time: 4.535001039505005s
mosaic-features Time: 0.08492898941040039s
creat-singleton-feature Time: 1.2971651554107666s
vote-by-classifier Time: 0.5661540031433105s
Qnodes to lookup: 45925
Qnodes from file: 44566
Outlier removal generates 129 lof-voted candidates
score-using-embedding Time: 91.41584014892578s
generate-reciprocal-rank-lof-graph-embedding-score Time: 2.424812078475952s
compute-tf-idf-class_count Time: 98.93270897865295s
compute-tf-idf-property_count Time: 101.07260799407959s
context-match Time: 2

323it [6:40:03, 232.74s/it]

align-page-rank Time: 3.088320016860962s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 17.44926619529724s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 23.01480793952942s
string-similarity-['jaro_winkler'] Time: 3.8840389251708984s
string-similarity-['levenshtein'] Time: 19.77751398086548s
string-similarity-['jaccard:tokenizer=word'] Time: 0.32231593132019043s
normalize-scores-des_cont_jaccard Time: 0.1769261360168457s
smallest-qnode-number Time: 2.781765937805176s
mosaic-features Time: 0.09453272819519043s
creat-singleton-feature Time: 1.0960910320281982s
vote-by-classifier Time: 0.5100111961364746s
Qnodes to lookup: 44252
Qnodes from file: 43001
Outlier removal generates 293 lof-voted candidates
score-using-embedding Time: 89.93774914741516s
generate-reciprocal-rank-lof-graph-embedding-score Time: 2.223248243331909s
compute-tf-idf-class_count Time: 96.81811714172363s
compute-tf-idf-property_count Time: 97.9615581035614s
context-match Time: 41.354

324it [6:42:36, 208.72s/it]

align-page-rank Time: 2.823047161102295s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 16.768746852874756s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 22.041790008544922s
string-similarity-['jaro_winkler'] Time: 3.516195058822632s
string-similarity-['levenshtein'] Time: 19.170963048934937s
string-similarity-['jaccard:tokenizer=word'] Time: 0.45706892013549805s
normalize-scores-des_cont_jaccard Time: 0.14278578758239746s
smallest-qnode-number Time: 2.4384748935699463s
mosaic-features Time: 0.08502888679504395s
creat-singleton-feature Time: 0.9048380851745605s
vote-by-classifier Time: 0.441986083984375s
Qnodes to lookup: 36000
Qnodes from file: 34891
Outlier removal generates 215 lof-voted candidates
score-using-embedding Time: 83.93960976600647s
generate-reciprocal-rank-lof-graph-embedding-score Time: 1.8091671466827393s
compute-tf-idf-class_count Time: 90.02619099617004s
compute-tf-idf-property_count Time: 91.68599009513855s
context-match Time: 2

325it [6:47:54, 241.74s/it]

align-page-rank Time: 0.06107497215270996s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.5568037033081055s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 6.812893867492676s
string-similarity-['jaro_winkler'] Time: 0.21309113502502441s
string-similarity-['levenshtein'] Time: 1.4511380195617676s
string-similarity-['jaccard:tokenizer=word'] Time: 0.0518949031829834s
normalize-scores-des_cont_jaccard Time: 0.018702030181884766s
smallest-qnode-number Time: 0.22973012924194336s
mosaic-features Time: 0.006966114044189453s
creat-singleton-feature Time: 0.07097220420837402s
vote-by-classifier Time: 0.79520583152771s
Qnodes to lookup: 1906
Qnodes from file: 1829
Outlier removal generates 13 lof-voted candidates
score-using-embedding Time: 22.246617078781128s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.13881516456604004s
compute-tf-idf-class_count Time: 24.145623207092285s
compute-tf-idf-property_count Time: 23.426051139831543s
context-match T

326it [6:49:13, 192.69s/it]

align-page-rank Time: 1.8621089458465576s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 16.64081621170044s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 21.2902889251709s
string-similarity-['jaro_winkler'] Time: 3.6306087970733643s
string-similarity-['levenshtein'] Time: 18.977754831314087s
string-similarity-['jaccard:tokenizer=word'] Time: 0.5615179538726807s
normalize-scores-des_cont_jaccard Time: 0.14274001121520996s
smallest-qnode-number Time: 2.4079232215881348s
mosaic-features Time: 0.08044719696044922s
creat-singleton-feature Time: 1.086150884628296s
vote-by-classifier Time: 0.5593039989471436s
Qnodes to lookup: 37301
Qnodes from file: 36172
Outlier removal generates 230 lof-voted candidates
score-using-embedding Time: 84.78019905090332s
generate-reciprocal-rank-lof-graph-embedding-score Time: 1.6647627353668213s
compute-tf-idf-class_count Time: 89.81293487548828s
compute-tf-idf-property_count Time: 91.4331750869751s
context-match Time: 9.52

327it [6:51:06, 168.83s/it]

align-page-rank Time: 0.02146005630493164s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.17537617683410645s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.39573097229003906s
string-similarity-['jaro_winkler'] Time: 0.05470585823059082s
string-similarity-['levenshtein'] Time: 0.15347981452941895s
string-similarity-['jaccard:tokenizer=word'] Time: 0.03862190246582031s
normalize-scores-des_cont_jaccard Time: 0.01152801513671875s
smallest-qnode-number Time: 0.036795854568481445s
mosaic-features Time: 0.004119873046875s
creat-singleton-feature Time: 0.046399831771850586s
vote-by-classifier Time: 0.5061659812927246s
Qnodes to lookup: 1152
Qnodes from file: 1146
Outlier removal generates 2 lof-voted candidates
score-using-embedding Time: 12.199305295944214s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.5519881248474121s
compute-tf-idf-class_count Time: 14.180079936981201s
compute-tf-idf-property_count Time: 13.29517674446106s
context-match

328it [6:51:30, 125.44s/it]

align-page-rank Time: 0.19091796875s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.2266738414764404s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.2265727519989014s
string-similarity-['jaro_winkler'] Time: 0.32384204864501953s
string-similarity-['levenshtein'] Time: 1.3663380146026611s
string-similarity-['jaccard:tokenizer=word'] Time: 0.06879401206970215s
normalize-scores-des_cont_jaccard Time: 0.04133009910583496s
smallest-qnode-number Time: 0.19115304946899414s
mosaic-features Time: 0.013657093048095703s
creat-singleton-feature Time: 0.1386258602142334s
vote-by-classifier Time: 0.3881042003631592s
Qnodes to lookup: 5253
Qnodes from file: 5125
Outlier removal generates 22 lof-voted candidates
score-using-embedding Time: 18.719610691070557s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.6011769771575928s
compute-tf-idf-class_count Time: 19.67918109893799s
compute-tf-idf-property_count Time: 20.90433621406555s
context-match Time: 28

329it [6:52:30, 105.76s/it]

align-page-rank Time: 0.28858280181884766s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.3061020374298096s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 6.038195848464966s
string-similarity-['jaro_winkler'] Time: 0.627526044845581s
string-similarity-['levenshtein'] Time: 3.2957558631896973s
string-similarity-['jaccard:tokenizer=word'] Time: 0.19179797172546387s
normalize-scores-des_cont_jaccard Time: 0.03934621810913086s
smallest-qnode-number Time: 0.40489888191223145s
mosaic-features Time: 0.021849870681762695s
creat-singleton-feature Time: 0.24077486991882324s
vote-by-classifier Time: 0.7860608100891113s
Qnodes to lookup: 9352
Qnodes from file: 9132
Outlier removal generates 90 lof-voted candidates
score-using-embedding Time: 27.85635995864868s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.4780459403991699s
compute-tf-idf-class_count Time: 29.256216049194336s
compute-tf-idf-property_count Time: 29.174468994140625s
context-match Tim

330it [6:53:58, 100.40s/it]

align-page-rank Time: 0.33878087997436523s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.7489449977874756s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 7.490836143493652s
string-similarity-['jaro_winkler'] Time: 0.7450401782989502s
string-similarity-['levenshtein'] Time: 3.6231460571289062s
string-similarity-['jaccard:tokenizer=word'] Time: 0.18925690650939941s
normalize-scores-des_cont_jaccard Time: 0.04612016677856445s
smallest-qnode-number Time: 0.4298238754272461s
mosaic-features Time: 0.0249788761138916s
creat-singleton-feature Time: 0.2685999870300293s
vote-by-classifier Time: 0.4588298797607422s
Qnodes to lookup: 11126
Qnodes from file: 10808
Outlier removal generates 81 lof-voted candidates
score-using-embedding Time: 29.395726919174194s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.5462281703948975s
compute-tf-idf-class_count Time: 31.654292106628418s
compute-tf-idf-property_count Time: 32.91178607940674s
context-match Time

331it [6:55:30, 98.02s/it] 

align-page-rank Time: 0.18137788772583008s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.3788580894470215s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.0016469955444336s
string-similarity-['jaro_winkler'] Time: 0.1095731258392334s
string-similarity-['levenshtein'] Time: 0.5008389949798584s
string-similarity-['jaccard:tokenizer=word'] Time: 0.029356002807617188s
normalize-scores-des_cont_jaccard Time: 0.014268875122070312s
smallest-qnode-number Time: 0.0867457389831543s
mosaic-features Time: 0.00575709342956543s
creat-singleton-feature Time: 0.06760287284851074s
vote-by-classifier Time: 0.8570258617401123s
Qnodes to lookup: 2040
Qnodes from file: 1995
Outlier removal generates 8 lof-voted candidates
score-using-embedding Time: 15.394913673400879s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.11986994743347168s
compute-tf-idf-class_count Time: 16.91682720184326s
compute-tf-idf-property_count Time: 16.278565883636475s
context-match T

332it [6:55:58, 76.87s/it]

align-page-rank Time: 0.3945128917694092s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.0760231018066406s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.1243510246276855s
string-similarity-['jaro_winkler'] Time: 0.643531084060669s
string-similarity-['levenshtein'] Time: 2.179633855819702s
string-similarity-['jaccard:tokenizer=word'] Time: 0.2096261978149414s
normalize-scores-des_cont_jaccard Time: 0.05781197547912598s
smallest-qnode-number Time: 0.5441341400146484s
mosaic-features Time: 0.030457019805908203s
creat-singleton-feature Time: 0.37044405937194824s
vote-by-classifier Time: 0.6204731464385986s
Qnodes to lookup: 15306
Qnodes from file: 14717
Outlier removal generates 41 lof-voted candidates
score-using-embedding Time: 23.570206880569458s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.9527480602264404s
compute-tf-idf-class_count Time: 26.903207778930664s
compute-tf-idf-property_count Time: 27.573906898498535s
context-match Tim

333it [6:57:10, 75.48s/it]

align-page-rank Time: 0.24614191055297852s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.048717737197876s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.237496852874756s
string-similarity-['jaro_winkler'] Time: 0.19246697425842285s
string-similarity-['levenshtein'] Time: 0.6972379684448242s
string-similarity-['jaccard:tokenizer=word'] Time: 0.06481289863586426s
normalize-scores-des_cont_jaccard Time: 0.022418975830078125s
smallest-qnode-number Time: 0.16460704803466797s
mosaic-features Time: 0.010514020919799805s
creat-singleton-feature Time: 0.10939717292785645s
vote-by-classifier Time: 0.8554422855377197s
Qnodes to lookup: 4346
Qnodes from file: 4282
Outlier removal generates 23 lof-voted candidates
score-using-embedding Time: 15.736521005630493s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.22438812255859375s
compute-tf-idf-class_count Time: 16.4844970703125s
compute-tf-idf-property_count Time: 16.705471992492676s
context-match T

334it [6:57:38, 61.21s/it]

align-page-rank Time: 3.9557290077209473s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 11.742668867111206s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 21.2659912109375s
string-similarity-['jaro_winkler'] Time: 3.0906591415405273s
string-similarity-['levenshtein'] Time: 15.376164674758911s
string-similarity-['jaccard:tokenizer=word'] Time: 0.4451131820678711s
normalize-scores-des_cont_jaccard Time: 0.15317916870117188s
smallest-qnode-number Time: 3.900982141494751s
mosaic-features Time: 0.06980490684509277s
creat-singleton-feature Time: 1.1714630126953125s
vote-by-classifier Time: 0.4931509494781494s
Qnodes to lookup: 23028
Qnodes from file: 22633
Outlier removal generates 150 lof-voted candidates
score-using-embedding Time: 77.32317614555359s
generate-reciprocal-rank-lof-graph-embedding-score Time: 2.266000986099243s
compute-tf-idf-class_count Time: 82.60248231887817s
compute-tf-idf-property_count Time: 83.32394790649414s
context-match Time: 192

335it [7:02:27, 129.66s/it]

align-page-rank Time: 0.3332672119140625s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 5.76692008972168s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 8.141891956329346s
string-similarity-['jaro_winkler'] Time: 1.258486032485962s
string-similarity-['levenshtein'] Time: 6.3441197872161865s
string-similarity-['jaccard:tokenizer=word'] Time: 0.3419930934906006s
normalize-scores-des_cont_jaccard Time: 0.05712008476257324s
smallest-qnode-number Time: 0.4644930362701416s
mosaic-features Time: 0.15593910217285156s
creat-singleton-feature Time: 0.336575984954834s
vote-by-classifier Time: 0.426347017288208s
Qnodes to lookup: 14176
Qnodes from file: 13801
Outlier removal generates 127 lof-voted candidates
score-using-embedding Time: 35.529906272888184s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.6077878475189209s
compute-tf-idf-class_count Time: 38.414422035217285s
compute-tf-idf-property_count Time: 38.523212909698486s
context-match Time: 17

336it [7:06:09, 157.38s/it]

align-page-rank Time: 0.1561441421508789s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.4325129985809326s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.3788788318634033s
string-similarity-['jaro_winkler'] Time: 0.1166987419128418s
string-similarity-['levenshtein'] Time: 0.4263498783111572s
string-similarity-['jaccard:tokenizer=word'] Time: 0.030144929885864258s
normalize-scores-des_cont_jaccard Time: 0.014872074127197266s
smallest-qnode-number Time: 0.09111380577087402s
mosaic-features Time: 0.006247758865356445s
creat-singleton-feature Time: 0.066741943359375s
vote-by-classifier Time: 0.40001416206359863s
Qnodes to lookup: 2152
Qnodes from file: 2088
Outlier removal generates 8 lof-voted candidates
score-using-embedding Time: 12.438958883285522s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.1352686882019043s
compute-tf-idf-class_count Time: 13.076521158218384s
compute-tf-idf-property_count Time: 13.185236930847168s
context-match T

337it [7:06:32, 116.88s/it]

align-page-rank Time: 1.514153242111206s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 9.551455020904541s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 33.03552603721619s
string-similarity-['jaro_winkler'] Time: 2.05450701713562s
string-similarity-['levenshtein'] Time: 10.234035015106201s
string-similarity-['jaccard:tokenizer=word'] Time: 0.48865675926208496s
normalize-scores-des_cont_jaccard Time: 0.0969991683959961s
smallest-qnode-number Time: 1.6099812984466553s
mosaic-features Time: 0.056617021560668945s
creat-singleton-feature Time: 1.2019860744476318s
vote-by-classifier Time: 0.7941570281982422s
Qnodes to lookup: 21314
Qnodes from file: 21012
Outlier removal generates 90 lof-voted candidates
score-using-embedding Time: 76.34121131896973s
generate-reciprocal-rank-lof-graph-embedding-score Time: 1.289025068283081s
compute-tf-idf-class_count Time: 80.11666083335876s
compute-tf-idf-property_count Time: 81.89128589630127s
context-match Time: 219.2

338it [7:11:46, 176.08s/it]

align-page-rank Time: 0.41921401023864746s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.46515417098999s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 12.52771806716919s
string-similarity-['jaro_winkler'] Time: 0.8465409278869629s
string-similarity-['levenshtein'] Time: 4.18412184715271s
string-similarity-['jaccard:tokenizer=word'] Time: 0.20126104354858398s
normalize-scores-des_cont_jaccard Time: 0.052475929260253906s
smallest-qnode-number Time: 0.5885879993438721s
mosaic-features Time: 0.024625778198242188s
creat-singleton-feature Time: 0.6381509304046631s
vote-by-classifier Time: 1.3972628116607666s
Qnodes to lookup: 11340
Qnodes from file: 10966
Outlier removal generates 98 lof-voted candidates
score-using-embedding Time: 35.38410186767578s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.5646312236785889s
compute-tf-idf-class_count Time: 37.847129106521606s
compute-tf-idf-property_count Time: 37.72049689292908s
context-match Time: 

339it [7:13:27, 153.53s/it]

align-page-rank Time: 0.15169000625610352s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.3960390090942383s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.8255980014801025s
string-similarity-['jaro_winkler'] Time: 0.12698101997375488s
string-similarity-['levenshtein'] Time: 0.43778085708618164s
string-similarity-['jaccard:tokenizer=word'] Time: 0.03637337684631348s
normalize-scores-des_cont_jaccard Time: 0.013297796249389648s
smallest-qnode-number Time: 0.0926821231842041s
mosaic-features Time: 0.009131908416748047s
creat-singleton-feature Time: 0.06321382522583008s
vote-by-classifier Time: 0.40155696868896484s
Qnodes to lookup: 1403
Qnodes from file: 1387
Outlier removal generates 5 lof-voted candidates
score-using-embedding Time: 9.314464092254639s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.11962318420410156s
compute-tf-idf-class_count Time: 10.550337076187134s
compute-tf-idf-property_count Time: 10.708681106567383s
context-matc

340it [7:14:05, 118.81s/it]

align-page-rank Time: 0.23582792282104492s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 4.604219198226929s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 6.693008184432983s
string-similarity-['jaro_winkler'] Time: 0.7320289611816406s
string-similarity-['levenshtein'] Time: 6.406642913818359s
string-similarity-['jaccard:tokenizer=word'] Time: 0.1324169635772705s
normalize-scores-des_cont_jaccard Time: 0.02198505401611328s
smallest-qnode-number Time: 0.22907590866088867s
mosaic-features Time: 0.012861013412475586s
creat-singleton-feature Time: 0.12227487564086914s
vote-by-classifier Time: 0.862541913986206s
Qnodes to lookup: 3642
Qnodes from file: 3567
Outlier removal generates 55 lof-voted candidates
score-using-embedding Time: 31.502750158309937s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.4205770492553711s
compute-tf-idf-class_count Time: 33.457934856414795s
compute-tf-idf-property_count Time: 33.86403775215149s
context-match Time: 

341it [7:15:50, 114.86s/it]

align-page-rank Time: 2.310871124267578s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 14.1688871383667s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 27.21897006034851s
string-similarity-['jaro_winkler'] Time: 3.1521499156951904s
string-similarity-['levenshtein'] Time: 18.026950359344482s
string-similarity-['jaccard:tokenizer=word'] Time: 0.9855029582977295s
normalize-scores-des_cont_jaccard Time: 0.1484520435333252s
smallest-qnode-number Time: 2.767651081085205s
mosaic-features Time: 0.07256507873535156s
creat-singleton-feature Time: 1.0267040729522705s
vote-by-classifier Time: 1.1113629341125488s
Qnodes to lookup: 27602
Qnodes from file: 26841
_centroid_of_lof: Missing 1 of 266
Outlier removal generates 160 lof-voted candidates
score-using-embedding Time: 87.06353497505188s
generate-reciprocal-rank-lof-graph-embedding-score Time: 2.1906330585479736s
compute-tf-idf-class_count Time: 93.73657393455505s
compute-tf-idf-property_count Time: 95.557877

In [11]:
feature_generation(dev_candidate_path, dev_graph_embedding, dev_class_count, dev_prop_count, dev_context_path, dev_feature_path)

0it [00:00, ?it/s]

align-page-rank Time: 0.44028282165527344s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 7.408203125s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 15.187237977981567s
string-similarity-['jaro_winkler'] Time: 1.039217233657837s
string-similarity-['levenshtein'] Time: 6.074228048324585s
string-similarity-['jaccard:tokenizer=word'] Time: 0.23963499069213867s
normalize-scores-des_cont_jaccard Time: 0.0382227897644043s
smallest-qnode-number Time: 0.49535703659057617s
mosaic-features Time: 0.022897720336914062s
creat-singleton-feature Time: 0.32657718658447266s
vote-by-classifier Time: 1.6364359855651855s
Qnodes to lookup: 10982
Qnodes from file: 10654
Outlier removal generates 99 lof-voted candidates
score-using-embedding Time: 44.015238761901855s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.5955820083618164s
compute-tf-idf-class_count Time: 45.96125888824463s
compute-tf-idf-property_count Time: 46.41690492630005s
context-match Time: 59.2

1it [01:56, 116.16s/it]

align-page-rank Time: 0.08647489547729492s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.6216990947723389s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.7880198955535889s
string-similarity-['jaro_winkler'] Time: 0.17881107330322266s
string-similarity-['levenshtein'] Time: 0.6029279232025146s
string-similarity-['jaccard:tokenizer=word'] Time: 0.0405580997467041s
normalize-scores-des_cont_jaccard Time: 0.016739845275878906s
smallest-qnode-number Time: 0.1318068504333496s
mosaic-features Time: 0.007908821105957031s
creat-singleton-feature Time: 0.08887195587158203s
vote-by-classifier Time: 0.4225149154663086s
Qnodes to lookup: 3254
Qnodes from file: 3227
Outlier removal generates 8 lof-voted candidates
score-using-embedding Time: 11.774867057800293s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.290255069732666s
compute-tf-idf-class_count Time: 12.75142765045166s
compute-tf-idf-property_count Time: 13.729729652404785s
context-match Tim

2it [02:18, 60.79s/it] 

align-page-rank Time: 0.20038819313049316s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.2148289680480957s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.7442500591278076s
string-similarity-['jaro_winkler'] Time: 0.18785595893859863s
string-similarity-['levenshtein'] Time: 1.1664979457855225s
string-similarity-['jaccard:tokenizer=word'] Time: 0.052812814712524414s
normalize-scores-des_cont_jaccard Time: 0.014535903930664062s
smallest-qnode-number Time: 0.11955094337463379s
mosaic-features Time: 0.006815433502197266s
creat-singleton-feature Time: 0.07467293739318848s
vote-by-classifier Time: 0.9493081569671631s
Qnodes to lookup: 2227
Qnodes from file: 2142
Outlier removal generates 21 lof-voted candidates
score-using-embedding Time: 19.525466203689575s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.14049792289733887s
compute-tf-idf-class_count Time: 20.176828145980835s
compute-tf-idf-property_count Time: 20.388978958129883s
context-ma

3it [03:15, 59.25s/it]

align-page-rank Time: 0.3281738758087158s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 6.4075868129730225s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 11.870534896850586s
string-similarity-['jaro_winkler'] Time: 0.9921231269836426s
string-similarity-['levenshtein'] Time: 8.375239133834839s
string-similarity-['jaccard:tokenizer=word'] Time: 0.12506890296936035s
normalize-scores-des_cont_jaccard Time: 0.04180312156677246s
smallest-qnode-number Time: 0.47780489921569824s
mosaic-features Time: 0.02324366569519043s
creat-singleton-feature Time: 0.27151989936828613s
vote-by-classifier Time: 1.0651960372924805s
Qnodes to lookup: 8839
Qnodes from file: 8354
Outlier removal generates 45 lof-voted candidates
score-using-embedding Time: 42.215062856674194s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.5857341289520264s
compute-tf-idf-class_count Time: 44.77003788948059s
compute-tf-idf-property_count Time: 46.47589087486267s
context-match Time:

4it [04:16, 59.72s/it]

align-page-rank Time: 0.1080019474029541s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 0.8045012950897217s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 1.0618367195129395s
string-similarity-['jaro_winkler'] Time: 0.17636990547180176s
string-similarity-['levenshtein'] Time: 0.7500927448272705s
string-similarity-['jaccard:tokenizer=word'] Time: 0.07706689834594727s
normalize-scores-des_cont_jaccard Time: 0.015913963317871094s
smallest-qnode-number Time: 0.09249210357666016s
mosaic-features Time: 0.007218122482299805s
creat-singleton-feature Time: 0.08337521553039551s
vote-by-classifier Time: 0.39960503578186035s
Qnodes to lookup: 2527
Qnodes from file: 2458
Outlier removal generates 8 lof-voted candidates
score-using-embedding Time: 12.84122610092163s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.14829301834106445s
compute-tf-idf-class_count Time: 13.47645115852356s
compute-tf-idf-property_count Time: 14.628101110458374s
context-match 

5it [04:54, 52.17s/it]

align-page-rank Time: 0.4133279323577881s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 6.28534197807312s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 10.906824827194214s
string-similarity-['jaro_winkler'] Time: 1.4430568218231201s
string-similarity-['levenshtein'] Time: 6.849722146987915s
string-similarity-['jaccard:tokenizer=word'] Time: 0.4543900489807129s
normalize-scores-des_cont_jaccard Time: 0.06465888023376465s
smallest-qnode-number Time: 0.5703928470611572s
mosaic-features Time: 0.040132761001586914s
creat-singleton-feature Time: 0.373150110244751s
vote-by-classifier Time: 0.8728048801422119s
Qnodes to lookup: 15701
Qnodes from file: 15364
Outlier removal generates 90 lof-voted candidates
score-using-embedding Time: 40.9332549571991s
generate-reciprocal-rank-lof-graph-embedding-score Time: 1.0448276996612549s
compute-tf-idf-class_count Time: 44.815951108932495s
compute-tf-idf-property_count Time: 45.59992074966431s
context-match Time: 121

6it [07:55, 95.73s/it]

align-page-rank Time: 0.882993221282959s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 2.9521329402923584s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 10.25437593460083s
string-similarity-['jaro_winkler'] Time: 0.7549409866333008s
string-similarity-['levenshtein'] Time: 3.0077860355377197s
string-similarity-['jaccard:tokenizer=word'] Time: 0.15935277938842773s
normalize-scores-des_cont_jaccard Time: 0.06648492813110352s
smallest-qnode-number Time: 1.014944076538086s
mosaic-features Time: 0.03615093231201172s
creat-singleton-feature Time: 0.44671082496643066s
vote-by-classifier Time: 0.5021429061889648s
Qnodes to lookup: 17694
Qnodes from file: 17584
Outlier removal generates 54 lof-voted candidates
score-using-embedding Time: 30.205616235733032s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.9085738658905029s
compute-tf-idf-class_count Time: 32.79194903373718s
compute-tf-idf-property_count Time: 33.482540130615234s
context-match Time:

7it [08:41, 79.44s/it]

align-page-rank Time: 1.5963890552520752s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 9.709328174591064s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 33.90274405479431s
string-similarity-['jaro_winkler'] Time: 2.125648260116577s
string-similarity-['levenshtein'] Time: 10.14313292503357s
string-similarity-['jaccard:tokenizer=word'] Time: 0.5256590843200684s
normalize-scores-des_cont_jaccard Time: 0.09882187843322754s
smallest-qnode-number Time: 1.7649171352386475s
mosaic-features Time: 0.051434993743896484s
creat-singleton-feature Time: 0.703510046005249s
vote-by-classifier Time: 0.5056219100952148s
Qnodes to lookup: 21311
Qnodes from file: 21009
Outlier removal generates 90 lof-voted candidates
score-using-embedding Time: 75.54481887817383s
generate-reciprocal-rank-lof-graph-embedding-score Time: 1.3187470436096191s
compute-tf-idf-class_count Time: 79.4631450176239s
compute-tf-idf-property_count Time: 80.89181423187256s
context-match Time: 219.4

8it [13:50, 152.81s/it]

align-page-rank Time: 0.1799919605255127s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.5184550285339355s
string-similarity-['symmetric_monge_elkan:tokenizer=word'] Time: 3.9267590045928955s
string-similarity-['jaro_winkler'] Time: 0.7206149101257324s
string-similarity-['levenshtein'] Time: 3.9114248752593994s
string-similarity-['jaccard:tokenizer=word'] Time: 0.19081711769104004s
normalize-scores-des_cont_jaccard Time: 0.0239717960357666s
smallest-qnode-number Time: 0.21527671813964844s
mosaic-features Time: 0.015949249267578125s
creat-singleton-feature Time: 0.18722105026245117s
vote-by-classifier Time: 0.4609508514404297s
Qnodes to lookup: 7895
Qnodes from file: 7638
Outlier removal generates 59 lof-voted candidates
score-using-embedding Time: 21.252566814422607s
generate-reciprocal-rank-lof-graph-embedding-score Time: 0.4088780879974365s
compute-tf-idf-class_count Time: 22.600757122039795s
compute-tf-idf-property_count Time: 23.93340492248535s
context-match Tim

9it [15:45, 105.02s/it]


### Generate Training Data

In [12]:
def merge_files(args):
    datapath = args.train_path
    eval_file_names = []
    for (dirpath, dirnames, filenames) in os.walk(datapath):
        for fn in filenames:
            if "csv" not in fn:
                continue
            abs_fn = f"{dirpath}/{fn}"
            assert os.path.isfile(abs_fn)
            if os.path.getsize(abs_fn) == 0:
                continue
            eval_file_names.append(abs_fn)
    df_list = []
    for fn in eval_file_names:
        fid = fn.split('/')[-1].split('.csv')[0]
        df = pd.read_csv(fn)
        df['table_id'] = fid
        df_list.append(df)
    return pd.concat(df_list) 

def compute_normalization_factor(args, all_data):
    min_max_scaler_path = args.min_max_scaler_path
    all_data_features = all_data[features]
    scaler = MinMaxScaler()
    scaler.fit(all_data_features)
    pickle.dump(scaler, open(min_max_scaler_path, 'wb'))
    return scaler

def generate_train_data(args):
    scaler_path = args.min_max_scaler_path
    scaler = pickle.load(open(scaler_path, 'rb'))
    final_list = []
    sfeatures = copy.deepcopy(features) + ['evaluation_label']
    print(sfeatures)
    normalize_features = features
    evaluation_label = ['evaluation_label']
    positive_features_final = []
    negative_features_final = []
    for i,file in enumerate(glob.glob(args.train_path + '/*.csv')):
        file_name = file.split('/')[-1]
        print(file_name)
        if os.path.getsize(file) == 0:
                continue
        d_sample = pd.read_csv(file)
#         grouped_obj = d_sample.groupby(['row', 'column'])
        grouped_obj = d_sample.groupby(['column', 'row'])
        for cell in grouped_obj:
            cell[1][normalize_features] = scaler.transform(cell[1][normalize_features])
            pos_features = []
            neg_features = []
            a = cell[1][cell[1]['evaluation_label'] == 1]
            if a.empty:
                continue
            num_rows = 64
            pos_row = a[sfeatures].drop('evaluation_label',axis=1)
            negatives_filtered = cell[1][cell[1]['evaluation_label'] == -1]
            sorted_df = negatives_filtered.sort_values('lof-graph-embedding-score',ascending=False)
            sorted_df = sorted_df[sfeatures]
            if 0 in sorted_df['evaluation_label'].tolist():
                continue
            if sorted_df.empty:
                continue
            neg_list = []
            if num_rows < len(sorted_df):
                sorted_df = sorted_df[sorted_df['evaluation_label'] == -1]
                neg_list.append(sorted_df[:2])
                retrieval_score_df = sorted_df[2:].sort_values('retrieval_score',ascending=False)
                neg_list.append(retrieval_score_df[:2])
                pagerank_score_df = retrieval_score_df[2:].sort_values('pagerank', ascending=False)
                neg_list.append(pagerank_score_df[:2])
                class_count_score_df = pagerank_score_df[2:].sort_values('lof_class_count_tf_idf_score', ascending=False)
                neg_list.append(class_count_score_df[:2])
                prop_count_score_df = class_count_score_df[2:].sort_values('lof_property_count_tf_idf_score', ascending=False)
                neg_list.append(prop_count_score_df[:2])
                monge_elkan_score_df = prop_count_score_df[2:].sort_values('monge_elkan', ascending=False)
                neg_list.append(monge_elkan_score_df[:2])
                monge_elkan_alias_score_df = monge_elkan_score_df[2:].sort_values('monge_elkan_aliases', ascending=False)
                neg_list.append(monge_elkan_alias_score_df[:2])
                
                context_score_df = monge_elkan_alias_score_df[2:].sort_values('context_score', ascending=False)
                neg_list.append(context_score_df[:2])

                jaro_winkler_score_df = monge_elkan_alias_score_df[2:].sort_values('jaro_winkler', ascending=False)
                neg_list.append(jaro_winkler_score_df[:2])
                
                top_sample_df = jaro_winkler_score_df.sample(n=50)
                neg_list.append(top_sample_df)
                top_sample_df = pd.concat(neg_list)
                top_sample_df.drop('evaluation_label', inplace=True, axis=1)
                top_sample_arr = top_sample_df.to_numpy()

            for i in range(len(top_sample_arr)):
                neg_features.append(top_sample_arr[i])
            random.shuffle(neg_features)
            for i in range(len(top_sample_arr)):
                pos_row_sample = pos_row.sample(n=1)
                ar = pos_row_sample.to_numpy()
                for ps_ar in ar:
                    pos_features.append(ps_ar)
            positive_features_final.append(pos_features)
            negative_features_final.append(neg_features)
    print(len(positive_features_final), len(positive_features_final[37]))
    print(len(negative_features_final), len(negative_features_final[37]))
    pickle.dump(positive_features_final,open(args.pos_output,'wb'))
    pickle.dump(negative_features_final,open(args.neg_output,'wb'))


In [13]:
gen_training_data_args = Namespace(train_path=train_feature_path, pos_output=pos_output, neg_output=neg_output, 
                 min_max_scaler_path=min_max_scaler_path)
all_data = merge_files(gen_training_data_args)
scaler = compute_normalization_factor(gen_training_data_args, all_data)
generate_train_data(gen_training_data_args)


['pagerank', 'retrieval_score', 'monge_elkan', 'monge_elkan_aliases', 'des_cont_jaccard', 'jaro_winkler', 'levenshtein', 'singleton', 'num_char', 'num_tokens', 'lof_class_count_tf_idf_score', 'lof_property_count_tf_idf_score', 'lof-graph-embedding-score', 'lof-reciprocal-rank', 'context_score', 'evaluation_label']
LPK7ZPYF.csv
DPY34RCV.csv
C1N4OC0S.csv
1LD1MWA8.csv
YX7BRMMR.csv
58891288_0_1117541047012405958.csv
BBU25KIE.csv
ZX8GERJC.csv
8ZD74BO9.csv
DVFKEEPF.csv
YCXXPVD2.csv
D65TEZWN.csv
NPGBDBM4.csv
BMVA4ZET.csv
4Q6FAT24.csv
Z8HBXN1A.csv
W0ZNF869.csv
6SMY8KMG.csv
AM1UELOJ.csv
39173938_0_7916056990138658530.csv
5IXA0RAI.csv
8EFC5XVR.csv
2INS294X.csv
RW8EFPXB.csv
0KL64BZL.csv
G0QTILKH.csv
DPUA686B.csv
NUMBFEKZ.csv
UMMA6HQO.csv
ERPSWFMM.csv
ZDAZ5PQ5.csv
XF412HIL.csv
BQ36GYQE.csv
CKRLO13X.csv
JOQ4B8NW.csv
L5LFLQIN.csv
IL8WDV2Y.csv
J6SSKET3.csv
M6BLH4EG.csv
10579449_0_1681126353774891032.csv
N7ZYR1AT.csv
RMTEB8QZ.csv
ZL1S4IY0.csv
X0TEEJCK.csv
T8SL8HGK.csv
QAPGT6E5.csv
C9OCMYGQ.csv
24X84XD

### Model Definition

In [14]:
# Dataset
class T2DV2Dataset(Dataset):
    def __init__(self, pos_features, neg_features):
        self.pos_features = pos_features
        self.neg_features = neg_features
    
    def __len__(self):
        return len(self.pos_features)
    
    def __getitem__(self, idx):
        return self.pos_features[idx], self.neg_features[idx]

# Model
class PairwiseNetwork(nn.Module):
    def __init__(self, hidden_size):
        super().__init__()
        #original 12x24, 24x12, 12x12, 12x1
        self.fc1 = nn.Linear(hidden_size, 2*hidden_size)
        self.fc2 = nn.Linear(2*hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, hidden_size)
        self.fc4 = nn.Linear(hidden_size, 1)
    
    def forward(self, pos_features, neg_features):
        # Positive pass
        x = F.relu(self.fc1(pos_features))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        pos_out = torch.sigmoid(self.fc4(x))
        
        # Negative Pass
        x = F.relu(self.fc1(neg_features))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        neg_out = torch.sigmoid(self.fc4(x))
        
        return pos_out, neg_out
    
    def predict(self, test_feat):
        x = F.relu(self.fc1(test_feat))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        test_out = torch.sigmoid(self.fc4(x))
        return test_out

# Pairwise Loss
class PairwiseLoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.m = 0
    
    def forward(self, pos_out, neg_out):
        distance = (1 - pos_out) + neg_out
        loss = torch.mean(torch.max(torch.tensor(0), distance))
        return loss

### Training

In [15]:
def generate_dataloader(positive_feat_path, negative_feat_path):
    pos_features = pickle.load(open(positive_feat_path, 'rb'))
    neg_features = pickle.load(open(negative_feat_path, 'rb'))

    pos_features_flatten = list(chain.from_iterable(pos_features))
    neg_features_flatten = list(chain.from_iterable(neg_features))

    train_dataset = T2DV2Dataset(pos_features_flatten, neg_features_flatten)
    train_dataloader = DataLoader(train_dataset, batch_size=64)
    return train_dataloader

def infer_scores(min_max_scaler_path, input_table_path, output_table_path, model):
    scaler = pickle.load(open(min_max_scaler_path, 'rb'))
    normalize_features = features
    for file in glob.glob(input_table_path + '/*.csv'):
        file_name = file.split('/')[-1]
        if os.path.getsize(file) == 0:
                continue
        if file_name != '52299421_0_4473286348258170200.csv':
            print(file_name)
            d_sample = pd.read_csv(file)
            grouped_obj = d_sample.groupby(['column', 'row'])
            new_df_list = []
            pred = []
            for cell in grouped_obj:
                cell[1][normalize_features] = scaler.transform(cell[1][normalize_features])
                sorted_df = cell[1].sort_values('lof-graph-embedding-score',ascending=False)[:64]
                sorted_df_features = sorted_df[normalize_features]
                new_df_list.append(sorted_df)
                arr = sorted_df_features.to_numpy()
                test_inp = []
                for a in arr:
                    test_inp.append(a)
                test_tensor = torch.tensor(test_inp).float()
                scores = model.predict(test_tensor)
                pred.extend(torch.squeeze(scores).tolist())
            test_df = pd.concat(new_df_list)
            test_df[final_score_column] = pred
            test_df.to_csv(f"{output_table_path}/{file_name}", index=False)

def train(args):
    if torch.cuda.is_available():
        device = torch.device('cuda')
    
    else:
        device = torch.device('cpu')
    train_dataloader = generate_dataloader(args.positive_feat_path, args.negative_feat_path)
    criterion = PairwiseLoss()
    EPOCHS = args.num_epochs
    model = PairwiseNetwork(len(features)).to(device=device)
    optimizer = Adam(model.parameters(), lr=args.lr)
    top1_max_prec = 0
    for epoch in range(EPOCHS):
        train_epoch_loss = 0
        avg_loss = 0
        model.train()
        for bid, batch in tqdm(enumerate(train_dataloader), position=0, leave=True):
            positive_feat = torch.tensor(batch[0].float())
            negative_feat = torch.tensor(batch[1].float())
            optimizer.zero_grad()
            pos_out, neg_out = model(positive_feat, negative_feat)
            loss = criterion(pos_out, neg_out)
            loss.backward()
            optimizer.step()
            train_epoch_loss += loss
        avg_loss = train_epoch_loss / bid

        # Evaluation
        model.eval()
        infer_scores(args.min_max_scaler_path, args.dev_path, args.dev_output, model)
        eval_data = merge_eval_files(args.dev_output)
        res, candidate_eval_data = parse_eval_files_stats(eval_data, final_score_column)
        top1_precision = res['num_tasks_with_model_score_top_one_accurate']/res['num_tasks_with_gt']
        if top1_precision > top1_max_prec:
            top1_max_prec = top1_precision
            model_save_name = 'epoch_{}_loss_{}_top1_{}.pth'.format(epoch, avg_loss, top1_max_prec)
            best_model_path = os.path.join(args.model_save_path, model_save_name)
            torch.save(model.state_dict(), best_model_path)
        
        print("Epoch {}, Avg Loss is {}, epoch top1 {}, max top1 {}".format(epoch, avg_loss, top1_precision, top1_max_prec))
    return best_model_path

In [16]:
def merge_eval_files(final_score_path):
    eval_file_names = []
    df_list = []
    for (dirpath, dirnames, filenames) in os.walk(final_score_path):
        for fn in filenames:
            if fn != '52299421_0_4473286348258170200.csv':
                if "csv" not in fn:
                    continue
                abs_fn = os.path.join(dirpath, fn)
                assert os.path.isfile(abs_fn)
                if os.path.getsize(abs_fn) == 0:
                    continue
                eval_file_names.append(abs_fn)
    
    for fn in eval_file_names:
        fid = fn.split('/')[-1].split('.csv')[0]
        df = pd.read_csv(fn)
        df['table_id'] = fid
        # df = df.fillna('')
        df_list.append(df)
    return pd.concat(df_list)

def parse_eval_files_stats(eval_data, method):
    res = {}
    candidate_eval_data = eval_data.groupby(['table_id', 'column', 'row'])['table_id'].count().reset_index(name="count")
    res['num_tasks'] = len(eval_data.groupby(['table_id', 'column', 'row']))
    res['num_tasks_with_gt'] = len(eval_data[pd.notna(eval_data['GT_kg_id'])].groupby(['table_id', 'column', 'row']))
    res['num_tasks_with_gt_in_candidate'] = len(eval_data[eval_data['evaluation_label'] == 1].groupby(['table_id', 'column', 'row']))
    res['num_tasks_with_singleton_candidate'] = len(candidate_eval_data[candidate_eval_data['count'] == 1].groupby(['table_id', 'column', 'row']))
    singleton_eval_data = candidate_eval_data[candidate_eval_data['count'] == 1]
    num_tasks_with_singleton_candidate_with_gt = 0
    for i, row in singleton_eval_data.iterrows():
        table_id, row_idx, col_idx = row['table_id'], row['row'], row['column']
        c_e_data = eval_data[(eval_data['table_id'] == table_id) & (eval_data['row'] == row_idx) & (eval_data['column'] == col_idx)]
        assert len(c_e_data) == 1
        if c_e_data.iloc[0]['evaluation_label'] == 1:
            num_tasks_with_singleton_candidate_with_gt += 1
    res['num_tasks_with_singleton_candidate_with_gt'] = num_tasks_with_singleton_candidate_with_gt
    num_tasks_with_graph_top_one_accurate = []
    num_tasks_with_graph_top_five_accurate = []
    num_tasks_with_graph_top_ten_accurate = []
    num_tasks_with_model_score_top_one_accurate = []
    num_tasks_with_model_score_top_five_accurate = []
    num_tasks_with_model_score_top_ten_accurate = []
    has_gt_list = []
    has_gt_in_candidate = []
    # candidate_eval_data = candidate_eval_data[:1]
    for i, row in candidate_eval_data.iterrows():
        #print(i)
        table_id, row_idx, col_idx = row['table_id'], row['row'], row['column']
        c_e_data = eval_data[(eval_data['table_id'] == table_id) & (eval_data['row'] == row_idx) & (eval_data['column'] == col_idx)]
        assert len(c_e_data) > 0
        if np.nan not in set(c_e_data['GT_kg_id']):
            has_gt_list.append(1)
        else:
            has_gt_list.append(0)
        if 1 in set(c_e_data['evaluation_label']):
            has_gt_in_candidate.append(1)
        else:
            has_gt_in_candidate.append(0)
            
        # handle graph-embedding-score
        s_data = c_e_data.sort_values(by=['lof-graph-embedding-score'], ascending=False)
        if s_data.iloc[0]['evaluation_label'] == 1:
            num_tasks_with_graph_top_one_accurate.append(1)
        else:
            num_tasks_with_graph_top_one_accurate.append(0)
        if 1 in set(s_data.iloc[0:5]['evaluation_label']):
            num_tasks_with_graph_top_five_accurate.append(1)
        else:
            num_tasks_with_graph_top_five_accurate.append(0)
        if 1 in set(s_data.iloc[0:10]['evaluation_label']):
            num_tasks_with_graph_top_ten_accurate.append(1)
        else:
            num_tasks_with_graph_top_ten_accurate.append(0)
        
        #rank on model score
        s_data = c_e_data.sort_values(by=[method], ascending=False)
        if s_data.iloc[0]['evaluation_label'] == 1:
            num_tasks_with_model_score_top_one_accurate.append(1)
        else:
            num_tasks_with_model_score_top_one_accurate.append(0)
        if 1 in set(s_data.iloc[0:5]['evaluation_label']):
            num_tasks_with_model_score_top_five_accurate.append(1)
        else:
            num_tasks_with_model_score_top_five_accurate.append(0)
        if 1 in set(s_data.iloc[0:10]['evaluation_label']):
            num_tasks_with_model_score_top_ten_accurate.append(1)
        else:
            num_tasks_with_model_score_top_ten_accurate.append(0)
            
        cf_e_data = c_e_data.copy()
        cf_e_data['lof-graph-embedding-score'] = cf_e_data['lof-graph-embedding-score'].replace(np.nan, 0)
        cf_e_data[method] = cf_e_data[method].replace(np.nan, 0)

    candidate_eval_data['lof-graph_top_one_accurate'] = num_tasks_with_graph_top_one_accurate
    candidate_eval_data['lof-graph_top_five_accurate'] = num_tasks_with_graph_top_five_accurate
    candidate_eval_data['lof-graph_top_ten_accurate'] = num_tasks_with_graph_top_five_accurate
    candidate_eval_data['model_top_one_accurate'] = num_tasks_with_model_score_top_one_accurate
    candidate_eval_data['model_top_five_accurate'] = num_tasks_with_model_score_top_five_accurate
    candidate_eval_data['model_top_ten_accurate'] = num_tasks_with_model_score_top_ten_accurate
    candidate_eval_data['has_gt'] = has_gt_list
    candidate_eval_data['has_gt_in_candidate'] = has_gt_in_candidate
    res['num_tasks_with_graph_top_one_accurate'] = sum(num_tasks_with_graph_top_one_accurate)
    res['num_tasks_with_graph_top_five_accurate'] = sum(num_tasks_with_graph_top_five_accurate)
    res['num_tasks_with_graph_top_ten_accurate'] = sum(num_tasks_with_graph_top_ten_accurate)
    res['num_tasks_with_model_score_top_one_accurate'] = sum(num_tasks_with_model_score_top_one_accurate)
    res['num_tasks_with_model_score_top_five_accurate'] = sum(num_tasks_with_model_score_top_five_accurate)
    res['num_tasks_with_model_score_top_ten_accurate'] = sum(num_tasks_with_model_score_top_ten_accurate)
    return res, candidate_eval_data

In [17]:
training_args = Namespace(num_epochs=20, lr=0.001, positive_feat_path=pos_output, negative_feat_path=neg_output,
                         dev_path=dev_feature_path, dev_output=dev_output_predictions,
                         model_save_path=model_save_path, min_max_scaler_path=min_max_scaler_path)

In [18]:
## Call Training
best_model_path = train(training_args)

  positive_feat = torch.tensor(batch[0].float())
  negative_feat = torch.tensor(batch[1].float())
18278it [00:30, 602.41it/s]


39759273_0_1427898308030295194.csv
45073662_0_3179937335063201739.csv
29414811_2_4773219892816395776.csv
84575189_0_6365692015941409487.csv
14380604_4_3329235705746762392.csv
50270082_0_444360818941411589.csv
28086084_0_3127660530989916727.csv
14067031_0_559833072073397908.csv


  positive_feat = torch.tensor(batch[0].float())
  negative_feat = torch.tensor(batch[1].float())
106it [00:00, 524.60it/s]

Epoch 0, Avg Loss is 0.12883563339710236, epoch top1 0.6200564971751412, max top1 0.6200564971751412


18278it [00:30, 601.79it/s]


39759273_0_1427898308030295194.csv
45073662_0_3179937335063201739.csv
29414811_2_4773219892816395776.csv
84575189_0_6365692015941409487.csv
14380604_4_3329235705746762392.csv
50270082_0_444360818941411589.csv
28086084_0_3127660530989916727.csv
14067031_0_559833072073397908.csv


  positive_feat = torch.tensor(batch[0].float())
  negative_feat = torch.tensor(batch[1].float())
109it [00:00, 539.72it/s]

Epoch 1, Avg Loss is 0.11121796071529388, epoch top1 0.768361581920904, max top1 0.768361581920904


18278it [00:29, 616.82it/s]


39759273_0_1427898308030295194.csv
45073662_0_3179937335063201739.csv
29414811_2_4773219892816395776.csv
84575189_0_6365692015941409487.csv
14380604_4_3329235705746762392.csv
50270082_0_444360818941411589.csv
28086084_0_3127660530989916727.csv
14067031_0_559833072073397908.csv


  positive_feat = torch.tensor(batch[0].float())
  negative_feat = torch.tensor(batch[1].float())
53it [00:00, 525.75it/s]

Epoch 2, Avg Loss is 0.11015131324529648, epoch top1 0.7443502824858758, max top1 0.768361581920904


18278it [00:30, 606.91it/s]


39759273_0_1427898308030295194.csv
45073662_0_3179937335063201739.csv
29414811_2_4773219892816395776.csv
84575189_0_6365692015941409487.csv
14380604_4_3329235705746762392.csv
50270082_0_444360818941411589.csv
28086084_0_3127660530989916727.csv
14067031_0_559833072073397908.csv


  positive_feat = torch.tensor(batch[0].float())
  negative_feat = torch.tensor(batch[1].float())
54it [00:00, 533.08it/s]

Epoch 3, Avg Loss is 0.12181942909955978, epoch top1 0.6765536723163842, max top1 0.768361581920904


18278it [00:30, 607.05it/s]


39759273_0_1427898308030295194.csv
45073662_0_3179937335063201739.csv
29414811_2_4773219892816395776.csv
84575189_0_6365692015941409487.csv
14380604_4_3329235705746762392.csv
50270082_0_444360818941411589.csv
28086084_0_3127660530989916727.csv
14067031_0_559833072073397908.csv


  positive_feat = torch.tensor(batch[0].float())
  negative_feat = torch.tensor(batch[1].float())
106it [00:00, 530.18it/s]

Epoch 4, Avg Loss is 0.1114315390586853, epoch top1 0.730225988700565, max top1 0.768361581920904


18278it [00:30, 601.70it/s]


39759273_0_1427898308030295194.csv
45073662_0_3179937335063201739.csv
29414811_2_4773219892816395776.csv
84575189_0_6365692015941409487.csv
14380604_4_3329235705746762392.csv
50270082_0_444360818941411589.csv
28086084_0_3127660530989916727.csv
14067031_0_559833072073397908.csv


  positive_feat = torch.tensor(batch[0].float())
  negative_feat = torch.tensor(batch[1].float())
50it [00:00, 497.89it/s]

Epoch 5, Avg Loss is 0.11637300997972488, epoch top1 0.7330508474576272, max top1 0.768361581920904


18278it [00:30, 591.84it/s]


39759273_0_1427898308030295194.csv
45073662_0_3179937335063201739.csv
29414811_2_4773219892816395776.csv
84575189_0_6365692015941409487.csv
14380604_4_3329235705746762392.csv
50270082_0_444360818941411589.csv
28086084_0_3127660530989916727.csv
14067031_0_559833072073397908.csv


  positive_feat = torch.tensor(batch[0].float())
  negative_feat = torch.tensor(batch[1].float())
51it [00:00, 508.06it/s]

Epoch 6, Avg Loss is 0.11358321458101273, epoch top1 0.5847457627118644, max top1 0.768361581920904


18278it [00:31, 582.02it/s]


39759273_0_1427898308030295194.csv
45073662_0_3179937335063201739.csv
29414811_2_4773219892816395776.csv
84575189_0_6365692015941409487.csv
14380604_4_3329235705746762392.csv
50270082_0_444360818941411589.csv
28086084_0_3127660530989916727.csv
14067031_0_559833072073397908.csv


  positive_feat = torch.tensor(batch[0].float())
  negative_feat = torch.tensor(batch[1].float())
106it [00:00, 528.01it/s]

Epoch 7, Avg Loss is 0.11483117192983627, epoch top1 0.5918079096045198, max top1 0.768361581920904


18278it [00:31, 589.42it/s]


39759273_0_1427898308030295194.csv
45073662_0_3179937335063201739.csv
29414811_2_4773219892816395776.csv
84575189_0_6365692015941409487.csv
14380604_4_3329235705746762392.csv
50270082_0_444360818941411589.csv
28086084_0_3127660530989916727.csv
14067031_0_559833072073397908.csv


  positive_feat = torch.tensor(batch[0].float())
  negative_feat = torch.tensor(batch[1].float())
53it [00:00, 527.35it/s]

Epoch 8, Avg Loss is 0.12005460262298584, epoch top1 0.8022598870056498, max top1 0.8022598870056498


18278it [00:30, 598.46it/s]


39759273_0_1427898308030295194.csv
45073662_0_3179937335063201739.csv
29414811_2_4773219892816395776.csv
84575189_0_6365692015941409487.csv
14380604_4_3329235705746762392.csv
50270082_0_444360818941411589.csv
28086084_0_3127660530989916727.csv
14067031_0_559833072073397908.csv


  positive_feat = torch.tensor(batch[0].float())
  negative_feat = torch.tensor(batch[1].float())
104it [00:00, 511.50it/s]

Epoch 9, Avg Loss is 0.12509234249591827, epoch top1 0.769774011299435, max top1 0.8022598870056498


18278it [00:31, 586.38it/s]


39759273_0_1427898308030295194.csv
45073662_0_3179937335063201739.csv
29414811_2_4773219892816395776.csv
84575189_0_6365692015941409487.csv
14380604_4_3329235705746762392.csv
50270082_0_444360818941411589.csv
28086084_0_3127660530989916727.csv
14067031_0_559833072073397908.csv


  positive_feat = torch.tensor(batch[0].float())
  negative_feat = torch.tensor(batch[1].float())
51it [00:00, 502.78it/s]

Epoch 10, Avg Loss is 0.13111945986747742, epoch top1 0.7556497175141242, max top1 0.8022598870056498


18278it [00:31, 584.25it/s]


39759273_0_1427898308030295194.csv
45073662_0_3179937335063201739.csv
29414811_2_4773219892816395776.csv
84575189_0_6365692015941409487.csv
14380604_4_3329235705746762392.csv
50270082_0_444360818941411589.csv
28086084_0_3127660530989916727.csv
14067031_0_559833072073397908.csv


  positive_feat = torch.tensor(batch[0].float())
  negative_feat = torch.tensor(batch[1].float())
52it [00:00, 513.43it/s]

Epoch 11, Avg Loss is 0.11873285472393036, epoch top1 0.5847457627118644, max top1 0.8022598870056498


18278it [00:32, 571.16it/s]


39759273_0_1427898308030295194.csv
45073662_0_3179937335063201739.csv
29414811_2_4773219892816395776.csv
84575189_0_6365692015941409487.csv
14380604_4_3329235705746762392.csv
50270082_0_444360818941411589.csv
28086084_0_3127660530989916727.csv
14067031_0_559833072073397908.csv


  positive_feat = torch.tensor(batch[0].float())
  negative_feat = torch.tensor(batch[1].float())
49it [00:00, 484.94it/s]

Epoch 12, Avg Loss is 0.1164768785238266, epoch top1 0.8403954802259888, max top1 0.8403954802259888


18278it [00:30, 592.74it/s]


39759273_0_1427898308030295194.csv
45073662_0_3179937335063201739.csv
29414811_2_4773219892816395776.csv
84575189_0_6365692015941409487.csv
14380604_4_3329235705746762392.csv
50270082_0_444360818941411589.csv
28086084_0_3127660530989916727.csv
14067031_0_559833072073397908.csv


  positive_feat = torch.tensor(batch[0].float())
  negative_feat = torch.tensor(batch[1].float())
49it [00:00, 481.48it/s]

Epoch 13, Avg Loss is 0.1260880082845688, epoch top1 0.6567796610169492, max top1 0.8403954802259888


18278it [00:35, 517.61it/s]


39759273_0_1427898308030295194.csv
45073662_0_3179937335063201739.csv
29414811_2_4773219892816395776.csv
84575189_0_6365692015941409487.csv
14380604_4_3329235705746762392.csv
50270082_0_444360818941411589.csv
28086084_0_3127660530989916727.csv
14067031_0_559833072073397908.csv


  positive_feat = torch.tensor(batch[0].float())
  negative_feat = torch.tensor(batch[1].float())
51it [00:00, 506.72it/s]

Epoch 14, Avg Loss is 0.12995406985282898, epoch top1 0.7584745762711864, max top1 0.8403954802259888


18278it [00:33, 551.28it/s]


39759273_0_1427898308030295194.csv
45073662_0_3179937335063201739.csv
29414811_2_4773219892816395776.csv
84575189_0_6365692015941409487.csv
14380604_4_3329235705746762392.csv
50270082_0_444360818941411589.csv
28086084_0_3127660530989916727.csv
14067031_0_559833072073397908.csv


  positive_feat = torch.tensor(batch[0].float())
  negative_feat = torch.tensor(batch[1].float())
48it [00:00, 475.80it/s]

Epoch 15, Avg Loss is 0.12205344438552856, epoch top1 0.6299435028248588, max top1 0.8403954802259888


18278it [00:32, 555.31it/s]


39759273_0_1427898308030295194.csv
45073662_0_3179937335063201739.csv
29414811_2_4773219892816395776.csv
84575189_0_6365692015941409487.csv
14380604_4_3329235705746762392.csv
50270082_0_444360818941411589.csv
28086084_0_3127660530989916727.csv
14067031_0_559833072073397908.csv


  positive_feat = torch.tensor(batch[0].float())
  negative_feat = torch.tensor(batch[1].float())
50it [00:00, 497.13it/s]

Epoch 16, Avg Loss is 0.12807875871658325, epoch top1 0.6186440677966102, max top1 0.8403954802259888


18278it [00:33, 546.82it/s]


39759273_0_1427898308030295194.csv
45073662_0_3179937335063201739.csv
29414811_2_4773219892816395776.csv
84575189_0_6365692015941409487.csv
14380604_4_3329235705746762392.csv
50270082_0_444360818941411589.csv
28086084_0_3127660530989916727.csv
14067031_0_559833072073397908.csv


  positive_feat = torch.tensor(batch[0].float())
  negative_feat = torch.tensor(batch[1].float())
52it [00:00, 514.44it/s]

Epoch 17, Avg Loss is 0.116629958152771, epoch top1 0.5677966101694916, max top1 0.8403954802259888


18278it [00:31, 581.33it/s]


39759273_0_1427898308030295194.csv
45073662_0_3179937335063201739.csv
29414811_2_4773219892816395776.csv
84575189_0_6365692015941409487.csv
14380604_4_3329235705746762392.csv
50270082_0_444360818941411589.csv
28086084_0_3127660530989916727.csv
14067031_0_559833072073397908.csv


  positive_feat = torch.tensor(batch[0].float())
  negative_feat = torch.tensor(batch[1].float())
52it [00:00, 516.10it/s]

Epoch 18, Avg Loss is 0.11798889935016632, epoch top1 0.5805084745762712, max top1 0.8403954802259888


18278it [00:31, 581.02it/s]


39759273_0_1427898308030295194.csv
45073662_0_3179937335063201739.csv
29414811_2_4773219892816395776.csv
84575189_0_6365692015941409487.csv
14380604_4_3329235705746762392.csv
50270082_0_444360818941411589.csv
28086084_0_3127660530989916727.csv
14067031_0_559833072073397908.csv
Epoch 19, Avg Loss is 0.11301832646131516, epoch top1 0.8248587570621468, max top1 0.8403954802259888


In [19]:
best_model_path

'/Users/amandeep/Github/table-linker/data/SemTabR4_T2dv2/table-linker/dev-output/v12/saved_models/epoch_12_loss_0.1164768785238266_top1_0.8403954802259888.pth'

## Dev Prediction

In [20]:
def dev_prediction(dev_feature_path, dev_predictions_top_k, saved_model, output_column, min_max_scaler_path, k=5):
    for file in glob.glob(dev_feature_path + '/*.csv'):
        filename = file.split("/")[-1]
        print(filename)
        feature_str =  ",".join(features)
        if os.path.getsize(file) == 0:
                    continue
        # location where the output generated by the predictions wil be stored.
        dev_output = f"{dev_predictions_top_k}/{filename}"
        !tl predict-using-model $file -o $output_column \
            --features $feature_str \
            --ranking-model $saved_model \
            --normalization-factor $min_max_scaler_path \
            / get-kg-links -c $output_column -k $k --k-rows \
            > $dev_output

In [21]:
def add_color(dev_predictions_top_k, dev_colorized_path, score_column, k=5):
    for file in glob.glob(dev_predictions_top_k + '/*.csv'):
        filename = file.split("/")[-1]
        print(filename)
        if os.path.getsize(file) == 0:
                    continue
                
        dev_color_file = f"{dev_colorized_path}/{filename.strip('.csv')}.xlsx"
        !tl add-color $file -c "$score_column,evaluation_label" -k $k --output $dev_color_file

In [22]:
def compute_metrics(dev_predictions_top_k, dev_predictions_metrics, score_column, k=5):
    df_list = []
    for file in glob.glob(dev_predictions_top_k + '/*.csv'):
        filename = file.split("/")[-1]
        print(filename)
        if os.path.getsize(file) == 0:
                    continue
                
        dev_metrics_file = f"{dev_predictions_metrics}/{filename}"
        !tl metrics $file -k $k -c $score_column --tag $filename> $dev_metrics_file
        df_list.append(pd.read_csv(dev_metrics_file))
    return pd.concat(df_list)

In [23]:
dev_prediction(dev_feature_path, dev_predictions_top_k, best_model_path, final_score_column, min_max_scaler_path, k=200)

39759273_0_1427898308030295194.csv
predict-using-model Time: 1.1223816871643066s
get-kg-links-siamese_prediction Time: 0.9894561767578125s
45073662_0_3179937335063201739.csv
predict-using-model Time: 1.8837628364562988s
get-kg-links-siamese_prediction Time: 0.2691960334777832s
29414811_2_4773219892816395776.csv
predict-using-model Time: 1.1233410835266113s
get-kg-links-siamese_prediction Time: 0.21491312980651855s
84575189_0_6365692015941409487.csv
predict-using-model Time: 1.5535120964050293s
get-kg-links-siamese_prediction Time: 1.0447988510131836s
14380604_4_3329235705746762392.csv
predict-using-model Time: 0.9142780303955078s
get-kg-links-siamese_prediction Time: 0.2114419937133789s
52299421_0_4473286348258170200.csv
predict-using-model Time: 1.4488623142242432s
get-kg-links-siamese_prediction Time: 1.1445989608764648s
50270082_0_444360818941411589.csv
predict-using-model Time: 1.6169407367706299s
get-kg-links-siamese_prediction Time: 1.736905813217163s
28086084_0_31276605309899167

In [24]:
metrics_df = compute_metrics(dev_predictions_top_k, dev_metrics_path, final_score_column, k=200)

39759273_0_1427898308030295194.csv
metrics Time: 3.344841957092285s
45073662_0_3179937335063201739.csv
metrics Time: 0.9158201217651367s
29414811_2_4773219892816395776.csv
metrics Time: 0.7286691665649414s
84575189_0_6365692015941409487.csv
metrics Time: 3.2659289836883545s
14380604_4_3329235705746762392.csv
metrics Time: 0.828848123550415s
52299421_0_4473286348258170200.csv
metrics Time: 4.336864948272705s
50270082_0_444360818941411589.csv
metrics Time: 5.679181814193726s
28086084_0_3127660530989916727.csv
metrics Time: 7.728912830352783s
14067031_0_559833072073397908.csv
metrics Time: 2.216524124145508s


In [25]:
metrics_df

Unnamed: 0,k,f1,precision,recall,tag
0,200,0.994975,0.99,1.0,39759273_0_1427898308030295194.csv
0,200,1.0,1.0,1.0,45073662_0_3179937335063201739.csv
0,200,0.976744,0.954545,1.0,29414811_2_4773219892816395776.csv
0,200,0.898876,0.816327,1.0,84575189_0_6365692015941409487.csv
0,200,1.0,1.0,1.0,14380604_4_3329235705746762392.csv
0,200,0.978022,0.978022,0.978022,52299421_0_4473286348258170200.csv
0,200,0.990991,0.982143,1.0,50270082_0_444360818941411589.csv
0,200,0.85488,0.768182,0.963636,28086084_0_3127660530989916727.csv
0,200,0.981132,0.981132,0.981132,14067031_0_559833072073397908.csv


In [26]:
metrics_df['recall'].mean()

0.9914211574588934

In [27]:
metrics_df.to_csv(f"{dev_metrics_path}/metrics_200.csv", index=False)

In [28]:
metrics_df = compute_metrics(dev_predictions_top_k, dev_metrics_path, final_score_column, k=1)

39759273_0_1427898308030295194.csv
metrics Time: 3.5537333488464355s
45073662_0_3179937335063201739.csv
metrics Time: 0.9060759544372559s
29414811_2_4773219892816395776.csv
metrics Time: 0.6573503017425537s
84575189_0_6365692015941409487.csv
metrics Time: 3.3251709938049316s
14380604_4_3329235705746762392.csv
metrics Time: 0.7085330486297607s
52299421_0_4473286348258170200.csv
metrics Time: 4.736017227172852s
50270082_0_444360818941411589.csv
metrics Time: 5.278078079223633s
28086084_0_3127660530989916727.csv
metrics Time: 7.827936887741089s
14067031_0_559833072073397908.csv
metrics Time: 2.3459389209747314s


In [29]:
metrics_df

Unnamed: 0,k,f1,precision,recall,tag
0,1,0.897017,0.99,0.82,39759273_0_1427898308030295194.csv
0,1,0.92,1.0,0.851852,45073662_0_3179937335063201739.csv
0,1,0.526646,0.954545,0.363636,29414811_2_4773219892816395776.csv
0,1,0.800728,0.816327,0.785714,84575189_0_6365692015941409487.csv
0,1,0.947368,1.0,0.9,14380604_4_3329235705746762392.csv
0,1,0.894532,0.978022,0.824176,52299421_0_4473286348258170200.csv
0,1,0.948276,0.982143,0.916667,50270082_0_444360818941411589.csv
0,1,0.756648,0.768182,0.745455,28086084_0_3127660530989916727.csv
0,1,0.941887,0.981132,0.90566,14067031_0_559833072073397908.csv


In [30]:
metrics_df['f1'].mean()

0.848122356254435

In [31]:
metrics_df.to_csv(f"{dev_metrics_path}/metrics_1.csv", index=False)

In [32]:
add_color(dev_predictions_top_k, dev_colorized_path, final_score_column)

39759273_0_1427898308030295194.csv
add-color Time: 2.080474853515625s
45073662_0_3179937335063201739.csv
add-color Time: 0.4650721549987793s
29414811_2_4773219892816395776.csv
add-color Time: 0.3892197608947754s
84575189_0_6365692015941409487.csv
add-color Time: 1.7831270694732666s
14380604_4_3329235705746762392.csv
add-color Time: 0.39830708503723145s
52299421_0_4473286348258170200.csv
add-color Time: 2.531569719314575s
50270082_0_444360818941411589.csv
add-color Time: 2.867861032485962s
28086084_0_3127660530989916727.csv
add-color Time: 4.324522018432617s
14067031_0_559833072073397908.csv
add-color Time: 1.5757498741149902s


In [33]:
def find_missing_correct_candidates(candidates_path, missing_correct_candidates_path):
     for file in tqdm(glob.glob(candidates_path + '/*.csv')):
        filename = file.split("/")[-1]
        if os.path.getsize(file) == 0:
                    continue
        missing_file = f"{missing_correct_candidates_path}/{filename}"
        !tl check-candidates "$file" > "$missing_file"

In [34]:
find_missing_correct_candidates(dev_candidate_path, dev_missing_candidates_path)

  0%|          | 0/9 [00:00<?, ?it/s]

check-candidates Time: 0.020513057708740234s


 11%|█         | 1/9 [00:02<00:23,  3.00s/it]

check-candidates Time: 0.007582902908325195s


 22%|██▏       | 2/9 [00:06<00:21,  3.01s/it]

check-candidates Time: 0.007328033447265625s


 33%|███▎      | 3/9 [00:08<00:15,  2.61s/it]

check-candidates Time: 0.018445968627929688s


 44%|████▍     | 4/9 [00:10<00:12,  2.47s/it]

check-candidates Time: 0.009742021560668945s


 56%|█████▌    | 5/9 [00:12<00:08,  2.21s/it]

check-candidates Time: 0.025377988815307617s


 67%|██████▋   | 6/9 [00:13<00:05,  1.95s/it]

check-candidates Time: 0.033286094665527344s


 78%|███████▊  | 7/9 [00:14<00:03,  1.69s/it]

check-candidates Time: 0.04585003852844238s


 89%|████████▉ | 8/9 [00:15<00:01,  1.55s/it]

check-candidates Time: 0.015259027481079102s


100%|██████████| 9/9 [00:17<00:00,  1.94s/it]


In [35]:
def concat_files(files_path):
    df_list = []
    for file in glob.glob(files_path + '/*.csv'):
        filename = file.split("/")[-1]
        print(filename)
        if os.path.getsize(file) == 0:
                    continue
        df = pd.read_csv(file)
        df['filename'] = filename
        df_list.append(df)
    return pd.concat(df_list)

In [36]:
missing_df = concat_files(dev_missing_candidates_path)

39759273_0_1427898308030295194.csv
45073662_0_3179937335063201739.csv
29414811_2_4773219892816395776.csv
84575189_0_6365692015941409487.csv
14380604_4_3329235705746762392.csv
52299421_0_4473286348258170200.csv
50270082_0_444360818941411589.csv
28086084_0_3127660530989916727.csv
14067031_0_559833072073397908.csv


In [37]:
missing_df

Unnamed: 0,column,row,label,context,GT_kg_id,GT_kg_label,filename
0,1,50,World,"51|$6,500|4.00%|$38,000.0|25.00%|5,926,466,814...",Q2277,Roman Empire,52299421_0_4473286348258170200.csv
0,0,38,BRIGID of Ireland,--|--|--|Yes|--,Q18527449,Dar Lugdach,28086084_0_3127660530989916727.csv
1,0,39,Brigid of Kildare,--|--|--|Yes|--,Q18527449,Dar Lugdach,28086084_0_3127660530989916727.csv
2,0,52,Charlie,--|--|--|Yes|--,Q1199757,Dish Network,28086084_0_3127660530989916727.csv
3,0,59,"Cyril, Apostle to the Slavs",??|??|Yes|??|--,Q239925,Saints Cyril and Methodius,28086084_0_3127660530989916727.csv
4,0,138,Louis,--|--|--|Yes|--,Q88927,Louis de Montfort,28086084_0_3127660530989916727.csv
5,0,156,Matthias,Yes|Yes|Yes|Yes|--,Q43600,Matthew the Apostle,28086084_0_3127660530989916727.csv
6,0,175,Peter of Alexandria,--|Yes|Yes|Yes|--,Q20815727,Pope Peter of Alexandria,28086084_0_3127660530989916727.csv
0,0,24,Kanya,"Swahili, English|199.4|Dec 1963|Musalia Mudava...",Q114,Kenya,14067031_0_559833072073397908.csv


In [None]:
missing_df.to_csv(f"{dev_missing_candidates_path}/missing_concatenated.csv", index=False)

In [None]:
find_missing_correct_candidates(train_candidates_path, train_missing_candidates_path)

In [None]:
train_missing_df = concat_files(train_missing_candidates_path)

In [None]:
len(train_missing_df)

In [None]:
train_missing_df

In [None]:
train_missing_df.to_csv(f"{train_missing_candidates_path}/missing_concatenated.csv", index=False)