In [3]:
import sys, time
sys.argv = ["main", "Input/data_conf_oaei_german_aml_thresh.pkl", 6, 5, 0.6075]

In [5]:
import os, itertools, time, pickle, operator, random
import subprocess
from xml.dom import minidom
from collections import Counter, OrderedDict
from operator import itemgetter
from scipy import spatial
from sklearn.metrics import precision_score, accuracy_score, recall_score, f1_score
from sklearn.feature_extraction.text import TfidfVectorizer
import re, sys, glob
import numpy as np
import scipy.sparse as sp
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from math import ceil, exp
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

f = open(sys.argv[1], "rb")
data_conf, data_german, aml_data, emb_indexer, emb_indexer_inv, emb_vals, neighbours_dicts, max_paths, max_pathlen, max_types, ontologies_in_alignment = pickle.load(f)
max_paths = int(sys.argv[2])
max_pathlen = int(sys.argv[3])
threshold = float(sys.argv[4])
aml_data = {key: float(aml_data[key])>=threshold for key in aml_data}
flatten = lambda l: [item for sublist in l for item in sublist]
ontologies_in_alignment = [tuple(pair) for pair in ontologies_in_alignment]

direct_inputs, direct_targets = [], []

def cos_sim(a,b):
    return 1 - spatial.distance.cosine(a,b)

all_fn, all_fp = [], []

threshold_results = {}

def test():
    global batch_size, test_data_t, test_data_f, model, optimizer, emb_indexer_inv, all_metrics, direct_inputs, direct_targets, threshold_results
    all_results = OrderedDict()    
    direct_inputs, direct_targets = [], []
    with torch.no_grad():
        all_pred = []
        
        np.random.shuffle(test_data_t)
        np.random.shuffle(test_data_f)

        inputs_pos, nodes_pos, targets_pos = generate_input(test_data_t, 1)
        inputs_neg, nodes_neg, targets_neg = generate_input(test_data_f, 0)

        inputs_all = list(inputs_pos) + list(inputs_neg)
        targets_all = list(targets_pos) + list(targets_neg)
        nodes_all = list(nodes_pos) + list(nodes_neg)
        
        all_inp = list(zip(inputs_all, targets_all, nodes_all))
        all_inp_shuffled = random.sample(all_inp, len(all_inp))
        inputs_all, targets_all, nodes_all = list(zip(*all_inp_shuffled))

        batch_size = min(batch_size, len(inputs_all))
        num_batches = int(ceil(len(inputs_all)/batch_size))
        for batch_idx in range(num_batches):
            batch_start = batch_idx * batch_size
            batch_end = (batch_idx+1) * batch_size

            inputs = np.array(to_feature(inputs_all[batch_start: batch_end]))
            targets = np.array(targets_all[batch_start: batch_end])
            nodes = np.array(nodes_all[batch_start: batch_end])
            
            inp_elems = torch.LongTensor(inputs).to(device)
            node_elems = torch.LongTensor(nodes).to(device)
            targ_elems = torch.DoubleTensor(targets)

            outputs = model(node_elems, inp_elems)
            outputs = [el.item() for el in outputs]
            targets = [True if el.item() else False for el in targets]

            for idx, pred_elem in enumerate(outputs):
                ent1 = emb_indexer_inv[nodes[idx][0]]
                ent2 = emb_indexer_inv[nodes[idx][1]]
                if (ent1, ent2) in all_results:
                    print ("Error: ", ent1, ent2, "already present")
                all_results[(ent1, ent2)] = (pred_elem, targets[idx])
        
        direct_targets = [True if el else False for el in direct_targets]
        
        print ("Len (direct inputs): ", len(direct_inputs))
        for idx, direct_input in enumerate(direct_inputs):
            ent1 = emb_indexer_inv[direct_input[0]]
            ent2 = emb_indexer_inv[direct_input[1]]
            sim = cos_sim(emb_vals[direct_input[0]], emb_vals[direct_input[1]])
            all_results[(ent1, ent2)] = (sim, direct_targets[idx])
    return (test_onto, all_results)

def optimize_threshold():
    global batch_size, val_data_t, val_data_f, model, optimizer, emb_indexer_inv, all_metrics, direct_inputs, direct_targets, threshold_results
    all_results = OrderedDict()
    direct_inputs, direct_targets = [], []
    with torch.no_grad():
        all_pred = []
        
        np.random.shuffle(val_data_t)
        np.random.shuffle(val_data_f)

        inputs_pos, nodes_pos, targets_pos = generate_input(val_data_t, 1)
        inputs_neg, nodes_neg, targets_neg = generate_input(val_data_f, 0)

        inputs_all = list(inputs_pos) + list(inputs_neg)
        targets_all = list(targets_pos) + list(targets_neg)
        nodes_all = list(nodes_pos) + list(nodes_neg)
        
        all_inp = list(zip(inputs_all, targets_all, nodes_all))
        all_inp_shuffled = random.sample(all_inp, len(all_inp))
        inputs_all, targets_all, nodes_all = list(zip(*all_inp_shuffled))

        batch_size = min(batch_size, len(inputs_all))
        num_batches = int(ceil(len(inputs_all)/batch_size))
        for batch_idx in range(num_batches):
            batch_start = batch_idx * batch_size
            batch_end = (batch_idx+1) * batch_size

            inputs = np.array(to_feature(inputs_all[batch_start: batch_end]))
            targets = np.array(targets_all[batch_start: batch_end])
            nodes = np.array(nodes_all[batch_start: batch_end])
            
            inp_elems = torch.LongTensor(inputs).to(device)
            node_elems = torch.LongTensor(nodes).to(device)
            targ_elems = torch.DoubleTensor(targets)

            outputs = model(node_elems, inp_elems)
            outputs = [el.item() for el in outputs]
            targets = [True if el.item() else False for el in targets]

            for idx, pred_elem in enumerate(outputs):
                ent1 = emb_indexer_inv[nodes[idx][0]]
                ent2 = emb_indexer_inv[nodes[idx][1]]
                if (ent1, ent2) in all_results:
                    print ("Error: ", ent1, ent2, "already present")
                all_results[(ent1, ent2)] = (pred_elem, targets[idx])
        
        direct_targets = [True if el else False for el in direct_targets]
        
        print ("Len (direct inputs): ", len(direct_inputs))
        for idx, direct_input in enumerate(direct_inputs):
            ent1 = emb_indexer_inv[direct_input[0]]
            ent2 = emb_indexer_inv[direct_input[1]]
            sim = cos_sim(emb_vals[direct_input[0]], emb_vals[direct_input[1]])
            all_results[(ent1, ent2)] = (sim, direct_targets[idx])
        
        low_threshold = np.min([el[0] for el in all_results.values()]) - 0.02
        high_threshold = np.max([el[0] for el in all_results.values()]) + 0.02
        threshold = low_threshold
        step = 0.001
        while threshold < high_threshold:
            res = []
            for i,key in enumerate(all_results):
                if all_results[key][0] > threshold:
                    res.append(key)
            s = set(res)
            fn_list = [(key, all_results[key][0]) for key in val_data_t if key not in s and not is_valid(val_onto, key)]
            fp_list = [(elem, all_results[elem][0]) for elem in res if not all_results[elem][1]]
            tp_list = [(elem, all_results[elem][0]) for elem in res if all_results[elem][1]]
            
            tp, fn, fp = len(tp_list), len(fn_list), len(fp_list)
            exception = False
            
            try:
                precision = tp/(tp+fp)
                recall = tp/(tp+fn)
                f1score = 2 * precision * recall / (precision + recall)
                f2score = 5 * precision * recall / (4 * precision + recall)
                f0_5score = 1.25 * precision * recall / (0.25 * precision + recall)
            except Exception as e:
                print (e)
                exception = True
                step = 0.001
                threshold += step
                continue
            # print ("Threshold: ", threshold, precision, recall, f1score, f2score, f0_5score)
            if threshold in threshold_results:
                threshold_results[threshold].append([precision, recall, f1score, f2score, f0_5score])
            else:
                threshold_results[threshold] = [[precision, recall, f1score, f2score, f0_5score]]

            threshold += step 

def calculate_performance():
    global final_results
    all_metrics, all_fn, all_fp = [], [], []
    for (test_onto, all_results) in final_results:
        res = []
        for i,key in enumerate(all_results):
            if all_results[key][0] > threshold:
                res.append(key)
        s = set(res)
        fn_list = [(key, all_results[key][0]) for key in test_data_t if key not in s and not is_valid(test_onto, key)]
        fp_list = [(elem, all_results[elem][0]) for elem in res if not all_results[elem][1]]
        tp_list = [(elem, all_results[elem][0]) for elem in res if all_results[elem][1]]
        tp, fn, fp = len(tp_list), len(fn_list), len(fp_list)
        
        try:
            precision = tp/(tp+fp)
            recall = tp/(tp+fn)
            f1score = 2 * precision * recall / (precision + recall)
            f2score = 5 * precision * recall / (4 * precision + recall)
            f0_5score = 1.25 * precision * recall / (0.25 * precision + recall)
        except Exception as e:
            print (e)
            continue
        print ("Performance for", test_onto, "is :", (precision, recall, f1score, f2score, f0_5score))
        all_fn.extend(fn_list)
        all_fp.extend(fp_list)
        all_metrics.append((precision, recall, f1score, f2score, f0_5score))
    return all_metrics, all_fn, all_fp


def masked_softmax(inp):
    inp = inp.double()
    mask = ((inp != 0).double() - 1) * 9999  # for -inf
    return (inp + mask).softmax(dim=-1)

class SiameseNetwork(nn.Module):
    def __init__(self, emb_vals, threshold=0.9):
        super().__init__() 
        
        self.n_neighbours = max_types
        self.max_paths = max_paths
        self.max_pathlen = max_pathlen
        self.embedding_dim = np.array(emb_vals).shape[1]
        
        self.threshold = threshold

        self.name_embedding = nn.Embedding(len(emb_vals), self.embedding_dim)
        self.name_embedding.load_state_dict({'weight': torch.from_numpy(np.array(emb_vals))})
        self.name_embedding.weight.requires_grad = False

        self.dropout = dropout
        
        self.cosine_sim_layer = nn.CosineSimilarity(dim=1)
        self.output = nn.Linear(2*self.embedding_dim, 300)
        
        self.v = nn.Parameter(torch.DoubleTensor([1/(self.max_pathlen) for i in range(self.max_pathlen)]))
        self.w_rootpath = nn.Parameter(torch.DoubleTensor([0.25]))
        self.w_children = nn.Parameter(torch.DoubleTensor([0.25]))
        self.w_obj_neighbours = nn.Parameter(torch.DoubleTensor([0.25]))
 
    def forward(self, nodes, features):
        '''
        Arguments:
            - nodes: batch_size * 2
            - features: batch_size * 2 * 4 * max_paths * max_pathlen
        '''
        results = []
        nodes = nodes.permute(1,0) # 2 * batch_size
        features = features.permute(1,0,2,3,4) # 2 * batch_size * 4 * max_paths * max_pathlen
        for i in range(2):
            node_emb = self.name_embedding(nodes[i]) # batch_size * 512
            feature_emb = self.name_embedding(features[i]) #  batch_size * 4 * max_paths * max_pathlen * 512
            
            feature_emb_reshaped = feature_emb.permute(0,4,1,2,3).reshape(-1, self.embedding_dim, self.n_neighbours * self.max_paths * self.max_pathlen)
            path_weights = torch.bmm(node_emb[:, None, :], feature_emb_reshaped)
            path_weights = path_weights.squeeze(1).reshape(-1, self.n_neighbours, self.max_paths, self.max_pathlen)
            path_weights = torch.sum(path_weights, dim=-1)
            best_path_indices = torch.max(path_weights, dim=-1)[1][(..., ) + (None, ) * 3]
            best_path_indices = best_path_indices.expand(-1, -1, -1, self.max_pathlen,  self.embedding_dim)
            best_path = torch.gather(feature_emb, 2, best_path_indices).squeeze(2) # batch_size * 4 * max_pathlen * 512
            # Another way: 
            # path_weights = masked_softmax(path_weights)
            # best_path = torch.sum(path_weights[:, :, :, None, None] * feature_emb, dim=2)

            best_path_reshaped = best_path.permute(0,3,1,2).reshape(-1, self.embedding_dim, self.n_neighbours * self.max_pathlen)
            node_weights = torch.bmm(node_emb.unsqueeze(1), best_path_reshaped) # batch_size * 4 * max_pathlen
            node_weights = masked_softmax(node_weights.squeeze(1).reshape(-1, self.n_neighbours, self.max_pathlen)) # batch_size * 4 * max_pathlen
            attended_path = node_weights.unsqueeze(-1) * best_path # batch_size * 4 * max_pathlen * 512

            distance_weighted_path = torch.sum((self.v[None,None,:,None] * attended_path), dim=2) # batch_size * 4 * 512

            self.w_data_neighbours = (1-self.w_rootpath-self.w_children-self.w_obj_neighbours)
            context_emb = self.w_rootpath * distance_weighted_path[:,0,:] \
                        + self.w_children * distance_weighted_path[:,1,:] \
                        + self.w_obj_neighbours * distance_weighted_path[:,2,:] \
                        + self.w_data_neighbours * distance_weighted_path[:,3,:]

            contextual_node_emb = torch.cat((node_emb, context_emb), dim=1)
            output_node_emb = self.output(contextual_node_emb)
            results.append(output_node_emb)
        sim = self.cosine_sim_layer(results[0], results[1])
        return sim

def is_valid(test_onto, key):
    return tuple([el.split("#")[0] for el in key]) not in test_onto

def generate_data_neighbourless(elem_tuple):
    return [emb_indexer[elem] for elem in elem_tuple]

def embedify(seq, emb_indexer):
    for item in seq:
        if isinstance(item, list):
            yield list(embedify(item, emb_indexer))
        else:
            yield emb_indexer[item]

def generate_data(elem_tuple):
    return list(embedify([neighbours_dicts[elem] for elem in elem_tuple], emb_indexer))

def to_feature(inputs):
    inputs_lenpadded = [[[[path[:max_pathlen] + [0 for i in range(max_pathlen -len(path[:max_pathlen]))]
                                    for path in nbr_type[:max_paths]]
                                for nbr_type in ent[:max_types]]
                            for ent in elem]
                        for elem in inputs]
    inputs_pathpadded = [[[nbr_type + [[0 for j in range(max_pathlen)]
                             for i in range(max_paths - len(nbr_type))]
                            for nbr_type in ent] for ent in elem]
                        for elem in inputs_lenpadded]
    return inputs_pathpadded

def generate_input(elems, target):
    inputs, targets, nodes = [], [], []
    global direct_inputs, direct_targets
    for elem in list(elems):
        try:
            inputs.append(generate_data(elem))
            nodes.append(generate_data_neighbourless(elem))
            targets.append(target)
        except:
            direct_inputs.append(generate_data_neighbourless(elem))
            direct_targets.append(target)
    return inputs, nodes, targets

print("Max number of nodes in a path: " + str(sys.argv[1]))

def count_non_unk(elem):
    return len([l for l in elem if l!="<UNK>"])

torch.manual_seed(0)
np.random.seed(0)
random.seed(0)

torch.set_default_dtype(torch.float64)
data_items = data_conf.items()
np.random.shuffle(list(data_items))
data_conf = OrderedDict(data_items)

print ("Number of entities:", len(data_conf))
lr = 0.001
num_epochs = 1
weight_decay = 0.001
batch_size = 32
dropout = 0.3
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
all_metrics = []
final_results = []

model = SiameseNetwork(emb_vals).to(device)
print (model.threshold)
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

train_data_t = [key for key in data_conf if data_conf[key]]
train_data_f = [key for key in data_conf if not data_conf[key]]
train_data_t = np.repeat(train_data_t, ceil(len(train_data_f)/len(train_data_t)), axis=0)
train_data_t = train_data_t[:len(train_data_f)].tolist()
np.random.shuffle(train_data_f)

for epoch in range(num_epochs):
    inputs_pos, nodes_pos, targets_pos = generate_input(train_data_t, 1)
    inputs_neg, nodes_neg, targets_neg = generate_input(train_data_f, 0)
    inputs_all = list(inputs_pos) + list(inputs_neg)
    targets_all = list(targets_pos) + list(targets_neg)
    nodes_all = list(nodes_pos) + list(nodes_neg)
    
    all_inp = list(zip(inputs_all, targets_all, nodes_all))
    all_inp_shuffled = random.sample(all_inp, len(all_inp))
    inputs_all, targets_all, nodes_all = list(zip(*all_inp_shuffled[:100]))

    batch_size = min(batch_size, len(inputs_all))
    num_batches = int(ceil(len(inputs_all)/batch_size))

    for batch_idx in range(num_batches):
        batch_start = batch_idx * batch_size
        batch_end = (batch_idx+1) * batch_size
        
        inputs = np.array(to_feature(inputs_all[batch_start: batch_end]))
        targets = np.array(targets_all[batch_start: batch_end])
        nodes = np.array(nodes_all[batch_start: batch_end])
        
        inp_elems = torch.LongTensor(inputs).to(device)
        node_elems = torch.LongTensor(nodes).to(device)
        targ_elems = torch.DoubleTensor(targets).to(device)

        optimizer.zero_grad()
        outputs = model(node_elems, inp_elems)

        loss = F.mse_loss(outputs, targ_elems)
        loss.backward()
        optimizer.step()

        if batch_idx%500 == 0:
            print ("Epoch: {} Idx: {} Loss: {}".format(epoch, batch_idx, loss.item()))


val_data_t = [key for key in data_conf if data_conf[key]]
val_data_f = [key for key in data_conf if not data_conf[key]]
np.random.shuffle(val_data_f)
val_onto = ontologies_in_alignment

optimize_threshold()

threshold_results_mean = {el: np.mean(threshold_results[el], axis=0) for el in threshold_results}    
threshold = max(threshold_results_mean.keys(), key=(lambda key: threshold_results_mean[key][2]))

model.threshold = threshold

# def check_best_performance():
#     output_file = "Results/Output_att*" + "_".join(sys.argv[6].split("/")[1].split("_")[:4]) + ".txt"
#     results_lines = [[l for l in open(file).read().split("\n") if "Final Results:" in l] for file in glob.glob(output_file)]
#     results_lines = [line[0] for line in results_lines if line]
#     results_lines = [line.split("[")[1].split("]")[0].split(" ") for line in results_lines]
#     results_lines = [float([value for value in line if value][2]) for line in results_lines]
#     return max(results_lines)
model.eval()

test_onto = ontologies_in_alignment
test_data = {elem: data_conf[elem] for elem in data_conf if tuple([el.split("#")[0] for el in elem]) in test_onto}

test_data_t = [key for key in test_data if test_data[key]]
test_data_f = [key for key in test_data if not test_data[key]]

final_results.append(test())

all_metrics, all_fn, all_fp = calculate_performance()
final_results = np.mean(all_metrics, axis=0)

# if float(final_results[2]) > check_best_performance():
# Remove unneccessary models
# _ = [os.remove(file) for file in glob.glob("_".join(sys.argv[6].split("_")[:4]) + "*.pt")]
# Remove unneccessary error files
# _ = [os.remove(file) for file in glob.glob("_".join(sys.argv[5].split("_")[:5]) + "*.pkl")]
# Save model
# torch.save(model.state_dict(), sys.argv[6])
#Save error file
# f1 = open(sys.argv[5], "wb")
# pickle.dump([all_fn, all_fp], f1)

print ("Final Results: ", final_results)
print ("Threshold: ", threshold)



Max number of nodes in a path: Input/data_conf_oaei_german_aml_thresh.pkl
Number of entities: 122893
0.9
Epoch: 0 Idx: 0 Loss: 0.1697372857382614
Len (direct inputs):  27800
division by zero
division by zero
division by zero
division by zero
division by zero
division by zero
division by zero
division by zero
division by zero
division by zero
division by zero
division by zero
division by zero
division by zero
division by zero
division by zero
division by zero
division by zero
division by zero
division by zero
Len (direct inputs):  27800
Performance for [('confOf', 'sigkdd'), ('iasted', 'sigkdd'), ('cmt', 'ekaw'), ('confOf', 'iasted'), ('conference', 'edas'), ('cmt', 'sigkdd'), ('ekaw', 'sigkdd'), ('conference', 'confOf'), ('conference', 'sigkdd'), ('confOf', 'edas'), ('cmt', 'conference'), ('edas', 'iasted'), ('conference', 'iasted'), ('edas', 'sigkdd'), ('ekaw', 'iasted'), ('cmt', 'edas'), ('edas', 'ekaw'), ('cmt', 'confOf'), ('confOf', 'ekaw'), ('conference', 'ekaw'), ('cmt', 'iasted'

In [9]:
nbr_dict_orig = pickle.load(open("Input/data_conf_oaei_german_aml_thresh.pkl", "rb"))[6]
nbr_dict_new = {el: nbr_dict_orig[el] for el in nbr_dict_orig if el.split("#")[0] not in list(set(flatten(ontologies_in_alignment)))}
data_ent, data_prop, aml_data_ent, aml_data_prop, data_german, _, _, _, neighbours_dicts, neighbours_dicts_prop, ontologies_in_alignment = pickle.load(open("Input/data_conf_oaei_german_aml_prop_thresh.pkl", "rb"))



In [8]:
emb_indexer, emb_indexer_inv, emb_vals = pickle.load(open("Input/data_conf_oaei_german_aml_thresh.pkl", "rb"))[3:6]

In [8]:
len(pickle.load(open("Input/data_conf_oaei_german_aml_prop_thresh.pkl", "rb"))[3])

24205

In [12]:
pickle.load(open("Input/data_conf_oaei_german_aml_thresh.pkl", "rb"))[1] == pickle.load(open("Input/data_conf_oaei_german_aml_prop_thresh.pkl", "rb"))[4]

True

In [31]:
results = """Output_att13_5_0.6075_0.35_0.5_fn_prop.txt:Final Results:  [0.78600823 0.62622951 0.69708029 0.65276828 0.74784652]
Output_att13_5_0.6075_0.43_0.5_fn_prop.txt:Final Results:  [0.80257511 0.61311475 0.69516729 0.64349621 0.75586095]
Output_att13_5_0.6075_0.45_0.5_fn_prop.txt:Final Results:  [0.764      0.62622951 0.68828829 0.64965986 0.73180077]
Output_att13_5_0.6075_0.4_0.5_fn_prop.txt:Final Results:  [0.81415929 0.60327869 0.69303202 0.6362379  0.76095947]
Output_att13_5_0.6075_0.5_0.5_fn_prop.txt:Final Results:  [0.82272727 0.59344262 0.68952381 0.62847222 0.76371308]
Output_att13_5_0.6075_0.62_0.5_fn_prop.txt:Final Results:  [0.81497797 0.60655738 0.69548872 0.63925363 0.76257214]
Output_att13_5_0.6075_0.65_0.5_fn_prop.txt:Final Results:  [0.79906542 0.56065574 0.65895954 0.59623431 0.73643411]
Output_att13_5_0.6075_0.6_0.5_fn_prop.txt:Final Results:  [0.81447964 0.59016393 0.68441065 0.62456627 0.7569386 ]
Output_att13_5_0.6075_0.7_0.5_fn_prop.txt:Final Results:  [0.7920354  0.58688525 0.67419962 0.61894882 0.74028122]
Output_att13_5_0.6075_0.85_0.5_fn_prop.txt:Final Results:  [0.76171875 0.63934426 0.69518717 0.66056911 0.73363431]
Output_att13_5_0.6075_0.87_0.5_fn_prop.txt:Final Results:  [0.81081081 0.59016393 0.68311195 0.62413315 0.75440067]
Output_att13_5_0.6075_0.8_0.5_fn_prop.txt:Final Results:  [0.77682403 0.59344262 0.67286245 0.62284928 0.73160873]
Output_att13_5_0.6075_0.9_0.5_fn_prop.txt:Final Results:  [0.78571429 0.61311475 0.68876611 0.64128944 0.74383453]
Output_att13_5_0.6075_1_0.5_fn_prop.txt:Final Results:  [0.79166667 0.56065574 0.65642994 0.5954039  0.73139435]
Output_att13_8_0.6075_0.35_0.5_fn_prop.txt:Final Results:  [0.80088496 0.59344262 0.68173258 0.62586445 0.74855252]
Output_att13_8_0.6075_0.43_0.5_fn_prop.txt:Final Results:  [0.75984252 0.63278689 0.69051878 0.65468114 0.73050719]
Output_att13_8_0.6075_0.45_0.5_fn_prop.txt:Final Results:  [0.82407407 0.58360656 0.68330134 0.61977716 0.76133447]
Output_att13_8_0.6075_0.4_0.5_fn_prop.txt:Final Results:  [0.8226601  0.54754098 0.65748031 0.58678848 0.74753805]
Output_att13_8_0.6075_0.5_0.5_fn_prop.txt:Final Results:  [0.79324895 0.61639344 0.69372694 0.64516129 0.75019952]
Output_att13_8_0.6075_0.62_0.5_fn_prop.txt:Final Results:  [0.81222707 0.60983607 0.69662921 0.64182195 0.76167076]
Output_att13_8_0.6075_0.65_0.5_fn_prop.txt:Final Results:  [0.75630252 0.59016393 0.66298343 0.61728395 0.71599045]
Output_att13_8_0.6075_0.6_0.5_fn_prop.txt:Final Results:  [0.78723404 0.60655738 0.68518519 0.63573883 0.74297189]
Output_att13_8_0.6075_0.7_0.5_fn_prop.txt:Final Results:  [0.76890756 0.6        0.67403315 0.62757202 0.72792363]
Output_att13_8_0.6075_0.85_0.5_fn_prop.txt:Final Results:  [N/A N/A N/A N/A N/A]
Output_att13_8_0.6075_0.87_0.5_fn_prop.txt:Final Results:  [N/A N/A N/A N/A N/A]
Output_att13_8_0.6075_0.8_0.5_fn_prop.txt:Final Results:  [N/A N/A N/A N/A N/A]
Output_att13_8_0.6075_0.9_0.5_fn_prop.txt:Final Results:  [N/A N/A N/A N/A N/A]
Output_att13_8_0.6075_1_0.5_fn_prop.txt:Final Results:  [N/A N/A N/A N/A N/A]
Output_att21_5_0.6075_0.35_0.5_fn_prop.txt:Final Results:  [0.77729258 0.58360656 0.66666667 0.6142167  0.72891073]
Output_att21_5_0.6075_0.43_0.5_fn_prop.txt:Final Results:  [0.78389831 0.60655738 0.68391867 0.6353022  0.74059247]
Output_att21_5_0.6075_0.45_0.5_fn_prop.txt:Final Results:  [0.80357143 0.59016393 0.6805293  0.6232687  0.74937552]
Output_att21_5_0.6075_0.4_0.5_fn_prop.txt:Final Results:  [0.7887931  0.6        0.68156425 0.63016529 0.74209246]
Output_att21_5_0.6075_0.5_0.5_fn_prop.txt:Final Results:  [0.8008658  0.60655738 0.69029851 0.63749139 0.75264443]
Output_att21_5_0.6075_0.62_0.5_fn_prop.txt:Final Results:  [0.80084746 0.61967213 0.6987061  0.64903846 0.75660528]
Output_att21_5_0.6075_0.65_0.5_fn_prop.txt:Final Results:  [0.75107296 0.57377049 0.65055762 0.60220234 0.70735651]
Output_att21_5_0.6075_0.6_0.5_fn_prop.txt:Final Results:  [0.78540773 0.6        0.6802974  0.62973159 0.73969281]
Output_att21_5_0.6075_0.7_0.5_fn_prop.txt:Final Results:  [0.77731092 0.60655738 0.68139963 0.63443073 0.73587908]
Output_att21_5_0.6075_0.85_0.5_fn_prop.txt:Final Results:  [N/A N/A N/A N/A N/A]
Output_att21_5_0.6075_0.87_0.5_fn_prop.txt:Final Results:  [N/A N/A N/A N/A N/A]
Output_att21_5_0.6075_0.8_0.5_fn_prop.txt:Final Results:  [N/A N/A N/A N/A N/A]
Output_att21_5_0.6075_0.9_0.5_fn_prop.txt:Final Results:  [N/A N/A N/A N/A N/A]
Output_att21_5_0.6075_1_0.5_fn_prop.txt:Final Results:  [N/A N/A N/A N/A N/A]
Output_att21_8_0.6075_0.35_0.5_fn_prop.txt:Final Results:  [0.79555556 0.58688525 0.6754717  0.61937716 0.74273859]
Output_att21_8_0.6075_0.43_0.5_fn_prop.txt:Final Results:  [0.75294118 0.6295082  0.68571429 0.65084746 0.7245283 ]
Output_att21_8_0.6075_0.45_0.5_fn_prop.txt:Final Results:  [0.76284585 0.63278689 0.69175627 0.65512559 0.73272589]
Output_att21_8_0.6075_0.4_0.5_fn_prop.txt:Final Results:  [0.81105991 0.57704918 0.6743295  0.61238692 0.75021313]
Output_att21_8_0.6075_0.5_0.5_fn_prop.txt:Final Results:  [0.82272727 0.59344262 0.68952381 0.62847222 0.76371308]
Output_att21_8_0.6075_0.62_0.5_fn_prop.txt:Final Results:  [0.79411765 0.61967213 0.6961326  0.64814815 0.75178998]
Output_att21_8_0.6075_0.65_0.5_fn_prop.txt:Final Results:  [0.7920354  0.58688525 0.67419962 0.61894882 0.74028122]
Output_att21_8_0.6075_0.6_0.5_fn_prop.txt:Final Results:  [0.7721519  0.6        0.67527675 0.62800275 0.73024741]
Output_att21_8_0.6075_0.7_0.5_fn_prop.txt:Final Results:  [0.8        0.57704918 0.67047619 0.61111111 0.74261603]
Output_att21_8_0.6075_0.85_0.5_fn_prop.txt:Final Results:  [N/A N/A N/A N/A N/A]
Output_att21_8_0.6075_0.87_0.5_fn_prop.txt:Final Results:  [N/A N/A N/A N/A N/A]
Output_att21_8_0.6075_0.8_0.5_fn_prop.txt:Final Results:  [N/A N/A N/A N/A N/A]
Output_att21_8_0.6075_0.9_0.5_fn_prop.txt:Final Results:  [N/A N/A N/A N/A N/A]
Output_att21_8_0.6075_1_0.5_fn_prop.txt:Final Results:  [N/A N/A N/A N/A N/A]
Output_att26_5_0.6075_0.35_0.5_fn_prop.txt:Final Results:  [0.79646018 0.59016393 0.6779661  0.62240664 0.74441687]
Output_att26_5_0.6075_0.43_0.5_fn_prop.txt:Final Results:  [0.78389831 0.60655738 0.68391867 0.6353022  0.74059247]
Output_att26_5_0.6075_0.45_0.5_fn_prop.txt:Final Results:  [0.80357143 0.59016393 0.6805293  0.6232687  0.74937552]
Output_att26_5_0.6075_0.4_0.5_fn_prop.txt:Final Results:  [0.7887931  0.6        0.68156425 0.63016529 0.74209246]
Output_att26_5_0.6075_0.5_0.5_fn_prop.txt:Final Results:  [0.8008658  0.60655738 0.69029851 0.63749139 0.75264443]
Output_att26_5_0.6075_0.62_0.5_fn_prop.txt:Final Results:  [0.80084746 0.61967213 0.6987061  0.64903846 0.75660528]
Output_att26_5_0.6075_0.65_0.5_fn_prop.txt:Final Results:  [0.75107296 0.57377049 0.65055762 0.60220234 0.70735651]
Output_att26_5_0.6075_0.6_0.5_fn_prop.txt:Final Results:  [0.77405858 0.60655738 0.68014706 0.63399589 0.73354481]
Output_att26_5_0.6075_0.7_0.5_fn_prop.txt:Final Results:  [0.77731092 0.60655738 0.68139963 0.63443073 0.73587908]
Output_att26_5_0.6075_0.85_0.5_fn_prop.txt:Final Results:  [N/A N/A N/A N/A N/A]
Output_att26_5_0.6075_0.87_0.5_fn_prop.txt:Final Results:  [N/A N/A N/A N/A N/A]
Output_att26_5_0.6075_0.8_0.5_fn_prop.txt:Final Results:  [N/A N/A N/A N/A N/A]
Output_att26_5_0.6075_0.9_0.5_fn_prop.txt:Final Results:  [N/A N/A N/A N/A N/A]
Output_att26_5_0.6075_1_0.5_fn_prop.txt:Final Results:  [N/A N/A N/A N/A N/A]
Output_att26_8_0.6075_0.35_0.5_fn_prop.txt:Final Results:  [0.79555556 0.58688525 0.6754717  0.61937716 0.74273859]
Output_att26_8_0.6075_0.43_0.5_fn_prop.txt:Final Results:  [0.75294118 0.6295082  0.68571429 0.65084746 0.7245283 ]
Output_att26_8_0.6075_0.45_0.5_fn_prop.txt:Final Results:  [0.76284585 0.63278689 0.69175627 0.65512559 0.73272589]
Output_att26_8_0.6075_0.4_0.5_fn_prop.txt:Final Results:  [0.81105991 0.57704918 0.6743295  0.61238692 0.75021313]
Output_att26_8_0.6075_0.5_0.5_fn_prop.txt:Final Results:  [0.82272727 0.59344262 0.68952381 0.62847222 0.76371308]
Output_att26_8_0.6075_0.62_0.5_fn_prop.txt:Final Results:  [0.79411765 0.61967213 0.6961326  0.64814815 0.75178998]
Output_att26_8_0.6075_0.65_0.5_fn_prop.txt:Final Results:  [0.79816514 0.5704918  0.66539197 0.60500695 0.73916737]
Output_att26_8_0.6075_0.6_0.5_fn_prop.txt:Final Results:  [0.78297872 0.60327869 0.68148148 0.63230241 0.73895582]
Output_att26_8_0.6075_0.7_0.5_fn_prop.txt:Final Results:  [0.8        0.57704918 0.67047619 0.61111111 0.74261603]
Output_att26_8_0.6075_0.85_0.5_fn_prop.txt:Final Results:  [N/A N/A N/A N/A N/A]
Output_att26_8_0.6075_0.87_0.5_fn_prop.txt:Final Results:  [N/A N/A N/A N/A N/A]
Output_att26_8_0.6075_0.8_0.5_fn_prop.txt:Final Results:  [N/A N/A N/A N/A N/A]
Output_att26_8_0.6075_0.9_0.5_fn_prop.txt:Final Results:  [N/A N/A N/A N/A N/A]
Output_att26_8_0.6075_1_0.5_fn_prop.txt:Final Results:  [N/A N/A N/A N/A N/A]
Output_att3_5_0.6075_0.35_0.5_fn_prop.txt:Final Results:  [0.78761062 0.58360656 0.67043315 0.61549101 0.73614557]
Output_att3_5_0.6075_0.43_0.5_fn_prop.txt:Final Results:  [0.8061674  0.6        0.68796992 0.63234278 0.75432811]
Output_att3_5_0.6075_0.45_0.5_fn_prop.txt:Final Results:  [0.76890756 0.6        0.67403315 0.62757202 0.72792363]
Output_att3_5_0.6075_0.4_0.5_fn_prop.txt:Final Results:  [0.80717489 0.59016393 0.68181818 0.62370062 0.7518797 ]
Output_att3_5_0.6075_0.5_0.5_fn_prop.txt:Final Results:  [0.81818182 0.59016393 0.68571429 0.625      0.75949367]
Output_att3_5_0.6075_0.62_0.5_fn_prop.txt:Final Results:  [0.77822581 0.63278689 0.69801085 0.65735695 0.74402467]
Output_att3_5_0.6075_0.65_0.5_fn_prop.txt:Final Results:  [0.82692308 0.56393443 0.6705653  0.6022409  0.75637643]
Output_att3_5_0.6075_0.6_0.5_fn_prop.txt:Final Results:  [0.78695652 0.59344262 0.67663551 0.62413793 0.73877551]
Output_att3_5_0.6075_0.7_0.5_fn_prop.txt:Final Results:  [0.78508772 0.58688525 0.67166979 0.61809392 0.73541495]
Output_att3_5_0.6075_0.85_0.5_fn_prop.txt:Final Results:  [0.73846154 0.6295082  0.67964602 0.64864865 0.71375465]
Output_att3_5_0.6075_0.87_0.5_fn_prop.txt:Final Results:  [0.76518219 0.61967213 0.68478261 0.64417178 0.73085847]
Output_att3_5_0.6075_0.8_0.5_fn_prop.txt:Final Results:  [0.79908676 0.57377049 0.66793893 0.60806115 0.74089754]
Output_att3_5_0.6075_0.9_0.5_fn_prop.txt:Final Results:  [0.7768595  0.61639344 0.68738574 0.64295486 0.7384132 ]
Output_att3_5_0.6075_1_0.5_fn_prop.txt:Final Results:  [0.77142857 0.61967213 0.68727273 0.64505119 0.73540856]
Output_att3_8_0.6075_0.35_0.5_fn_prop.txt:Final Results:  [0.7689243  0.63278689 0.6942446  0.65601632 0.73720397]
Output_att3_8_0.6075_0.43_0.5_fn_prop.txt:Final Results:  [0.73584906 0.63934426 0.68421053 0.65656566 0.71428571]
Output_att3_8_0.6075_0.45_0.5_fn_prop.txt:Final Results:  [0.82142857 0.60327869 0.69565217 0.63711911 0.76602831]
Output_att3_8_0.6075_0.4_0.5_fn_prop.txt:Final Results:  [0.8125     0.55409836 0.6588694  0.59173669 0.74318382]
Output_att3_8_0.6075_0.5_0.5_fn_prop.txt:Final Results:  [0.83962264 0.58360656 0.68858801 0.62150838 0.77189939]
Output_att3_8_0.6075_0.62_0.5_fn_prop.txt:Final Results:  [0.78838174 0.62295082 0.6959707  0.65023956 0.74862096]
Output_att3_8_0.6075_0.65_0.5_fn_prop.txt:Final Results:  [0.82439024 0.55409836 0.6627451  0.59298246 0.75111111]
Output_att3_8_0.6075_0.6_0.5_fn_prop.txt:Final Results:  [0.79130435 0.59672131 0.68037383 0.62758621 0.74285714]
Output_att3_8_0.6075_0.7_0.5_fn_prop.txt:Final Results:  [0.7920354  0.58688525 0.67419962 0.61894882 0.74028122]
Output_att3_8_0.6075_0.85_0.5_fn_prop.txt:Final Results:  [0.80701754 0.60327869 0.69043152 0.63535912 0.75595727]
Output_att3_8_0.6075_0.87_0.5_fn_prop.txt:Final Results:  [N/A N/A N/A N/A N/A]
Output_att3_8_0.6075_0.8_0.5_fn_prop.txt:Final Results:  [0.77637131 0.60327869 0.67896679 0.63143445 0.73423783]
Output_att3_8_0.6075_0.9_0.5_fn_prop.txt:Final Results:  [N/A N/A N/A N/A N/A]
Output_att3_8_0.6075_1_0.5_fn_prop.txt:Final Results:  [N/A N/A N/A N/A N/A]"""

records = [line.split("_") for line in results.split("\n")]
rownames, colnames, vals = list(zip(*[(line[1][3:] + "-" + line[2], line[4], line[-1].split("[")[-1].split("]")[0].split()[2])  for line in records]))

In [32]:
from orderedset import OrderedSet
import pandas as pd
colnames, rownames = list(OrderedSet(colnames)), list(OrderedSet(rownames))
pd.DataFrame(np.reshape(vals, (len(rownames), len(colnames))).tolist(), columns=colnames, index=rownames).to_csv("results_ent.tsv", sep="\t")

In [34]:
import time
import tensorflow_text
import tensorflow_hub as hub
t = time.time()
hub.load("https://tfhub.dev/google/universal-sentence-encoder-multilingual/3?tf-hub-format=compressed")
print (time.time() - t)
t = time.time()
hub.load("/home/vlead/USE_multilingual/")
print (time.time() - t)

2.7454731464385986
1.9681594371795654
