In [1]:
import matplotlib.pyplot as plt
from IPython.display import display, Markdown, Latex
import numpy as np
import json
import pandas as pd
from scipy import stats
import os

# Experiments and Functions

In [2]:
experiments = [
            #{'id': '1', 'source':'imdb', 'target':'uwcse', 'predicate':'workedunder', 'to_predicate':'advisedby', 'arity': 2},
            #{'id': '2', 'source':'uwcse', 'target':'imdb', 'predicate':'advisedby', 'to_predicate':'workedunder', 'arity': 2},
            #{'id': '3', 'source':'imdb', 'target':'cora', 'predicate':'workedunder', 'to_predicate':'samevenue', 'arity': 2},
            #{'id': '4', 'source':'cora', 'target':'imdb', 'predicate':'samevenue', 'to_predicate':'workedunder', 'arity': 2},
            ##{'id': '5', 'source':'uwcse', 'target':'cora', 'predicate':'advisedby', 'to_predicate':'samevenue', 'arity': 2},
            ##{'id': '6', 'source':'cora', 'target':'uwcse', 'predicate':'samevenue', 'to_predicate':'advisedby', 'arity': 2},
            #{'id': '7', 'source':'yeast', 'target':'twitter', 'predicate':'proteinclass', 'to_predicate':'accounttype', 'arity': 2},
            #{'id': '8', 'source':'twitter', 'target':'yeast', 'predicate':'accounttype', 'to_predicate':'proteinclass', 'arity': 2},
            #{'id': '9', 'source':'nell_sports', 'target':'nell_finances', 'predicate':'teamplayssport', 'to_predicate':'companyeconomicsector', 'arity': 2},
            {'id': '10', 'source':'nell_finances', 'target':'nell_sports', 'predicate':'companyeconomicsector', 'to_predicate':'teamplayssport', 'arity': 2}
]

In [3]:
def load_data(experiments, metric, directory):
    data = {}
    for item in experiments:
        experiment_title = item['id'] + '_' + item['source'] + '_' + item['target']
        
        if(metric != ''):
            file_path = os.getcwd() + directory + experiment_title + '/' + experiment_title + '_fasttext_{}.json'.format(metric)
        else:
            file_path = os.getcwd() + directory + experiment_title + '/' + experiment_title + '.json'
            
        #if(os.path.isfile(file_path)):
        with open(file_path, 'r') as fp:
            results = json.load(fp)

            data[experiment_title] = results
    return data

# Estudo dos Mapeamentos

## Depth-First Mapping

In [12]:
# Setting paths parameters
mapping_order = 'most_similar'
k = 1
compare_stopwords = False

#Removing Yeast and Twitter pair of experiments because there is no stopwords in predicates
#exp = experiments.copy()
#exp.remove({'id': '7', 'source':'yeast', 'target':'twitter', 'predicate':'proteinclass', 'to_predicate':'accounttype', 'arity': 2})
#exp.remove({'id': '8', 'source':'twitter', 'target':'yeast', 'predicate':'accounttype', 'to_predicate':'proteinclass', 'arity': 2})

In [15]:
path = f'/results/transfer/{mapping_order}/w_stopwords/k_{k}/'
data_softcosine = load_data(experiments, 'softcosine', path)
data_euclidean = load_data(experiments, 'euclidean', path)
data_wmd = load_data(experiments, 'wmd', path)
#data_rwmd = load_data(experiments, 'relax-wmd', path)

# path = f'/results/transfer/{mapping_order}/w_stopwords/k_{k}_no_revision/'
# data_softcosine_no_rev = load_data(experiments, 'softcosine', path)
# data_euclidean_no_rev = load_data(experiments, 'euclidean', path)
# data_wmd_no_rev = load_data(experiments, 'wmd', path)
# data_rwmd_no_rev = load_data(experiments, 'relax-wmd', path)

baseline_path = f'/results/baselines/transfer/'
#treeboostler_data = load_data(experiments, '', baseline_path)
rdnb_data = load_data(experiments, '', baseline_path + 'rdn_b/')

for j in range(len(experiments)):
    dataset = experiments[j]['id'] + '_' + experiments[j]['source'] + '_' + experiments[j]['target']
    #if dataset in data['results']: 
    display(Markdown('# Results for ' + dataset))
    table = []
    for metric in ['CLL', 'AUC ROC', 'AUC PR']: #, 'Learning and Revision time', 'Inference time']:
        display(Markdown('## ' + metric))
        
        trans_softcosine = np.zeros(len(data_softcosine[str(dataset)][0]))
        trans_softcosine_no_rev = np.zeros(len(data_softcosine[str(dataset)][0]))
        
        trans_euclidean = np.zeros(len(data_euclidean[str(dataset)][0]))
        trans_euclidean_no_rev = np.zeros(len(data_euclidean[str(dataset)][0]))
        
        trans_wmd = np.zeros(len(data_wmd[str(dataset)][0]))
        trans_wmd_no_rev = np.zeros(len(data_wmd[str(dataset)][0]))
        
#         trans_relax_wmd = np.zeros(len(data_rwmd[str(dataset)][0]))
#         trans_relax_wmd_no_rev = np.zeros(len(data_rwmd[str(dataset)][0]))
        
#         tree_no_rev = np.zeros(len(treeboostler_data[str(dataset)][0]))
#         tree = np.zeros(len(treeboostler_data[str(dataset)][0]))
        
        rdnb_no_rev = np.zeros(len(rdnb_data[str(dataset)][0]))
        rdnb = np.zeros(len(rdnb_data[str(dataset)][0]))
        
        m = 0
        for m in range(len(data_softcosine[str(dataset)][0])):
            trans_softcosine += np.array([item['transfer'][metric] for item in data_softcosine[str(dataset)][m]])
            trans_softcosine_no_rev += np.array([item['transfer']['parameter'][metric] for item in data_softcosine[str(dataset)][m]])

            trans_euclidean += np.array([item['transfer'][metric] for item in data_euclidean[str(dataset)][m]])
            trans_euclidean_no_rev += np.array([item['transfer']['parameter'][metric] for item in data_euclidean[str(dataset)][m]])

            trans_wmd += np.array([item['transfer'][metric] for item in data_wmd[str(dataset)][m]])
            trans_wmd_no_rev += np.array([item['transfer']['parameter'][metric] for item in data_wmd[str(dataset)][m]])
            
#             trans_relax_wmd += np.array([item['transfer'][metric] for item in data_rwmd[str(dataset)][m]])
#             trans_relax_wmd_no_rev += np.array([item['transfer']['parameter'][metric] for item in data_rwmd[str(dataset)][m]])

#             tree_no_rev += np.array([item['transfer']['parameter'][metric] for item in treeboostler_data[str(dataset)][m]])
#             tree += np.array([item['transfer'][metric] for item in treeboostler_data[str(dataset)][m]])
            break
            
        rdnb_no_rev += np.array([item['rdn-b'][metric] for item in rdnb_data[str(dataset)][0]]) 
        rdnb += np.array([item['rdn-b'][metric] for item in rdnb_data[str(dataset)][0]])
        
        # Fora do FOR
            
#         trans_softcosine /= len(data_softcosine[str(dataset)])
#         trans_softcosine_no_rev /= len(data_softcosine_no_rev[str(dataset)])
        
#         trans_euclidean /= len(data_euclidean[str(dataset)])
#         trans_euclidean_no_rev /= len(data_euclidean_no_rev[str(dataset)])
        
#         trans_wmd /= len(data_wmd[str(dataset)])
#         trans_wmd_no_rev /= len(data_wmd_no_rev[str(dataset)])
        
        #trans_relax_wmd /= len(data_rwmd[str(dataset)])
        #trans_relax_wmd_no_rev /= len(data_rwmd_no_rev[str(dataset)])
        
        #tree /= len(baseline_data[str(dataset)])
        #tree_no_rev /= len(baseline_data[str(dataset)])
        
        #rdnb /= len(baseline_data[str(dataset)])
        #rdnb_no_rev /= len(baseline_data[str(dataset)])
        
        pvalue_table = []
        pvalue,pvalue2 = 0,0
        #tvalue, pvalue = stats.ttest_rel(trans_softcosine,tree)
        tvalue2, pvalue2 = stats.ttest_rel(trans_softcosine, rdnb)
        pvalue_table.append(['TransBoostler Soft Cosine', '%.3f' % (pvalue), '%.3f' % (pvalue2)])
        del pvalue,pvalue2
        
        pvalue,pvalue2 = 0,0
        #tvalue, pvalue = stats.ttest_rel(trans_euclidean,tree)
        tvalue2, pvalue2 = stats.ttest_rel(trans_euclidean,rdnb)
        pvalue_table.append(['TransBoostler Euclidean', '%.3f' % (pvalue), '%.3f' % (pvalue2)])
        del pvalue,pvalue2
        
        pvalue,pvalue2 = 0,0
        #tvalue, pvalue = stats.ttest_rel(trans_wmd,tree)
        tvalue2, pvalue2 = stats.ttest_rel(trans_wmd,rdnb)
        pvalue_table.append(['TransBoostler WMD', '%.3f' % (pvalue), '%.3f' % (pvalue2)])
        del pvalue,pvalue2
        
        pvalue,pvalue2 = 0,0
        #tvalue, pvalue = stats.ttest_rel(trans_relax_wmd,tree)
        #tvalue2, pvalue2 = stats.ttest_rel(trans_relax_wmd,rdnb)
        pvalue_table.append(['TransBoostler Relax-WMD', '%.3f' % (pvalue), '%.3f' % (pvalue2)])
        del pvalue,pvalue2
        
        # No Revision
        
        pvalue,pvalue2 = 0,0
        #tvalue, pvalue = stats.ttest_rel(trans_softcosine_no_rev,tree_no_rev)
        tvalue2, pvalue2 = stats.ttest_rel(trans_softcosine_no_rev,rdnb_no_rev)
        pvalue_table.append(['TransBoostler* Soft Cosine', '%.3f' % (pvalue), '%.3f' % (pvalue2)])
        del pvalue,pvalue2
        
        pvalue,pvalue2 = 0,0
        #tvalue, pvalue = stats.ttest_rel(trans_euclidean_no_rev,tree_no_rev)
        tavlue2, pvalue2 = stats.ttest_rel(trans_euclidean_no_rev,rdnb_no_rev)
        pvalue_table.append(['TransBoostler* Euclidean', '%.3f' % (pvalue), '%.3f' % (pvalue2)])
        del pvalue,pvalue2
        
        pvalue,pvalue2 = 0,0
        #tvalue, pvalue = stats.ttest_rel(trans_wmd_no_rev,tree_no_rev)
        tvalue, pvalue2 = stats.ttest_rel(trans_wmd_no_rev,rdnb_no_rev)
        pvalue_table.append(['TransBoostler* WMD', '%.3f' % (pvalue), '%.3f' % (pvalue2)])
        del pvalue,pvalue2
        
        pvalue,pvalue2 = 0,0
        #tvalue, pvalue = stats.ttest_rel(trans_relax_wmd,tree_no_rev)
        tvalue, pvalue2 = stats.ttest_rel(trans_relax_wmd_no_rev,rdnb_no_rev)
        pvalue_table.append(['TransBoostler* Relax-WMD', '%.3f' % (pvalue), '%.3f' % (pvalue2)])
        del pvalue,pvalue2
        
        display(pd.DataFrame(pvalue_table, columns=['p-value', 'TreeBoostler', 'RDN-B']))
        print('TransBoostler Soft Cosine', ','.join(str(v) for v in trans_softcosine))
        
        print('TransBoostler Euclidean', ','.join(str(v) for v in trans_euclidean))
        
        print('TransBoostler WMD', ','.join(str(v) for v in trans_wmd))
        
        #print('TransBoostler Relax-WMD', ','.join(str(v) for v in trans_relax_wmd))
        
        #print('TreeBoostler', ','.join(str(v) for v in tree))
        
        print('TransBoostler* Soft Cosine', ','.join(str(v) for v in trans_softcosine_no_rev))
        print('TransBoostler* Euclidean', ','.join(str(v) for v in trans_euclidean_no_rev))
        print('TransBoostler* WMD', ','.join(str(v) for v in trans_wmd_no_rev))
        #print('TransBoostler* RWMD', ','.join(str(v) for v in trans_relax_wmd_no_rev))
        
        #print('TreeBoostler*', ','.join(str(v) for v in tree_no_rev))
        
        print('RDN-B', ','.join(str(v) for v in rdnb))
       

# Results for 10_nell_finances_nell_sports

## CLL

Unnamed: 0,p-value,TreeBoostler,RDN-B
0,TransBoostler Soft Cosine,0.0,0.401
1,TransBoostler Euclidean,0.0,0.19
2,TransBoostler WMD,0.0,0.422
3,TransBoostler Relax-WMD,0.0,0.0
4,TransBoostler* Soft Cosine,0.0,0.0
5,TransBoostler* Euclidean,0.0,0.0
6,TransBoostler* WMD,0.0,0.0
7,TransBoostler* Relax-WMD,0.0,0.0


TransBoostler Soft Cosine -0.21596,-0.086234,-0.086928
TransBoostler Euclidean -0.084389,-0.151503,-0.176692
TransBoostler WMD -0.08201,-0.201711,-0.086279
TransBoostler* Soft Cosine -0.367616,-0.373628,-0.373518
TransBoostler* Euclidean -0.377068,-0.375331,-0.359935
TransBoostler* WMD -0.372202,-0.373274,-0.379374
RDN-B -0.083776,-0.083762,-0.084288


## AUC ROC

Unnamed: 0,p-value,TreeBoostler,RDN-B
0,TransBoostler Soft Cosine,0.0,0.127
1,TransBoostler Euclidean,0.0,0.136
2,TransBoostler WMD,0.0,0.116
3,TransBoostler Relax-WMD,0.0,0.0
4,TransBoostler* Soft Cosine,0.0,0.0
5,TransBoostler* Euclidean,0.0,0.0
6,TransBoostler* WMD,0.0,0.0
7,TransBoostler* Relax-WMD,0.0,0.0


TransBoostler Soft Cosine 0.987725,0.991999,0.989733
TransBoostler Euclidean 0.994776,0.990367,0.990686
TransBoostler WMD 0.9938,0.986866,0.992015
TransBoostler* Soft Cosine 0.491348,0.496419,0.486674
TransBoostler* Euclidean 0.477895,0.485139,0.485135
TransBoostler* WMD 0.478833,0.487175,0.488324
RDN-B 0.995456,0.993248,0.995169


## AUC PR

Unnamed: 0,p-value,TreeBoostler,RDN-B
0,TransBoostler Soft Cosine,0.0,0.604
1,TransBoostler Euclidean,0.0,0.527
2,TransBoostler WMD,0.0,0.858
3,TransBoostler Relax-WMD,0.0,0.0
4,TransBoostler* Soft Cosine,0.0,0.005
5,TransBoostler* Euclidean,0.0,0.005
6,TransBoostler* WMD,0.0,0.005
7,TransBoostler* Relax-WMD,0.0,0.001


TransBoostler Soft Cosine 0.183443,0.409249,0.191109
TransBoostler Euclidean 0.26857,0.323195,0.27712
TransBoostler WMD 0.403347,0.17995,0.329318
TransBoostler* Soft Cosine 0.002393,0.002448,0.00242
TransBoostler* Euclidean 0.002346,0.002334,0.002335
TransBoostler* WMD 0.002315,0.002375,0.002394
RDN-B 0.302024,0.285712,0.361852
