In [1]:
import matplotlib.pyplot as plt
from IPython.display import display, Markdown, Latex
import numpy as np
import json
import pandas as pd
from scipy import stats
import os

In [2]:
experiments = [
            {'id': '1', 'source':'imdb', 'target':'uwcse', 'predicate':'workedunder', 'to_predicate':'advisedby', 'arity': 2},
            #{'id': '2', 'source':'uwcse', 'target':'imdb', 'predicate':'advisedby', 'to_predicate':'workedunder', 'arity': 2},
            {'id': '3', 'source':'imdb', 'target':'cora', 'predicate':'workedunder', 'to_predicate':'samevenue', 'arity': 2},
            {'id': '4', 'source':'cora', 'target':'imdb', 'predicate':'samevenue', 'to_predicate':'workedunder', 'arity': 2},
            ##{'id': '5', 'source':'uwcse', 'target':'cora', 'predicate':'advisedby', 'to_predicate':'samevenue', 'arity': 2},
            ##{'id': '6', 'source':'cora', 'target':'uwcse', 'predicate':'samevenue', 'to_predicate':'advisedby', 'arity': 2},
            {'id': '7', 'source':'yeast', 'target':'twitter', 'predicate':'proteinclass', 'to_predicate':'accounttype', 'arity': 2},
            {'id': '8', 'source':'twitter', 'target':'yeast', 'predicate':'accounttype', 'to_predicate':'proteinclass', 'arity': 2},
            {'id': '9', 'source':'nell_sports', 'target':'nell_finances', 'predicate':'teamplayssport', 'to_predicate':'companyeconomicsector', 'arity': 2},
            #{'id': '10', 'source':'nell_finances', 'target':'nell_sports', 'predicate':'companyeconomicsector', 'to_predicate':'teamplayssport', 'arity': 2}
]

In [3]:
def load_data(experiments, metric, treeboostler=False):
    data = {}
    for item in experiments:
        experiment_title = item['id'] + '_' + item['source'] + '_' + item['target']
        
        if(treeboostler):
            file_path = os.path.split(os.getcwd())[0] + '/TreeBoostler/experiments-transfer/' + experiment_title + '/' + experiment_title + '.json'
        else:
            file_path = os.getcwd() + '/experiments-transfer-k-1/' + experiment_title + '/' + experiment_title + '_fasttext_{}.json'.format(metric)
        
        #if(os.path.isfile(file_path)):
        with open(file_path, 'r') as fp:
            results = json.load(fp)

            data[experiment_title] = results
    return data

def load_data_no_rev(experiments, metric, treeboostler=False):
    data = {}
    for item in experiments:
        experiment_title = item['id'] + '_' + item['source'] + '_' + item['target']
        
        if(treeboostler):
            file_path = os.path.split(os.getcwd())[0] + '/TreeBoostler/experiments-transfer/' + experiment_title + '/' + experiment_title + '.json'
        else:
            file_path = os.getcwd() + '/experiments-no-revision/' + experiment_title + '/' + experiment_title + '_fasttext_{}.json'.format(metric)
        
        #if(os.path.isfile(file_path)):
        with open(file_path, 'r') as fp:
            results = json.load(fp)

            data[experiment_title] = results
    return data

In [4]:
data_softcosine = load_data(experiments, 'softcosine')
data_euclidean = load_data(experiments, 'euclidean')
data_wmd = load_data(experiments, 'wmd')

data_softcosine_no_rev = load_data_no_rev(experiments, 'softcosine')
data_euclidean_no_rev = load_data_no_rev(experiments, 'euclidean')
data_wmd_no_rev = load_data_no_rev(experiments, 'wmd')

treeboostler_data = load_data(experiments, '', True)

for j in range(len(experiments)):
    dataset = experiments[j]['id'] + '_' + experiments[j]['source'] + '_' + experiments[j]['target']
    #if dataset in data['results']: 
    display(Markdown('# Results for ' + dataset))
    table = []
    for metric in ['AUC ROC', 'AUC PR', 'CLL']: #, 'Learning and Revision time', 'Inference time']:
        display(Markdown('## ' + metric))
        
        trans_softcosine = np.zeros(len(data_softcosine[str(dataset)][0]))
        trans_softcosine_no_rev = np.zeros(len(data_softcosine_no_rev[str(dataset)][0]))
        
        trans_euclidean = np.zeros(len(data_euclidean[str(dataset)][0]))
        trans_euclidean_no_rev = np.zeros(len(data_euclidean_no_rev[str(dataset)][0]))
        
        trans_wmd = np.zeros(len(data_wmd[str(dataset)][0]))
        trans_wmd_no_rev = np.zeros(len(data_wmd_no_rev[str(dataset)][0]))
        
        tree_no_rev = np.zeros(len(treeboostler_data[str(dataset)][0]))
        tree = np.zeros(len(treeboostler_data[str(dataset)][0]))
        
        for m in range(len(treeboostler_data[str(dataset)][0])):
            trans_softcosine += np.array([item['transfer'][metric] for item in data_softcosine[str(dataset)][m]])
            trans_softcosine_no_rev += np.array([item['transfer'][metric] for item in data_softcosine[str(dataset)][m]])
            
            trans_euclidean += np.array([item['transfer'][metric] for item in data_euclidean[str(dataset)][m]])
            trans_euclidean_no_rev += np.array([item['transfer'][metric] for item in data_euclidean[str(dataset)][m]])
            
            trans_wmd += np.array([item['transfer'][metric] for item in data_wmd[str(dataset)][m]])
            trans_wmd_no_rev += np.array([item['transfer'][metric] for item in data_wmd[str(dataset)][m]])
            
            tree_no_rev += np.array([item['transfer']['parameter'][metric] for item in treeboostler_data[str(dataset)][m]])
            tree += np.array([item['transfer'][metric] for item in treeboostler_data[str(dataset)][m]])
            
        trans_softcosine /= len(data_softcosine[str(dataset)])
        trans_softcosine_no_rev /= len(data_softcosine_no_rev[str(dataset)])
        
        trans_euclidean /= len(data_euclidean[str(dataset)])
        trans_euclidean_no_rev /= len(data_euclidean_no_rev[str(dataset)])
        
        trans_wmd /= len(data_wmd[str(dataset)])
        trans_wmd_no_rev /= len(data_wmd_no_rev[str(dataset)])
        
        tree /= len(treeboostler_data[str(dataset)])
        tree_no_rev /= len(treeboostler_data[str(dataset)])
        
        pvalue_table = []
        tvalue, pvalue = stats.ttest_rel(trans_softcosine,tree)
        #pvalue2 = stats.ttest_rel(tr_sum,tree_no_rev)
        #print(pvalue)
        pvalue_table.append(['TransBoostler Soft Cosine', '%.3f' % (pvalue), '%.3f' % (tvalue)])
        
        tvalue, pvalue = stats.ttest_rel(trans_euclidean,tree)
        #pvalue2 = stats.ttest_rel(tr_sum,tree_no_rev)
        #print(pvalue)
        pvalue_table.append(['TransBoostler Euclidean', '%.3f' % (pvalue), '%.3f' % (tvalue)])
        
        tvalue, pvalue = stats.ttest_rel(trans_wmd,tree)
        #pvalue2 = stats.ttest_rel(tr_sum,tree_no_rev)
        #print(pvalue)
        pvalue_table.append(['TransBoostler WMD', '%.3f' % (pvalue), '%.3f' % (tvalue)])
        
        # No Revision
        
        tvalue, pvalue = stats.ttest_rel(trans_softcosine_no_rev,tree_no_rev)
        #pvalue2 = stats.ttest_rel(tr_sum,tree_no_rev)
        #print(pvalue)
        pvalue_table.append(['TransBoostler* Soft Cosine', '%.3f' % (pvalue), '%.3f' % (tvalue)])
        
        tvalue, pvalue = stats.ttest_rel(trans_euclidean_no_rev,tree_no_rev)
        #pvalue2 = stats.ttest_rel(tr_sum,tree_no_rev)
        #print(pvalue)
        pvalue_table.append(['TransBoostler* Euclidean', '%.3f' % (pvalue), '%.3f' % (tvalue)])
        
        tvalue, pvalue = stats.ttest_rel(trans_wmd_no_rev,tree_no_rev)
        #pvalue2 = stats.ttest_rel(tr_sum,tree_no_rev)
        #print(pvalue)
        pvalue_table.append(['TransBoostler* WMD', '%.3f' % (pvalue), '%.3f' % (tvalue)])
        
        display(pd.DataFrame(pvalue_table, columns=['TreeBoostler', 'p-value', 't-stat']))

        #for m in range(len(data['results'][str(dataset)])):
        #    print('Experiment %s' % (m+1))
        #    sc = [item['rdn_b'][metric] for item in data['results'][str(dataset)][m]]
        #    sc_rdn = [item['rdn'][metric] for item in data['results'][str(dataset)][m]]
        #    pr = [item['transfer']['parameter'][metric] for item in data['results'][str(dataset)][m]]
        #    tr = [item['transfer'][metric] for item in data['results'][str(dataset)][m]] 
        #    print('Scratch RDN-B: %s' % sc)
        #    print('Scratch RDN: %s' % sc_rdn)
        #    print('Transfer: %s' % pr)
        #    print('Transfer & Revision: %s' % tr) 
        #    print('\n')


# Results for 1_imdb_uwcse

## AUC ROC

Unnamed: 0,TreeBoostler,p-value,t-stat
0,TransBoostler Soft Cosine,0.542,-0.666
1,TransBoostler Euclidean,0.567,-0.622
2,TransBoostler WMD,0.526,-0.693
3,TransBoostler* Soft Cosine,0.052,2.745
4,TransBoostler* Euclidean,0.078,2.361
5,TransBoostler* WMD,0.125,1.935


## AUC PR

Unnamed: 0,TreeBoostler,p-value,t-stat
0,TransBoostler Soft Cosine,0.433,-0.871
1,TransBoostler Euclidean,0.238,-1.387
2,TransBoostler WMD,0.318,-1.14
3,TransBoostler* Soft Cosine,0.71,-0.4
4,TransBoostler* Euclidean,0.437,-0.863
5,TransBoostler* WMD,0.509,-0.724


## CLL

Unnamed: 0,TreeBoostler,p-value,t-stat
0,TransBoostler Soft Cosine,0.336,-1.093
1,TransBoostler Euclidean,0.155,-1.747
2,TransBoostler WMD,0.975,-0.033
3,TransBoostler* Soft Cosine,0.023,3.564
4,TransBoostler* Euclidean,0.053,2.724
5,TransBoostler* WMD,0.063,2.556


# Results for 3_imdb_cora

## AUC ROC

Unnamed: 0,TreeBoostler,p-value,t-stat
0,TransBoostler Soft Cosine,0.898,-0.136
1,TransBoostler Euclidean,0.657,-0.479
2,TransBoostler WMD,0.915,-0.114
3,TransBoostler* Soft Cosine,0.606,0.558
4,TransBoostler* Euclidean,0.707,0.405
5,TransBoostler* WMD,0.608,0.556


## AUC PR

Unnamed: 0,TreeBoostler,p-value,t-stat
0,TransBoostler Soft Cosine,0.228,-1.421
1,TransBoostler Euclidean,0.121,-1.966
2,TransBoostler WMD,0.225,-1.432
3,TransBoostler* Soft Cosine,0.352,-1.053
4,TransBoostler* Euclidean,0.215,-1.471
5,TransBoostler* WMD,0.346,-1.067


## CLL

Unnamed: 0,TreeBoostler,p-value,t-stat
0,TransBoostler Soft Cosine,0.512,-0.718
1,TransBoostler Euclidean,0.417,-0.905
2,TransBoostler WMD,0.767,-0.318
3,TransBoostler* Soft Cosine,0.567,-0.622
4,TransBoostler* Euclidean,0.486,-0.767
5,TransBoostler* WMD,0.784,-0.293


# Results for 4_cora_imdb

## AUC ROC

Unnamed: 0,TreeBoostler,p-value,t-stat
0,TransBoostler Soft Cosine,0.017,3.926
1,TransBoostler Euclidean,0.749,-0.343
2,TransBoostler WMD,0.423,0.893
3,TransBoostler* Soft Cosine,0.004,5.97
4,TransBoostler* Euclidean,0.003,6.304
5,TransBoostler* WMD,0.004,5.956


## AUC PR

Unnamed: 0,TreeBoostler,p-value,t-stat
0,TransBoostler Soft Cosine,0.04,3.002
1,TransBoostler Euclidean,0.371,-1.006
2,TransBoostler WMD,0.754,-0.336
3,TransBoostler* Soft Cosine,0.004,5.922
4,TransBoostler* Euclidean,0.091,2.216
5,TransBoostler* WMD,0.057,2.648


## CLL

Unnamed: 0,TreeBoostler,p-value,t-stat
0,TransBoostler Soft Cosine,0.746,0.347
1,TransBoostler Euclidean,0.502,-0.737
2,TransBoostler WMD,0.118,-1.99
3,TransBoostler* Soft Cosine,0.147,1.793
4,TransBoostler* Euclidean,0.179,1.629
5,TransBoostler* WMD,0.182,1.614


# Results for 7_yeast_twitter

## AUC ROC

Unnamed: 0,TreeBoostler,p-value,t-stat
0,TransBoostler Soft Cosine,0.0,-3273.89
1,TransBoostler Euclidean,0.0,-43240.891
2,TransBoostler WMD,0.0,-17839.96
3,TransBoostler* Soft Cosine,0.672,0.565
4,TransBoostler* Euclidean,0.28,2.123
5,TransBoostler* WMD,0.269,2.221


## AUC PR

Unnamed: 0,TreeBoostler,p-value,t-stat
0,TransBoostler Soft Cosine,0.001,-894.584
1,TransBoostler Euclidean,0.025,-25.035
2,TransBoostler WMD,0.015,-41.869
3,TransBoostler* Soft Cosine,0.42,1.291
4,TransBoostler* Euclidean,0.206,2.987
5,TransBoostler* WMD,0.186,3.332


## CLL

Unnamed: 0,TreeBoostler,p-value,t-stat
0,TransBoostler Soft Cosine,0.09,7.021
1,TransBoostler Euclidean,0.011,58.368
2,TransBoostler WMD,0.015,43.84
3,TransBoostler* Soft Cosine,0.055,-11.649
4,TransBoostler* Euclidean,0.053,-12.05
5,TransBoostler* WMD,0.108,-5.856


# Results for 8_twitter_yeast

## AUC ROC

Unnamed: 0,TreeBoostler,p-value,t-stat
0,TransBoostler Soft Cosine,0.0,-219.954
1,TransBoostler Euclidean,0.0,-248.849
2,TransBoostler WMD,0.011,-5.731
3,TransBoostler* Soft Cosine,0.0,-22.994
4,TransBoostler* Euclidean,0.0,-32.127
5,TransBoostler* WMD,0.011,-5.731


## AUC PR

Unnamed: 0,TreeBoostler,p-value,t-stat
0,TransBoostler Soft Cosine,0.001,-12.596
1,TransBoostler Euclidean,0.014,-5.22
2,TransBoostler WMD,0.56,0.653
3,TransBoostler* Soft Cosine,0.003,-9.068
4,TransBoostler* Euclidean,0.163,1.84
5,TransBoostler* WMD,0.56,0.653


## CLL

Unnamed: 0,TreeBoostler,p-value,t-stat
0,TransBoostler Soft Cosine,0.327,-1.169
1,TransBoostler Euclidean,0.61,-0.568
2,TransBoostler WMD,0.0,-22.317
3,TransBoostler* Soft Cosine,0.001,-15.69
4,TransBoostler* Euclidean,0.0,-30.881
5,TransBoostler* WMD,0.0,-22.317


# Results for 9_nell_sports_nell_finances

## AUC ROC

Unnamed: 0,TreeBoostler,p-value,t-stat
0,TransBoostler Soft Cosine,0.002,-20.417
1,TransBoostler Euclidean,0.004,-15.643
2,TransBoostler WMD,0.003,-18.656
3,TransBoostler* Soft Cosine,0.003,-19.55
4,TransBoostler* Euclidean,0.004,-15.04
5,TransBoostler* WMD,0.003,-18.17


## AUC PR

Unnamed: 0,TreeBoostler,p-value,t-stat
0,TransBoostler Soft Cosine,0.405,1.046
1,TransBoostler Euclidean,0.847,-0.218
2,TransBoostler WMD,0.296,1.402
3,TransBoostler* Soft Cosine,0.28,1.467
4,TransBoostler* Euclidean,0.934,0.093
5,TransBoostler* WMD,0.161,2.177


## CLL

Unnamed: 0,TreeBoostler,p-value,t-stat
0,TransBoostler Soft Cosine,0.0,-87.171
1,TransBoostler Euclidean,0.001,-44.141
2,TransBoostler WMD,0.001,-40.703
3,TransBoostler* Soft Cosine,0.0,-55.536
4,TransBoostler* Euclidean,0.001,-32.663
5,TransBoostler* WMD,0.0,-58.961
