In [1]:
import matplotlib.pyplot as plt
from IPython.display import display, Markdown, Latex
import numpy as np
import json
import pandas as pd
from scipy import stats
import os

In [2]:
experiments = [
            {'id': '1', 'source':'imdb', 'target':'uwcse', 'predicate':'workedunder', 'to_predicate':'advisedby', 'arity': 2},
            #{'id': '2', 'source':'uwcse', 'target':'imdb', 'predicate':'advisedby', 'to_predicate':'workedunder', 'arity': 2},
            {'id': '3', 'source':'imdb', 'target':'cora', 'predicate':'workedunder', 'to_predicate':'samevenue', 'arity': 2},
            {'id': '4', 'source':'cora', 'target':'imdb', 'predicate':'samevenue', 'to_predicate':'workedunder', 'arity': 2},
            ##{'id': '5', 'source':'uwcse', 'target':'cora', 'predicate':'advisedby', 'to_predicate':'samevenue', 'arity': 2},
            ##{'id': '6', 'source':'cora', 'target':'uwcse', 'predicate':'samevenue', 'to_predicate':'advisedby', 'arity': 2},
            {'id': '7', 'source':'yeast', 'target':'twitter', 'predicate':'proteinclass', 'to_predicate':'accounttype', 'arity': 2},
            {'id': '8', 'source':'twitter', 'target':'yeast', 'predicate':'accounttype', 'to_predicate':'proteinclass', 'arity': 2},
            {'id': '9', 'source':'nell_sports', 'target':'nell_finances', 'predicate':'teamplayssport', 'to_predicate':'companyeconomicsector', 'arity': 2},
            #{'id': '10', 'source':'nell_finances', 'target':'nell_sports', 'predicate':'companyeconomicsector', 'to_predicate':'teamplayssport', 'arity': 2}
]

In [3]:
def load_data(experiments, metric, treeboostler=False, rdnb=False):
    data = {}
    for item in experiments:
        experiment_title = item['id'] + '_' + item['source'] + '_' + item['target']
        
        if(treeboostler):
            file_path = os.path.split(os.getcwd())[0] + '/TreeBoostler/experiments-transfer/' + experiment_title + '/' + experiment_title + '.json'
        elif(rdnb):
            file_path = os.getcwd() + '/rdn-b-transfer-experiment/' + experiment_title + '/rdnb.json'
        else:
            file_path = os.getcwd() + '/experiments-transfer-k-1/' + experiment_title + '/' + experiment_title + '_fasttext_{}.json'.format(metric)
        
        #if(os.path.isfile(file_path)):
        with open(file_path, 'r') as fp:
            results = json.load(fp)

            data[experiment_title] = results
    return data

def load_data_no_rev(experiments, metric, treeboostler=False, rdnb=False):
    data = {}
    for item in experiments:
        experiment_title = item['id'] + '_' + item['source'] + '_' + item['target']
        
        if(treeboostler):
            file_path = os.path.split(os.getcwd())[0] + '/TreeBoostler/experiments-transfer/' + experiment_title + '/' + experiment_title + '.json'
        elif(rdnb):
            file_path = os.getcwd() + '/rdn-b-transfer-experiment/' + experiment_title + '/rdnb.json'
        else:
            file_path = os.getcwd() + '/experiments-transfer-k-1/' + experiment_title + '/' + experiment_title + '_fasttext_{}.json'.format(metric)
        
        #if(os.path.isfile(file_path)):
        with open(file_path, 'r') as fp:
            results = json.load(fp)

            data[experiment_title] = results
    return data

In [6]:
data_softcosine = load_data(experiments, 'softcosine')
data_euclidean = load_data(experiments, 'euclidean')
data_wmd = load_data(experiments, 'wmd')

data_softcosine_no_rev = load_data_no_rev(experiments, 'softcosine')
data_euclidean_no_rev = load_data_no_rev(experiments, 'euclidean')
data_wmd_no_rev = load_data_no_rev(experiments, 'wmd')

treeboostler_data = load_data(experiments, '', True)

rdnb_data = load_data(experiments, '', False, True)

for j in range(len(experiments)):
    dataset = experiments[j]['id'] + '_' + experiments[j]['source'] + '_' + experiments[j]['target']
    #if dataset in data['results']: 
    display(Markdown('# Results for ' + dataset))
    table = []
    for metric in ['AUC ROC', 'AUC PR', 'CLL']: #, 'Learning and Revision time', 'Inference time']:
        display(Markdown('## ' + metric))
        
        trans_softcosine = np.zeros(len(data_softcosine[str(dataset)][0]))
        trans_softcosine_no_rev = np.zeros(len(data_softcosine_no_rev[str(dataset)][0]))
        
        trans_euclidean = np.zeros(len(data_euclidean[str(dataset)][0]))
        trans_euclidean_no_rev = np.zeros(len(data_euclidean_no_rev[str(dataset)][0]))
        
        trans_wmd = np.zeros(len(data_wmd[str(dataset)][0]))
        trans_wmd_no_rev = np.zeros(len(data_wmd_no_rev[str(dataset)][0]))
        
        tree_no_rev = np.zeros(len(treeboostler_data[str(dataset)][0]))
        tree = np.zeros(len(treeboostler_data[str(dataset)][0]))
        
        rdnb_no_rev = np.zeros(len(rdnb_data[str(dataset)][0]))
        rdnb = np.zeros(len(rdnb_data[str(dataset)][0]))
        
        for m in range(len(treeboostler_data[str(dataset)][0])):
            trans_softcosine += np.array([item['transfer'][metric] for item in data_softcosine[str(dataset)][m]])
            trans_softcosine_no_rev += np.array([item['transfer'][metric] for item in data_softcosine[str(dataset)][m]])
            
            trans_euclidean += np.array([item['transfer'][metric] for item in data_euclidean[str(dataset)][m]])
            trans_euclidean_no_rev += np.array([item['transfer'][metric] for item in data_euclidean[str(dataset)][m]])
            
            trans_wmd += np.array([item['transfer'][metric] for item in data_wmd[str(dataset)][m]])
            trans_wmd_no_rev += np.array([item['transfer'][metric] for item in data_wmd[str(dataset)][m]])
            
            tree_no_rev += np.array([item['transfer']['parameter'][metric] for item in treeboostler_data[str(dataset)][m]])
            tree += np.array([item['transfer'][metric] for item in treeboostler_data[str(dataset)][m]])
            
            transfer_softcosine = np.array([(np.array([item['transfer_{}'.format(amount)][metric] for item in sublist])).mean() for sublist in data_softcosine[str(dataset)]])
        
        rdnb_no_rev += np.array([item['rdn-b'][metric] for item in rdnb_data[str(dataset)][0]]) 
        rdnb += np.array([item['rdn-b'][metric] for item in rdnb_data[str(dataset)][0]])
            
        trans_softcosine /= len(data_softcosine[str(dataset)])
        trans_softcosine_no_rev /= len(data_softcosine_no_rev[str(dataset)])
        
        trans_euclidean /= len(data_euclidean[str(dataset)])
        trans_euclidean_no_rev /= len(data_euclidean_no_rev[str(dataset)])
        
        trans_wmd /= len(data_wmd[str(dataset)])
        trans_wmd_no_rev /= len(data_wmd_no_rev[str(dataset)])
        
        tree /= len(treeboostler_data[str(dataset)])
        tree_no_rev /= len(treeboostler_data[str(dataset)])
        
        rdnb /= len(rdnb_data[str(dataset)])
        rdnb_no_rev /= len(rdnb_data[str(dataset)])
        
        pvalue_table = []
        tvalue, pvalue = stats.ttest_rel(trans_softcosine,tree)
        tvalue2, pvalue2 = stats.ttest_rel(trans_softcosine, rdnb)
        #print(pvalue)
        pvalue_table.append(['TransBoostler Soft Cosine', '%.3f' % (pvalue), '%.3f' % (pvalue2)])
        
        tvalue, pvalue = stats.ttest_rel(trans_euclidean,tree)
        tvalue2, pvalue2 = stats.ttest_rel(trans_euclidean,rdnb)
        #print(pvalue)
        pvalue_table.append(['TransBoostler Euclidean', '%.3f' % (pvalue), '%.3f' % (pvalue2)])
        
        tvalue, pvalue = stats.ttest_rel(trans_wmd,tree)
        tvalue2, pvalue2 = stats.ttest_rel(trans_wmd,rdnb)
        #print(pvalue)
        pvalue_table.append(['TransBoostler WMD', '%.3f' % (pvalue), '%.3f' % (pvalue2)])
        
        # No Revision
        
        tvalue, pvalue = stats.ttest_rel(trans_softcosine_no_rev,tree_no_rev)
        tvalue2, pvalue2 = stats.ttest_rel(trans_softcosine_no_rev,rdnb_no_rev)
        #print(pvalue)
        pvalue_table.append(['TransBoostler* Soft Cosine', '%.3f' % (pvalue), '%.3f' % (pvalue2)])
        
        tvalue, pvalue = stats.ttest_rel(trans_euclidean_no_rev,tree_no_rev)
        tavlue2, pvalue2 = stats.ttest_rel(trans_euclidean_no_rev,rdnb_no_rev)
        #print(pvalue)
        pvalue_table.append(['TransBoostler* Euclidean', '%.3f' % (pvalue), '%.3f' % (pvalue2)])
        
        tvalue, pvalue = stats.ttest_rel(trans_wmd_no_rev,tree_no_rev)
        tvalue, pvalue2 = stats.ttest_rel(trans_wmd_no_rev,rdnb_no_rev)
        #print(pvalue)
        pvalue_table.append(['TransBoostler* WMD', '%.3f' % (pvalue), '%.3f' % (pvalue2)])
        
        display(pd.DataFrame(pvalue_table, columns=['p-value', 'TreeBoostler', 'RDN-B']))


# Results for 1_imdb_uwcse

## AUC ROC

Unnamed: 0,p-value,TreeBoostler,RDN-B
0,TransBoostler Soft Cosine,0.542,0.051
1,TransBoostler Euclidean,0.567,0.045
2,TransBoostler WMD,0.526,0.077
3,TransBoostler* Soft Cosine,0.052,0.051
4,TransBoostler* Euclidean,0.078,0.045
5,TransBoostler* WMD,0.125,0.077


## AUC PR

Unnamed: 0,p-value,TreeBoostler,RDN-B
0,TransBoostler Soft Cosine,0.433,0.923
1,TransBoostler Euclidean,0.238,0.592
2,TransBoostler WMD,0.318,0.6
3,TransBoostler* Soft Cosine,0.71,0.923
4,TransBoostler* Euclidean,0.437,0.592
5,TransBoostler* WMD,0.509,0.6


## CLL

Unnamed: 0,p-value,TreeBoostler,RDN-B
0,TransBoostler Soft Cosine,0.336,0.907
1,TransBoostler Euclidean,0.155,0.83
2,TransBoostler WMD,0.975,0.533
3,TransBoostler* Soft Cosine,0.023,0.907
4,TransBoostler* Euclidean,0.053,0.83
5,TransBoostler* WMD,0.063,0.533


# Results for 3_imdb_cora

## AUC ROC

Unnamed: 0,p-value,TreeBoostler,RDN-B
0,TransBoostler Soft Cosine,0.898,0.394
1,TransBoostler Euclidean,0.657,0.408
2,TransBoostler WMD,0.915,0.402
3,TransBoostler* Soft Cosine,0.606,0.394
4,TransBoostler* Euclidean,0.707,0.408
5,TransBoostler* WMD,0.608,0.402


## AUC PR

Unnamed: 0,p-value,TreeBoostler,RDN-B
0,TransBoostler Soft Cosine,0.228,0.408
1,TransBoostler Euclidean,0.121,0.439
2,TransBoostler WMD,0.225,0.425
3,TransBoostler* Soft Cosine,0.352,0.408
4,TransBoostler* Euclidean,0.215,0.439
5,TransBoostler* WMD,0.346,0.425


## CLL

Unnamed: 0,p-value,TreeBoostler,RDN-B
0,TransBoostler Soft Cosine,0.512,0.402
1,TransBoostler Euclidean,0.417,0.403
2,TransBoostler WMD,0.767,0.395
3,TransBoostler* Soft Cosine,0.567,0.402
4,TransBoostler* Euclidean,0.486,0.403
5,TransBoostler* WMD,0.784,0.395


# Results for 4_cora_imdb

## AUC ROC

Unnamed: 0,p-value,TreeBoostler,RDN-B
0,TransBoostler Soft Cosine,0.017,
1,TransBoostler Euclidean,0.749,0.059
2,TransBoostler WMD,0.423,0.084
3,TransBoostler* Soft Cosine,0.004,
4,TransBoostler* Euclidean,0.003,0.059
5,TransBoostler* WMD,0.004,0.084


## AUC PR

Unnamed: 0,p-value,TreeBoostler,RDN-B
0,TransBoostler Soft Cosine,0.04,
1,TransBoostler Euclidean,0.371,0.053
2,TransBoostler WMD,0.754,0.088
3,TransBoostler* Soft Cosine,0.004,
4,TransBoostler* Euclidean,0.091,0.053
5,TransBoostler* WMD,0.057,0.088


## CLL

Unnamed: 0,p-value,TreeBoostler,RDN-B
0,TransBoostler Soft Cosine,0.746,0.888
1,TransBoostler Euclidean,0.502,0.745
2,TransBoostler WMD,0.118,0.771
3,TransBoostler* Soft Cosine,0.147,0.888
4,TransBoostler* Euclidean,0.179,0.745
5,TransBoostler* WMD,0.182,0.771


# Results for 7_yeast_twitter

## AUC ROC

Unnamed: 0,p-value,TreeBoostler,RDN-B
0,TransBoostler Soft Cosine,0.0,0.005
1,TransBoostler Euclidean,0.0,0.005
2,TransBoostler WMD,0.0,0.005
3,TransBoostler* Soft Cosine,0.0,0.005
4,TransBoostler* Euclidean,0.0,0.005
5,TransBoostler* WMD,0.0,0.005


## AUC PR

Unnamed: 0,p-value,TreeBoostler,RDN-B
0,TransBoostler Soft Cosine,0.001,0.122
1,TransBoostler Euclidean,0.025,0.175
2,TransBoostler WMD,0.015,0.157
3,TransBoostler* Soft Cosine,0.04,0.122
4,TransBoostler* Euclidean,0.083,0.175
5,TransBoostler* WMD,0.066,0.157


## CLL

Unnamed: 0,p-value,TreeBoostler,RDN-B
0,TransBoostler Soft Cosine,0.09,0.187
1,TransBoostler Euclidean,0.011,0.145
2,TransBoostler WMD,0.015,0.141
3,TransBoostler* Soft Cosine,0.065,0.187
4,TransBoostler* Euclidean,0.01,0.145
5,TransBoostler* WMD,0.013,0.141


# Results for 8_twitter_yeast

## AUC ROC

Unnamed: 0,p-value,TreeBoostler,RDN-B
0,TransBoostler Soft Cosine,0.0,0.001
1,TransBoostler Euclidean,0.0,0.0
2,TransBoostler WMD,0.011,0.409
3,TransBoostler* Soft Cosine,0.0,0.001
4,TransBoostler* Euclidean,0.0,0.0
5,TransBoostler* WMD,0.011,0.409


## AUC PR

Unnamed: 0,p-value,TreeBoostler,RDN-B
0,TransBoostler Soft Cosine,0.001,0.03
1,TransBoostler Euclidean,0.014,0.13
2,TransBoostler WMD,0.56,0.391
3,TransBoostler* Soft Cosine,0.001,0.03
4,TransBoostler* Euclidean,0.014,0.13
5,TransBoostler* WMD,0.56,0.391


## CLL

Unnamed: 0,p-value,TreeBoostler,RDN-B
0,TransBoostler Soft Cosine,0.327,0.011
1,TransBoostler Euclidean,0.61,0.007
2,TransBoostler WMD,0.0,0.412
3,TransBoostler* Soft Cosine,0.327,0.011
4,TransBoostler* Euclidean,0.61,0.007
5,TransBoostler* WMD,0.0,0.412


# Results for 9_nell_sports_nell_finances

## AUC ROC

Unnamed: 0,p-value,TreeBoostler,RDN-B
0,TransBoostler Soft Cosine,0.002,0.253
1,TransBoostler Euclidean,0.004,0.079
2,TransBoostler WMD,0.003,0.017
3,TransBoostler* Soft Cosine,0.003,0.253
4,TransBoostler* Euclidean,0.004,0.079
5,TransBoostler* WMD,0.003,0.017


## AUC PR

Unnamed: 0,p-value,TreeBoostler,RDN-B
0,TransBoostler Soft Cosine,0.405,0.275
1,TransBoostler Euclidean,0.847,0.501
2,TransBoostler WMD,0.296,0.472
3,TransBoostler* Soft Cosine,0.28,0.275
4,TransBoostler* Euclidean,0.934,0.501
5,TransBoostler* WMD,0.161,0.472


## CLL

Unnamed: 0,p-value,TreeBoostler,RDN-B
0,TransBoostler Soft Cosine,0.0,0.77
1,TransBoostler Euclidean,0.001,0.734
2,TransBoostler WMD,0.001,0.012
3,TransBoostler* Soft Cosine,0.0,0.77
4,TransBoostler* Euclidean,0.001,0.734
5,TransBoostler* WMD,0.0,0.012
