In [1]:
from IPython.display import display, Markdown, Latex
import scikit_posthocs as sp
from scipy import stats
import numpy as np
import os

In [2]:
experiments = [
            {'id': '1', 'source':'imdb', 'target':'uwcse', 'predicate':'workedunder', 'to_predicate':'advisedby', 'arity': 2},
            #{'id': '2', 'source':'uwcse', 'target':'imdb', 'predicate':'advisedby', 'to_predicate':'workedunder', 'arity': 2},
            {'id': '3', 'source':'imdb', 'target':'cora', 'predicate':'workedunder', 'to_predicate':'samevenue', 'arity': 2},
            {'id': '4', 'source':'cora', 'target':'imdb', 'predicate':'samevenue', 'to_predicate':'workedunder', 'arity': 2},
            ##{'id': '5', 'source':'uwcse', 'target':'cora', 'predicate':'advisedby', 'to_predicate':'samevenue', 'arity': 2},
            ##{'id': '6', 'source':'cora', 'target':'uwcse', 'predicate':'samevenue', 'to_predicate':'advisedby', 'arity': 2},
            {'id': '7', 'source':'yeast', 'target':'twitter', 'predicate':'proteinclass', 'to_predicate':'accounttype', 'arity': 2},
            {'id': '8', 'source':'twitter', 'target':'yeast', 'predicate':'accounttype', 'to_predicate':'proteinclass', 'arity': 2},
            {'id': '9', 'source':'nell_sports', 'target':'nell_finances', 'predicate':'teamplayssport', 'to_predicate':'companyeconomicsector', 'arity': 2},
            #{'id': '10', 'source':'nell_finances', 'target':'nell_sports', 'predicate':'companyeconomicsector', 'to_predicate':'teamplayssport', 'arity': 2}
]

In [3]:
def load_data(experiments, metric, directory):
    data = {}
    for item in experiments:
        experiment_title = item['id'] + '_' + item['source'] + '_' + item['target']
        
        if(metric != ''):
            file_path = os.getcwd() + directory + experiment_title + '/' + experiment_title + '_fasttext_{}.json'.format(metric)
        else:
            file_path = os.getcwd() + directory + experiment_title + '/' + experiment_title + '.json'
            
        #if(os.path.isfile(file_path)):
        with open(file_path, 'r') as fp:
            results = json.load(fp)

            data[experiment_title] = results
    return data

In [5]:
path = f'/results/transfer/deep/w_stopwords/k_1/'
data_softcosine = load_data(experiments, 'softcosine', path)
data_euclidean = load_data(experiments, 'euclidean', path)
data_wmd = load_data(experiments, 'wmd', path)

path = f'/results/transfer/comites/'
data_majority = load_data(experiments, 'majority_vote', path)
data_borda = load_data(experiments, 'borda_count', path)

baseline_path = f'/results/baselines/transfer/'
treeboostler_data = load_data(experiments, '', baseline_path)
rdnb_data = load_data(experiments, '', baseline_path + 'rdn_b/')

In [8]:
for metric in ['CLL', 'AUC ROC', 'AUC PR']:
    display(Markdown('## ' + metric))
        
    trans_softcosine,trans_euclidean,trans_wmd,trans_majority,trans_borda,tree,rdnb = [],[],[],[],[],[],[]

    for j in range(len(experiments)):
        dataset = experiments[j]['id'] + '_' + experiments[j]['source'] + '_' + experiments[j]['target']
        
        softcosine = np.array([(np.array([item['transfer'][metric] for item in sublist])).mean() for sublist in data_softcosine[str(dataset)]])
        euclidean = np.array([(np.array([item['transfer'][metric] for item in sublist])).mean() for sublist in data_euclidean[str(dataset)]])
        wmd = np.array([(np.array([item['transfer'][metric] for item in sublist])).mean() for sublist in data_wmd[str(dataset)]])
        majority = np.array([(np.array([item['transfer']['parameter'][metric] for item in sublist])).mean() for sublist in data_majority[str(dataset)]])
        borda = np.array([(np.array([item['transfer']['parameter'][metric] for item in sublist])).mean() for sublist in data_borda[str(dataset)]])
        tb = np.array([(np.array([item['transfer']['parameter'][metric] for item in sublist])).mean() for sublist in treeboostler_data[str(dataset)]])
        bs_rdn = np.array([(np.array([item['rdn-b'][metric] for item in sublist])).mean() for sublist in rdnb_data[str(dataset)]])
        
        trans_softcosine.append(softcosine.mean())
        trans_euclidean.append(euclidean.mean())
        trans_wmd.append(wmd.mean())
        trans_majority.append(majority.mean())
        trans_borda.append(borda.mean())
        tree.append(tb.mean())
        rdnb.append(bs_rdn.mean())
        
    #perform Friedman Test
    statistic, pvalue = stats.friedmanchisquare(trans_softcosine, trans_euclidean, trans_wmd, trans_majority, trans_borda, tree, rdnb)
    print('statistic', statistic, 'pvalue', pvalue)
    
    #combine three groups into one array
    data = np.array([trans_softcosine, trans_euclidean, trans_wmd, trans_majority, trans_borda, tree, rdnb])

    #perform Nemenyi post-hoc test
    print(sp.posthoc_nemenyi_friedman(data.T))

## CLL

statistic 20.35714285714286 pvalue 0.002391700720826012
          0         1         2         3         4         5         6
0  1.000000  0.900000  0.900000  0.330001  0.499397  0.900000  0.900000
1  0.900000  1.000000  0.900000  0.145271  0.257682  0.900000  0.900000
2  0.900000  0.900000  1.000000  0.022747  0.051237  0.900000  0.735155
3  0.330001  0.145271  0.022747  1.000000  0.900000  0.022747  0.578001
4  0.499397  0.257682  0.051237  0.900000  1.000000  0.051237  0.735155
5  0.900000  0.900000  0.900000  0.022747  0.051237  1.000000  0.735155
6  0.900000  0.900000  0.735155  0.578001  0.735155  0.735155  1.000000


## AUC ROC

statistic 14.674698795180722 pvalue 0.022943462292059316
          0         1         2         3         4         5         6
0  1.000000  0.900000  0.900000  0.105254  0.330001  0.900000  0.900000
1  0.900000  1.000000  0.900000  0.088415  0.292768  0.900000  0.900000
2  0.900000  0.900000  1.000000  0.124446  0.371034  0.900000  0.900000
3  0.105254  0.088415  0.124446  1.000000  0.900000  0.371034  0.413910
4  0.330001  0.292768  0.371034  0.900000  1.000000  0.695866  0.735155
5  0.900000  0.900000  0.900000  0.371034  0.695866  1.000000  0.900000
6  0.900000  0.900000  0.900000  0.413910  0.735155  0.900000  1.000000


## AUC PR

statistic 6.578313253012048 pvalue 0.36160877835599053
          0         1         2         3         4         5         6
0  1.000000  0.900000  0.900000  0.413910  0.735155  0.900000  0.892309
1  0.900000  1.000000  0.900000  0.457177  0.774446  0.900000  0.900000
2  0.900000  0.900000  1.000000  0.774446  0.900000  0.900000  0.900000
3  0.413910  0.457177  0.774446  1.000000  0.900000  0.853023  0.900000
4  0.735155  0.774446  0.900000  0.900000  1.000000  0.900000  0.900000
5  0.900000  0.900000  0.900000  0.853023  0.900000  1.000000  0.900000
6  0.892309  0.900000  0.900000  0.900000  0.900000  0.900000  1.000000


In [9]:
trans_softcosine

[0.28384660000000006,
 0.4642054,
 1.0,
 0.381729,
 0.16886660714285714,
 0.08692955555555555]