In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import glob
import os

In [33]:
experiments = glob.glob(os.getcwd() + '/curves/*')

## AUC ROC, AUC PR e CLL

In [None]:
# '+/-' + str(round(exp['CLL'].std(),3)

In [34]:
df = pd.DataFrame(columns=['experiment', 'CLL', 'AUC ROC', 'AUC PR'])
for experiment in experiments:
    try:
        exp = pd.read_csv(experiment + '/transboostler_folds.txt')
        df = df.append({'experiment': experiment.split('\\')[-1], 
                   'CLL': str(round(exp['CLL'].mean(),3)),
                   'AUC ROC': str(round(exp['AUC ROC'].mean(),3)),
                   'AUC PR': str(round(exp['AUC PR'].mean(),3)),
                   'Total Learning Time': str(round(exp['Total Learning Time'].mean(),3))}, ignore_index=True)
    except FileNotFoundError as e:
        print(e)

In [35]:
df.sort_values(by='experiment')

Unnamed: 0,experiment,CLL,AUC ROC,AUC PR,Total Learning Time
0,10_nell_finances_nell_sports,-0.691,0.516,0.525,3.955
1,1_imdb_uwcse,-0.229,0.956,0.939,1.514
2,2_uwcse_imdb,-0.299,0.939,0.902,1.769
3,3_imdb_cora,-0.698,0.5,0.524,2.575
4,4_cora_imdb,-0.4,0.89,0.871,2.125
5,7_yeast_twitter,-0.691,0.523,0.513,3.883
6,8_twitter_yeast,-0.568,0.72,0.729,2.734
7,9_nell_sports_nell_finances,-0.688,0.532,0.55,2.676


## Confusion Matrix

In [36]:
cm = pd.DataFrame(columns=['TP', 'FP', 'TN', 'FN'])
for experiment in experiments:
    try:
        exp = pd.read_csv(experiment + '/transboostler_confusion_matrix.txt')
        cm = cm.append({'experiment': experiment.split('\\')[-1], 
                   'TP': str(round(exp['TP'].mean(),3)),
                   'FP': str(round(exp['FP'].mean(),3)),
                   'TN': str(round(exp['TN'].mean(),3)),
                   'FN': str(round(exp['FN'].mean(),3))}, ignore_index=True)
    except FileNotFoundError as e:
        print(e)

In [37]:
cm

Unnamed: 0,TP,FP,TN,FN,experiment
0,9.0,0.333,391.333,382.667,10_nell_finances_nell_sports
1,90.4,11.2,79.2,0.0,1_imdb_uwcse
2,198.0,30.8,167.2,0.0,2_uwcse_imdb
3,0.0,0.0,2191.0,2455.5,3_imdb_cora
4,258.4,28.0,277.6,47.2,4_cora_imdb
5,0.0,0.0,141.0,141.0,7_yeast_twitter
6,74.0,0.5,231.0,157.5,8_twitter_yeast
7,18.0,0.667,518.0,500.667,9_nell_sports_nell_finances


## Learning Curves

In [38]:
if not os.path.exists(os.getcwd() + '/figures'):
    os.makedirs(os.getcwd() + '/figures')
    os.makedirs(os.getcwd() + '/figures/AUC ROC')
    os.makedirs(os.getcwd() + '/figures/AUC PR')

for experiment in experiments:
    try:
        rdnb = {'AUC ROC': [], 'AUC PR': []}
        transfer_rdnb = {'AUC ROC': [], 'AUC PR': []}
        treeboostler = {'AUC ROC': [], 'AUC PR': []}

        transfer_exp = pd.read_csv(experiment + '/trRDNB_curves.csv').set_index('Unnamed: 0')
        treeboostler_exp = pd.read_csv(os.getcwd() + '/TreeBoostler Results/{}_curves.csv'.format(experiment.split('\\')[-1].split('_curves.csv')[0])).set_index('Unnamed: 0')
        rdnb_exp = pd.read_csv(experiment + '/RDNB_curves.csv').set_index('Unnamed: 0')

        target = experiment.split('_')[-1]
        if(target in ['sports', 'finances']):
            target = 'nell_' + target

        for amount in ['0.2', '0.4', '0.6', '0.8', '1.0']:
            rdnb['AUC ROC'].append(float(rdnb_exp.loc['AUC ROC'][amount]))
            rdnb['AUC PR'].append(float(rdnb_exp.loc['AUC PR'][amount]))

            transfer_rdnb['AUC ROC'].append(float(transfer_exp.loc['AUC ROC'][amount]))
            transfer_rdnb['AUC PR'].append(float(transfer_exp.loc['AUC PR'][amount]))

            treeboostler['AUC ROC'].append(float(treeboostler_exp.loc['AUC ROC'][amount]))
            treeboostler['AUC PR'].append(float(treeboostler_exp.loc['AUC PR'][amount]))

        _title = experiment.split('\\')[-1].split('.csv')[0].replace('_', ' ').split()
            
        figure_title = ''
        for t in _title:
            if t in ['imdb', 'uwcse', 'nell']:
                if t == 'uwcse':
                    t = 'UW-CSE'
                figure_title += t.upper() + ' '
            elif t in ['cora', 'twitter', 'yeast', 'sports', 'finances']:
                figure_title += t.capitalize() + ' '
                
        figure_title = figure_title.strip().replace(' ', ' -> ')
        if('NELL' in figure_title):
            splitting = figure_title.split('->')
            figure_title = splitting[0] + splitting[1] + '->' + splitting[2] + splitting[3]
        
        t = [0.2, 0.4, 0.6, 0.8, 1.0]
        fig, ax = plt.subplots()
        ax.plot(t, rdnb['AUC ROC'], '^-', linestyle=':', color='k', label='RDN-B')
        ax.plot(t, transfer_rdnb['AUC ROC'], 'D-', linestyle='-.', color='k', label='TransBoostler')
        ax.plot(t, treeboostler['AUC ROC'], 's-', linestyle='--', color='k', label='TreeBoostler*')
        #ax.plot(t, transfer_p, 'x-', linestyle='-', color='k', label='trRDN-B Rev')
        
        print(figure_title)
        print(rdnb['AUC ROC'], transfer_rdnb['AUC ROC'], treeboostler['AUC ROC'])
        print(rdnb['AUC PR'], transfer_rdnb['AUC PR'], treeboostler['AUC PR'])

        ax.set(xlabel='Proportion of training data', ylabel='AUC ROC', title=figure_title)
        ax.grid(linestyle = ':')
        ax.legend(loc='lower right', fontsize='small', frameon=True)
        plt.savefig(os.getcwd() + '/figures/AUC ROC/AUC_ROC_{}.pdf'.format(experiment.split('\\')[-1].split('.csv')[0]))
        plt.close()

        fig, ax = plt.subplots()
        ax.plot(t, rdnb['AUC PR'], '^-', linestyle=':', color='k', label='RDN-B')
        ax.plot(t, transfer_rdnb['AUC PR'], 'D-', linestyle='-.', color='k', label='TransBoostler')
        ax.plot(t, treeboostler['AUC PR'], 's-', linestyle='--', color='k', label='TreeBoostler*')
        #ax.plot(t, transfer_p, 'x-', linestyle='-', color='k', label='trRDN-B Rev')

        ax.set(xlabel='Proportion of training data', ylabel='AUC PR', title=figure_title)
        ax.grid(linestyle = ':')
        ax.legend(loc='lower right', fontsize='small', frameon=True)
        plt.savefig(os.getcwd() + '/figures/AUC PR/AUC_PR_{}.pdf'.format(experiment.split('\\')[-1].split('.csv')[0]))
        plt.close()
    except FileNotFoundError as e:
        print(e)

NELL  Finances -> NELL  Sports
[1.5993860000000002, 1.6560406666666665, 1.6783716666666668, 1.685331666666667, 1.668664] [1.031904333333333, 1.0319726666666666, 1.0319726666666666, 1.0319726666666666, 1.0319726666666666] [0.47472600000000004, 0.471217, 0.4610233333333333, 0.4610966666666666, 0.44921533333333336]
[1.6070826666666669, 1.6727613333333335, 1.6973056666666668, 1.6973523333333336, 1.689086] [1.049655, 1.0496733333333337, 1.0496733333333337, 1.0496733333333337, 1.0496733333333337] [0.002308000000000001, 0.0023026666666666664, 0.0021796666666666666, 0.0021856666666666674, 0.002157]
IMDB -> UW-CSE
[0.8355788000000001, 0.8991691999999999, 0.9279139999999999, 0.9306816, 0.9335898000000001] [0.9265362, 0.9485506, 0.9522384, 0.9559264, 0.9559264] [0.9367180000000002, 0.9543488000000001, 0.9544348, 0.9599488, 0.9585362]
[0.7905217999999999, 0.8669932000000001, 0.8742219999999999, 0.8776835999999999, 0.8862722] [0.8710661999999999, 0.9219828000000001, 0.9289972, 0.9388156, 0.9388156]

In [31]:
if not os.path.exists(os.getcwd() + '/figures'):
    os.makedirs(os.getcwd() + '/figures')
    os.makedirs(os.getcwd() + '/figures/AUC ROC')
    os.makedirs(os.getcwd() + '/figures/AUC PR')

for experiment in experiments:
    try:
        fastText = {'AUC ROC': [], 'AUC PR': []}
        word2vec = {'AUC ROC': [], 'AUC PR': []}
        word2vec_dist = {'AUC ROC': [], 'AUC PR': []}

        fastText_exp = pd.read_csv(experiment + '/trRDNB_curves.csv').set_index('Unnamed: 0')
        
        word2vec_exp = experiment.replace('experiments-fastText', 'experiments-word2vec')
        word2vec_exp = pd.read_csv(word2vec_exp + '/trRDNB_curves.csv').set_index('Unnamed: 0')
        
        word2vec_dist_exp = experiment.replace('experiments-fastText', 'experiments-dist-word2vec')
        word2vec_dist_exp = pd.read_csv(word2vec_dist_exp + '/trRDNB_curves.csv').set_index('Unnamed: 0')

        target = experiment.split('_')[-1]
        if(target in ['sports', 'finances']):
            target = 'nell_' + target

        for amount in ['0.2', '0.4', '0.6', '0.8', '1.0']:
            fastText['AUC ROC'].append(float(fastText_exp.loc['AUC ROC'][amount]))
            fastText['AUC PR'].append(float(fastText_exp.loc['AUC PR'][amount]))

            word2vec['AUC ROC'].append(float(word2vec_exp.loc['AUC ROC'][amount]))
            word2vec['AUC PR'].append(float(word2vec_exp.loc['AUC PR'][amount]))

            word2vec_dist['AUC ROC'].append(float(word2vec_dist_exp.loc['AUC ROC'][amount]))
            word2vec_dist['AUC PR'].append(float(word2vec_dist_exp.loc['AUC PR'][amount]))

        _title = experiment.split('\\')[-1].split('.csv')[0].replace('_', ' ').split()
        figure_title = ''
        for t in _title:
            if t in ['imdb', 'uwcse', 'nell']:
                if t == 'uwcse':
                    t = 'UW-CSE'
                figure_title += t.upper() + ' '
            elif t in ['cora', 'twitter', 'yeast', 'sports', 'finances']:
                figure_title += t.capitalize() + ' '
                
        figure_title = figure_title.strip().replace(' ', ' -> ')
        if('NELL' in figure_title):
            splitting = figure_title.split('->')
            figure_title = splitting[0] + splitting[1] + '->' + splitting[2] + splitting[3]
            
        t = [0.2, 0.4, 0.6, 0.8, 1.0]
        fig, ax = plt.subplots()
        ax.plot(t, fastText['AUC ROC'], '^-', linestyle=':', color='k', label='FastText')
        ax.plot(t, word2vec['AUC ROC'], 'D-', linestyle='-.', color='k', label='Word2Vec')
        ax.plot(t, word2vec_dist['AUC ROC'], 's-', linestyle='--', color='k', label='Word2Vec WMD')
        #ax.plot(t, transfer_p, 'x-', linestyle='-', color='k', label='trRDN-B Rev')

        ax.set(xlabel='Proportion of training data', ylabel='AUC ROC', title=figure_title)
        ax.grid(linestyle = ':')
        ax.legend(loc='lower right', fontsize='small', frameon=True)
        plt.savefig(os.getcwd() + '/figures/AUC ROC/modelos_AUC_ROC_{}.pdf'.format(experiment.split('\\')[-1].split('.csv')[0]))
        plt.close()

        fig, ax = plt.subplots()
        ax.plot(t, fastText['AUC PR'], '^-', linestyle=':', color='k', label='FastText')
        ax.plot(t, word2vec['AUC PR'], 'D-', linestyle='-.', color='k', label='Word2Vec')
        ax.plot(t, word2vec_dist['AUC PR'], 's-', linestyle='--', color='k', label='Word2Vec WMD*')
        #ax.plot(t, transfer_p, 'x-', linestyle='-', color='k', label='trRDN-B Rev')

        ax.set(xlabel='Proportion of training data', ylabel='AUC PR', title=figure_title)
        ax.grid(linestyle = ':')
        ax.legend(loc='lower right', fontsize='small', frameon=True)
        plt.savefig(os.getcwd() + '/figures/AUC PR/modelos_AUC_PR_{}.pdf'.format(experiment.split('\\')[-1].split('.csv')[0]))
        plt.close()
    except FileNotFoundError as e:
        print(e)