In [1]:
import numpy as np
import pandas as pd



### Single Task Data

In [16]:
eval_results = pd.read_csv("evaluation_results.csv", sep=",")
results = eval_results[eval_results["task_id"].str.contains("MTLAS")==False]

In [21]:
def prepare_single_task(single_task_data: pd.DataFrame):
    """

    :param single_task_data:
    :return:
    """
    single_task_data["use_topic"] = single_task_data["task_id"].str.contains("_topic")
    d = {True: 'topic', False: 'no-topic'}
    results = single_task_data.replace(d)

    datasets = ['gretz', 'swanson', 'ukp', 'webis', 'toledo']
    aggregated_cols = [col for col in results.columns if any(s in col for s in datasets)]

    #  aggregate over the seeds.
    results_by_task = results.groupby(["task_id", "sampling", "use_topic"], as_index=False).agg(
        {
            d: ['mean', 'std']
            for d in aggregated_cols
        }).round(3)

    # extract the values from the hierarchical dataframe.
    task_details = results_by_task.loc[:, (slice(None), '')]
    task_details.columns = task_details.columns.get_level_values(0)

    means = results_by_task.loc[:, (slice(None), 'mean')]
    means.columns = means.columns.get_level_values(0)

    stds = results_by_task.loc[:, (slice(None), 'std')]
    stds.columns = stds.columns.get_level_values(0)

    #  separate pearson and spearman
    pearson_means = means.filter(regex='pearson')
    pearson_stds = stds.filter(regex='pearson')
    pearson_stds.columns = "d_" + pearson_stds.columns
    pearson_data = pd.concat([task_details, pearson_means, pearson_stds], axis=1)

    spearman_means = means.filter(regex='spearman')
    spearman_stds = stds.filter(regex='spearman')
    spearman_stds.columns = "d_" + spearman_stds.columns
    spearman_data = pd.concat([task_details, spearman_means, spearman_stds], axis=1)

    #  combine the mean and std values for the pearson coefficients.
    if "gretz_pearson" in pearson_data.columns:
        pearson_data["IBMArgQ"] = [
            "$\rho: " + str(pearson_data["gretz_pearson"][i]) + " \pm " + str(
                pearson_data["d_gretz_pearson"][i]) + "$"
            for i in range(len(pearson_data))]
        spearman_data["IBMArgQ"] = [
            "$\sigma: " + str(spearman_data["gretz_spearman"][i]) + " \pm " + str(
                spearman_data["d_gretz_spearman"][i]) + "$"
            for i in range(len(spearman_data))]
    else:
        pearson_data["IBMArgQ"] = None
        spearman_data["IBMArgQ"] = None

    if "toledo_pearson" in pearson_data.columns:
        pearson_data["IBMRank"] = [
            "$\rho: " + str(pearson_data["toledo_pearson"][i]) + " \pm " + str(
                pearson_data["d_toledo_pearson"][i]) + "$"
            for i in range(len(pearson_data))]
        spearman_data["IBMRank"] = [
            "$\sigma: " + str(spearman_data["toledo_spearman"][i]) + " \pm " + str(
                spearman_data["d_toledo_spearman"][i]) + "$"
            for i in range(len(spearman_data))]
    else:
        pearson_data["IBMRank"] = None
        spearman_data["IBMRank"] = None

    if "ukp_pearson" in pearson_data.columns:
        pearson_data["UKPConvArgRank"] = [
            "$\rho: " + str(pearson_data["ukp_pearson"][i]) + " \pm " + str(
                pearson_data["d_ukp_pearson"][i]) + "$"
            for i in range(len(pearson_data))]
        spearman_data["UKPConvArgRank"] = [
            "$\sigma: " + str(spearman_data["ukp_spearman"][i]) + " \pm " + str(
                spearman_data["d_ukp_spearman"][i]) + "$"
            for i in range(len(spearman_data))]
    else:
        pearson_data["UKPConvArgRank"] = None
        spearman_data["UKPConvArgRank"] = None

    if "swanson_pearson" in pearson_data.columns:
        pearson_data["SwanRank"] = [
            "$\rho: " + str(pearson_data["swanson_pearson"][i]) + " \pm " + str(
                pearson_data["d_swanson_pearson"][i]) + "$"
            for i in range(len(pearson_data))]
        spearman_data["SwanRank"] = [
            "$\sigma: " + str(spearman_data["swanson_spearman"][i]) + " \pm " + str(
                spearman_data["d_swanson_spearman"][i]) + "$"
            for i in range(len(spearman_data))]
    else:
        pearson_data["SwanRank"] = None
        spearman_data["SwanRank"] = None

    if "webis_pearson" in pearson_data.columns:
        pearson_data["Webis"] = [
            "$\rho: " + str(pearson_data["webis_pearson"][i]) + " \pm " + str(
                pearson_data["d_webis_pearson"][i]) + "$"
            for i in range(len(pearson_data))]
        spearman_data["Webis"] = [
            "$\sigma: " + str(spearman_data["webis_spearman"][i]) + " \pm " + str(
                spearman_data["d_webis_spearman"][i]) + "$"
            for i in range(len(spearman_data))]
    else:
        pearson_data["Webis"] = None
        spearman_data["Webis"] = None

    final_pearson = pearson_data[
        ["task_id", "sampling", "use_topic", "IBMArgQ", "IBMRank", "UKPConvArgRank", "SwanRank", "Webis"]]
    final_spearman = spearman_data[
        ["task_id", "sampling", "use_topic", "IBMArgQ", "IBMRank", "UKPConvArgRank", "SwanRank", "Webis"]]

    metrics_table = pd.concat([final_pearson, final_spearman]).sort_values(by=["task_id", "sampling", "use_topic"])

    # rename task_id to proper names.
    metrics_table["task_id"][metrics_table["task_id"].str.contains("STLAS_only_gretz") is True] = "IBMArgQ"
    metrics_table["task_id"][metrics_table["task_id"].str.contains("STLAS_only_toledo") is True] = "IBMRank"
    metrics_table["task_id"][metrics_table["task_id"].str.contains("STLAS_only_ukp") is True] = "UKPConvArgRank"
    metrics_table["task_id"][metrics_table["task_id"].str.contains("STLAS_only_swanson") is True] = "SwanRank"
    metrics_table["task_id"][metrics_table["task_id"].str.contains("STLAS_only_webis") is True] = "Webis"
    metrics_table["task_id"][metrics_table["task_id"].str.contains("STLAS_LOO_gretz") is True] = "All except IBMArgQ"
    metrics_table["task_id"][metrics_table["task_id"].str.contains("STLAS_LOO_toledo") is True] = "All except IBMRank"
    metrics_table["task_id"][metrics_table["task_id"].str.contains("STLAS_LOO_ukp") is True] = \
        "All except UKPConvArgRank"
    metrics_table["task_id"][metrics_table["task_id"].str.contains("STLAS_LOO_swanson") is True] = "All except SwanRank"
    metrics_table["task_id"][metrics_table["task_id"].str.contains("STLAS_LOO_webis") is True] = "All except Webis"
    metrics_table["task_id"][metrics_table["task_id"].str.contains("STLAS") is True] = "All"

    metrics_table.sort_values(by=["task_id", "sampling", "use_topic"], inplace=True)
    #  Define the latex table structure.
    col1 = metrics_table["task_id"].tolist()
    col2 = metrics_table["sampling"].tolist()
    col3 = metrics_table["use_topic"].tolist()
    cidx = ["IBMArgQ", "IBMRank", "UKPConvArgRank", "SwanRank", "Webis"]
    iidx = pd.MultiIndex.from_arrays([
        col1, col2, col3
    ])
    metrics_value_table = metrics_table.iloc[:, 3:]
    values_list = metrics_value_table.values.tolist()
    latex_table = pd.DataFrame(
        values_list, columns=cidx, index=iidx)
    pd.options.display.float_format = '{:,.3f}'.format
    ltx_code = latex_table.to_latex(
        caption="Single Task Learning",
        header=cidx,
        position="H",
        escape=False,
        multirow=True,
        column_format="|lll|lllll|",
    )
    print(ltx_code)
    return ltx_code

In [20]:
prepare_single_task(results)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


KeyError: 'cannot use a single bool to index into setitem'

In [3]:
# Define topic information
results["use_topic"] = results["task_id"].str.contains("_topic")
d = {True: 'topic', False: 'no-topic'}
results = results.replace(d)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [4]:
results.head()

Unnamed: 0,task_id,sampling,checkpoint,test_overall_pearson,gretz_pearson,gretz_spearman,swanson_pearson,swanson_spearman,toledo_pearson,toledo_spearman,ukp_pearson,ukp_spearman,webis_pearson,webis_spearman,test_loss,use_topic
0,STLAS_only_toledo,in-topic,experiments/version_24-02-2022--10-34-07/check...,0.4001,0.4105,0.3909,0.3103,0.3372,0.4993,0.4798,0.2575,0.3696,0.1977,0.2096,0.0726,no-topic
1,STLAS_only_swanson,in-topic,experiments/version_24-02-2022--10-32-00/check...,0.315,0.2289,0.1655,0.6374,0.6081,0.3135,0.2173,0.5696,0.4718,0.5405,0.5476,0.0449,no-topic
2,STLAS_only_gretz,in-topic,experiments/version_24-02-2022--10-26-29/check...,0.5007,0.503,0.4449,0.4549,0.4928,0.436,0.4132,0.4345,0.484,0.4265,0.3523,0.0355,no-topic
3,STLAS_only_ukp,in-topic,experiments/version_24-02-2022--10-36-24/check...,0.0585,0.0165,-0.0004,0.4435,0.3869,-0.176,-0.2124,0.3942,0.3013,0.5411,0.4703,0.0635,no-topic
4,STLAS_only_toledo,in-topic,experiments/version_24-02-2022--10-38-16/check...,0.4102,0.4195,0.3903,0.3246,0.3495,0.5001,0.4773,0.3158,0.395,0.3062,0.3179,0.0691,no-topic


In [5]:
results_by_task = results.groupby(["task_id", "sampling", "use_topic"], as_index=False).agg(
                        {
                        'gretz_pearson': ['mean', 'std'],
                        'toledo_pearson': ['mean', 'std'],
                        'ukp_pearson': ['mean', 'std'],
                        'swanson_pearson': ['mean', 'std'],
                        'webis_pearson': ['mean', 'std'], 
                        'gretz_spearman': ['mean', 'std'],
                        'toledo_spearman': ['mean', 'std'],
                        'ukp_spearman': ['mean', 'std'],
                        'swanson_spearman': ['mean', 'std'],
                        'webis_spearman': ['mean', 'std'],
                        }).round(3)

In [88]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(results_by_task.sort_values(by=["task_id", "sampling"]))

Unnamed: 0_level_0,task_id,sampling,use_topic,gretz_pearson,gretz_pearson,toledo_pearson,toledo_pearson,ukp_pearson,ukp_pearson,swanson_pearson,swanson_pearson,webis_pearson,webis_pearson,gretz_spearman,gretz_spearman,toledo_spearman,toledo_spearman,ukp_spearman,ukp_spearman,swanson_spearman,swanson_spearman,webis_spearman,webis_spearman
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std
0,MTLAS,cross-topic,False,0.495,0.015,0.44,0.064,0.416,0.11,0.505,0.132,0.79,0.047,0.439,0.014,0.421,0.058,0.499,0.145,0.492,0.145,0.794,0.051
1,MTLAS,in-topic,False,0.511,0.004,0.52,0.012,0.6,0.034,0.66,0.006,0.559,0.021,0.457,0.004,0.501,0.014,0.565,0.043,0.633,0.006,0.593,0.029
2,MTLAS_topic,cross-topic,True,0.482,0.014,0.429,0.072,0.434,0.096,0.494,0.141,0.786,0.049,0.422,0.01,0.407,0.069,0.502,0.165,0.484,0.153,0.791,0.044
3,MTLAS_topic,in-topic,True,0.515,0.004,0.535,0.007,0.654,0.042,0.679,0.005,0.598,0.016,0.463,0.004,0.508,0.009,0.646,0.035,0.653,0.005,0.65,0.013
4,STLAS,cross-topic,False,0.427,0.03,0.401,0.049,0.228,0.097,0.461,0.086,0.5,0.094,0.368,0.028,0.39,0.043,0.302,0.04,0.45,0.096,0.36,0.114
5,STLAS,in-topic,False,0.403,0.004,0.356,0.01,0.41,0.016,0.643,0.007,0.493,0.016,0.335,0.003,0.334,0.012,0.424,0.015,0.617,0.009,0.492,0.014
6,STLAS_LOO_gretz,cross-topic,False,0.256,0.068,0.318,0.053,0.102,0.081,0.396,0.123,0.349,0.106,0.205,0.061,0.293,0.052,0.146,0.05,0.385,0.126,0.283,0.115
7,STLAS_LOO_gretz,in-topic,False,0.242,0.012,0.441,0.012,0.363,0.015,0.622,0.007,0.5,0.011,0.191,0.009,0.419,0.016,0.485,0.037,0.584,0.005,0.534,0.013
8,STLAS_LOO_gretz_topic,cross-topic,True,0.303,0.03,0.374,0.068,0.259,0.046,0.452,0.132,0.75,0.065,0.269,0.031,0.353,0.063,0.306,0.201,0.445,0.139,0.765,0.063
9,STLAS_LOO_gretz_topic,in-topic,True,0.359,0.012,0.528,0.007,0.328,0.01,0.675,0.009,0.605,0.021,0.329,0.011,0.505,0.012,0.46,0.057,0.653,0.01,0.678,0.016


In [10]:
metrics_table = pd.concat([final_pearson, final_spearman]).sort_values(by=["task_id", "sampling", "use_topic"])

In [11]:
metrics_table.head()

Unnamed: 0,task_id,sampling,use_topic,IBMArgQ,IBMRank,UKPConvArgRank,SwanRank,Webis
0,MTLAS,cross-topic,no-topic,$\rho: 0.495 \pm 0.015$,$\rho: 0.44 \pm 0.064$,$\rho: 0.416 \pm 0.11$,$\rho: 0.505 \pm 0.132$,$\rho: 0.79 \pm 0.047$
0,MTLAS,cross-topic,no-topic,$\sigma: 0.439 \pm 0.014$,$\sigma: 0.421 \pm 0.058$,$\sigma: 0.499 \pm 0.145$,$\sigma: 0.492 \pm 0.145$,$\sigma: 0.794 \pm 0.051$
1,MTLAS,in-topic,no-topic,$\rho: 0.511 \pm 0.004$,$\rho: 0.52 \pm 0.012$,$\rho: 0.6 \pm 0.034$,$\rho: 0.66 \pm 0.006$,$\rho: 0.559 \pm 0.021$
1,MTLAS,in-topic,no-topic,$\sigma: 0.457 \pm 0.004$,$\sigma: 0.501 \pm 0.014$,$\sigma: 0.565 \pm 0.043$,$\sigma: 0.633 \pm 0.006$,$\sigma: 0.593 \pm 0.029$
2,MTLAS_topic,cross-topic,topic,$\rho: 0.482 \pm 0.014$,$\rho: 0.429 \pm 0.072$,$\rho: 0.434 \pm 0.096$,$\rho: 0.494 \pm 0.141$,$\rho: 0.786 \pm 0.049$


In [12]:
# rename task_id to proper names.
metrics_table["task_id"][metrics_table["task_id"].str.contains("STLAS_only_gretz")==True] = "IBMArgQ"
metrics_table["task_id"][metrics_table["task_id"].str.contains("STLAS_only_toledo")==True] = "IBMRank"
metrics_table["task_id"][metrics_table["task_id"].str.contains("STLAS_only_ukp")==True] = "UKPConvArgRank"
metrics_table["task_id"][metrics_table["task_id"].str.contains("STLAS_only_swanson")==True] = "SwanRank"
metrics_table["task_id"][metrics_table["task_id"].str.contains("STLAS_only_webis")==True] = "Webis"
metrics_table["task_id"][metrics_table["task_id"].str.contains("STLAS_LOO_gretz")==True] = "All except IBMArgQ"
metrics_table["task_id"][metrics_table["task_id"].str.contains("STLAS_LOO_toledo")==True] = "All except IBMRank"
metrics_table["task_id"][metrics_table["task_id"].str.contains("STLAS_LOO_ukp")==True] = "All except UKPConvArgRank"
metrics_table["task_id"][metrics_table["task_id"].str.contains("STLAS_LOO_swanson")==True] = "All except SwanRank"
metrics_table["task_id"][metrics_table["task_id"].str.contains("STLAS_LOO_webis")==True] = "All except Webis"
metrics_table["task_id"][metrics_table["task_id"].str.contains("STLAS")==True] = "All"

In [13]:
col1 = metrics_table["task_id"].tolist()
col2 = metrics_table["sampling"].tolist()
col3 = metrics_table["use_topic"].tolist()
cidx = ["IBMArgQ", "IBMRank", "UKPConvArgRank", "SwanRank", "Webis"]
iidx = pd.MultiIndex.from_arrays([
    col1, col2, col3
])
metrics_value_table = metrics_table.iloc[:, 3:]
values_list = metrics_value_table.values.tolist()

In [14]:
latex_table = pd.DataFrame(
    values_list, columns=cidx, index=iidx)

In [32]:
pd.options.display.float_format = '{:,.3f}'.format
ltx_code = latex_table.to_latex(
    caption="Single Task Learning",
    position="H",
    escape=False,
    multirow=True,
)

In [33]:
print(ltx_code)

\begin{table}[H]
\centering
\caption{Single Task Learning}
\begin{tabular}{llllllll}
\toprule
    &          &       &                    IBMArgQ &                     IBMRank &             UKPConvArgRank &                   SwanRank &                      Webis \\
\midrule
\multirow{8}{*}{MTLAS} & \multirow{2}{*}{cross-topic} & no-topic &    $\rho: 0.495 \pm 0.015$ &      $\rho: 0.44 \pm 0.064$ &     $\rho: 0.416 \pm 0.11$ &    $\rho: 0.505 \pm 0.132$ &     $\rho: 0.79 \pm 0.047$ \\
    &          & no-topic &  $\sigma: 0.439 \pm 0.014$ &   $\sigma: 0.421 \pm 0.058$ &  $\sigma: 0.499 \pm 0.145$ &  $\sigma: 0.492 \pm 0.145$ &  $\sigma: 0.794 \pm 0.051$ \\
\cline{2-8}
    & \multirow{2}{*}{in-topic} & no-topic &    $\rho: 0.511 \pm 0.004$ &      $\rho: 0.52 \pm 0.012$ &      $\rho: 0.6 \pm 0.034$ &     $\rho: 0.66 \pm 0.006$ &    $\rho: 0.559 \pm 0.021$ \\
    &          & no-topic &  $\sigma: 0.457 \pm 0.004$ &   $\sigma: 0.501 \pm 0.014$ &  $\sigma: 0.565 \pm 0.043$ &  $\sigma: 0.633 

### Multi Task Learning

In [126]:
eval_results = pd.read_csv("evaluation_results.csv", sep=",")
multi_task_results = eval_results[eval_results["task_id"].isin(["MTLAS", "MTLAS_topic"])].reset_index(drop=True)
mtlas_loo_gretz = pd.read_csv("eval_mtlas_loo_gretz.csv", sep=";")
mtlas_loo_toledo = pd.read_csv("eval_mtlas_loo_toledo.csv", sep=";")
mtlas_loo_swanson = pd.read_csv("eval_mtlas_loo_swanson.csv", sep=";")
mtlas_loo_ukp = pd.read_csv("eval_mtlas_loo_ukp.csv", sep=";")
mtlas_loo_webis = pd.read_csv("eval_mtlas_loo_webis.csv", sep=";")
infer_results = pd.read_csv("infer_results1.csv", sep=";")

multi_task_data = [multi_task_results, mtlas_loo_gretz, mtlas_loo_toledo, mtlas_loo_swanson, mtlas_loo_ukp, mtlas_loo_webis]

In [127]:
infer_results

Unnamed: 0,ukp_pearson,ukp_spearman,toledo_pearson,toledo_spearman,webis_pearson,webis_spearman,swanson_pearson,swanson_spearman,gretz_pearson,gretz_spearman,task_name,sampling,aggregation_method
0,0.655,0.485,0.250,0.167,0.456,0.629,0.436,0.450,0.229,0.224,MTLAS_LOO_gretz_topic,in-topic,var
1,0.606,0.468,0.249,0.128,0.501,0.602,0.457,0.480,0.331,0.298,MTLAS_topic,in-topic,var
2,0.596,0.601,0.497,0.451,0.718,0.745,0.625,0.621,0.493,0.445,MTLAS_topic,in-topic,mean
3,0.595,0.527,0.415,0.311,0.708,0.716,0.584,0.602,0.469,0.410,MTLAS_topic,in-topic,wt-var
4,0.587,0.442,0.258,0.185,0.394,0.576,0.393,0.446,0.320,0.315,MTLAS_LOO_swanson,in-topic,var
...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,0.216,0.332,0.512,0.484,0.228,0.215,0.599,0.613,0.446,0.398,MTLAS_LOO_webis_topic,cross-topic,mean
68,0.198,0.467,0.233,0.173,0.179,0.274,0.360,0.439,0.216,0.208,MTLAS_LOO_webis,cross-topic,var
69,0.111,0.326,0.157,0.110,0.373,0.448,0.286,0.298,0.242,0.198,MTLAS_LOO_swanson_topic,cross-topic,var
70,0.096,0.302,0.196,0.244,0.126,0.267,0.364,0.423,0.216,0.215,MTLAS_LOO_webis_topic,cross-topic,var


In [81]:
complete_set = pd.DataFrame(columns=["task_id", "sampling", "use_topic", "aggregation", "IBMArgQ", "IBMRank", "UKPConvArgRank", "SwanRank", "Webis"])
for result_set in multi_task_data:
    
    result_set["use_topic"] = result_set["task_id"].str.contains("_topic")
    d = {True: 'topic', False: 'no-topic'}
    results = result_set.replace(d)
    datasets = ['gretz', 'swanson', 'ukp', 'webis', 'toledo']
    aggregated_cols = [col for col in results.columns if any(s in col for s in datasets)]
    results_by_task = results.groupby(["task_id", "sampling", "use_topic"], as_index=False).agg(
        {
            d: ['mean', 'std']
            for d in aggregated_cols
        }).round(3)
    
    # extract the values into a new dataframe.
    task_details = results_by_task.loc[:, (slice(None), '')]
    task_details.columns = task_details.columns.get_level_values(0)
    means = results_by_task.loc[:, (slice(None), 'mean')]
    means.columns = means.columns.get_level_values(0)
    stds = results_by_task.loc[:, (slice(None), 'std')]
    stds.columns = stds.columns.get_level_values(0)
    
    #  separate pearson and spearman
    pearson_means = means.filter(regex='pearson')
    pearson_stds = stds.filter(regex='pearson')
    pearson_stds.columns = "d_" + pearson_stds.columns
    pearson_data = pd.concat([task_details, pearson_means, pearson_stds], axis=1)
    spearman_means = means.filter(regex='spearman')
    spearman_stds = stds.filter(regex='spearman')
    spearman_stds.columns = "d_" + spearman_stds.columns
    spearman_data = pd.concat([task_details, spearman_means, spearman_stds], axis=1)
    
    #  combine the mean and std values for the pearson coeffients.

    if "gretz_pearson" in pearson_data.columns:
        pearson_data["IBMArgQ"] = [
            "$\rho: " + str(pearson_data["gretz_pearson"][i]) + " \pm " + str(pearson_data["d_gretz_pearson"][i]) + "$"
            for i in range(len(pearson_data))]
        spearman_data["IBMArgQ"] = [
            "$\sigma: " + str(spearman_data["gretz_spearman"][i]) + " \pm " + str(spearman_data["d_gretz_spearman"][i]) + "$"
            for i in range(len(spearman_data))]
    else:
        pearson_data["IBMArgQ"] = None
        spearman_data["IBMArgQ"] = None
        
    if "toledo_pearson" in pearson_data.columns:
        pearson_data["IBMRank"] = [
            "$\rho: " + str(pearson_data["toledo_pearson"][i]) + " \pm " + str(pearson_data["d_toledo_pearson"][i]) + "$"
            for i in range(len(pearson_data))]
        spearman_data["IBMRank"] = [
            "$\sigma: " + str(spearman_data["toledo_spearman"][i]) + " \pm " + str(spearman_data["d_toledo_spearman"][i]) + "$"
            for i in range(len(spearman_data))]
    else:
        pearson_data["IBMRank"] = None
        spearman_data["IBMRank"] = None
    
    if "ukp_pearson" in pearson_data.columns:
        pearson_data["UKPConvArgRank"] = [
            "$\rho: " + str(pearson_data["ukp_pearson"][i]) + " \pm " + str(pearson_data["d_ukp_pearson"][i]) + "$"
            for i in range(len(pearson_data))]
        spearman_data["UKPConvArgRank"] = [
            "$\sigma: " + str(spearman_data["ukp_spearman"][i]) + " \pm " + str(spearman_data["d_ukp_spearman"][i]) + "$"
            for i in range(len(spearman_data))]
    else:
        pearson_data["UKPConvArgRank"] = None
        spearman_data["UKPConvArgRank"] = None
    
    if "swanson_pearson" in pearson_data.columns:
        pearson_data["SwanRank"] = [
            "$\rho: " + str(pearson_data["swanson_pearson"][i]) + " \pm " + str(pearson_data["d_swanson_pearson"][i]) + "$"
            for i in range(len(pearson_data))]
        spearman_data["SwanRank"] = [
            "$\sigma: " + str(spearman_data["swanson_spearman"][i]) + " \pm " + str(spearman_data["d_swanson_spearman"][i]) + "$"
            for i in range(len(spearman_data))]
    else:
        pearson_data["SwanRank"] = None
        spearman_data["SwanRank"] = None
    
    if "webis_pearson" in pearson_data.columns:
        pearson_data["Webis"] = [
            "$\rho: " + str(pearson_data["webis_pearson"][i]) + " \pm " + str(pearson_data["d_webis_pearson"][i]) + "$"
            for i in range(len(pearson_data))]
        spearman_data["Webis"] = [
            "$\sigma: " + str(spearman_data["webis_spearman"][i]) + " \pm " + str(spearman_data["d_webis_spearman"][i]) + "$"
            for i in range(len(spearman_data))]
    else:
        pearson_data["Webis"] = None
        spearman_data["Webis"] = None
    
    final_pearson = pearson_data[
        ["task_id", "sampling", "use_topic", "IBMArgQ", "IBMRank", "UKPConvArgRank", "SwanRank", "Webis"]]
    final_spearman = spearman_data[
        ["task_id", "sampling", "use_topic", "IBMArgQ", "IBMRank", "UKPConvArgRank", "SwanRank", "Webis"]]
    
    metrics_table = pd.concat([final_pearson, final_spearman]).sort_values(by=["task_id", "sampling", "use_topic"])
    metrics_table["aggregation"] = "None"
    complete_set = pd.concat([complete_set, metrics_table]).reset_index(drop=True)

In [103]:
complete_set.head()

Unnamed: 0,task_id,sampling,use_topic,aggregation,IBMArgQ,IBMRank,UKPConvArgRank,SwanRank,Webis
0,MTLAS,cross-topic,no-topic,,$\rho: 0.495 \pm 0.015$,$\rho: 0.44 \pm 0.064$,$\rho: 0.416 \pm 0.11$,$\rho: 0.505 \pm 0.132$,$\rho: 0.79 \pm 0.047$
1,MTLAS,cross-topic,no-topic,,$\sigma: 0.439 \pm 0.014$,$\sigma: 0.421 \pm 0.058$,$\sigma: 0.499 \pm 0.145$,$\sigma: 0.492 \pm 0.145$,$\sigma: 0.794 \pm 0.051$
2,MTLAS,in-topic,no-topic,,$\rho: 0.511 \pm 0.004$,$\rho: 0.52 \pm 0.012$,$\rho: 0.6 \pm 0.034$,$\rho: 0.66 \pm 0.006$,$\rho: 0.559 \pm 0.021$
3,MTLAS,in-topic,no-topic,,$\sigma: 0.457 \pm 0.004$,$\sigma: 0.501 \pm 0.014$,$\sigma: 0.565 \pm 0.043$,$\sigma: 0.633 \pm 0.006$,$\sigma: 0.593 \pm 0.029$
4,MTLAS_topic,cross-topic,topic,,$\rho: 0.482 \pm 0.014$,$\rho: 0.429 \pm 0.072$,$\rho: 0.434 \pm 0.096$,$\rho: 0.494 \pm 0.141$,$\rho: 0.786 \pm 0.049$


In [135]:
infer_results.rename(columns={"task_name": "task_id", "aggregation_method": "aggregation"}, inplace=True)

# define topic information
infer_results["use_topic"] = infer_results["task_id"].str.contains("_topic")
d = {True: 'topic', False: 'no-topic'}
results = infer_results.replace(d)
pearson_data = results[["task_id","sampling","use_topic",
                              "aggregation","gretz_pearson","toledo_pearson",
                              "ukp_pearson","swanson_pearson","webis_pearson"]]
spearman_data = results[["task_id","sampling","use_topic",
                              "aggregation","gretz_spearman","toledo_spearman",
                              "ukp_spearman","swanson_spearman","webis_spearman"]]

# separate out pearson and spearman and rewrite
if "gretz_pearson" in pearson_data.columns:
        pearson_data["IBMArgQ"] = [
            "$\rho: " + str(pearson_data["gretz_pearson"][i]) + "$"
            for i in range(len(pearson_data))]
        spearman_data["IBMArgQ"] = [
            "$\sigma: " + str(spearman_data["gretz_spearman"][i]) + "$"
            for i in range(len(spearman_data))]
else:
    pearson_data["IBMArgQ"] = None
    spearman_data["IBMArgQ"] = None
    
if "toledo_pearson" in pearson_data.columns:
    pearson_data["IBMRank"] = [
        "$\rho: " + str(pearson_data["toledo_pearson"][i])  + "$"
        for i in range(len(pearson_data))]
    spearman_data["IBMRank"] = [
        "$\sigma: " + str(spearman_data["toledo_spearman"][i])  + "$"
        for i in range(len(spearman_data))]
else:
    pearson_data["IBMRank"] = None
    spearman_data["IBMRank"] = None

if "ukp_pearson" in pearson_data.columns:
    pearson_data["UKPConvArgRank"] = [
        "$\rho: " + str(pearson_data["ukp_pearson"][i]) + "$"
        for i in range(len(pearson_data))]
    spearman_data["UKPConvArgRank"] = [
        "$\sigma: " + str(spearman_data["ukp_spearman"][i])  + "$"
        for i in range(len(spearman_data))]
else:
    pearson_data["UKPConvArgRank"] = None
    spearman_data["UKPConvArgRank"] = None

if "swanson_pearson" in pearson_data.columns:
    pearson_data["SwanRank"] = [
        "$\rho: " + str(pearson_data["swanson_pearson"][i]) + "$"
        for i in range(len(pearson_data))]
    spearman_data["SwanRank"] = [
        "$\sigma: " + str(spearman_data["swanson_spearman"][i]) + "$"
        for i in range(len(spearman_data))]
else:
    pearson_data["SwanRank"] = None
    spearman_data["SwanRank"] = None

if "webis_pearson" in pearson_data.columns:
    pearson_data["Webis"] = [
        "$\rho: " + str(pearson_data["webis_pearson"][i]) + "$"
        for i in range(len(pearson_data))]
    spearman_data["Webis"] = [
        "$\sigma: " + str(spearman_data["webis_spearman"][i]) + "$"
        for i in range(len(spearman_data))]
else:
    pearson_data["Webis"] = None
    spearman_data["Webis"] = None

final_pearson = pearson_data[
    ["task_id", "sampling", "use_topic", "aggregation", "IBMArgQ", "IBMRank", "UKPConvArgRank", "SwanRank", "Webis"]]
final_spearman = spearman_data[
    ["task_id", "sampling", "use_topic", "aggregation", "IBMArgQ", "IBMRank", "UKPConvArgRank", "SwanRank", "Webis"]]

infer_table = pd.concat([final_pearson, final_spearman]).sort_values(by=["task_id", "sampling", "use_topic", "aggregation"]).reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

In [136]:
complete_set.head()

Unnamed: 0,task_id,sampling,use_topic,aggregation,IBMArgQ,IBMRank,UKPConvArgRank,SwanRank,Webis
0,MTLAS,cross-topic,no-topic,,$\rho: 0.495 \pm 0.015$,$\rho: 0.44 \pm 0.064$,$\rho: 0.416 \pm 0.11$,$\rho: 0.505 \pm 0.132$,$\rho: 0.79 \pm 0.047$
1,MTLAS,cross-topic,no-topic,,$\sigma: 0.439 \pm 0.014$,$\sigma: 0.421 \pm 0.058$,$\sigma: 0.499 \pm 0.145$,$\sigma: 0.492 \pm 0.145$,$\sigma: 0.794 \pm 0.051$
2,MTLAS,in-topic,no-topic,,$\rho: 0.511 \pm 0.004$,$\rho: 0.52 \pm 0.012$,$\rho: 0.6 \pm 0.034$,$\rho: 0.66 \pm 0.006$,$\rho: 0.559 \pm 0.021$
3,MTLAS,in-topic,no-topic,,$\sigma: 0.457 \pm 0.004$,$\sigma: 0.501 \pm 0.014$,$\sigma: 0.565 \pm 0.043$,$\sigma: 0.633 \pm 0.006$,$\sigma: 0.593 \pm 0.029$
4,MTLAS_topic,cross-topic,topic,,$\rho: 0.482 \pm 0.014$,$\rho: 0.429 \pm 0.072$,$\rho: 0.434 \pm 0.096$,$\rho: 0.494 \pm 0.141$,$\rho: 0.786 \pm 0.049$


In [170]:
infer_table.head()

Unnamed: 0,task_id,sampling,use_topic,aggregation,IBMArgQ,IBMRank,UKPConvArgRank,SwanRank,Webis
0,MTLAS,cross-topic,no-topic,mean,$\rho: 0.4717$,$\rho: 0.3574$,$\rho: 0.402$,$\rho: 0.6171$,$\rho: 0.7976$
1,MTLAS,cross-topic,no-topic,mean,$\sigma: 0.4101$,$\sigma: 0.3375$,$\sigma: 0.69$,$\sigma: 0.624$,$\sigma: 0.7921$
2,MTLAS,cross-topic,no-topic,var,$\rho: 0.2748$,$\rho: 0.1683$,$\rho: 0.2586$,$\rho: 0.3982$,$\rho: 0.5949$
3,MTLAS,cross-topic,no-topic,var,$\sigma: 0.1804$,$\sigma: 0.0559$,$\sigma: 0.5811$,$\sigma: 0.4466$,$\sigma: 0.5045$
4,MTLAS,cross-topic,no-topic,wt-var,$\rho: 0.449$,$\rho: 0.3422$,$\rho: 0.3987$,$\rho: 0.5932$,$\rho: 0.779$


In [171]:
mtl_table = pd.concat([complete_set, infer_table]).sort_values(by=["task_id", "sampling", "use_topic", "aggregation"]).reset_index(drop=True)

In [179]:
#  rename task names
mtl_table["task_id"][mtl_table["task_id"].str.contains("MTLAS_LOO_gretz")==True] = "All except IBMArgQ"
mtl_table["task_id"][mtl_table["task_id"].str.contains("MTLAS_LOO_toledo")==True] = "All except IBMRank"
mtl_table["task_id"][mtl_table["task_id"].str.contains("MTLAS_LOO_ukp")==True] = "All except UKPConvArgRank"
mtl_table["task_id"][mtl_table["task_id"].str.contains("MTLAS_LOO_swanson")==True] = "All except SwanRank"
mtl_table["task_id"][mtl_table["task_id"].str.contains("MTLAS_LOO_webis")==True] = "All except Webis"
mtl_table["task_id"][mtl_table["task_id"].str.contains("MTLAS")==True] = "All"

mtl_table.sort_values(by=["task_id", "sampling", "use_topic", "aggregation"], inplace=True)

In [180]:
#  Define the latex table structure.
col1 = mtl_table["task_id"].tolist()
col2 = mtl_table["sampling"].tolist()
col3 = mtl_table["use_topic"].tolist()
col4 = mtl_table["aggregation"].tolist()
cidx = ["IBMArgQ", "IBMRank", "UKPConvArgRank", "SwanRank", "Webis"]
iidx = pd.MultiIndex.from_arrays([
    col1, col2, col3, col4
    ])

In [181]:
mtl_metrics_table = mtl_table.iloc[:, 4:]
values_list = mtl_metrics_table.values.tolist()
mtl_latex_table = pd.DataFrame(
    values_list, columns=cidx, index=iidx)

In [182]:
mtl_latex_table.head(30)

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,IBMArgQ,IBMRank,UKPConvArgRank,SwanRank,Webis
All,cross-topic,no-topic,,$\rho: 0.495 \pm 0.015$,$\rho: 0.44 \pm 0.064$,$\rho: 0.416 \pm 0.11$,$\rho: 0.505 \pm 0.132$,$\rho: 0.79 \pm 0.047$
All,cross-topic,no-topic,,$\sigma: 0.439 \pm 0.014$,$\sigma: 0.421 \pm 0.058$,$\sigma: 0.499 \pm 0.145$,$\sigma: 0.492 \pm 0.145$,$\sigma: 0.794 \pm 0.051$
All,cross-topic,no-topic,mean,$\rho: 0.4717$,$\rho: 0.3574$,$\rho: 0.402$,$\rho: 0.6171$,$\rho: 0.7976$
All,cross-topic,no-topic,mean,$\sigma: 0.4101$,$\sigma: 0.3375$,$\sigma: 0.69$,$\sigma: 0.624$,$\sigma: 0.7921$
All,cross-topic,no-topic,var,$\rho: 0.2748$,$\rho: 0.1683$,$\rho: 0.2586$,$\rho: 0.3982$,$\rho: 0.5949$
All,cross-topic,no-topic,var,$\sigma: 0.1804$,$\sigma: 0.0559$,$\sigma: 0.5811$,$\sigma: 0.4466$,$\sigma: 0.5045$
All,cross-topic,no-topic,wt-var,$\sigma: 0.3861$,$\sigma: 0.3225$,$\sigma: 0.6965$,$\sigma: 0.5988$,$\sigma: 0.753$
All,cross-topic,no-topic,wt-var,$\rho: 0.449$,$\rho: 0.3422$,$\rho: 0.3987$,$\rho: 0.5932$,$\rho: 0.779$
All,cross-topic,topic,,$\sigma: 0.422 \pm 0.01$,$\sigma: 0.407 \pm 0.069$,$\sigma: 0.502 \pm 0.165$,$\sigma: 0.484 \pm 0.153$,$\sigma: 0.791 \pm 0.044$
All,cross-topic,topic,,$\rho: 0.482 \pm 0.014$,$\rho: 0.429 \pm 0.072$,$\rho: 0.434 \pm 0.096$,$\rho: 0.494 \pm 0.141$,$\rho: 0.786 \pm 0.049$


In [189]:
pd.options.display.float_format = '{:,.3f}'.format
ltx_code = mtl_latex_table.to_latex(
    caption="Multi Task Learning",
    longtable=False,
    header=["IBMArgQ", "IBMRank", "UKPConvArgRank", "SwanRank", "Webis"],
    position="H",
    escape=False,
    multirow=True,
    float_format="%.3f",
    column_format="|l|l|l|l|lllll|",
)

In [190]:
print(ltx_code)

\begin{table}[H]
\centering
\caption{Multi Task Learning}
\begin{tabular}{|l|l|l|l|lllll|}
\toprule
                 &          &       &        &                    IBMArgQ &                    IBMRank &             UKPConvArgRank &                   SwanRank &                      Webis \\
\midrule
\multirow{32}{*}{All} & \multirow{16}{*}{cross-topic} & \multirow{8}{*}{no-topic} & None &    $\rho: 0.495 \pm 0.015$ &     $\rho: 0.44 \pm 0.064$ &     $\rho: 0.416 \pm 0.11$ &    $\rho: 0.505 \pm 0.132$ &     $\rho: 0.79 \pm 0.047$ \\
                 &          &       & None &  $\sigma: 0.439 \pm 0.014$ &  $\sigma: 0.421 \pm 0.058$ &  $\sigma: 0.499 \pm 0.145$ &  $\sigma: 0.492 \pm 0.145$ &  $\sigma: 0.794 \pm 0.051$ \\
                 &          &       & mean &             $\rho: 0.4717$ &             $\rho: 0.3574$ &              $\rho: 0.402$ &             $\rho: 0.6171$ &             $\rho: 0.7976$ \\
                 &          &       & mean &           $\sigma: 0.4101$ &      