In [1]:
import os
import glob

import mlflow
from mlflow import log_metric, log_param, MlflowClient
from mlflow.entities import ViewType
from IPython.display import display, Markdown
import pandas as pd

# Model Wide Metrics

In [2]:
exp_ids = os.listdir('./mlruns')

In [3]:
len(exp_ids)

221

## Group types of models (experiment type and model type) and pick best performing in terms of f1-score per unit of analysis
 
 * Create dictionary for experiment types and model types
 * Create to dataframes (for each of the previous groups) and pick the ones with max test_f1_score

In [4]:
df_filepaths = glob.glob('./*.csv')

In [5]:
#mw_df = pd.concat([pd.read_csv(df_filepath, index_col=['model_type', 'model_subtype', 'model_name', 'run_uuid', 'runName']) for df_filepath in df_filepaths])
mw_df = pd.concat([pd.read_csv(df_filepath, index_col=['run_uuid', 'runName']) for df_filepath in df_filepaths])

In [6]:
mw_df.model_type = mw_df.model_type.str.strip('¶')

In [7]:
mw_df.shape

(3400, 48)

In [8]:
def get_best_models(df, grouping_criterion):
    return df.reset_index().loc[df.reset_index().groupby(grouping_criterion)['test_f1_micro_mean'].idxmax().dropna()]\
        .set_index(grouping_criterion)

### Generate the tables to report

In [9]:
def display_performance_table(df, metric, index_cols=['model_type'], display_=True):
    report_table = df.reset_index().copy()
    report_table['result'] = report_table[f'test_{metric}_mean'].map(lambda x: f'{x:.2f}') + \
    ' $\pm$ ' + report_table[f'test_{metric}_std'].map(lambda x: f'{x:.2f}')
    report_table['col_title'] = report_table.unit_of_analysis.str.split('_').str.join(' ') 
    report_table['col_title'] = pd.Categorical(
        report_table.col_title,
        categories=['title', 'title and first paragraph', 'title and 5 sentences', 'title and 10 sentences',
                    'title and first sentence each paragraph', 'raw text'],
        ordered=True)
    report_table = report_table[index_cols + ['col_title', 'result']]\
        .pivot_table(index=index_cols, columns=['col_title'], values=['result'], aggfunc='first', fill_value=0)\
        .droplevel(0, axis=1)

    report_table.columns.names = [None]

    # Highlight best scoring models according to their average
    mean_perf_arr = report_table.applymap(lambda x: float(str(x).split(' ')[0])).to_numpy()
    highlight_mask = mean_perf_arr == mean_perf_arr.max()
    report_table_arr = report_table.to_numpy()  # Note it passes the array by reference
    report_table_arr[highlight_mask] = '**' + report_table_arr[highlight_mask] + '**'

    if display_:
        display(Markdown(report_table.to_markdown()))
    
    return report_table

### Generate tables for all languages

In [10]:
metrics_to_report = ['f1_micro', 'recall_micro', 'precision_micro', 'accuracy'] 

In [11]:
language_dict = {'en': 'English', 'it': 'Italian', 'fr': 'French', 'po': 'Polish', 'ru': 'Russian', 'ge': 'German'}

In [12]:
def display_metrics_and_write_to_file(df, grouping_criterion, output_dir):
    os.makedirs(output_dir, exist_ok=True)

    for language, results_df in df.groupby('language'):
        best_models_df = get_best_models(results_df, grouping_criterion + ['unit_of_analysis'])

        display(Markdown(f'# {language_dict[language]}'))
        
        for metric in metrics_to_report:
            
            os.makedirs(os.path.join(output_dir, metric), exist_ok=True)

            output_dir_markdown = os.path.join(output_dir, metric, 'markdown')
            output_dir_latex = os.path.join(output_dir, metric, 'latex')

            os.makedirs(output_dir_markdown, exist_ok=True)
            os.makedirs(output_dir_latex, exist_ok=True)

            display(Markdown(f'## {metric}'))

            report_table = display_performance_table(df=best_models_df, index_cols=grouping_criterion, metric=metric, display_=True)

            markdown_file = open(os.path.join(output_dir_markdown, f"{language_dict[language]}.md"), "w")
            report_table.to_markdown(markdown_file)
            markdown_file.close()

            latex_file = open(os.path.join(output_dir_latex, f"{language_dict[language]}_{metric}.tex"), "w")
            report_table.to_latex(latex_file)
            latex_file.close()
        

# Per model type

In [13]:
display_metrics_and_write_to_file(df=mw_df, grouping_criterion=['model_type'], output_dir='per_model_type_tables')

# Russian

## f1_micro

| model_type                      | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:--------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| Binary Relevance kNN            | 0.23 $\pm$ 0.02 | 0.24 $\pm$ 0.01             | 0.32 $\pm$ 0.01         | 0.33 $\pm$ 0.04          | 0.33 $\pm$ 0.01                           | 0.33 $\pm$ 0.04     |
| ComplementNB                    | 0.36 $\pm$ 0.03 | 0.41 $\pm$ 0.02             | 0.43 $\pm$ 0.01         | 0.49 $\pm$ 0.03          | 0.48 $\pm$ 0.02                           | 0.51 $\pm$ 0.01     |
| KNN                             | 0.36 $\pm$ 0.02 | 0.37 $\pm$ 0.01             | 0.40 $\pm$ 0.02         | 0.43 $\pm$ 0.01          | 0.44 $\pm$ 0.01                           | 0.46 $\pm$ 0.01     |
| LinearSVM                       | 0.37 $\pm$ 0.00 | 0.41 $\pm$ 0.01             | 0.44 $\pm$ 0.01         | 0.46 $\pm$ 0.03          | 0.46 $\pm$ 0.01                           | 0.50 $\pm$ 0.02     |
| LogisticRegression              | 0.31 $\pm$ 0.02 | 0.37 $\pm$ 0.01             | 0.36 $\pm$ 0.02         | 0.42 $\pm$ 0.01          | 0.41 $\pm$ 0.02                           | 0.45 $\pm$ 0.02     |
| LogisticRegressionElasticNet    | 0.33 $\pm$ 0.02 | 0.38 $\pm$ 0.02             | 0.40 $\pm$ 0.05         | 0.44 $\pm$ 0.01          | 0.44 $\pm$ 0.02                           | 0.49 $\pm$ 0.01     |
| LogisticRegressionLasso         | 0.34 $\pm$ 0.01 | 0.34 $\pm$ 0.02             | 0.40 $\pm$ 0.01         | 0.44 $\pm$ 0.01          | 0.44 $\pm$ 0.02                           | 0.49 $\pm$ 0.02     |
| LogisticRegressionRidge         | 0.37 $\pm$ 0.01 | 0.43 $\pm$ 0.02             | 0.45 $\pm$ 0.02         | 0.49 $\pm$ 0.02          | 0.50 $\pm$ 0.00                           | 0.51 $\pm$ 0.01     |
| Multi-label ARAM                | 0.15 $\pm$ 0.01 | 0.18 $\pm$ 0.09             | 0.18 $\pm$ 0.08         | 0.24 $\pm$ 0.09          | 0.22 $\pm$ 0.03                           | 0.26 $\pm$ 0.04     |
| Multilabel k Nearest Neighbours | 0.31 $\pm$ 0.02 | 0.34 $\pm$ 0.02             | 0.37 $\pm$ 0.03         | 0.38 $\pm$ 0.04          | 0.37 $\pm$ 0.03                           | 0.40 $\pm$ 0.03     |
| NaiveBayes                      | 0.37 $\pm$ 0.01 | 0.43 $\pm$ 0.03             | 0.45 $\pm$ 0.01         | 0.49 $\pm$ 0.01          | 0.50 $\pm$ 0.01                           | 0.52 $\pm$ 0.01     |
| RandomForest                    | 0.27 $\pm$ 0.02 | 0.31 $\pm$ 0.02             | 0.35 $\pm$ 0.03         | 0.40 $\pm$ 0.01          | 0.37 $\pm$ 0.04                           | 0.44 $\pm$ 0.03     |
| RidgeClassifier                 | 0.32 $\pm$ 0.01 | 0.33 $\pm$ 0.03             | 0.35 $\pm$ 0.01         | 0.44 $\pm$ 0.02          | 0.44 $\pm$ 0.01                           | 0.47 $\pm$ 0.02     |
| SVM                             | 0.35 $\pm$ 0.01 | 0.40 $\pm$ 0.01             | 0.44 $\pm$ 0.03         | 0.48 $\pm$ 0.02          | 0.48 $\pm$ 0.03                           | **0.53 $\pm$ 0.04** |
| XGBoost                         | 0.36 $\pm$ 0.02 | 0.36 $\pm$ 0.03             | 0.38 $\pm$ 0.02         | 0.43 $\pm$ 0.00          | 0.42 $\pm$ 0.03                           | 0.48 $\pm$ 0.01     |

  report_table.to_latex(latex_file)


## recall_micro

| model_type                      | title               | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text        |
|:--------------------------------|:--------------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:----------------|
| Binary Relevance kNN            | 0.19 $\pm$ 0.02     | 0.23 $\pm$ 0.02             | 0.31 $\pm$ 0.01         | 0.29 $\pm$ 0.04          | 0.30 $\pm$ 0.01                           | 0.31 $\pm$ 0.02 |
| ComplementNB                    | 0.65 $\pm$ 0.11     | 0.56 $\pm$ 0.03             | 0.60 $\pm$ 0.08         | 0.57 $\pm$ 0.04          | 0.56 $\pm$ 0.04                           | 0.61 $\pm$ 0.05 |
| KNN                             | **0.74 $\pm$ 0.20** | 0.63 $\pm$ 0.06             | 0.58 $\pm$ 0.04         | 0.67 $\pm$ 0.03          | 0.67 $\pm$ 0.01                           | 0.60 $\pm$ 0.05 |
| LinearSVM                       | 0.49 $\pm$ 0.01     | 0.66 $\pm$ 0.06             | 0.55 $\pm$ 0.07         | 0.60 $\pm$ 0.06          | 0.67 $\pm$ 0.03                           | 0.58 $\pm$ 0.01 |
| LogisticRegression              | 0.34 $\pm$ 0.03     | 0.40 $\pm$ 0.04             | 0.34 $\pm$ 0.03         | 0.44 $\pm$ 0.03          | 0.46 $\pm$ 0.03                           | 0.49 $\pm$ 0.02 |
| LogisticRegressionElasticNet    | 0.37 $\pm$ 0.03     | 0.43 $\pm$ 0.03             | 0.45 $\pm$ 0.08         | 0.55 $\pm$ 0.06          | 0.47 $\pm$ 0.03                           | 0.60 $\pm$ 0.04 |
| LogisticRegressionLasso         | 0.38 $\pm$ 0.01     | 0.30 $\pm$ 0.02             | 0.42 $\pm$ 0.03         | 0.56 $\pm$ 0.01          | 0.52 $\pm$ 0.03                           | 0.63 $\pm$ 0.05 |
| LogisticRegressionRidge         | 0.54 $\pm$ 0.04     | 0.69 $\pm$ 0.04             | 0.68 $\pm$ 0.05         | 0.63 $\pm$ 0.04          | 0.58 $\pm$ 0.05                           | 0.55 $\pm$ 0.02 |
| Multi-label ARAM                | 0.34 $\pm$ 0.06     | 0.43 $\pm$ 0.23             | 0.39 $\pm$ 0.20         | 0.54 $\pm$ 0.20          | 0.47 $\pm$ 0.10                           | 0.38 $\pm$ 0.06 |
| Multilabel k Nearest Neighbours | 0.33 $\pm$ 0.03     | 0.34 $\pm$ 0.02             | 0.38 $\pm$ 0.04         | 0.34 $\pm$ 0.04          | 0.39 $\pm$ 0.03                           | 0.35 $\pm$ 0.03 |
| NaiveBayes                      | 0.67 $\pm$ 0.09     | 0.57 $\pm$ 0.04             | 0.52 $\pm$ 0.03         | 0.58 $\pm$ 0.05          | 0.59 $\pm$ 0.03                           | 0.58 $\pm$ 0.03 |
| RandomForest                    | 0.49 $\pm$ 0.05     | 0.43 $\pm$ 0.02             | 0.44 $\pm$ 0.04         | 0.40 $\pm$ 0.06          | 0.38 $\pm$ 0.05                           | 0.43 $\pm$ 0.05 |
| RidgeClassifier                 | 0.54 $\pm$ 0.01     | 0.29 $\pm$ 0.04             | 0.28 $\pm$ 0.01         | 0.41 $\pm$ 0.02          | 0.44 $\pm$ 0.01                           | 0.45 $\pm$ 0.03 |
| SVM                             | 0.46 $\pm$ 0.06     | 0.46 $\pm$ 0.06             | 0.47 $\pm$ 0.05         | 0.59 $\pm$ 0.02          | 0.59 $\pm$ 0.04                           | 0.62 $\pm$ 0.05 |
| XGBoost                         | 0.57 $\pm$ 0.10     | 0.57 $\pm$ 0.01             | 0.50 $\pm$ 0.01         | 0.51 $\pm$ 0.03          | 0.54 $\pm$ 0.01                           | 0.56 $\pm$ 0.03 |

  report_table.to_latex(latex_file)


## precision_micro

| model_type                      | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:--------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| Binary Relevance kNN            | 0.34 $\pm$ 0.02 | 0.34 $\pm$ 0.04             | 0.39 $\pm$ 0.01         | 0.46 $\pm$ 0.06          | 0.43 $\pm$ 0.03                           | 0.42 $\pm$ 0.08     |
| ComplementNB                    | 0.27 $\pm$ 0.02 | 0.34 $\pm$ 0.03             | 0.36 $\pm$ 0.04         | 0.46 $\pm$ 0.08          | 0.43 $\pm$ 0.03                           | 0.45 $\pm$ 0.01     |
| KNN                             | 0.27 $\pm$ 0.04 | 0.28 $\pm$ 0.02             | 0.32 $\pm$ 0.02         | 0.33 $\pm$ 0.02          | 0.35 $\pm$ 0.02                           | 0.39 $\pm$ 0.02     |
| LinearSVM                       | 0.32 $\pm$ 0.00 | 0.31 $\pm$ 0.02             | 0.38 $\pm$ 0.03         | 0.40 $\pm$ 0.06          | 0.37 $\pm$ 0.02                           | 0.45 $\pm$ 0.04     |
| LogisticRegression              | 0.31 $\pm$ 0.02 | 0.37 $\pm$ 0.02             | 0.41 $\pm$ 0.02         | 0.43 $\pm$ 0.02          | 0.38 $\pm$ 0.01                           | 0.42 $\pm$ 0.02     |
| LogisticRegressionElasticNet    | 0.30 $\pm$ 0.02 | 0.36 $\pm$ 0.01             | 0.38 $\pm$ 0.03         | 0.37 $\pm$ 0.04          | 0.42 $\pm$ 0.01                           | 0.44 $\pm$ 0.00     |
| LogisticRegressionLasso         | 0.33 $\pm$ 0.02 | 0.40 $\pm$ 0.06             | 0.42 $\pm$ 0.02         | 0.39 $\pm$ 0.02          | 0.39 $\pm$ 0.03                           | 0.42 $\pm$ 0.01     |
| LogisticRegressionRidge         | 0.29 $\pm$ 0.00 | 0.33 $\pm$ 0.02             | 0.35 $\pm$ 0.03         | 0.41 $\pm$ 0.03          | 0.46 $\pm$ 0.03                           | 0.50 $\pm$ 0.03     |
| Multi-label ARAM                | 0.09 $\pm$ 0.01 | 0.12 $\pm$ 0.06             | 0.12 $\pm$ 0.05         | 0.16 $\pm$ 0.06          | 0.16 $\pm$ 0.03                           | 0.23 $\pm$ 0.04     |
| Multilabel k Nearest Neighbours | 0.30 $\pm$ 0.02 | 0.37 $\pm$ 0.02             | 0.39 $\pm$ 0.03         | 0.49 $\pm$ 0.01          | 0.39 $\pm$ 0.07                           | **0.56 $\pm$ 0.05** |
| NaiveBayes                      | 0.28 $\pm$ 0.01 | 0.36 $\pm$ 0.03             | 0.41 $\pm$ 0.01         | 0.44 $\pm$ 0.04          | 0.45 $\pm$ 0.01                           | 0.48 $\pm$ 0.01     |
| RandomForest                    | 0.24 $\pm$ 0.02 | 0.26 $\pm$ 0.01             | 0.31 $\pm$ 0.02         | 0.45 $\pm$ 0.09          | 0.38 $\pm$ 0.03                           | 0.47 $\pm$ 0.02     |
| RidgeClassifier                 | 0.26 $\pm$ 0.01 | 0.44 $\pm$ 0.01             | 0.54 $\pm$ 0.02         | 0.50 $\pm$ 0.03          | 0.46 $\pm$ 0.01                           | 0.52 $\pm$ 0.03     |
| SVM                             | 0.30 $\pm$ 0.01 | 0.39 $\pm$ 0.06             | 0.45 $\pm$ 0.01         | 0.44 $\pm$ 0.04          | 0.45 $\pm$ 0.06                           | 0.50 $\pm$ 0.05     |
| XGBoost                         | 0.27 $\pm$ 0.01 | 0.28 $\pm$ 0.03             | 0.32 $\pm$ 0.03         | 0.40 $\pm$ 0.01          | 0.37 $\pm$ 0.03                           | 0.45 $\pm$ 0.04     |

  report_table.to_latex(latex_file)


## accuracy

| model_type                      | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:--------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| Binary Relevance kNN            | 0.05 $\pm$ 0.01 | 0.07 $\pm$ 0.03             | 0.05 $\pm$ 0.02         | 0.10 $\pm$ 0.03          | 0.05 $\pm$ 0.03                           | 0.06 $\pm$ 0.05     |
| ComplementNB                    | 0.01 $\pm$ 0.01 | 0.02 $\pm$ 0.02             | 0.04 $\pm$ 0.02         | 0.05 $\pm$ 0.02          | 0.06 $\pm$ 0.05                           | 0.06 $\pm$ 0.01     |
| KNN                             | 0.01 $\pm$ 0.02 | 0.00 $\pm$ 0.00             | 0.00 $\pm$ 0.00         | 0.02 $\pm$ 0.00          | 0.01 $\pm$ 0.01                           | 0.04 $\pm$ 0.02     |
| LinearSVM                       | 0.00 $\pm$ 0.00 | 0.01 $\pm$ 0.01             | 0.02 $\pm$ 0.01         | 0.02 $\pm$ 0.02          | 0.02 $\pm$ 0.01                           | 0.07 $\pm$ 0.03     |
| LogisticRegression              | 0.02 $\pm$ 0.01 | 0.06 $\pm$ 0.01             | 0.08 $\pm$ 0.02         | 0.04 $\pm$ 0.01          | 0.04 $\pm$ 0.01                           | 0.04 $\pm$ 0.02     |
| LogisticRegressionElasticNet    | 0.01 $\pm$ 0.02 | 0.03 $\pm$ 0.01             | 0.04 $\pm$ 0.01         | 0.02 $\pm$ 0.02          | 0.03 $\pm$ 0.00                           | 0.03 $\pm$ 0.01     |
| LogisticRegressionLasso         | 0.02 $\pm$ 0.02 | 0.04 $\pm$ 0.04             | 0.03 $\pm$ 0.01         | 0.03 $\pm$ 0.01          | 0.05 $\pm$ 0.01                           | 0.02 $\pm$ 0.01     |
| LogisticRegressionRidge         | 0.00 $\pm$ 0.00 | 0.00 $\pm$ 0.00             | 0.01 $\pm$ 0.01         | 0.02 $\pm$ 0.01          | 0.08 $\pm$ 0.02                           | 0.11 $\pm$ 0.02     |
| Multi-label ARAM                | 0.01 $\pm$ 0.01 | 0.02 $\pm$ 0.02             | 0.03 $\pm$ 0.04         | 0.02 $\pm$ 0.03          | 0.00 $\pm$ 0.00                           | 0.01 $\pm$ 0.01     |
| Multilabel k Nearest Neighbours | 0.02 $\pm$ 0.01 | 0.05 $\pm$ 0.01             | 0.05 $\pm$ 0.01         | 0.10 $\pm$ 0.01          | 0.09 $\pm$ 0.04                           | 0.10 $\pm$ 0.04     |
| NaiveBayes                      | 0.00 $\pm$ 0.00 | 0.02 $\pm$ 0.00             | 0.04 $\pm$ 0.01         | 0.04 $\pm$ 0.01          | 0.05 $\pm$ 0.00                           | 0.07 $\pm$ 0.02     |
| RandomForest                    | 0.00 $\pm$ 0.00 | 0.00 $\pm$ 0.00             | 0.01 $\pm$ 0.02         | 0.06 $\pm$ 0.03          | 0.04 $\pm$ 0.02                           | 0.09 $\pm$ 0.01     |
| RidgeClassifier                 | 0.01 $\pm$ 0.01 | 0.06 $\pm$ 0.01             | 0.09 $\pm$ 0.02         | 0.06 $\pm$ 0.01          | 0.07 $\pm$ 0.01                           | 0.09 $\pm$ 0.01     |
| SVM                             | 0.01 $\pm$ 0.01 | 0.02 $\pm$ 0.02             | 0.04 $\pm$ 0.02         | 0.03 $\pm$ 0.03          | 0.03 $\pm$ 0.03                           | 0.03 $\pm$ 0.02     |
| XGBoost                         | 0.00 $\pm$ 0.00 | 0.01 $\pm$ 0.01             | 0.03 $\pm$ 0.01         | 0.05 $\pm$ 0.02          | 0.05 $\pm$ 0.02                           | **0.12 $\pm$ 0.01** |

  report_table.to_latex(latex_file)


# Per model sub-type

In [None]:
display_metrics_and_write_to_file(df=mw_df, grouping_criterion=['model_type','model_subtype'], output_dir='per_model_subtype_tables')

# Russian

## f1_micro

|                                                              | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-------------------------------------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| ('Binary Relevance kNN', 'Natively Multilabel')              | 0.23 $\pm$ 0.02 | 0.24 $\pm$ 0.01             | 0.32 $\pm$ 0.01         | 0.33 $\pm$ 0.04          | 0.33 $\pm$ 0.01                           | 0.33 $\pm$ 0.04     |
| ('ComplementNB', 'RakelD Partitioning of labels')            | 0.36 $\pm$ 0.03 | 0.41 $\pm$ 0.02             | 0.43 $\pm$ 0.01         | 0.49 $\pm$ 0.03          | 0.48 $\pm$ 0.02                           | 0.51 $\pm$ 0.01     |
| ('KNN', 'BorderlineSMOTE')                                   | 0.35 $\pm$ 0.00 | 0.37 $\pm$ 0.01             | 0.40 $\pm$ 0.01         | 0.43 $\pm$ 0.01          | 0.43 $\pm$ 0.01                           | 0.45 $\pm$ 0.02     |
| ('KNN', 'No Upsampling')                                     | 0.30 $\pm$ 0.03 | 0.30 $\pm$ 0.03             | 0.37 $\pm$ 0.01         | 0.36 $\pm$ 0.02          | 0.38 $\pm$ 0.04                           | 0.42 $\pm$ 0.02     |
| ('KNN', 'Random Oversampling')                               | 0.36 $\pm$ 0.01 | 0.36 $\pm$ 0.01             | 0.40 $\pm$ 0.01         | 0.43 $\pm$ 0.01          | 0.44 $\pm$ 0.02                           | 0.45 $\pm$ 0.01     |
| ('KNN', 'SMOTE')                                             | 0.36 $\pm$ 0.02 | 0.35 $\pm$ 0.01             | 0.40 $\pm$ 0.02         | 0.43 $\pm$ 0.01          | 0.44 $\pm$ 0.01                           | 0.44 $\pm$ 0.01     |
| ('KNN', 'SVMSMOTE')                                          | 0.35 $\pm$ 0.01 | 0.28 $\pm$ 0.12             | 0.39 $\pm$ 0.02         | 0.43 $\pm$ 0.01          | 0.43 $\pm$ 0.02                           | 0.46 $\pm$ 0.01     |
| ('LinearSVM', 'BorderlineSMOTE')                             | 0.37 $\pm$ 0.00 | 0.39 $\pm$ 0.01             | 0.44 $\pm$ 0.01         | 0.46 $\pm$ 0.02          | 0.46 $\pm$ 0.01                           | 0.48 $\pm$ 0.01     |
| ('LinearSVM', 'No Upsampling')                               | 0.37 $\pm$ 0.01 | 0.40 $\pm$ 0.02             | 0.43 $\pm$ 0.01         | 0.46 $\pm$ 0.03          | 0.46 $\pm$ 0.01                           | 0.49 $\pm$ 0.03     |
| ('LinearSVM', 'RakelD Partitioning of labels')               | 0.36 $\pm$ 0.01 | 0.41 $\pm$ 0.01             | 0.43 $\pm$ 0.02         | 0.46 $\pm$ 0.02          | 0.45 $\pm$ 0.01                           | 0.50 $\pm$ 0.02     |
| ('LinearSVM', 'Random Oversampling')                         | 0.37 $\pm$ 0.01 | 0.41 $\pm$ 0.03             | 0.43 $\pm$ 0.01         | 0.45 $\pm$ 0.03          | 0.46 $\pm$ 0.02                           | 0.49 $\pm$ 0.01     |
| ('LinearSVM', 'SMOTE')                                       | 0.36 $\pm$ 0.01 | 0.39 $\pm$ 0.01             | 0.44 $\pm$ 0.01         | 0.46 $\pm$ 0.01          | 0.45 $\pm$ 0.01                           | 0.48 $\pm$ 0.02     |
| ('LogisticRegression', 'BorderlineSMOTE')                    | 0.30 $\pm$ 0.00 | 0.37 $\pm$ 0.01             | 0.36 $\pm$ 0.02         | 0.42 $\pm$ 0.01          | 0.41 $\pm$ 0.02                           | 0.45 $\pm$ 0.02     |
| ('LogisticRegression', 'No Upsampling')                      | 0.31 $\pm$ 0.02 | 0.33 $\pm$ 0.02             | 0.31 $\pm$ 0.04         | 0.40 $\pm$ 0.01          | 0.40 $\pm$ 0.02                           | 0.42 $\pm$ 0.03     |
| ('LogisticRegression', 'Random Oversampling')                | 0.31 $\pm$ 0.01 | 0.33 $\pm$ 0.02             | 0.32 $\pm$ 0.03         | 0.40 $\pm$ 0.02          | 0.39 $\pm$ 0.02                           | 0.42 $\pm$ 0.04     |
| ('LogisticRegression', 'SMOTE')                              | 0.31 $\pm$ 0.02 | 0.34 $\pm$ 0.02             | 0.33 $\pm$ 0.04         | 0.40 $\pm$ 0.00          | 0.40 $\pm$ 0.01                           | 0.43 $\pm$ 0.04     |
| ('LogisticRegression', 'SVMSMOTE')                           | 0.30 $\pm$ 0.03 | 0.33 $\pm$ 0.02             | 0.32 $\pm$ 0.03         | 0.38 $\pm$ 0.01          | 0.38 $\pm$ 0.01                           | 0.41 $\pm$ 0.03     |
| ('LogisticRegressionElasticNet', 'BorderlineSMOTE')          | 0.33 $\pm$ 0.02 | 0.38 $\pm$ 0.02             | 0.40 $\pm$ 0.05         | 0.43 $\pm$ 0.02          | 0.44 $\pm$ 0.02                           | 0.48 $\pm$ 0.02     |
| ('LogisticRegressionElasticNet', 'Random Oversampling')      | 0.32 $\pm$ 0.04 | 0.35 $\pm$ 0.03             | 0.40 $\pm$ 0.04         | 0.43 $\pm$ 0.01          | 0.43 $\pm$ 0.04                           | 0.49 $\pm$ 0.01     |
| ('LogisticRegressionElasticNet', 'SMOTE')                    | 0.32 $\pm$ 0.02 | 0.37 $\pm$ 0.02             | 0.38 $\pm$ 0.04         | 0.44 $\pm$ 0.01          | 0.42 $\pm$ 0.02                           | 0.48 $\pm$ 0.01     |
| ('LogisticRegressionElasticNet', 'SVMSMOTE')                 | 0.33 $\pm$ 0.02 | 0.35 $\pm$ 0.01             | 0.40 $\pm$ 0.00         | 0.43 $\pm$ 0.01          | 0.43 $\pm$ 0.02                           | 0.48 $\pm$ 0.02     |
| ('LogisticRegressionLasso', 'BorderlineSMOTE')               | 0.34 $\pm$ 0.03 | 0.32 $\pm$ 0.02             | 0.39 $\pm$ 0.03         | 0.44 $\pm$ 0.01          | 0.42 $\pm$ 0.01                           | 0.48 $\pm$ 0.02     |
| ('LogisticRegressionLasso', 'No Upsampling')                 | 0.33 $\pm$ 0.02 | 0.33 $\pm$ 0.02             | 0.40 $\pm$ 0.01         | 0.44 $\pm$ 0.00          | 0.44 $\pm$ 0.02                           | 0.49 $\pm$ 0.02     |
| ('LogisticRegressionLasso', 'Random Oversampling')           | 0.32 $\pm$ 0.03 | 0.33 $\pm$ 0.03             | 0.39 $\pm$ 0.02         | 0.43 $\pm$ 0.01          | 0.42 $\pm$ 0.02                           | 0.48 $\pm$ 0.03     |
| ('LogisticRegressionLasso', 'SMOTE')                         | 0.33 $\pm$ 0.02 | 0.34 $\pm$ 0.02             | 0.38 $\pm$ 0.03         | 0.43 $\pm$ 0.01          | 0.43 $\pm$ 0.03                           | 0.47 $\pm$ 0.03     |
| ('LogisticRegressionLasso', 'SVMSMOTE')                      | 0.34 $\pm$ 0.01 | 0.31 $\pm$ 0.05             | 0.36 $\pm$ 0.00         | 0.43 $\pm$ 0.02          | 0.43 $\pm$ 0.03                           | 0.49 $\pm$ 0.02     |
| ('LogisticRegressionRidge', 'BorderlineSMOTE')               | 0.37 $\pm$ 0.01 | 0.40 $\pm$ 0.02             | 0.42 $\pm$ 0.00         | 0.46 $\pm$ 0.02          | 0.46 $\pm$ 0.03                           | 0.50 $\pm$ 0.00     |
| ('LogisticRegressionRidge', 'No Upsampling')                 | 0.36 $\pm$ 0.01 | 0.40 $\pm$ 0.02             | 0.43 $\pm$ 0.01         | 0.46 $\pm$ 0.02          | 0.46 $\pm$ 0.02                           | 0.49 $\pm$ 0.01     |
| ('LogisticRegressionRidge', 'RakelD Partitioning of labels') | 0.37 $\pm$ 0.01 | 0.43 $\pm$ 0.02             | 0.45 $\pm$ 0.02         | 0.49 $\pm$ 0.02          | 0.50 $\pm$ 0.00                           | 0.51 $\pm$ 0.01     |
| ('LogisticRegressionRidge', 'Random Oversampling')           | 0.37 $\pm$ 0.02 | 0.39 $\pm$ 0.01             | 0.42 $\pm$ 0.01         | 0.46 $\pm$ 0.02          | 0.45 $\pm$ 0.01                           | 0.49 $\pm$ 0.01     |
| ('LogisticRegressionRidge', 'SMOTE')                         | 0.35 $\pm$ 0.00 | 0.39 $\pm$ 0.01             | 0.43 $\pm$ 0.01         | 0.46 $\pm$ 0.02          | 0.44 $\pm$ 0.02                           | 0.47 $\pm$ 0.02     |
| ('LogisticRegressionRidge', 'SVMSMOTE')                      | 0.36 $\pm$ 0.00 | 0.37 $\pm$ 0.02             | 0.42 $\pm$ 0.03         | 0.45 $\pm$ 0.01          | 0.44 $\pm$ 0.01                           | 0.46 $\pm$ 0.01     |
| ('Multi-label ARAM', 'Natively Multilabel')                  | 0.15 $\pm$ 0.01 | 0.18 $\pm$ 0.09             | 0.18 $\pm$ 0.08         | 0.24 $\pm$ 0.09          | 0.22 $\pm$ 0.03                           | 0.26 $\pm$ 0.04     |
| ('Multilabel k Nearest Neighbours', 'Natively Multilabel')   | 0.31 $\pm$ 0.02 | 0.34 $\pm$ 0.02             | 0.37 $\pm$ 0.03         | 0.38 $\pm$ 0.04          | 0.37 $\pm$ 0.03                           | 0.40 $\pm$ 0.03     |
| ('NaiveBayes', 'BorderlineSMOTE')                            | 0.37 $\pm$ 0.01 | 0.43 $\pm$ 0.01             | 0.45 $\pm$ 0.01         | 0.49 $\pm$ 0.01          | 0.49 $\pm$ 0.01                           | 0.51 $\pm$ 0.01     |
| ('NaiveBayes', 'No Upsampling')                              | 0.37 $\pm$ 0.02 | 0.41 $\pm$ 0.01             | 0.44 $\pm$ 0.01         | 0.49 $\pm$ 0.01          | 0.48 $\pm$ 0.02                           | 0.51 $\pm$ 0.02     |
| ('NaiveBayes', 'Random Oversampling')                        | 0.37 $\pm$ 0.02 | 0.42 $\pm$ 0.01             | 0.44 $\pm$ 0.02         | 0.48 $\pm$ 0.01          | 0.48 $\pm$ 0.03                           | 0.51 $\pm$ 0.01     |
| ('NaiveBayes', 'SMOTE')                                      | 0.36 $\pm$ 0.02 | 0.43 $\pm$ 0.03             | 0.45 $\pm$ 0.01         | 0.49 $\pm$ 0.02          | 0.50 $\pm$ 0.01                           | 0.52 $\pm$ 0.01     |
| ('NaiveBayes', 'SVMSMOTE')                                   | 0.37 $\pm$ 0.01 | 0.42 $\pm$ 0.01             | 0.45 $\pm$ 0.02         | 0.49 $\pm$ 0.01          | 0.48 $\pm$ 0.02                           | 0.51 $\pm$ 0.01     |
| ('RandomForest', 'BorderlineSMOTE')                          | 0.26 $\pm$ 0.02 | 0.31 $\pm$ 0.02             | 0.35 $\pm$ 0.03         | 0.39 $\pm$ 0.02          | 0.36 $\pm$ 0.04                           | 0.42 $\pm$ 0.04     |
| ('RandomForest', 'No Upsampling')                            | 0.27 $\pm$ 0.02 | 0.29 $\pm$ 0.02             | 0.35 $\pm$ 0.00         | 0.38 $\pm$ 0.03          | 0.37 $\pm$ 0.03                           | 0.44 $\pm$ 0.03     |
| ('RandomForest', 'Random Oversampling')                      | 0.26 $\pm$ 0.01 | 0.29 $\pm$ 0.01             | 0.34 $\pm$ 0.02         | 0.40 $\pm$ 0.01          | 0.37 $\pm$ 0.03                           | 0.42 $\pm$ 0.06     |
| ('RandomForest', 'SMOTE')                                    | 0.27 $\pm$ 0.00 | 0.30 $\pm$ 0.01             | 0.35 $\pm$ 0.01         | 0.38 $\pm$ 0.01          | 0.37 $\pm$ 0.04                           | 0.42 $\pm$ 0.02     |
| ('RandomForest', 'SVMSMOTE')                                 | 0.26 $\pm$ 0.01 | 0.28 $\pm$ 0.03             | 0.35 $\pm$ 0.02         | 0.37 $\pm$ 0.01          | 0.36 $\pm$ 0.03                           | 0.43 $\pm$ 0.04     |
| ('RidgeClassifier', 'BorderlineSMOTE')                       | 0.32 $\pm$ 0.01 | 0.29 $\pm$ 0.03             | 0.34 $\pm$ 0.02         | 0.44 $\pm$ 0.02          | 0.44 $\pm$ 0.01                           | 0.47 $\pm$ 0.00     |
| ('RidgeClassifier', 'No Upsampling')                         | 0.31 $\pm$ 0.00 | 0.29 $\pm$ 0.04             | 0.35 $\pm$ 0.01         | 0.43 $\pm$ 0.01          | 0.44 $\pm$ 0.01                           | 0.47 $\pm$ 0.02     |
| ('RidgeClassifier', 'Random Oversampling')                   | 0.32 $\pm$ 0.01 | 0.33 $\pm$ 0.03             | 0.34 $\pm$ 0.02         | 0.43 $\pm$ 0.01          | 0.41 $\pm$ 0.03                           | 0.47 $\pm$ 0.02     |
| ('RidgeClassifier', 'SMOTE')                                 | 0.31 $\pm$ 0.00 | 0.29 $\pm$ 0.03             | 0.34 $\pm$ 0.02         | 0.43 $\pm$ 0.02          | 0.41 $\pm$ 0.03                           | 0.47 $\pm$ 0.02     |
| ('RidgeClassifier', 'SVMSMOTE')                              | 0.31 $\pm$ 0.01 | 0.29 $\pm$ 0.02             | 0.32 $\pm$ 0.02         | 0.41 $\pm$ 0.01          | 0.42 $\pm$ 0.02                           | 0.45 $\pm$ 0.02     |
| ('SVM', 'BorderlineSMOTE')                                   | 0.34 $\pm$ 0.01 | 0.37 $\pm$ 0.03             | 0.42 $\pm$ 0.02         | 0.46 $\pm$ 0.01          | 0.47 $\pm$ 0.02                           | 0.50 $\pm$ 0.01     |
| ('SVM', 'No Upsampling')                                     | 0.35 $\pm$ 0.01 | 0.36 $\pm$ 0.02             | 0.43 $\pm$ 0.02         | 0.47 $\pm$ 0.01          | 0.48 $\pm$ 0.01                           | **0.53 $\pm$ 0.04** |
| ('SVM', 'RakelD Partitioning of labels')                     | 0.34 $\pm$ 0.02 | 0.40 $\pm$ 0.01             | 0.39 $\pm$ 0.03         | 0.45 $\pm$ 0.03          | 0.45 $\pm$ 0.00                           | 0.49 $\pm$ 0.00     |
| ('SVM', 'Random Oversampling')                               | 0.35 $\pm$ 0.01 | 0.39 $\pm$ 0.03             | 0.40 $\pm$ 0.03         | 0.48 $\pm$ 0.02          | 0.48 $\pm$ 0.03                           | 0.50 $\pm$ 0.02     |
| ('SVM', 'SMOTE')                                             | 0.34 $\pm$ 0.01 | 0.36 $\pm$ 0.04             | 0.41 $\pm$ 0.00         | 0.48 $\pm$ 0.02          | 0.47 $\pm$ 0.02                           | 0.50 $\pm$ 0.01     |
| ('SVM', 'SVMSMOTE')                                          | 0.34 $\pm$ 0.01 | 0.36 $\pm$ 0.00             | 0.44 $\pm$ 0.03         | 0.44 $\pm$ 0.03          | 0.47 $\pm$ 0.02                           | 0.49 $\pm$ 0.01     |
| ('XGBoost', 'BorderlineSMOTE')                               | 0.36 $\pm$ 0.04 | 0.35 $\pm$ 0.02             | 0.38 $\pm$ 0.02         | 0.41 $\pm$ 0.04          | 0.39 $\pm$ 0.04                           | 0.46 $\pm$ 0.01     |
| ('XGBoost', 'No Upsampling')                                 | 0.36 $\pm$ 0.03 | 0.35 $\pm$ 0.02             | 0.37 $\pm$ 0.02         | 0.41 $\pm$ 0.03          | 0.38 $\pm$ 0.01                           | 0.45 $\pm$ 0.00     |
| ('XGBoost', 'Random Oversampling')                           | 0.35 $\pm$ 0.04 | 0.36 $\pm$ 0.03             | 0.38 $\pm$ 0.02         | 0.43 $\pm$ 0.00          | 0.42 $\pm$ 0.03                           | 0.48 $\pm$ 0.01     |
| ('XGBoost', 'SMOTE')                                         | 0.35 $\pm$ 0.03 | 0.35 $\pm$ 0.01             | 0.38 $\pm$ 0.03         | 0.42 $\pm$ 0.04          | 0.41 $\pm$ 0.03                           | 0.48 $\pm$ 0.01     |
| ('XGBoost', 'SVMSMOTE')                                      | 0.36 $\pm$ 0.02 | 0.35 $\pm$ 0.01             | 0.38 $\pm$ 0.01         | 0.41 $\pm$ 0.01          | 0.40 $\pm$ 0.01                           | 0.48 $\pm$ 0.01     |

  report_table.to_latex(latex_file)


## recall_micro

|                                                              | title               | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text        |
|:-------------------------------------------------------------|:--------------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:----------------|
| ('Binary Relevance kNN', 'Natively Multilabel')              | 0.19 $\pm$ 0.02     | 0.23 $\pm$ 0.02             | 0.31 $\pm$ 0.01         | 0.29 $\pm$ 0.04          | 0.30 $\pm$ 0.01                           | 0.31 $\pm$ 0.02 |
| ('ComplementNB', 'RakelD Partitioning of labels')            | 0.65 $\pm$ 0.11     | 0.56 $\pm$ 0.03             | 0.60 $\pm$ 0.08         | 0.57 $\pm$ 0.04          | 0.56 $\pm$ 0.04                           | 0.61 $\pm$ 0.05 |
| ('KNN', 'BorderlineSMOTE')                                   | 0.74 $\pm$ 0.17     | 0.63 $\pm$ 0.06             | 0.57 $\pm$ 0.05         | 0.67 $\pm$ 0.03          | 0.69 $\pm$ 0.05                           | 0.72 $\pm$ 0.05 |
| ('KNN', 'No Upsampling')                                     | 0.28 $\pm$ 0.05     | 0.31 $\pm$ 0.08             | 0.31 $\pm$ 0.02         | 0.31 $\pm$ 0.02          | 0.41 $\pm$ 0.04                           | 0.41 $\pm$ 0.03 |
| ('KNN', 'Random Oversampling')                               | 0.58 $\pm$ 0.12     | 0.60 $\pm$ 0.02             | 0.61 $\pm$ 0.02         | 0.64 $\pm$ 0.03          | 0.64 $\pm$ 0.05                           | 0.73 $\pm$ 0.05 |
| ('KNN', 'SMOTE')                                             | 0.74 $\pm$ 0.20     | 0.70 $\pm$ 0.21             | 0.58 $\pm$ 0.04         | 0.67 $\pm$ 0.02          | 0.67 $\pm$ 0.01                           | 0.68 $\pm$ 0.03 |
| ('KNN', 'SVMSMOTE')                                          | **0.96 $\pm$ 0.01** | 0.31 $\pm$ 0.16             | 0.45 $\pm$ 0.03         | 0.54 $\pm$ 0.02          | 0.55 $\pm$ 0.05                           | 0.60 $\pm$ 0.05 |
| ('LinearSVM', 'BorderlineSMOTE')                             | 0.49 $\pm$ 0.01     | 0.63 $\pm$ 0.08             | 0.59 $\pm$ 0.07         | 0.58 $\pm$ 0.07          | 0.62 $\pm$ 0.04                           | 0.61 $\pm$ 0.08 |
| ('LinearSVM', 'No Upsampling')                               | 0.50 $\pm$ 0.02     | 0.68 $\pm$ 0.03             | 0.62 $\pm$ 0.02         | 0.60 $\pm$ 0.06          | 0.67 $\pm$ 0.03                           | 0.62 $\pm$ 0.01 |
| ('LinearSVM', 'RakelD Partitioning of labels')               | 0.48 $\pm$ 0.03     | 0.66 $\pm$ 0.06             | 0.58 $\pm$ 0.04         | 0.64 $\pm$ 0.07          | 0.59 $\pm$ 0.11                           | 0.58 $\pm$ 0.01 |
| ('LinearSVM', 'Random Oversampling')                         | 0.50 $\pm$ 0.02     | 0.67 $\pm$ 0.03             | 0.58 $\pm$ 0.02         | 0.63 $\pm$ 0.08          | 0.57 $\pm$ 0.03                           | 0.63 $\pm$ 0.03 |
| ('LinearSVM', 'SMOTE')                                       | 0.43 $\pm$ 0.02     | 0.62 $\pm$ 0.09             | 0.55 $\pm$ 0.07         | 0.52 $\pm$ 0.05          | 0.59 $\pm$ 0.05                           | 0.60 $\pm$ 0.03 |
| ('LogisticRegression', 'BorderlineSMOTE')                    | 0.29 $\pm$ 0.01     | 0.40 $\pm$ 0.04             | 0.34 $\pm$ 0.03         | 0.44 $\pm$ 0.03          | 0.46 $\pm$ 0.03                           | 0.49 $\pm$ 0.02 |
| ('LogisticRegression', 'No Upsampling')                      | 0.27 $\pm$ 0.02     | 0.32 $\pm$ 0.05             | 0.24 $\pm$ 0.03         | 0.35 $\pm$ 0.02          | 0.39 $\pm$ 0.02                           | 0.40 $\pm$ 0.03 |
| ('LogisticRegression', 'Random Oversampling')                | 0.29 $\pm$ 0.01     | 0.32 $\pm$ 0.05             | 0.25 $\pm$ 0.03         | 0.35 $\pm$ 0.02          | 0.37 $\pm$ 0.03                           | 0.41 $\pm$ 0.04 |
| ('LogisticRegression', 'SMOTE')                              | 0.34 $\pm$ 0.03     | 0.33 $\pm$ 0.04             | 0.26 $\pm$ 0.04         | 0.36 $\pm$ 0.01          | 0.38 $\pm$ 0.01                           | 0.41 $\pm$ 0.05 |
| ('LogisticRegression', 'SVMSMOTE')                           | 0.29 $\pm$ 0.03     | 0.31 $\pm$ 0.04             | 0.26 $\pm$ 0.03         | 0.35 $\pm$ 0.02          | 0.38 $\pm$ 0.01                           | 0.40 $\pm$ 0.05 |
| ('LogisticRegressionElasticNet', 'BorderlineSMOTE')          | 0.38 $\pm$ 0.02     | 0.43 $\pm$ 0.03             | 0.45 $\pm$ 0.08         | 0.49 $\pm$ 0.03          | 0.47 $\pm$ 0.03                           | 0.50 $\pm$ 0.04 |
| ('LogisticRegressionElasticNet', 'Random Oversampling')      | 0.33 $\pm$ 0.03     | 0.39 $\pm$ 0.03             | 0.42 $\pm$ 0.06         | 0.50 $\pm$ 0.02          | 0.50 $\pm$ 0.05                           | 0.60 $\pm$ 0.04 |
| ('LogisticRegressionElasticNet', 'SMOTE')                    | 0.36 $\pm$ 0.02     | 0.42 $\pm$ 0.04             | 0.40 $\pm$ 0.08         | 0.55 $\pm$ 0.06          | 0.45 $\pm$ 0.02                           | 0.59 $\pm$ 0.06 |
| ('LogisticRegressionElasticNet', 'SVMSMOTE')                 | 0.37 $\pm$ 0.03     | 0.35 $\pm$ 0.01             | 0.42 $\pm$ 0.02         | 0.52 $\pm$ 0.04          | 0.51 $\pm$ 0.03                           | 0.60 $\pm$ 0.08 |
| ('LogisticRegressionLasso', 'BorderlineSMOTE')               | 0.38 $\pm$ 0.03     | 0.29 $\pm$ 0.04             | 0.40 $\pm$ 0.05         | 0.56 $\pm$ 0.01          | 0.50 $\pm$ 0.04                           | 0.60 $\pm$ 0.03 |
| ('LogisticRegressionLasso', 'No Upsampling')                 | 0.35 $\pm$ 0.02     | 0.30 $\pm$ 0.03             | 0.42 $\pm$ 0.03         | 0.57 $\pm$ 0.04          | 0.52 $\pm$ 0.03                           | 0.63 $\pm$ 0.05 |
| ('LogisticRegressionLasso', 'Random Oversampling')           | 0.36 $\pm$ 0.03     | 0.31 $\pm$ 0.05             | 0.40 $\pm$ 0.03         | 0.52 $\pm$ 0.02          | 0.52 $\pm$ 0.08                           | 0.61 $\pm$ 0.05 |
| ('LogisticRegressionLasso', 'SMOTE')                         | 0.36 $\pm$ 0.01     | 0.30 $\pm$ 0.02             | 0.37 $\pm$ 0.04         | 0.50 $\pm$ 0.04          | 0.51 $\pm$ 0.05                           | 0.57 $\pm$ 0.09 |
| ('LogisticRegressionLasso', 'SVMSMOTE')                      | 0.38 $\pm$ 0.01     | 0.30 $\pm$ 0.07             | 0.34 $\pm$ 0.01         | 0.49 $\pm$ 0.05          | 0.50 $\pm$ 0.06                           | 0.62 $\pm$ 0.06 |
| ('LogisticRegressionRidge', 'BorderlineSMOTE')               | 0.49 $\pm$ 0.01     | 0.61 $\pm$ 0.03             | 0.56 $\pm$ 0.01         | 0.61 $\pm$ 0.06          | 0.68 $\pm$ 0.04                           | 0.57 $\pm$ 0.03 |
| ('LogisticRegressionRidge', 'No Upsampling')                 | 0.47 $\pm$ 0.02     | 0.68 $\pm$ 0.02             | 0.62 $\pm$ 0.02         | 0.53 $\pm$ 0.06          | 0.57 $\pm$ 0.02                           | 0.57 $\pm$ 0.03 |
| ('LogisticRegressionRidge', 'RakelD Partitioning of labels') | 0.54 $\pm$ 0.04     | 0.69 $\pm$ 0.04             | 0.68 $\pm$ 0.05         | 0.63 $\pm$ 0.04          | 0.58 $\pm$ 0.05                           | 0.55 $\pm$ 0.02 |
| ('LogisticRegressionRidge', 'Random Oversampling')           | 0.49 $\pm$ 0.03     | 0.65 $\pm$ 0.04             | 0.61 $\pm$ 0.05         | 0.62 $\pm$ 0.05          | 0.66 $\pm$ 0.07                           | 0.66 $\pm$ 0.08 |
| ('LogisticRegressionRidge', 'SMOTE')                         | 0.44 $\pm$ 0.01     | 0.55 $\pm$ 0.03             | 0.48 $\pm$ 0.06         | 0.58 $\pm$ 0.05          | 0.54 $\pm$ 0.02                           | 0.61 $\pm$ 0.04 |
| ('LogisticRegressionRidge', 'SVMSMOTE')                      | 0.42 $\pm$ 0.02     | 0.48 $\pm$ 0.06             | 0.40 $\pm$ 0.04         | 0.51 $\pm$ 0.01          | 0.64 $\pm$ 0.03                           | 0.54 $\pm$ 0.08 |
| ('Multi-label ARAM', 'Natively Multilabel')                  | 0.34 $\pm$ 0.06     | 0.43 $\pm$ 0.23             | 0.39 $\pm$ 0.20         | 0.54 $\pm$ 0.20          | 0.47 $\pm$ 0.10                           | 0.38 $\pm$ 0.06 |
| ('Multilabel k Nearest Neighbours', 'Natively Multilabel')   | 0.33 $\pm$ 0.03     | 0.34 $\pm$ 0.02             | 0.38 $\pm$ 0.04         | 0.34 $\pm$ 0.04          | 0.39 $\pm$ 0.03                           | 0.35 $\pm$ 0.03 |
| ('NaiveBayes', 'BorderlineSMOTE')                            | 0.67 $\pm$ 0.09     | 0.56 $\pm$ 0.02             | 0.53 $\pm$ 0.03         | 0.58 $\pm$ 0.05          | 0.65 $\pm$ 0.03                           | 0.60 $\pm$ 0.05 |
| ('NaiveBayes', 'No Upsampling')                              | 0.73 $\pm$ 0.02     | 0.62 $\pm$ 0.05             | 0.57 $\pm$ 0.08         | 0.56 $\pm$ 0.03          | 0.58 $\pm$ 0.07                           | 0.59 $\pm$ 0.01 |
| ('NaiveBayes', 'Random Oversampling')                        | 0.71 $\pm$ 0.02     | 0.74 $\pm$ 0.05             | 0.63 $\pm$ 0.04         | 0.55 $\pm$ 0.04          | 0.67 $\pm$ 0.05                           | 0.63 $\pm$ 0.06 |
| ('NaiveBayes', 'SMOTE')                                      | 0.50 $\pm$ 0.06     | 0.57 $\pm$ 0.04             | 0.52 $\pm$ 0.03         | 0.57 $\pm$ 0.05          | 0.59 $\pm$ 0.03                           | 0.58 $\pm$ 0.03 |
| ('NaiveBayes', 'SVMSMOTE')                                   | 0.65 $\pm$ 0.08     | 0.58 $\pm$ 0.06             | 0.63 $\pm$ 0.03         | 0.55 $\pm$ 0.01          | 0.61 $\pm$ 0.06                           | 0.64 $\pm$ 0.04 |
| ('RandomForest', 'BorderlineSMOTE')                          | 0.48 $\pm$ 0.07     | 0.43 $\pm$ 0.02             | 0.44 $\pm$ 0.04         | 0.39 $\pm$ 0.02          | 0.36 $\pm$ 0.05                           | 0.39 $\pm$ 0.04 |
| ('RandomForest', 'No Upsampling')                            | 0.49 $\pm$ 0.05     | 0.43 $\pm$ 0.05             | 0.44 $\pm$ 0.04         | 0.41 $\pm$ 0.02          | 0.37 $\pm$ 0.02                           | 0.43 $\pm$ 0.05 |
| ('RandomForest', 'Random Oversampling')                      | 0.42 $\pm$ 0.01     | 0.36 $\pm$ 0.09             | 0.43 $\pm$ 0.04         | 0.40 $\pm$ 0.06          | 0.32 $\pm$ 0.03                           | 0.38 $\pm$ 0.09 |
| ('RandomForest', 'SMOTE')                                    | 0.47 $\pm$ 0.04     | 0.45 $\pm$ 0.05             | 0.41 $\pm$ 0.01         | 0.41 $\pm$ 0.03          | 0.38 $\pm$ 0.05                           | 0.40 $\pm$ 0.02 |
| ('RandomForest', 'SVMSMOTE')                                 | 0.44 $\pm$ 0.01     | 0.40 $\pm$ 0.03             | 0.42 $\pm$ 0.03         | 0.39 $\pm$ 0.01          | 0.36 $\pm$ 0.01                           | 0.40 $\pm$ 0.07 |
| ('RidgeClassifier', 'BorderlineSMOTE')                       | 0.35 $\pm$ 0.04     | 0.24 $\pm$ 0.04             | 0.27 $\pm$ 0.02         | 0.41 $\pm$ 0.02          | 0.43 $\pm$ 0.01                           | 0.44 $\pm$ 0.01 |
| ('RidgeClassifier', 'No Upsampling')                         | 0.31 $\pm$ 0.01     | 0.26 $\pm$ 0.05             | 0.28 $\pm$ 0.01         | 0.42 $\pm$ 0.01          | 0.44 $\pm$ 0.01                           | 0.45 $\pm$ 0.03 |
| ('RidgeClassifier', 'Random Oversampling')                   | 0.54 $\pm$ 0.01     | 0.29 $\pm$ 0.04             | 0.27 $\pm$ 0.02         | 0.40 $\pm$ 0.01          | 0.39 $\pm$ 0.03                           | 0.43 $\pm$ 0.02 |
| ('RidgeClassifier', 'SMOTE')                                 | 0.31 $\pm$ 0.01     | 0.25 $\pm$ 0.04             | 0.26 $\pm$ 0.02         | 0.39 $\pm$ 0.02          | 0.38 $\pm$ 0.03                           | 0.44 $\pm$ 0.02 |
| ('RidgeClassifier', 'SVMSMOTE')                              | 0.32 $\pm$ 0.01     | 0.25 $\pm$ 0.03             | 0.24 $\pm$ 0.02         | 0.38 $\pm$ 0.02          | 0.42 $\pm$ 0.02                           | 0.42 $\pm$ 0.01 |
| ('SVM', 'BorderlineSMOTE')                                   | 0.43 $\pm$ 0.06     | 0.45 $\pm$ 0.06             | 0.50 $\pm$ 0.04         | 0.55 $\pm$ 0.06          | 0.70 $\pm$ 0.02                           | 0.57 $\pm$ 0.06 |
| ('SVM', 'No Upsampling')                                     | 0.45 $\pm$ 0.02     | 0.56 $\pm$ 0.04             | 0.48 $\pm$ 0.04         | 0.59 $\pm$ 0.05          | 0.59 $\pm$ 0.07                           | 0.62 $\pm$ 0.05 |
| ('SVM', 'RakelD Partitioning of labels')                     | 0.45 $\pm$ 0.03     | 0.46 $\pm$ 0.06             | 0.65 $\pm$ 0.06         | 0.53 $\pm$ 0.06          | 0.51 $\pm$ 0.04                           | 0.65 $\pm$ 0.05 |
| ('SVM', 'Random Oversampling')                               | 0.46 $\pm$ 0.06     | 0.56 $\pm$ 0.02             | 0.54 $\pm$ 0.04         | 0.59 $\pm$ 0.02          | 0.59 $\pm$ 0.04                           | 0.61 $\pm$ 0.04 |
| ('SVM', 'SMOTE')                                             | 0.51 $\pm$ 0.01     | 0.52 $\pm$ 0.07             | 0.54 $\pm$ 0.11         | 0.57 $\pm$ 0.02          | 0.58 $\pm$ 0.01                           | 0.55 $\pm$ 0.05 |
| ('SVM', 'SVMSMOTE')                                          | 0.49 $\pm$ 0.05     | 0.48 $\pm$ 0.09             | 0.47 $\pm$ 0.05         | 0.53 $\pm$ 0.06          | 0.59 $\pm$ 0.02                           | 0.54 $\pm$ 0.02 |
| ('XGBoost', 'BorderlineSMOTE')                               | 0.64 $\pm$ 0.06     | 0.68 $\pm$ 0.09             | 0.43 $\pm$ 0.03         | 0.55 $\pm$ 0.01          | 0.46 $\pm$ 0.03                           | 0.53 $\pm$ 0.00 |
| ('XGBoost', 'No Upsampling')                                 | 0.60 $\pm$ 0.05     | 0.56 $\pm$ 0.07             | 0.57 $\pm$ 0.06         | 0.44 $\pm$ 0.01          | 0.47 $\pm$ 0.02                           | 0.52 $\pm$ 0.04 |
| ('XGBoost', 'Random Oversampling')                           | 0.59 $\pm$ 0.09     | 0.57 $\pm$ 0.01             | 0.50 $\pm$ 0.01         | 0.51 $\pm$ 0.03          | 0.54 $\pm$ 0.01                           | 0.55 $\pm$ 0.02 |
| ('XGBoost', 'SMOTE')                                         | 0.65 $\pm$ 0.08     | 0.63 $\pm$ 0.08             | 0.49 $\pm$ 0.04         | 0.54 $\pm$ 0.02          | 0.50 $\pm$ 0.05                           | 0.54 $\pm$ 0.05 |
| ('XGBoost', 'SVMSMOTE')                                      | 0.57 $\pm$ 0.10     | 0.73 $\pm$ 0.02             | 0.50 $\pm$ 0.06         | 0.53 $\pm$ 0.06          | 0.48 $\pm$ 0.02                           | 0.56 $\pm$ 0.03 |

  report_table.to_latex(latex_file)


# Per model sub-type and exp name

In [None]:
display_metrics_and_write_to_file(df=mw_df, grouping_criterion=['model_type','model_subtype', 'model_name'], output_dir='per_model_name_tables')