In [11]:
import os
import glob

from IPython.display import display, Markdown
import pandas as pd

# Group types of models (experiment type and model type) and pick best performing in terms of f1-score per unit of analysis and report them in a table


In [12]:
results_filepaths_mean_pred = glob.glob('./logged_performance_per_model/*/*agg_mean*.csv')
results_filepaths_majority_pred = glob.glob('./logged_performance_per_model/*/*agg_majority*.csv')

In [13]:
def conantenate_results(filepath_list):
    dfs_list = []
    for results_filepath in filepath_list:
        model_name = results_filepath.split('/')[-2]
        results_df_i = pd.read_csv(results_filepath)
        results_df_i['model_name'] = model_name
        dfs_list.append(results_df_i)

    results_df_ = pd.concat(dfs_list).set_index(['language', 'model_name', 'unit_of_analysis']).sort_index()
    results_df_.rename(columns={'f1-mico_mean': 'f1-micro_mean', 'f1-mico_std': 'f1-micro_std'}, inplace=True)

    return results_df_

In [14]:
results_mean_pred_df = conantenate_results(results_filepaths_mean_pred)
results_majority_vote_pred_df = conantenate_results(results_filepaths_majority_pred)

### Generate the tables to report

In [15]:
def display_performance_table(df, metric, index_cols=['model_name'], display_=True):
    report_table = df.reset_index().copy()
    report_table['result'] = report_table[f'{metric}_mean'].map(lambda x: f'{x:.2f}') + \
    ' $\pm$ ' + report_table[f'{metric}_std'].map(lambda x: f'{x:.2f}')
    report_table['col_title'] = report_table.unit_of_analysis.str.split('_').str.join(' ') 
    report_table['col_title'] = pd.Categorical(
        report_table.col_title,
        categories=['title', 'title and first paragraph', 'title and 5 sentences', 'title and 10 sentences',
                    'title and first sentence each paragraph', 'raw text'],
        ordered=True)
    report_table = report_table[index_cols + ['col_title', 'result']]\
        .pivot_table(index=index_cols, columns=['col_title'], values=['result'], aggfunc='first', fill_value=0)\
        .droplevel(0, axis=1)

    report_table.columns.names = [None]

    # Highlight best scoring models according to their average
    mean_perf_arr = report_table.applymap(lambda x: float(str(x).split(' ')[0])).to_numpy()
    highlight_mask = mean_perf_arr == mean_perf_arr.max()
    report_table_arr = report_table.to_numpy()  # Note it passes the array by reference
    report_table_arr[highlight_mask] = '**' + report_table_arr[highlight_mask] + '**'

    if display_:
        display(Markdown(report_table.to_markdown()))
    
    return report_table

### Generate tables for all languages

In [16]:
metrics_to_report = ['f1-micro', 'recall-micro', 'precision-micro', 'roc-auc', 'accuracy']

In [17]:
language_dict = {'en': 'English', 'it': 'Italian', 'fr': 'French', 'po': 'Polish', 'ru': 'Russian', 'ge': 'German'}

In [18]:
def display_metrics_and_write_to_file(df, grouping_criterion, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    report_tables_dfs_dict = {metric: [] for metric in metrics_to_report}

    for language, results_df in df.groupby(level=0):
        display(Markdown(f'# {language_dict[language]}'))
        
        for metric in metrics_to_report:
            os.makedirs(os.path.join(output_dir, metric), exist_ok=True)

            output_dir_markdown = os.path.join(output_dir, metric, 'markdown')
            output_dir_latex = os.path.join(output_dir, metric, 'latex')
            output_dir_csv = os.path.join(output_dir, metric, 'csv')

            os.makedirs(output_dir_markdown, exist_ok=True)
            os.makedirs(output_dir_latex, exist_ok=True)
            os.makedirs(output_dir_csv, exist_ok=True)

            display(Markdown(f'## {metric}'))

            report_table = display_performance_table(df=results_df, index_cols=grouping_criterion, metric=metric, display_=True)

            # Export as markdown
            markdown_file = open(os.path.join(output_dir_markdown, f"{language_dict[language]}_{metric}.md"), "w")
            report_table.reset_index().to_markdown(markdown_file, index=False)
            markdown_file.close()

            # Export as latex table
            latex_file = open(os.path.join(output_dir_latex, f"{language_dict[language]}_{metric}.tex"), "w")
            report_table.reset_index().to_latex(latex_file, index=False)
            latex_file.close()

            # Export as csv
            report_table.to_csv(os.path.join(output_dir_csv, f"{language_dict[language]}_{metric}.csv"))

            # Stack all languages into single table
            report_table['language'] = language
            report_table = report_table.reset_index().set_index(['language'] + grouping_criterion)

            report_tables_dfs_dict[metric].append(report_table)

    # Report or store unified table
    display(Markdown(f'# All 6 Languages'))
    for metric in metrics_to_report:
        display(Markdown(f'## {metric}'))
        multi_language_report_table_metric = pd.concat(report_tables_dfs_dict[metric])
        display(Markdown(multi_language_report_table_metric.reset_index().to_markdown(index=False)))

        output_dir_markdown = os.path.join(output_dir, metric, 'markdown')
        output_dir_latex = os.path.join(output_dir, metric, 'latex')
        output_dir_csv = os.path.join(output_dir, metric, 'csv')

        # Export as markdown
        markdown_file = open(os.path.join(output_dir_markdown, f"all_6_languages_{metric}.md"), "w")
        multi_language_report_table_metric.reset_index().to_markdown(markdown_file, index=False)
        markdown_file.close()

        # Export as latex table
        latex_file = open(os.path.join(output_dir_latex, f"all_6_languages_{metric}.tex"), "w")
        multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)
        latex_file.close()

        # Export as csv
        multi_language_report_table_metric.to_csv(os.path.join(output_dir_csv, f"all_6_languages_{metric}.csv"))

# Per model type

In [19]:
display_metrics_and_write_to_file(df=results_mean_pred_df, grouping_criterion=['model_name'], output_dir='per_model_name_tables_mean_prediction')

# English

## f1-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.62 $\pm$ 0.01 | 0.67 $\pm$ 0.01             | 0.68 $\pm$ 0.01         | 0.67 $\pm$ 0.01          | 0.68 $\pm$ 0.02                           | 0.70 $\pm$ 0.00     |
| EleutherAI-gpt-neo-125M            | 0.55 $\pm$ 0.03 | 0.60 $\pm$ 0.01             | 0.62 $\pm$ 0.02         | 0.62 $\pm$ 0.01          | 0.66 $\pm$ 0.01                           | 0.68 $\pm$ 0.02     |
| bert-base-multilingual-cased       | 0.64 $\pm$ 0.01 | 0.68 $\pm$ 0.02             | 0.69 $\pm$ 0.02         | 0.69 $\pm$ 0.02          | 0.69 $\pm$ 0.02                           | 0.70 $\pm$ 0.01     |
| distilbert-base-multilingual-cased | 0.61 $\pm$ 0.03 | 0.66 $\pm$ 0.01             | 0.67 $\pm$ 0.01         | 0.68 $\pm$ 0.03          | 0.67 $\pm$ 0.01                           | 0.69 $\pm$ 0.02     |
| facebook-mbart-large-50            | 0.66 $\pm$ 0.02 | 0.70 $\pm$ 0.01             | 0.70 $\pm$ 0.02         | 0.70 $\pm$ 0.01          | **0.71 $\pm$ 0.02**                       | 0.69 $\pm$ 0.02     |
| gpt2                               | 0.63 $\pm$ 0.03 | 0.68 $\pm$ 0.01             | 0.67 $\pm$ 0.01         | 0.67 $\pm$ 0.02          | 0.68 $\pm$ 0.02                           | 0.69 $\pm$ 0.02     |
| xlm-roberta-large                  | 0.66 $\pm$ 0.01 | 0.70 $\pm$ 0.01             | 0.70 $\pm$ 0.01         | **0.71 $\pm$ 0.02**      | **0.71 $\pm$ 0.01**                       | **0.71 $\pm$ 0.02** |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.55 $\pm$ 0.01 | 0.60 $\pm$ 0.01             | 0.62 $\pm$ 0.02         | 0.60 $\pm$ 0.02          | 0.60 $\pm$ 0.03                           | 0.63 $\pm$ 0.02     |
| EleutherAI-gpt-neo-125M            | 0.49 $\pm$ 0.03 | 0.54 $\pm$ 0.01             | 0.55 $\pm$ 0.03         | 0.55 $\pm$ 0.00          | 0.59 $\pm$ 0.03                           | 0.62 $\pm$ 0.04     |
| bert-base-multilingual-cased       | 0.58 $\pm$ 0.01 | 0.63 $\pm$ 0.03             | 0.64 $\pm$ 0.02         | 0.65 $\pm$ 0.01          | 0.64 $\pm$ 0.03                           | 0.66 $\pm$ 0.03     |
| distilbert-base-multilingual-cased | 0.56 $\pm$ 0.03 | 0.59 $\pm$ 0.02             | 0.62 $\pm$ 0.02         | 0.63 $\pm$ 0.03          | 0.60 $\pm$ 0.01                           | 0.64 $\pm$ 0.01     |
| facebook-mbart-large-50            | 0.60 $\pm$ 0.03 | 0.65 $\pm$ 0.02             | 0.66 $\pm$ 0.02         | 0.66 $\pm$ 0.01          | 0.66 $\pm$ 0.03                           | 0.65 $\pm$ 0.02     |
| gpt2                               | 0.59 $\pm$ 0.01 | 0.64 $\pm$ 0.03             | 0.65 $\pm$ 0.02         | 0.67 $\pm$ 0.04          | 0.65 $\pm$ 0.04                           | 0.66 $\pm$ 0.03     |
| xlm-roberta-large                  | 0.60 $\pm$ 0.01 | 0.66 $\pm$ 0.01             | 0.66 $\pm$ 0.02         | 0.67 $\pm$ 0.01          | **0.68 $\pm$ 0.02**                       | **0.68 $\pm$ 0.01** |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.72 $\pm$ 0.04 | 0.76 $\pm$ 0.02             | 0.74 $\pm$ 0.02         | 0.76 $\pm$ 0.01          | **0.77 $\pm$ 0.03**                       | **0.77 $\pm$ 0.03** |
| EleutherAI-gpt-neo-125M            | 0.63 $\pm$ 0.04 | 0.67 $\pm$ 0.01             | 0.70 $\pm$ 0.01         | 0.71 $\pm$ 0.03          | 0.73 $\pm$ 0.04                           | 0.75 $\pm$ 0.03     |
| bert-base-multilingual-cased       | 0.70 $\pm$ 0.03 | 0.74 $\pm$ 0.03             | 0.76 $\pm$ 0.02         | 0.74 $\pm$ 0.04          | 0.75 $\pm$ 0.03                           | 0.75 $\pm$ 0.02     |
| distilbert-base-multilingual-cased | 0.68 $\pm$ 0.05 | 0.74 $\pm$ 0.03             | 0.73 $\pm$ 0.01         | 0.73 $\pm$ 0.03          | 0.75 $\pm$ 0.02                           | 0.75 $\pm$ 0.03     |
| facebook-mbart-large-50            | 0.73 $\pm$ 0.01 | 0.76 $\pm$ 0.01             | 0.74 $\pm$ 0.03         | 0.76 $\pm$ 0.01          | **0.77 $\pm$ 0.02**                       | 0.74 $\pm$ 0.02     |
| gpt2                               | 0.67 $\pm$ 0.06 | 0.72 $\pm$ 0.02             | 0.70 $\pm$ 0.03         | 0.67 $\pm$ 0.03          | 0.71 $\pm$ 0.05                           | 0.72 $\pm$ 0.03     |
| xlm-roberta-large                  | 0.73 $\pm$ 0.03 | 0.75 $\pm$ 0.03             | 0.75 $\pm$ 0.01         | 0.76 $\pm$ 0.03          | 0.74 $\pm$ 0.01                           | 0.74 $\pm$ 0.02     |

  report_table.reset_index().to_latex(latex_file, index=False)


## roc-auc

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.73 $\pm$ 0.00 | 0.76 $\pm$ 0.01             | 0.77 $\pm$ 0.01         | 0.76 $\pm$ 0.00          | 0.77 $\pm$ 0.01                           | 0.78 $\pm$ 0.00     |
| EleutherAI-gpt-neo-125M            | 0.69 $\pm$ 0.01 | 0.72 $\pm$ 0.01             | 0.73 $\pm$ 0.01         | 0.73 $\pm$ 0.00          | 0.75 $\pm$ 0.01                           | 0.77 $\pm$ 0.01     |
| bert-base-multilingual-cased       | 0.74 $\pm$ 0.01 | 0.77 $\pm$ 0.01             | 0.78 $\pm$ 0.01         | 0.78 $\pm$ 0.01          | 0.78 $\pm$ 0.01                           | 0.78 $\pm$ 0.01     |
| distilbert-base-multilingual-cased | 0.73 $\pm$ 0.02 | 0.76 $\pm$ 0.01             | 0.77 $\pm$ 0.01         | 0.77 $\pm$ 0.02          | 0.76 $\pm$ 0.00                           | 0.78 $\pm$ 0.01     |
| facebook-mbart-large-50            | 0.76 $\pm$ 0.01 | 0.78 $\pm$ 0.01             | **0.79 $\pm$ 0.01**     | **0.79 $\pm$ 0.01**      | **0.79 $\pm$ 0.01**                       | 0.78 $\pm$ 0.01     |
| gpt2                               | 0.74 $\pm$ 0.02 | 0.77 $\pm$ 0.01             | 0.77 $\pm$ 0.00         | 0.77 $\pm$ 0.02          | 0.77 $\pm$ 0.01                           | 0.78 $\pm$ 0.01     |
| xlm-roberta-large                  | 0.76 $\pm$ 0.00 | **0.79 $\pm$ 0.00**         | **0.79 $\pm$ 0.01**     | **0.79 $\pm$ 0.01**      | **0.79 $\pm$ 0.01**                       | **0.79 $\pm$ 0.01** |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text        |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:----------------|
| EleutherAI-gpt-neo-1.3B            | 0.08 $\pm$ 0.02 | 0.12 $\pm$ 0.02             | 0.08 $\pm$ 0.01         | 0.11 $\pm$ 0.01          | 0.11 $\pm$ 0.02                           | 0.12 $\pm$ 0.02 |
| EleutherAI-gpt-neo-125M            | 0.03 $\pm$ 0.01 | 0.05 $\pm$ 0.01             | 0.08 $\pm$ 0.01         | 0.07 $\pm$ 0.01          | 0.09 $\pm$ 0.01                           | 0.09 $\pm$ 0.01 |
| bert-base-multilingual-cased       | 0.07 $\pm$ 0.01 | 0.10 $\pm$ 0.03             | **0.13 $\pm$ 0.03**     | 0.09 $\pm$ 0.00          | 0.10 $\pm$ 0.03                           | 0.10 $\pm$ 0.00 |
| distilbert-base-multilingual-cased | 0.06 $\pm$ 0.01 | 0.09 $\pm$ 0.02             | 0.10 $\pm$ 0.02         | 0.09 $\pm$ 0.01          | 0.09 $\pm$ 0.00                           | 0.11 $\pm$ 0.02 |
| facebook-mbart-large-50            | 0.07 $\pm$ 0.04 | 0.10 $\pm$ 0.03             | 0.11 $\pm$ 0.03         | 0.12 $\pm$ 0.03          | 0.12 $\pm$ 0.02                           | 0.10 $\pm$ 0.00 |
| gpt2                               | 0.06 $\pm$ 0.01 | 0.08 $\pm$ 0.02             | 0.07 $\pm$ 0.01         | 0.06 $\pm$ 0.03          | 0.08 $\pm$ 0.01                           | 0.09 $\pm$ 0.01 |
| xlm-roberta-large                  | 0.10 $\pm$ 0.01 | 0.12 $\pm$ 0.01             | 0.10 $\pm$ 0.01         | 0.11 $\pm$ 0.01          | 0.11 $\pm$ 0.02                           | 0.11 $\pm$ 0.02 |

  report_table.reset_index().to_latex(latex_file, index=False)


# French

## f1-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.41 $\pm$ 0.04 | 0.47 $\pm$ 0.01             | 0.50 $\pm$ 0.04         | 0.50 $\pm$ 0.01          | 0.52 $\pm$ 0.01                           | 0.56 $\pm$ 0.02     |
| EleutherAI-gpt-neo-125M            | 0.31 $\pm$ 0.01 | 0.38 $\pm$ 0.03             | 0.39 $\pm$ 0.03         | 0.39 $\pm$ 0.01          | 0.44 $\pm$ 0.03                           | 0.47 $\pm$ 0.04     |
| bert-base-multilingual-cased       | 0.47 $\pm$ 0.04 | 0.52 $\pm$ 0.03             | 0.53 $\pm$ 0.02         | 0.55 $\pm$ 0.02          | 0.56 $\pm$ 0.03                           | 0.58 $\pm$ 0.01     |
| distilbert-base-multilingual-cased | 0.44 $\pm$ 0.05 | 0.50 $\pm$ 0.02             | 0.53 $\pm$ 0.03         | 0.53 $\pm$ 0.02          | 0.52 $\pm$ 0.02                           | 0.54 $\pm$ 0.02     |
| facebook-mbart-large-50            | 0.50 $\pm$ 0.02 | 0.53 $\pm$ 0.02             | 0.56 $\pm$ 0.01         | 0.57 $\pm$ 0.02          | 0.58 $\pm$ 0.01                           | **0.60 $\pm$ 0.01** |
| gpt2                               | 0.40 $\pm$ 0.07 | 0.43 $\pm$ 0.02             | 0.47 $\pm$ 0.02         | 0.49 $\pm$ 0.02          | 0.49 $\pm$ 0.03                           | 0.53 $\pm$ 0.01     |
| xlm-roberta-large                  | 0.51 $\pm$ 0.02 | 0.56 $\pm$ 0.03             | 0.57 $\pm$ 0.01         | 0.58 $\pm$ 0.03          | 0.58 $\pm$ 0.01                           | 0.57 $\pm$ 0.04     |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.32 $\pm$ 0.05 | 0.37 $\pm$ 0.01             | 0.45 $\pm$ 0.04         | 0.42 $\pm$ 0.03          | 0.44 $\pm$ 0.04                           | 0.50 $\pm$ 0.05     |
| EleutherAI-gpt-neo-125M            | 0.24 $\pm$ 0.01 | 0.32 $\pm$ 0.04             | 0.32 $\pm$ 0.04         | 0.31 $\pm$ 0.03          | 0.38 $\pm$ 0.04                           | 0.38 $\pm$ 0.07     |
| bert-base-multilingual-cased       | 0.38 $\pm$ 0.04 | 0.46 $\pm$ 0.03             | 0.49 $\pm$ 0.04         | 0.50 $\pm$ 0.03          | 0.51 $\pm$ 0.04                           | 0.53 $\pm$ 0.03     |
| distilbert-base-multilingual-cased | 0.37 $\pm$ 0.06 | 0.44 $\pm$ 0.02             | 0.48 $\pm$ 0.04         | 0.48 $\pm$ 0.02          | 0.46 $\pm$ 0.03                           | 0.49 $\pm$ 0.02     |
| facebook-mbart-large-50            | 0.44 $\pm$ 0.03 | 0.47 $\pm$ 0.02             | 0.51 $\pm$ 0.02         | 0.52 $\pm$ 0.03          | 0.53 $\pm$ 0.03                           | **0.55 $\pm$ 0.04** |
| gpt2                               | 0.36 $\pm$ 0.06 | 0.36 $\pm$ 0.02             | 0.43 $\pm$ 0.02         | 0.43 $\pm$ 0.02          | 0.43 $\pm$ 0.06                           | 0.48 $\pm$ 0.01     |
| xlm-roberta-large                  | 0.44 $\pm$ 0.02 | 0.51 $\pm$ 0.02             | 0.52 $\pm$ 0.04         | **0.55 $\pm$ 0.04**      | **0.55 $\pm$ 0.02**                       | 0.54 $\pm$ 0.07     |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.55 $\pm$ 0.03 | 0.63 $\pm$ 0.04             | 0.57 $\pm$ 0.04         | 0.62 $\pm$ 0.06          | 0.65 $\pm$ 0.06                           | 0.64 $\pm$ 0.02     |
| EleutherAI-gpt-neo-125M            | 0.42 $\pm$ 0.05 | 0.48 $\pm$ 0.03             | 0.51 $\pm$ 0.02         | 0.55 $\pm$ 0.06          | 0.53 $\pm$ 0.06                           | 0.64 $\pm$ 0.04     |
| bert-base-multilingual-cased       | 0.59 $\pm$ 0.05 | 0.60 $\pm$ 0.02             | 0.59 $\pm$ 0.03         | 0.61 $\pm$ 0.05          | 0.62 $\pm$ 0.01                           | 0.64 $\pm$ 0.03     |
| distilbert-base-multilingual-cased | 0.53 $\pm$ 0.05 | 0.59 $\pm$ 0.05             | 0.59 $\pm$ 0.03         | 0.59 $\pm$ 0.03          | 0.62 $\pm$ 0.01                           | 0.61 $\pm$ 0.00     |
| facebook-mbart-large-50            | 0.60 $\pm$ 0.06 | 0.62 $\pm$ 0.01             | 0.62 $\pm$ 0.04         | 0.63 $\pm$ 0.02          | 0.64 $\pm$ 0.03                           | **0.67 $\pm$ 0.05** |
| gpt2                               | 0.45 $\pm$ 0.10 | 0.51 $\pm$ 0.04             | 0.53 $\pm$ 0.02         | 0.57 $\pm$ 0.04          | 0.59 $\pm$ 0.02                           | 0.58 $\pm$ 0.02     |
| xlm-roberta-large                  | 0.59 $\pm$ 0.01 | 0.62 $\pm$ 0.04             | 0.63 $\pm$ 0.03         | 0.62 $\pm$ 0.01          | 0.61 $\pm$ 0.02                           | 0.61 $\pm$ 0.01     |

  report_table.reset_index().to_latex(latex_file, index=False)


## roc-auc

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.63 $\pm$ 0.02 | 0.66 $\pm$ 0.00             | 0.68 $\pm$ 0.02         | 0.68 $\pm$ 0.00          | 0.69 $\pm$ 0.01                           | 0.71 $\pm$ 0.02     |
| EleutherAI-gpt-neo-125M            | 0.58 $\pm$ 0.01 | 0.61 $\pm$ 0.01             | 0.62 $\pm$ 0.01         | 0.62 $\pm$ 0.01          | 0.64 $\pm$ 0.02                           | 0.66 $\pm$ 0.02     |
| bert-base-multilingual-cased       | 0.66 $\pm$ 0.02 | 0.69 $\pm$ 0.02             | 0.70 $\pm$ 0.02         | 0.71 $\pm$ 0.01          | 0.71 $\pm$ 0.02                           | 0.73 $\pm$ 0.01     |
| distilbert-base-multilingual-cased | 0.64 $\pm$ 0.03 | 0.68 $\pm$ 0.01             | 0.69 $\pm$ 0.02         | 0.70 $\pm$ 0.01          | 0.69 $\pm$ 0.01                           | 0.70 $\pm$ 0.01     |
| facebook-mbart-large-50            | 0.68 $\pm$ 0.01 | 0.70 $\pm$ 0.01             | 0.71 $\pm$ 0.00         | 0.72 $\pm$ 0.02          | 0.73 $\pm$ 0.01                           | **0.74 $\pm$ 0.01** |
| gpt2                               | 0.62 $\pm$ 0.04 | 0.64 $\pm$ 0.01             | 0.66 $\pm$ 0.01         | 0.67 $\pm$ 0.01          | 0.67 $\pm$ 0.02                           | 0.69 $\pm$ 0.01     |
| xlm-roberta-large                  | 0.68 $\pm$ 0.01 | 0.71 $\pm$ 0.01             | 0.72 $\pm$ 0.01         | 0.73 $\pm$ 0.02          | 0.73 $\pm$ 0.01                           | 0.72 $\pm$ 0.03     |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text        |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:----------------|
| EleutherAI-gpt-neo-1.3B            | 0.04 $\pm$ 0.02 | 0.07 $\pm$ 0.01             | 0.06 $\pm$ 0.03         | 0.08 $\pm$ 0.02          | 0.09 $\pm$ 0.03                           | 0.11 $\pm$ 0.04 |
| EleutherAI-gpt-neo-125M            | 0.03 $\pm$ 0.01 | 0.01 $\pm$ 0.01             | 0.03 $\pm$ 0.01         | 0.04 $\pm$ 0.01          | 0.04 $\pm$ 0.03                           | 0.08 $\pm$ 0.03 |
| bert-base-multilingual-cased       | 0.07 $\pm$ 0.02 | 0.08 $\pm$ 0.02             | 0.07 $\pm$ 0.01         | 0.09 $\pm$ 0.01          | 0.10 $\pm$ 0.01                           | 0.11 $\pm$ 0.03 |
| distilbert-base-multilingual-cased | 0.05 $\pm$ 0.02 | 0.05 $\pm$ 0.02             | 0.07 $\pm$ 0.03         | 0.06 $\pm$ 0.04          | 0.09 $\pm$ 0.04                           | 0.08 $\pm$ 0.02 |
| facebook-mbart-large-50            | 0.08 $\pm$ 0.04 | 0.11 $\pm$ 0.02             | 0.09 $\pm$ 0.01         | 0.10 $\pm$ 0.03          | 0.11 $\pm$ 0.03                           | 0.11 $\pm$ 0.01 |
| gpt2                               | 0.03 $\pm$ 0.02 | 0.05 $\pm$ 0.02             | 0.06 $\pm$ 0.04         | 0.07 $\pm$ 0.02          | 0.06 $\pm$ 0.03                           | 0.08 $\pm$ 0.06 |
| xlm-roberta-large                  | 0.07 $\pm$ 0.03 | 0.07 $\pm$ 0.04             | 0.10 $\pm$ 0.03         | 0.10 $\pm$ 0.04          | **0.12 $\pm$ 0.05**                       | 0.08 $\pm$ 0.05 |

  report_table.reset_index().to_latex(latex_file, index=False)


# German

## f1-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.48 $\pm$ 0.00 | 0.56 $\pm$ 0.02             | 0.56 $\pm$ 0.02         | 0.58 $\pm$ 0.00          | 0.57 $\pm$ 0.02                           | 0.63 $\pm$ 0.02     |
| EleutherAI-gpt-neo-125M            | 0.39 $\pm$ 0.01 | 0.46 $\pm$ 0.01             | 0.48 $\pm$ 0.00         | 0.50 $\pm$ 0.02          | 0.51 $\pm$ 0.04                           | 0.56 $\pm$ 0.01     |
| bert-base-multilingual-cased       | 0.52 $\pm$ 0.03 | 0.58 $\pm$ 0.02             | 0.59 $\pm$ 0.01         | 0.63 $\pm$ 0.01          | 0.59 $\pm$ 0.01                           | 0.63 $\pm$ 0.04     |
| distilbert-base-multilingual-cased | 0.50 $\pm$ 0.02 | 0.56 $\pm$ 0.00             | 0.55 $\pm$ 0.03         | 0.59 $\pm$ 0.02          | 0.58 $\pm$ 0.01                           | 0.61 $\pm$ 0.02     |
| facebook-mbart-large-50            | 0.55 $\pm$ 0.01 | 0.60 $\pm$ 0.02             | 0.61 $\pm$ 0.02         | 0.63 $\pm$ 0.00          | 0.64 $\pm$ 0.02                           | **0.65 $\pm$ 0.02** |
| gpt2                               | 0.47 $\pm$ 0.03 | 0.52 $\pm$ 0.04             | 0.52 $\pm$ 0.04         | 0.55 $\pm$ 0.03          | 0.53 $\pm$ 0.01                           | 0.59 $\pm$ 0.01     |
| xlm-roberta-large                  | 0.55 $\pm$ 0.02 | 0.61 $\pm$ 0.00             | 0.62 $\pm$ 0.03         | 0.64 $\pm$ 0.01          | **0.65 $\pm$ 0.01**                       | **0.65 $\pm$ 0.02** |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.41 $\pm$ 0.03 | 0.49 $\pm$ 0.04             | 0.52 $\pm$ 0.04         | 0.50 $\pm$ 0.03          | 0.51 $\pm$ 0.02                           | 0.59 $\pm$ 0.04     |
| EleutherAI-gpt-neo-125M            | 0.33 $\pm$ 0.03 | 0.41 $\pm$ 0.02             | 0.41 $\pm$ 0.01         | 0.43 $\pm$ 0.02          | 0.45 $\pm$ 0.03                           | 0.51 $\pm$ 0.02     |
| bert-base-multilingual-cased       | 0.46 $\pm$ 0.04 | 0.54 $\pm$ 0.04             | 0.54 $\pm$ 0.02         | 0.58 $\pm$ 0.04          | 0.54 $\pm$ 0.02                           | 0.61 $\pm$ 0.06     |
| distilbert-base-multilingual-cased | 0.44 $\pm$ 0.01 | 0.51 $\pm$ 0.02             | 0.49 $\pm$ 0.03         | 0.54 $\pm$ 0.03          | 0.52 $\pm$ 0.01                           | 0.60 $\pm$ 0.05     |
| facebook-mbart-large-50            | 0.50 $\pm$ 0.02 | 0.55 $\pm$ 0.02             | 0.56 $\pm$ 0.03         | 0.59 $\pm$ 0.01          | 0.59 $\pm$ 0.01                           | 0.60 $\pm$ 0.05     |
| gpt2                               | 0.46 $\pm$ 0.05 | 0.49 $\pm$ 0.05             | 0.50 $\pm$ 0.05         | 0.52 $\pm$ 0.05          | 0.52 $\pm$ 0.04                           | 0.56 $\pm$ 0.01     |
| xlm-roberta-large                  | 0.50 $\pm$ 0.02 | 0.57 $\pm$ 0.02             | 0.57 $\pm$ 0.02         | 0.58 $\pm$ 0.01          | 0.63 $\pm$ 0.07                           | **0.66 $\pm$ 0.04** |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text        |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:----------------|
| EleutherAI-gpt-neo-1.3B            | 0.57 $\pm$ 0.05 | 0.65 $\pm$ 0.02             | 0.61 $\pm$ 0.03         | 0.70 $\pm$ 0.06          | 0.66 $\pm$ 0.03                           | 0.68 $\pm$ 0.02 |
| EleutherAI-gpt-neo-125M            | 0.49 $\pm$ 0.02 | 0.51 $\pm$ 0.04             | 0.57 $\pm$ 0.01         | 0.58 $\pm$ 0.02          | 0.60 $\pm$ 0.05                           | 0.62 $\pm$ 0.01 |
| bert-base-multilingual-cased       | 0.60 $\pm$ 0.02 | 0.62 $\pm$ 0.02             | 0.66 $\pm$ 0.03         | 0.68 $\pm$ 0.06          | 0.66 $\pm$ 0.01                           | 0.65 $\pm$ 0.03 |
| distilbert-base-multilingual-cased | 0.59 $\pm$ 0.04 | 0.63 $\pm$ 0.02             | 0.63 $\pm$ 0.04         | 0.64 $\pm$ 0.05          | 0.66 $\pm$ 0.03                           | 0.63 $\pm$ 0.01 |
| facebook-mbart-large-50            | 0.62 $\pm$ 0.01 | 0.66 $\pm$ 0.02             | 0.68 $\pm$ 0.00         | 0.68 $\pm$ 0.02          | 0.70 $\pm$ 0.04                           | 0.70 $\pm$ 0.02 |
| gpt2                               | 0.49 $\pm$ 0.01 | 0.56 $\pm$ 0.03             | 0.55 $\pm$ 0.04         | 0.58 $\pm$ 0.00          | 0.55 $\pm$ 0.02                           | 0.63 $\pm$ 0.02 |
| xlm-roberta-large                  | 0.62 $\pm$ 0.03 | 0.65 $\pm$ 0.02             | 0.67 $\pm$ 0.04         | **0.71 $\pm$ 0.01**      | 0.68 $\pm$ 0.04                           | 0.64 $\pm$ 0.06 |

  report_table.reset_index().to_latex(latex_file, index=False)


## roc-auc

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.64 $\pm$ 0.00 | 0.69 $\pm$ 0.02             | 0.68 $\pm$ 0.02         | 0.70 $\pm$ 0.00          | 0.69 $\pm$ 0.01                           | 0.73 $\pm$ 0.01     |
| EleutherAI-gpt-neo-125M            | 0.59 $\pm$ 0.01 | 0.62 $\pm$ 0.01             | 0.64 $\pm$ 0.00         | 0.65 $\pm$ 0.01          | 0.65 $\pm$ 0.03                           | 0.69 $\pm$ 0.00     |
| bert-base-multilingual-cased       | 0.66 $\pm$ 0.02 | 0.70 $\pm$ 0.01             | 0.71 $\pm$ 0.01         | 0.73 $\pm$ 0.01          | 0.71 $\pm$ 0.00                           | 0.73 $\pm$ 0.03     |
| distilbert-base-multilingual-cased | 0.65 $\pm$ 0.02 | 0.69 $\pm$ 0.00             | 0.68 $\pm$ 0.02         | 0.70 $\pm$ 0.01          | 0.70 $\pm$ 0.00                           | 0.72 $\pm$ 0.02     |
| facebook-mbart-large-50            | 0.68 $\pm$ 0.01 | 0.71 $\pm$ 0.01             | 0.72 $\pm$ 0.02         | 0.73 $\pm$ 0.01          | **0.74 $\pm$ 0.01**                       | **0.74 $\pm$ 0.01** |
| gpt2                               | 0.62 $\pm$ 0.02 | 0.66 $\pm$ 0.03             | 0.66 $\pm$ 0.03         | 0.67 $\pm$ 0.02          | 0.66 $\pm$ 0.01                           | 0.70 $\pm$ 0.01     |
| xlm-roberta-large                  | 0.68 $\pm$ 0.01 | 0.72 $\pm$ 0.00             | 0.72 $\pm$ 0.02         | **0.74 $\pm$ 0.01**      | **0.74 $\pm$ 0.01**                       | **0.74 $\pm$ 0.02** |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text        |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:----------------|
| EleutherAI-gpt-neo-1.3B            | 0.02 $\pm$ 0.01 | 0.03 $\pm$ 0.02             | 0.05 $\pm$ 0.03         | 0.05 $\pm$ 0.03          | 0.03 $\pm$ 0.03                           | 0.06 $\pm$ 0.04 |
| EleutherAI-gpt-neo-125M            | 0.01 $\pm$ 0.01 | 0.02 $\pm$ 0.01             | 0.03 $\pm$ 0.02         | 0.02 $\pm$ 0.01          | 0.04 $\pm$ 0.01                           | 0.03 $\pm$ 0.03 |
| bert-base-multilingual-cased       | 0.05 $\pm$ 0.01 | 0.05 $\pm$ 0.03             | 0.07 $\pm$ 0.03         | 0.09 $\pm$ 0.05          | 0.06 $\pm$ 0.01                           | 0.10 $\pm$ 0.07 |
| distilbert-base-multilingual-cased | 0.02 $\pm$ 0.01 | 0.02 $\pm$ 0.02             | 0.05 $\pm$ 0.04         | 0.05 $\pm$ 0.03          | 0.05 $\pm$ 0.04                           | 0.04 $\pm$ 0.04 |
| facebook-mbart-large-50            | 0.05 $\pm$ 0.03 | 0.06 $\pm$ 0.04             | 0.06 $\pm$ 0.03         | 0.05 $\pm$ 0.02          | **0.11 $\pm$ 0.03**                       | 0.07 $\pm$ 0.04 |
| gpt2                               | 0.01 $\pm$ 0.01 | 0.01 $\pm$ 0.02             | 0.03 $\pm$ 0.02         | 0.03 $\pm$ 0.03          | 0.03 $\pm$ 0.01                           | 0.02 $\pm$ 0.01 |
| xlm-roberta-large                  | 0.03 $\pm$ 0.02 | 0.07 $\pm$ 0.03             | 0.09 $\pm$ 0.03         | 0.10 $\pm$ 0.04          | 0.07 $\pm$ 0.04                           | 0.05 $\pm$ 0.03 |

  report_table.reset_index().to_latex(latex_file, index=False)


# Italian

## f1-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.42 $\pm$ 0.02 | 0.46 $\pm$ 0.05             | 0.50 $\pm$ 0.03         | 0.52 $\pm$ 0.04          | 0.52 $\pm$ 0.06                           | 0.58 $\pm$ 0.01     |
| EleutherAI-gpt-neo-125M            | 0.35 $\pm$ 0.04 | 0.42 $\pm$ 0.02             | 0.44 $\pm$ 0.02         | 0.44 $\pm$ 0.01          | 0.48 $\pm$ 0.04                           | 0.51 $\pm$ 0.02     |
| bert-base-multilingual-cased       | 0.47 $\pm$ 0.01 | 0.53 $\pm$ 0.02             | 0.55 $\pm$ 0.02         | 0.56 $\pm$ 0.02          | 0.56 $\pm$ 0.03                           | 0.59 $\pm$ 0.03     |
| distilbert-base-multilingual-cased | 0.45 $\pm$ 0.01 | 0.50 $\pm$ 0.05             | 0.52 $\pm$ 0.03         | 0.56 $\pm$ 0.05          | 0.52 $\pm$ 0.03                           | 0.57 $\pm$ 0.02     |
| facebook-mbart-large-50            | 0.48 $\pm$ 0.01 | 0.53 $\pm$ 0.04             | 0.55 $\pm$ 0.02         | 0.57 $\pm$ 0.03          | 0.57 $\pm$ 0.04                           | **0.61 $\pm$ 0.03** |
| gpt2                               | 0.41 $\pm$ 0.02 | 0.45 $\pm$ 0.01             | 0.48 $\pm$ 0.03         | 0.52 $\pm$ 0.02          | 0.50 $\pm$ 0.03                           | 0.56 $\pm$ 0.01     |
| xlm-roberta-large                  | 0.50 $\pm$ 0.04 | 0.54 $\pm$ 0.02             | 0.55 $\pm$ 0.02         | 0.59 $\pm$ 0.03          | 0.57 $\pm$ 0.01                           | 0.59 $\pm$ 0.02     |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.35 $\pm$ 0.04 | 0.38 $\pm$ 0.06             | 0.44 $\pm$ 0.02         | 0.45 $\pm$ 0.07          | 0.45 $\pm$ 0.09                           | 0.51 $\pm$ 0.02     |
| EleutherAI-gpt-neo-125M            | 0.28 $\pm$ 0.04 | 0.36 $\pm$ 0.04             | 0.36 $\pm$ 0.02         | 0.35 $\pm$ 0.02          | 0.41 $\pm$ 0.06                           | 0.42 $\pm$ 0.03     |
| bert-base-multilingual-cased       | 0.40 $\pm$ 0.00 | 0.48 $\pm$ 0.02             | 0.50 $\pm$ 0.03         | 0.51 $\pm$ 0.03          | 0.52 $\pm$ 0.01                           | 0.54 $\pm$ 0.02     |
| distilbert-base-multilingual-cased | 0.39 $\pm$ 0.01 | 0.44 $\pm$ 0.04             | 0.46 $\pm$ 0.03         | 0.50 $\pm$ 0.06          | 0.46 $\pm$ 0.04                           | 0.53 $\pm$ 0.00     |
| facebook-mbart-large-50            | 0.42 $\pm$ 0.01 | 0.47 $\pm$ 0.06             | 0.49 $\pm$ 0.02         | 0.51 $\pm$ 0.03          | 0.51 $\pm$ 0.02                           | 0.55 $\pm$ 0.04     |
| gpt2                               | 0.37 $\pm$ 0.03 | 0.40 $\pm$ 0.01             | 0.45 $\pm$ 0.03         | 0.49 $\pm$ 0.03          | 0.47 $\pm$ 0.00                           | 0.52 $\pm$ 0.01     |
| xlm-roberta-large                  | 0.43 $\pm$ 0.05 | 0.49 $\pm$ 0.02             | 0.50 $\pm$ 0.03         | 0.54 $\pm$ 0.02          | 0.54 $\pm$ 0.02                           | **0.57 $\pm$ 0.04** |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.53 $\pm$ 0.03 | 0.58 $\pm$ 0.03             | 0.58 $\pm$ 0.05         | 0.63 $\pm$ 0.05          | 0.63 $\pm$ 0.03                           | 0.66 $\pm$ 0.05     |
| EleutherAI-gpt-neo-125M            | 0.47 $\pm$ 0.05 | 0.49 $\pm$ 0.01             | 0.55 $\pm$ 0.01         | 0.57 $\pm$ 0.01          | 0.59 $\pm$ 0.04                           | 0.64 $\pm$ 0.03     |
| bert-base-multilingual-cased       | 0.57 $\pm$ 0.02 | 0.59 $\pm$ 0.04             | 0.61 $\pm$ 0.04         | 0.62 $\pm$ 0.05          | 0.62 $\pm$ 0.07                           | 0.65 $\pm$ 0.06     |
| distilbert-base-multilingual-cased | 0.54 $\pm$ 0.02 | 0.57 $\pm$ 0.05             | 0.58 $\pm$ 0.03         | 0.62 $\pm$ 0.03          | 0.60 $\pm$ 0.04                           | 0.62 $\pm$ 0.05     |
| facebook-mbart-large-50            | 0.57 $\pm$ 0.01 | 0.61 $\pm$ 0.01             | 0.62 $\pm$ 0.03         | 0.64 $\pm$ 0.03          | 0.65 $\pm$ 0.08                           | **0.68 $\pm$ 0.05** |
| gpt2                               | 0.47 $\pm$ 0.01 | 0.52 $\pm$ 0.02             | 0.52 $\pm$ 0.03         | 0.55 $\pm$ 0.01          | 0.55 $\pm$ 0.06                           | 0.61 $\pm$ 0.01     |
| xlm-roberta-large                  | 0.58 $\pm$ 0.03 | 0.59 $\pm$ 0.03             | 0.61 $\pm$ 0.02         | 0.64 $\pm$ 0.05          | 0.61 $\pm$ 0.04                           | 0.61 $\pm$ 0.05     |

  report_table.reset_index().to_latex(latex_file, index=False)


## roc-auc

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.61 $\pm$ 0.01 | 0.64 $\pm$ 0.03             | 0.66 $\pm$ 0.02         | 0.67 $\pm$ 0.02          | 0.68 $\pm$ 0.03                           | 0.71 $\pm$ 0.01     |
| EleutherAI-gpt-neo-125M            | 0.58 $\pm$ 0.02 | 0.61 $\pm$ 0.01             | 0.63 $\pm$ 0.01         | 0.63 $\pm$ 0.01          | 0.65 $\pm$ 0.03                           | 0.66 $\pm$ 0.01     |
| bert-base-multilingual-cased       | 0.64 $\pm$ 0.01 | 0.68 $\pm$ 0.01             | 0.69 $\pm$ 0.01         | 0.70 $\pm$ 0.01          | 0.70 $\pm$ 0.02                           | 0.71 $\pm$ 0.02     |
| distilbert-base-multilingual-cased | 0.63 $\pm$ 0.01 | 0.66 $\pm$ 0.03             | 0.67 $\pm$ 0.02         | 0.69 $\pm$ 0.03          | 0.67 $\pm$ 0.02                           | 0.70 $\pm$ 0.02     |
| facebook-mbart-large-50            | 0.65 $\pm$ 0.00 | 0.68 $\pm$ 0.03             | 0.69 $\pm$ 0.01         | 0.70 $\pm$ 0.02          | 0.70 $\pm$ 0.03                           | **0.73 $\pm$ 0.02** |
| gpt2                               | 0.61 $\pm$ 0.01 | 0.63 $\pm$ 0.01             | 0.65 $\pm$ 0.02         | 0.67 $\pm$ 0.01          | 0.66 $\pm$ 0.02                           | 0.70 $\pm$ 0.01     |
| xlm-roberta-large                  | 0.66 $\pm$ 0.02 | 0.68 $\pm$ 0.02             | 0.69 $\pm$ 0.01         | 0.71 $\pm$ 0.02          | 0.70 $\pm$ 0.01                           | 0.72 $\pm$ 0.01     |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.04 $\pm$ 0.01 | 0.05 $\pm$ 0.04             | 0.06 $\pm$ 0.02         | 0.05 $\pm$ 0.03          | 0.06 $\pm$ 0.02                           | 0.06 $\pm$ 0.03     |
| EleutherAI-gpt-neo-125M            | 0.01 $\pm$ 0.01 | 0.04 $\pm$ 0.01             | 0.04 $\pm$ 0.04         | 0.01 $\pm$ 0.01          | 0.05 $\pm$ 0.01                           | 0.08 $\pm$ 0.02     |
| bert-base-multilingual-cased       | 0.04 $\pm$ 0.01 | 0.05 $\pm$ 0.03             | 0.06 $\pm$ 0.01         | 0.09 $\pm$ 0.02          | 0.09 $\pm$ 0.04                           | 0.08 $\pm$ 0.04     |
| distilbert-base-multilingual-cased | 0.04 $\pm$ 0.02 | 0.05 $\pm$ 0.03             | 0.07 $\pm$ 0.02         | 0.06 $\pm$ 0.04          | 0.08 $\pm$ 0.02                           | 0.06 $\pm$ 0.03     |
| facebook-mbart-large-50            | 0.04 $\pm$ 0.01 | 0.07 $\pm$ 0.04             | 0.05 $\pm$ 0.02         | 0.09 $\pm$ 0.02          | 0.08 $\pm$ 0.03                           | **0.10 $\pm$ 0.02** |
| gpt2                               | 0.02 $\pm$ 0.02 | 0.02 $\pm$ 0.02             | 0.04 $\pm$ 0.02         | 0.03 $\pm$ 0.02          | 0.03 $\pm$ 0.00                           | 0.04 $\pm$ 0.02     |
| xlm-roberta-large                  | 0.06 $\pm$ 0.02 | 0.06 $\pm$ 0.01             | 0.08 $\pm$ 0.01         | 0.07 $\pm$ 0.02          | 0.07 $\pm$ 0.04                           | 0.07 $\pm$ 0.03     |

  report_table.reset_index().to_latex(latex_file, index=False)


# Polish

## f1-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.51 $\pm$ 0.02 | 0.55 $\pm$ 0.01             | 0.60 $\pm$ 0.02         | 0.58 $\pm$ 0.04          | 0.63 $\pm$ 0.03                           | 0.64 $\pm$ 0.03     |
| EleutherAI-gpt-neo-125M            | 0.39 $\pm$ 0.04 | 0.47 $\pm$ 0.02             | 0.55 $\pm$ 0.02         | 0.51 $\pm$ 0.06          | 0.55 $\pm$ 0.03                           | 0.59 $\pm$ 0.03     |
| bert-base-multilingual-cased       | 0.55 $\pm$ 0.02 | 0.59 $\pm$ 0.02             | 0.61 $\pm$ 0.03         | 0.63 $\pm$ 0.02          | 0.65 $\pm$ 0.03                           | 0.63 $\pm$ 0.02     |
| distilbert-base-multilingual-cased | 0.54 $\pm$ 0.03 | 0.58 $\pm$ 0.04             | 0.62 $\pm$ 0.02         | 0.60 $\pm$ 0.01          | 0.61 $\pm$ 0.02                           | 0.63 $\pm$ 0.02     |
| facebook-mbart-large-50            | 0.55 $\pm$ 0.04 | 0.60 $\pm$ 0.01             | 0.63 $\pm$ 0.03         | 0.66 $\pm$ 0.02          | 0.65 $\pm$ 0.02                           | **0.68 $\pm$ 0.02** |
| gpt2                               | 0.49 $\pm$ 0.01 | 0.55 $\pm$ 0.05             | 0.57 $\pm$ 0.04         | 0.58 $\pm$ 0.02          | 0.59 $\pm$ 0.02                           | 0.62 $\pm$ 0.04     |
| xlm-roberta-large                  | 0.57 $\pm$ 0.03 | 0.59 $\pm$ 0.04             | 0.63 $\pm$ 0.03         | 0.65 $\pm$ 0.03          | 0.66 $\pm$ 0.02                           | 0.67 $\pm$ 0.02     |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.48 $\pm$ 0.03 | 0.49 $\pm$ 0.02             | 0.58 $\pm$ 0.02         | 0.53 $\pm$ 0.04          | 0.58 $\pm$ 0.05                           | 0.62 $\pm$ 0.04     |
| EleutherAI-gpt-neo-125M            | 0.34 $\pm$ 0.05 | 0.42 $\pm$ 0.04             | 0.50 $\pm$ 0.02         | 0.44 $\pm$ 0.06          | 0.52 $\pm$ 0.01                           | 0.56 $\pm$ 0.03     |
| bert-base-multilingual-cased       | 0.50 $\pm$ 0.04 | 0.56 $\pm$ 0.04             | 0.56 $\pm$ 0.04         | 0.58 $\pm$ 0.02          | 0.63 $\pm$ 0.01                           | 0.62 $\pm$ 0.03     |
| distilbert-base-multilingual-cased | 0.49 $\pm$ 0.03 | 0.54 $\pm$ 0.04             | 0.58 $\pm$ 0.05         | 0.55 $\pm$ 0.03          | 0.56 $\pm$ 0.05                           | 0.62 $\pm$ 0.02     |
| facebook-mbart-large-50            | 0.51 $\pm$ 0.02 | 0.55 $\pm$ 0.01             | 0.59 $\pm$ 0.02         | 0.62 $\pm$ 0.02          | 0.61 $\pm$ 0.03                           | 0.64 $\pm$ 0.05     |
| gpt2                               | 0.48 $\pm$ 0.03 | 0.52 $\pm$ 0.01             | 0.55 $\pm$ 0.04         | 0.58 $\pm$ 0.03          | 0.59 $\pm$ 0.06                           | 0.63 $\pm$ 0.04     |
| xlm-roberta-large                  | 0.54 $\pm$ 0.02 | 0.56 $\pm$ 0.05             | 0.60 $\pm$ 0.03         | 0.63 $\pm$ 0.02          | 0.64 $\pm$ 0.03                           | **0.73 $\pm$ 0.05** |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.56 $\pm$ 0.10 | 0.63 $\pm$ 0.05             | 0.63 $\pm$ 0.06         | 0.64 $\pm$ 0.06          | 0.69 $\pm$ 0.03                           | 0.65 $\pm$ 0.03     |
| EleutherAI-gpt-neo-125M            | 0.48 $\pm$ 0.01 | 0.53 $\pm$ 0.03             | 0.60 $\pm$ 0.06         | 0.61 $\pm$ 0.07          | 0.59 $\pm$ 0.05                           | 0.63 $\pm$ 0.03     |
| bert-base-multilingual-cased       | 0.61 $\pm$ 0.07 | 0.63 $\pm$ 0.03             | 0.67 $\pm$ 0.05         | 0.69 $\pm$ 0.04          | 0.67 $\pm$ 0.06                           | 0.66 $\pm$ 0.08     |
| distilbert-base-multilingual-cased | 0.59 $\pm$ 0.04 | 0.64 $\pm$ 0.06             | 0.68 $\pm$ 0.07         | 0.67 $\pm$ 0.08          | 0.67 $\pm$ 0.08                           | 0.65 $\pm$ 0.04     |
| facebook-mbart-large-50            | 0.61 $\pm$ 0.06 | 0.67 $\pm$ 0.04             | 0.67 $\pm$ 0.07         | 0.70 $\pm$ 0.02          | 0.69 $\pm$ 0.05                           | **0.71 $\pm$ 0.02** |
| gpt2                               | 0.51 $\pm$ 0.07 | 0.59 $\pm$ 0.10             | 0.59 $\pm$ 0.05         | 0.58 $\pm$ 0.06          | 0.61 $\pm$ 0.06                           | 0.61 $\pm$ 0.05     |
| xlm-roberta-large                  | 0.62 $\pm$ 0.11 | 0.64 $\pm$ 0.03             | 0.67 $\pm$ 0.04         | 0.68 $\pm$ 0.05          | 0.68 $\pm$ 0.06                           | 0.63 $\pm$ 0.01     |

  report_table.reset_index().to_latex(latex_file, index=False)


## roc-auc

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.63 $\pm$ 0.02 | 0.66 $\pm$ 0.00             | 0.69 $\pm$ 0.01         | 0.69 $\pm$ 0.02          | 0.71 $\pm$ 0.01                           | 0.72 $\pm$ 0.01     |
| EleutherAI-gpt-neo-125M            | 0.57 $\pm$ 0.02 | 0.61 $\pm$ 0.01             | 0.66 $\pm$ 0.01         | 0.64 $\pm$ 0.03          | 0.66 $\pm$ 0.02                           | 0.69 $\pm$ 0.01     |
| bert-base-multilingual-cased       | 0.66 $\pm$ 0.01 | 0.69 $\pm$ 0.01             | 0.70 $\pm$ 0.02         | 0.72 $\pm$ 0.01          | 0.73 $\pm$ 0.01                           | 0.72 $\pm$ 0.01     |
| distilbert-base-multilingual-cased | 0.65 $\pm$ 0.02 | 0.69 $\pm$ 0.02             | 0.72 $\pm$ 0.01         | 0.70 $\pm$ 0.01          | 0.70 $\pm$ 0.01                           | 0.72 $\pm$ 0.01     |
| facebook-mbart-large-50            | 0.66 $\pm$ 0.02 | 0.70 $\pm$ 0.00             | 0.72 $\pm$ 0.01         | 0.74 $\pm$ 0.01          | 0.73 $\pm$ 0.00                           | **0.75 $\pm$ 0.01** |
| gpt2                               | 0.61 $\pm$ 0.01 | 0.66 $\pm$ 0.03             | 0.67 $\pm$ 0.02         | 0.68 $\pm$ 0.01          | 0.69 $\pm$ 0.01                           | 0.71 $\pm$ 0.02     |
| xlm-roberta-large                  | 0.68 $\pm$ 0.02 | 0.69 $\pm$ 0.02             | 0.72 $\pm$ 0.02         | 0.73 $\pm$ 0.02          | 0.73 $\pm$ 0.01                           | 0.74 $\pm$ 0.00     |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.01 $\pm$ 0.01 | 0.03 $\pm$ 0.02             | 0.03 $\pm$ 0.02         | 0.03 $\pm$ 0.02          | 0.05 $\pm$ 0.02                           | 0.05 $\pm$ 0.02     |
| EleutherAI-gpt-neo-125M            | 0.01 $\pm$ 0.01 | 0.02 $\pm$ 0.01             | 0.02 $\pm$ 0.00         | 0.02 $\pm$ 0.01          | 0.03 $\pm$ 0.01                           | 0.04 $\pm$ 0.04     |
| bert-base-multilingual-cased       | 0.04 $\pm$ 0.02 | 0.06 $\pm$ 0.01             | 0.06 $\pm$ 0.02         | 0.06 $\pm$ 0.02          | 0.07 $\pm$ 0.04                           | 0.05 $\pm$ 0.02     |
| distilbert-base-multilingual-cased | 0.03 $\pm$ 0.02 | 0.04 $\pm$ 0.01             | 0.05 $\pm$ 0.03         | 0.04 $\pm$ 0.01          | 0.05 $\pm$ 0.02                           | 0.04 $\pm$ 0.01     |
| facebook-mbart-large-50            | 0.05 $\pm$ 0.02 | 0.05 $\pm$ 0.01             | 0.06 $\pm$ 0.02         | 0.07 $\pm$ 0.03          | 0.05 $\pm$ 0.03                           | **0.11 $\pm$ 0.03** |
| gpt2                               | 0.00 $\pm$ 0.00 | 0.03 $\pm$ 0.02             | 0.03 $\pm$ 0.03         | 0.04 $\pm$ 0.06          | 0.02 $\pm$ 0.02                           | 0.03 $\pm$ 0.02     |
| xlm-roberta-large                  | 0.04 $\pm$ 0.01 | 0.07 $\pm$ 0.02             | 0.07 $\pm$ 0.03         | 0.07 $\pm$ 0.04          | 0.06 $\pm$ 0.03                           | 0.06 $\pm$ 0.02     |

  report_table.reset_index().to_latex(latex_file, index=False)


# Russian

## f1-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.21 $\pm$ 0.02 | 0.25 $\pm$ 0.03             | 0.38 $\pm$ 0.03         | 0.37 $\pm$ 0.04          | 0.41 $\pm$ 0.03                           | 0.41 $\pm$ 0.04     |
| EleutherAI-gpt-neo-125M            | 0.22 $\pm$ 0.01 | 0.22 $\pm$ 0.04             | 0.21 $\pm$ 0.02         | 0.22 $\pm$ 0.02          | 0.21 $\pm$ 0.03                           | 0.21 $\pm$ 0.02     |
| bert-base-multilingual-cased       | 0.39 $\pm$ 0.01 | 0.45 $\pm$ 0.02             | 0.50 $\pm$ 0.05         | 0.51 $\pm$ 0.06          | 0.51 $\pm$ 0.04                           | 0.53 $\pm$ 0.02     |
| distilbert-base-multilingual-cased | 0.31 $\pm$ 0.02 | 0.41 $\pm$ 0.01             | 0.44 $\pm$ 0.03         | 0.46 $\pm$ 0.06          | 0.47 $\pm$ 0.05                           | 0.45 $\pm$ 0.02     |
| facebook-mbart-large-50            | 0.40 $\pm$ 0.04 | 0.50 $\pm$ 0.03             | 0.51 $\pm$ 0.03         | 0.54 $\pm$ 0.03          | 0.51 $\pm$ 0.01                           | **0.55 $\pm$ 0.02** |
| gpt2                               | 0.16 $\pm$ 0.08 | 0.14 $\pm$ 0.05             | 0.07 $\pm$ 0.07         | 0.14 $\pm$ 0.08          | 0.16 $\pm$ 0.06                           | 0.15 $\pm$ 0.11     |
| xlm-roberta-large                  | 0.47 $\pm$ 0.03 | 0.53 $\pm$ 0.04             | 0.53 $\pm$ 0.03         | **0.55 $\pm$ 0.01**      | 0.52 $\pm$ 0.04                           | 0.53 $\pm$ 0.02     |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text        |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:----------------|
| EleutherAI-gpt-neo-1.3B            | 0.14 $\pm$ 0.01 | 0.16 $\pm$ 0.02             | 0.31 $\pm$ 0.05         | 0.29 $\pm$ 0.05          | 0.32 $\pm$ 0.02                           | 0.35 $\pm$ 0.06 |
| EleutherAI-gpt-neo-125M            | 0.16 $\pm$ 0.01 | 0.17 $\pm$ 0.03             | 0.15 $\pm$ 0.02         | 0.16 $\pm$ 0.01          | 0.15 $\pm$ 0.02                           | 0.15 $\pm$ 0.04 |
| bert-base-multilingual-cased       | 0.31 $\pm$ 0.03 | 0.40 $\pm$ 0.05             | 0.44 $\pm$ 0.05         | 0.45 $\pm$ 0.10          | 0.45 $\pm$ 0.04                           | 0.49 $\pm$ 0.02 |
| distilbert-base-multilingual-cased | 0.23 $\pm$ 0.01 | 0.33 $\pm$ 0.02             | 0.35 $\pm$ 0.04         | 0.39 $\pm$ 0.06          | 0.39 $\pm$ 0.06                           | 0.38 $\pm$ 0.03 |
| facebook-mbart-large-50            | 0.31 $\pm$ 0.04 | 0.42 $\pm$ 0.03             | 0.44 $\pm$ 0.05         | 0.47 $\pm$ 0.06          | 0.44 $\pm$ 0.03                           | 0.48 $\pm$ 0.01 |
| gpt2                               | 0.11 $\pm$ 0.06 | 0.09 $\pm$ 0.04             | 0.04 $\pm$ 0.05         | 0.09 $\pm$ 0.06          | 0.10 $\pm$ 0.04                           | 0.10 $\pm$ 0.08 |
| xlm-roberta-large                  | 0.40 $\pm$ 0.03 | 0.47 $\pm$ 0.05             | 0.48 $\pm$ 0.06         | **0.52 $\pm$ 0.03**      | 0.46 $\pm$ 0.05                           | 0.50 $\pm$ 0.07 |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.43 $\pm$ 0.06 | 0.59 $\pm$ 0.14             | 0.50 $\pm$ 0.04         | 0.52 $\pm$ 0.09          | 0.58 $\pm$ 0.15                           | 0.52 $\pm$ 0.05     |
| EleutherAI-gpt-neo-125M            | 0.35 $\pm$ 0.03 | 0.33 $\pm$ 0.08             | 0.36 $\pm$ 0.05         | 0.37 $\pm$ 0.03          | 0.37 $\pm$ 0.10                           | 0.40 $\pm$ 0.11     |
| bert-base-multilingual-cased       | 0.53 $\pm$ 0.02 | 0.53 $\pm$ 0.05             | 0.59 $\pm$ 0.04         | 0.60 $\pm$ 0.02          | 0.60 $\pm$ 0.04                           | 0.59 $\pm$ 0.02     |
| distilbert-base-multilingual-cased | 0.49 $\pm$ 0.04 | 0.54 $\pm$ 0.04             | 0.58 $\pm$ 0.01         | 0.57 $\pm$ 0.04          | 0.60 $\pm$ 0.02                           | 0.55 $\pm$ 0.01     |
| facebook-mbart-large-50            | 0.55 $\pm$ 0.06 | 0.60 $\pm$ 0.03             | 0.61 $\pm$ 0.04         | 0.63 $\pm$ 0.04          | 0.62 $\pm$ 0.02                           | **0.66 $\pm$ 0.07** |
| gpt2                               | 0.30 $\pm$ 0.03 | 0.27 $\pm$ 0.09             | 0.25 $\pm$ 0.04         | 0.37 $\pm$ 0.02          | 0.41 $\pm$ 0.00                           | 0.46 $\pm$ 0.15     |
| xlm-roberta-large                  | 0.57 $\pm$ 0.03 | 0.61 $\pm$ 0.01             | 0.61 $\pm$ 0.06         | 0.58 $\pm$ 0.03          | 0.58 $\pm$ 0.02                           | 0.59 $\pm$ 0.08     |

  report_table.reset_index().to_latex(latex_file, index=False)


## roc-auc

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text        |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:----------------|
| EleutherAI-gpt-neo-1.3B            | 0.55 $\pm$ 0.01 | 0.57 $\pm$ 0.01             | 0.62 $\pm$ 0.02         | 0.62 $\pm$ 0.02          | 0.63 $\pm$ 0.01                           | 0.64 $\pm$ 0.02 |
| EleutherAI-gpt-neo-125M            | 0.55 $\pm$ 0.01 | 0.55 $\pm$ 0.02             | 0.55 $\pm$ 0.01         | 0.55 $\pm$ 0.01          | 0.55 $\pm$ 0.02                           | 0.55 $\pm$ 0.00 |
| bert-base-multilingual-cased       | 0.63 $\pm$ 0.01 | 0.66 $\pm$ 0.02             | 0.69 $\pm$ 0.03         | 0.69 $\pm$ 0.04          | 0.69 $\pm$ 0.02                           | 0.71 $\pm$ 0.01 |
| distilbert-base-multilingual-cased | 0.59 $\pm$ 0.01 | 0.64 $\pm$ 0.01             | 0.65 $\pm$ 0.01         | 0.66 $\pm$ 0.03          | 0.67 $\pm$ 0.03                           | 0.66 $\pm$ 0.01 |
| facebook-mbart-large-50            | 0.63 $\pm$ 0.02 | 0.68 $\pm$ 0.02             | 0.69 $\pm$ 0.02         | 0.71 $\pm$ 0.03          | 0.69 $\pm$ 0.01                           | 0.71 $\pm$ 0.01 |
| gpt2                               | 0.53 $\pm$ 0.02 | 0.52 $\pm$ 0.02             | 0.51 $\pm$ 0.01         | 0.53 $\pm$ 0.02          | 0.54 $\pm$ 0.01                           | 0.53 $\pm$ 0.03 |
| xlm-roberta-large                  | 0.67 $\pm$ 0.01 | 0.70 $\pm$ 0.03             | 0.71 $\pm$ 0.02         | **0.72 $\pm$ 0.01**      | 0.70 $\pm$ 0.03                           | 0.71 $\pm$ 0.02 |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text        |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:----------------|
| EleutherAI-gpt-neo-1.3B            | 0.04 $\pm$ 0.03 | 0.06 $\pm$ 0.03             | 0.10 $\pm$ 0.01         | 0.08 $\pm$ 0.03          | 0.13 $\pm$ 0.05                           | 0.06 $\pm$ 0.03 |
| EleutherAI-gpt-neo-125M            | 0.03 $\pm$ 0.03 | 0.04 $\pm$ 0.05             | 0.06 $\pm$ 0.02         | 0.05 $\pm$ 0.04          | 0.03 $\pm$ 0.03                           | 0.03 $\pm$ 0.05 |
| bert-base-multilingual-cased       | 0.09 $\pm$ 0.03 | 0.12 $\pm$ 0.04             | **0.17 $\pm$ 0.03**     | 0.15 $\pm$ 0.08          | **0.17 $\pm$ 0.03**                       | 0.14 $\pm$ 0.05 |
| distilbert-base-multilingual-cased | 0.05 $\pm$ 0.01 | 0.11 $\pm$ 0.06             | 0.08 $\pm$ 0.03         | 0.13 $\pm$ 0.03          | 0.13 $\pm$ 0.07                           | 0.12 $\pm$ 0.02 |
| facebook-mbart-large-50            | 0.11 $\pm$ 0.03 | 0.16 $\pm$ 0.03             | 0.14 $\pm$ 0.03         | **0.17 $\pm$ 0.05**      | 0.15 $\pm$ 0.03                           | 0.13 $\pm$ 0.03 |
| gpt2                               | 0.02 $\pm$ 0.02 | 0.03 $\pm$ 0.02             | 0.02 $\pm$ 0.02         | 0.02 $\pm$ 0.00          | 0.02 $\pm$ 0.01                           | 0.02 $\pm$ 0.01 |
| xlm-roberta-large                  | 0.12 $\pm$ 0.05 | **0.17 $\pm$ 0.04**         | 0.14 $\pm$ 0.06         | 0.14 $\pm$ 0.08          | 0.13 $\pm$ 0.03                           | 0.14 $\pm$ 0.06 |

  report_table.reset_index().to_latex(latex_file, index=False)


# All 6 Languages

## f1-micro

| language   | model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| en         | EleutherAI-gpt-neo-1.3B            | 0.62 $\pm$ 0.01 | 0.67 $\pm$ 0.01             | 0.68 $\pm$ 0.01         | 0.67 $\pm$ 0.01          | 0.68 $\pm$ 0.02                           | 0.70 $\pm$ 0.00     |
| en         | EleutherAI-gpt-neo-125M            | 0.55 $\pm$ 0.03 | 0.60 $\pm$ 0.01             | 0.62 $\pm$ 0.02         | 0.62 $\pm$ 0.01          | 0.66 $\pm$ 0.01                           | 0.68 $\pm$ 0.02     |
| en         | bert-base-multilingual-cased       | 0.64 $\pm$ 0.01 | 0.68 $\pm$ 0.02             | 0.69 $\pm$ 0.02         | 0.69 $\pm$ 0.02          | 0.69 $\pm$ 0.02                           | 0.70 $\pm$ 0.01     |
| en         | distilbert-base-multilingual-cased | 0.61 $\pm$ 0.03 | 0.66 $\pm$ 0.01             | 0.67 $\pm$ 0.01         | 0.68 $\pm$ 0.03          | 0.67 $\pm$ 0.01                           | 0.69 $\pm$ 0.02     |
| en         | facebook-mbart-large-50            | 0.66 $\pm$ 0.02 | 0.70 $\pm$ 0.01             | 0.70 $\pm$ 0.02         | 0.70 $\pm$ 0.01          | **0.71 $\pm$ 0.02**                       | 0.69 $\pm$ 0.02     |
| en         | gpt2                               | 0.63 $\pm$ 0.03 | 0.68 $\pm$ 0.01             | 0.67 $\pm$ 0.01         | 0.67 $\pm$ 0.02          | 0.68 $\pm$ 0.02                           | 0.69 $\pm$ 0.02     |
| en         | xlm-roberta-large                  | 0.66 $\pm$ 0.01 | 0.70 $\pm$ 0.01             | 0.70 $\pm$ 0.01         | **0.71 $\pm$ 0.02**      | **0.71 $\pm$ 0.01**                       | **0.71 $\pm$ 0.02** |
| fr         | EleutherAI-gpt-neo-1.3B            | 0.41 $\pm$ 0.04 | 0.47 $\pm$ 0.01             | 0.50 $\pm$ 0.04         | 0.50 $\pm$ 0.01          | 0.52 $\pm$ 0.01                           | 0.56 $\pm$ 0.02     |
| fr         | EleutherAI-gpt-neo-125M            | 0.31 $\pm$ 0.01 | 0.38 $\pm$ 0.03             | 0.39 $\pm$ 0.03         | 0.39 $\pm$ 0.01          | 0.44 $\pm$ 0.03                           | 0.47 $\pm$ 0.04     |
| fr         | bert-base-multilingual-cased       | 0.47 $\pm$ 0.04 | 0.52 $\pm$ 0.03             | 0.53 $\pm$ 0.02         | 0.55 $\pm$ 0.02          | 0.56 $\pm$ 0.03                           | 0.58 $\pm$ 0.01     |
| fr         | distilbert-base-multilingual-cased | 0.44 $\pm$ 0.05 | 0.50 $\pm$ 0.02             | 0.53 $\pm$ 0.03         | 0.53 $\pm$ 0.02          | 0.52 $\pm$ 0.02                           | 0.54 $\pm$ 0.02     |
| fr         | facebook-mbart-large-50            | 0.50 $\pm$ 0.02 | 0.53 $\pm$ 0.02             | 0.56 $\pm$ 0.01         | 0.57 $\pm$ 0.02          | 0.58 $\pm$ 0.01                           | **0.60 $\pm$ 0.01** |
| fr         | gpt2                               | 0.40 $\pm$ 0.07 | 0.43 $\pm$ 0.02             | 0.47 $\pm$ 0.02         | 0.49 $\pm$ 0.02          | 0.49 $\pm$ 0.03                           | 0.53 $\pm$ 0.01     |
| fr         | xlm-roberta-large                  | 0.51 $\pm$ 0.02 | 0.56 $\pm$ 0.03             | 0.57 $\pm$ 0.01         | 0.58 $\pm$ 0.03          | 0.58 $\pm$ 0.01                           | 0.57 $\pm$ 0.04     |
| ge         | EleutherAI-gpt-neo-1.3B            | 0.48 $\pm$ 0.00 | 0.56 $\pm$ 0.02             | 0.56 $\pm$ 0.02         | 0.58 $\pm$ 0.00          | 0.57 $\pm$ 0.02                           | 0.63 $\pm$ 0.02     |
| ge         | EleutherAI-gpt-neo-125M            | 0.39 $\pm$ 0.01 | 0.46 $\pm$ 0.01             | 0.48 $\pm$ 0.00         | 0.50 $\pm$ 0.02          | 0.51 $\pm$ 0.04                           | 0.56 $\pm$ 0.01     |
| ge         | bert-base-multilingual-cased       | 0.52 $\pm$ 0.03 | 0.58 $\pm$ 0.02             | 0.59 $\pm$ 0.01         | 0.63 $\pm$ 0.01          | 0.59 $\pm$ 0.01                           | 0.63 $\pm$ 0.04     |
| ge         | distilbert-base-multilingual-cased | 0.50 $\pm$ 0.02 | 0.56 $\pm$ 0.00             | 0.55 $\pm$ 0.03         | 0.59 $\pm$ 0.02          | 0.58 $\pm$ 0.01                           | 0.61 $\pm$ 0.02     |
| ge         | facebook-mbart-large-50            | 0.55 $\pm$ 0.01 | 0.60 $\pm$ 0.02             | 0.61 $\pm$ 0.02         | 0.63 $\pm$ 0.00          | 0.64 $\pm$ 0.02                           | **0.65 $\pm$ 0.02** |
| ge         | gpt2                               | 0.47 $\pm$ 0.03 | 0.52 $\pm$ 0.04             | 0.52 $\pm$ 0.04         | 0.55 $\pm$ 0.03          | 0.53 $\pm$ 0.01                           | 0.59 $\pm$ 0.01     |
| ge         | xlm-roberta-large                  | 0.55 $\pm$ 0.02 | 0.61 $\pm$ 0.00             | 0.62 $\pm$ 0.03         | 0.64 $\pm$ 0.01          | **0.65 $\pm$ 0.01**                       | **0.65 $\pm$ 0.02** |
| it         | EleutherAI-gpt-neo-1.3B            | 0.42 $\pm$ 0.02 | 0.46 $\pm$ 0.05             | 0.50 $\pm$ 0.03         | 0.52 $\pm$ 0.04          | 0.52 $\pm$ 0.06                           | 0.58 $\pm$ 0.01     |
| it         | EleutherAI-gpt-neo-125M            | 0.35 $\pm$ 0.04 | 0.42 $\pm$ 0.02             | 0.44 $\pm$ 0.02         | 0.44 $\pm$ 0.01          | 0.48 $\pm$ 0.04                           | 0.51 $\pm$ 0.02     |
| it         | bert-base-multilingual-cased       | 0.47 $\pm$ 0.01 | 0.53 $\pm$ 0.02             | 0.55 $\pm$ 0.02         | 0.56 $\pm$ 0.02          | 0.56 $\pm$ 0.03                           | 0.59 $\pm$ 0.03     |
| it         | distilbert-base-multilingual-cased | 0.45 $\pm$ 0.01 | 0.50 $\pm$ 0.05             | 0.52 $\pm$ 0.03         | 0.56 $\pm$ 0.05          | 0.52 $\pm$ 0.03                           | 0.57 $\pm$ 0.02     |
| it         | facebook-mbart-large-50            | 0.48 $\pm$ 0.01 | 0.53 $\pm$ 0.04             | 0.55 $\pm$ 0.02         | 0.57 $\pm$ 0.03          | 0.57 $\pm$ 0.04                           | **0.61 $\pm$ 0.03** |
| it         | gpt2                               | 0.41 $\pm$ 0.02 | 0.45 $\pm$ 0.01             | 0.48 $\pm$ 0.03         | 0.52 $\pm$ 0.02          | 0.50 $\pm$ 0.03                           | 0.56 $\pm$ 0.01     |
| it         | xlm-roberta-large                  | 0.50 $\pm$ 0.04 | 0.54 $\pm$ 0.02             | 0.55 $\pm$ 0.02         | 0.59 $\pm$ 0.03          | 0.57 $\pm$ 0.01                           | 0.59 $\pm$ 0.02     |
| po         | EleutherAI-gpt-neo-1.3B            | 0.51 $\pm$ 0.02 | 0.55 $\pm$ 0.01             | 0.60 $\pm$ 0.02         | 0.58 $\pm$ 0.04          | 0.63 $\pm$ 0.03                           | 0.64 $\pm$ 0.03     |
| po         | EleutherAI-gpt-neo-125M            | 0.39 $\pm$ 0.04 | 0.47 $\pm$ 0.02             | 0.55 $\pm$ 0.02         | 0.51 $\pm$ 0.06          | 0.55 $\pm$ 0.03                           | 0.59 $\pm$ 0.03     |
| po         | bert-base-multilingual-cased       | 0.55 $\pm$ 0.02 | 0.59 $\pm$ 0.02             | 0.61 $\pm$ 0.03         | 0.63 $\pm$ 0.02          | 0.65 $\pm$ 0.03                           | 0.63 $\pm$ 0.02     |
| po         | distilbert-base-multilingual-cased | 0.54 $\pm$ 0.03 | 0.58 $\pm$ 0.04             | 0.62 $\pm$ 0.02         | 0.60 $\pm$ 0.01          | 0.61 $\pm$ 0.02                           | 0.63 $\pm$ 0.02     |
| po         | facebook-mbart-large-50            | 0.55 $\pm$ 0.04 | 0.60 $\pm$ 0.01             | 0.63 $\pm$ 0.03         | 0.66 $\pm$ 0.02          | 0.65 $\pm$ 0.02                           | **0.68 $\pm$ 0.02** |
| po         | gpt2                               | 0.49 $\pm$ 0.01 | 0.55 $\pm$ 0.05             | 0.57 $\pm$ 0.04         | 0.58 $\pm$ 0.02          | 0.59 $\pm$ 0.02                           | 0.62 $\pm$ 0.04     |
| po         | xlm-roberta-large                  | 0.57 $\pm$ 0.03 | 0.59 $\pm$ 0.04             | 0.63 $\pm$ 0.03         | 0.65 $\pm$ 0.03          | 0.66 $\pm$ 0.02                           | 0.67 $\pm$ 0.02     |
| ru         | EleutherAI-gpt-neo-1.3B            | 0.21 $\pm$ 0.02 | 0.25 $\pm$ 0.03             | 0.38 $\pm$ 0.03         | 0.37 $\pm$ 0.04          | 0.41 $\pm$ 0.03                           | 0.41 $\pm$ 0.04     |
| ru         | EleutherAI-gpt-neo-125M            | 0.22 $\pm$ 0.01 | 0.22 $\pm$ 0.04             | 0.21 $\pm$ 0.02         | 0.22 $\pm$ 0.02          | 0.21 $\pm$ 0.03                           | 0.21 $\pm$ 0.02     |
| ru         | bert-base-multilingual-cased       | 0.39 $\pm$ 0.01 | 0.45 $\pm$ 0.02             | 0.50 $\pm$ 0.05         | 0.51 $\pm$ 0.06          | 0.51 $\pm$ 0.04                           | 0.53 $\pm$ 0.02     |
| ru         | distilbert-base-multilingual-cased | 0.31 $\pm$ 0.02 | 0.41 $\pm$ 0.01             | 0.44 $\pm$ 0.03         | 0.46 $\pm$ 0.06          | 0.47 $\pm$ 0.05                           | 0.45 $\pm$ 0.02     |
| ru         | facebook-mbart-large-50            | 0.40 $\pm$ 0.04 | 0.50 $\pm$ 0.03             | 0.51 $\pm$ 0.03         | 0.54 $\pm$ 0.03          | 0.51 $\pm$ 0.01                           | **0.55 $\pm$ 0.02** |
| ru         | gpt2                               | 0.16 $\pm$ 0.08 | 0.14 $\pm$ 0.05             | 0.07 $\pm$ 0.07         | 0.14 $\pm$ 0.08          | 0.16 $\pm$ 0.06                           | 0.15 $\pm$ 0.11     |
| ru         | xlm-roberta-large                  | 0.47 $\pm$ 0.03 | 0.53 $\pm$ 0.04             | 0.53 $\pm$ 0.03         | **0.55 $\pm$ 0.01**      | 0.52 $\pm$ 0.04                           | 0.53 $\pm$ 0.02     |

  multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)


## recall-micro

| language   | model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| en         | EleutherAI-gpt-neo-1.3B            | 0.55 $\pm$ 0.01 | 0.60 $\pm$ 0.01             | 0.62 $\pm$ 0.02         | 0.60 $\pm$ 0.02          | 0.60 $\pm$ 0.03                           | 0.63 $\pm$ 0.02     |
| en         | EleutherAI-gpt-neo-125M            | 0.49 $\pm$ 0.03 | 0.54 $\pm$ 0.01             | 0.55 $\pm$ 0.03         | 0.55 $\pm$ 0.00          | 0.59 $\pm$ 0.03                           | 0.62 $\pm$ 0.04     |
| en         | bert-base-multilingual-cased       | 0.58 $\pm$ 0.01 | 0.63 $\pm$ 0.03             | 0.64 $\pm$ 0.02         | 0.65 $\pm$ 0.01          | 0.64 $\pm$ 0.03                           | 0.66 $\pm$ 0.03     |
| en         | distilbert-base-multilingual-cased | 0.56 $\pm$ 0.03 | 0.59 $\pm$ 0.02             | 0.62 $\pm$ 0.02         | 0.63 $\pm$ 0.03          | 0.60 $\pm$ 0.01                           | 0.64 $\pm$ 0.01     |
| en         | facebook-mbart-large-50            | 0.60 $\pm$ 0.03 | 0.65 $\pm$ 0.02             | 0.66 $\pm$ 0.02         | 0.66 $\pm$ 0.01          | 0.66 $\pm$ 0.03                           | 0.65 $\pm$ 0.02     |
| en         | gpt2                               | 0.59 $\pm$ 0.01 | 0.64 $\pm$ 0.03             | 0.65 $\pm$ 0.02         | 0.67 $\pm$ 0.04          | 0.65 $\pm$ 0.04                           | 0.66 $\pm$ 0.03     |
| en         | xlm-roberta-large                  | 0.60 $\pm$ 0.01 | 0.66 $\pm$ 0.01             | 0.66 $\pm$ 0.02         | 0.67 $\pm$ 0.01          | **0.68 $\pm$ 0.02**                       | **0.68 $\pm$ 0.01** |
| fr         | EleutherAI-gpt-neo-1.3B            | 0.32 $\pm$ 0.05 | 0.37 $\pm$ 0.01             | 0.45 $\pm$ 0.04         | 0.42 $\pm$ 0.03          | 0.44 $\pm$ 0.04                           | 0.50 $\pm$ 0.05     |
| fr         | EleutherAI-gpt-neo-125M            | 0.24 $\pm$ 0.01 | 0.32 $\pm$ 0.04             | 0.32 $\pm$ 0.04         | 0.31 $\pm$ 0.03          | 0.38 $\pm$ 0.04                           | 0.38 $\pm$ 0.07     |
| fr         | bert-base-multilingual-cased       | 0.38 $\pm$ 0.04 | 0.46 $\pm$ 0.03             | 0.49 $\pm$ 0.04         | 0.50 $\pm$ 0.03          | 0.51 $\pm$ 0.04                           | 0.53 $\pm$ 0.03     |
| fr         | distilbert-base-multilingual-cased | 0.37 $\pm$ 0.06 | 0.44 $\pm$ 0.02             | 0.48 $\pm$ 0.04         | 0.48 $\pm$ 0.02          | 0.46 $\pm$ 0.03                           | 0.49 $\pm$ 0.02     |
| fr         | facebook-mbart-large-50            | 0.44 $\pm$ 0.03 | 0.47 $\pm$ 0.02             | 0.51 $\pm$ 0.02         | 0.52 $\pm$ 0.03          | 0.53 $\pm$ 0.03                           | **0.55 $\pm$ 0.04** |
| fr         | gpt2                               | 0.36 $\pm$ 0.06 | 0.36 $\pm$ 0.02             | 0.43 $\pm$ 0.02         | 0.43 $\pm$ 0.02          | 0.43 $\pm$ 0.06                           | 0.48 $\pm$ 0.01     |
| fr         | xlm-roberta-large                  | 0.44 $\pm$ 0.02 | 0.51 $\pm$ 0.02             | 0.52 $\pm$ 0.04         | **0.55 $\pm$ 0.04**      | **0.55 $\pm$ 0.02**                       | 0.54 $\pm$ 0.07     |
| ge         | EleutherAI-gpt-neo-1.3B            | 0.41 $\pm$ 0.03 | 0.49 $\pm$ 0.04             | 0.52 $\pm$ 0.04         | 0.50 $\pm$ 0.03          | 0.51 $\pm$ 0.02                           | 0.59 $\pm$ 0.04     |
| ge         | EleutherAI-gpt-neo-125M            | 0.33 $\pm$ 0.03 | 0.41 $\pm$ 0.02             | 0.41 $\pm$ 0.01         | 0.43 $\pm$ 0.02          | 0.45 $\pm$ 0.03                           | 0.51 $\pm$ 0.02     |
| ge         | bert-base-multilingual-cased       | 0.46 $\pm$ 0.04 | 0.54 $\pm$ 0.04             | 0.54 $\pm$ 0.02         | 0.58 $\pm$ 0.04          | 0.54 $\pm$ 0.02                           | 0.61 $\pm$ 0.06     |
| ge         | distilbert-base-multilingual-cased | 0.44 $\pm$ 0.01 | 0.51 $\pm$ 0.02             | 0.49 $\pm$ 0.03         | 0.54 $\pm$ 0.03          | 0.52 $\pm$ 0.01                           | 0.60 $\pm$ 0.05     |
| ge         | facebook-mbart-large-50            | 0.50 $\pm$ 0.02 | 0.55 $\pm$ 0.02             | 0.56 $\pm$ 0.03         | 0.59 $\pm$ 0.01          | 0.59 $\pm$ 0.01                           | 0.60 $\pm$ 0.05     |
| ge         | gpt2                               | 0.46 $\pm$ 0.05 | 0.49 $\pm$ 0.05             | 0.50 $\pm$ 0.05         | 0.52 $\pm$ 0.05          | 0.52 $\pm$ 0.04                           | 0.56 $\pm$ 0.01     |
| ge         | xlm-roberta-large                  | 0.50 $\pm$ 0.02 | 0.57 $\pm$ 0.02             | 0.57 $\pm$ 0.02         | 0.58 $\pm$ 0.01          | 0.63 $\pm$ 0.07                           | **0.66 $\pm$ 0.04** |
| it         | EleutherAI-gpt-neo-1.3B            | 0.35 $\pm$ 0.04 | 0.38 $\pm$ 0.06             | 0.44 $\pm$ 0.02         | 0.45 $\pm$ 0.07          | 0.45 $\pm$ 0.09                           | 0.51 $\pm$ 0.02     |
| it         | EleutherAI-gpt-neo-125M            | 0.28 $\pm$ 0.04 | 0.36 $\pm$ 0.04             | 0.36 $\pm$ 0.02         | 0.35 $\pm$ 0.02          | 0.41 $\pm$ 0.06                           | 0.42 $\pm$ 0.03     |
| it         | bert-base-multilingual-cased       | 0.40 $\pm$ 0.00 | 0.48 $\pm$ 0.02             | 0.50 $\pm$ 0.03         | 0.51 $\pm$ 0.03          | 0.52 $\pm$ 0.01                           | 0.54 $\pm$ 0.02     |
| it         | distilbert-base-multilingual-cased | 0.39 $\pm$ 0.01 | 0.44 $\pm$ 0.04             | 0.46 $\pm$ 0.03         | 0.50 $\pm$ 0.06          | 0.46 $\pm$ 0.04                           | 0.53 $\pm$ 0.00     |
| it         | facebook-mbart-large-50            | 0.42 $\pm$ 0.01 | 0.47 $\pm$ 0.06             | 0.49 $\pm$ 0.02         | 0.51 $\pm$ 0.03          | 0.51 $\pm$ 0.02                           | 0.55 $\pm$ 0.04     |
| it         | gpt2                               | 0.37 $\pm$ 0.03 | 0.40 $\pm$ 0.01             | 0.45 $\pm$ 0.03         | 0.49 $\pm$ 0.03          | 0.47 $\pm$ 0.00                           | 0.52 $\pm$ 0.01     |
| it         | xlm-roberta-large                  | 0.43 $\pm$ 0.05 | 0.49 $\pm$ 0.02             | 0.50 $\pm$ 0.03         | 0.54 $\pm$ 0.02          | 0.54 $\pm$ 0.02                           | **0.57 $\pm$ 0.04** |
| po         | EleutherAI-gpt-neo-1.3B            | 0.48 $\pm$ 0.03 | 0.49 $\pm$ 0.02             | 0.58 $\pm$ 0.02         | 0.53 $\pm$ 0.04          | 0.58 $\pm$ 0.05                           | 0.62 $\pm$ 0.04     |
| po         | EleutherAI-gpt-neo-125M            | 0.34 $\pm$ 0.05 | 0.42 $\pm$ 0.04             | 0.50 $\pm$ 0.02         | 0.44 $\pm$ 0.06          | 0.52 $\pm$ 0.01                           | 0.56 $\pm$ 0.03     |
| po         | bert-base-multilingual-cased       | 0.50 $\pm$ 0.04 | 0.56 $\pm$ 0.04             | 0.56 $\pm$ 0.04         | 0.58 $\pm$ 0.02          | 0.63 $\pm$ 0.01                           | 0.62 $\pm$ 0.03     |
| po         | distilbert-base-multilingual-cased | 0.49 $\pm$ 0.03 | 0.54 $\pm$ 0.04             | 0.58 $\pm$ 0.05         | 0.55 $\pm$ 0.03          | 0.56 $\pm$ 0.05                           | 0.62 $\pm$ 0.02     |
| po         | facebook-mbart-large-50            | 0.51 $\pm$ 0.02 | 0.55 $\pm$ 0.01             | 0.59 $\pm$ 0.02         | 0.62 $\pm$ 0.02          | 0.61 $\pm$ 0.03                           | 0.64 $\pm$ 0.05     |
| po         | gpt2                               | 0.48 $\pm$ 0.03 | 0.52 $\pm$ 0.01             | 0.55 $\pm$ 0.04         | 0.58 $\pm$ 0.03          | 0.59 $\pm$ 0.06                           | 0.63 $\pm$ 0.04     |
| po         | xlm-roberta-large                  | 0.54 $\pm$ 0.02 | 0.56 $\pm$ 0.05             | 0.60 $\pm$ 0.03         | 0.63 $\pm$ 0.02          | 0.64 $\pm$ 0.03                           | **0.73 $\pm$ 0.05** |
| ru         | EleutherAI-gpt-neo-1.3B            | 0.14 $\pm$ 0.01 | 0.16 $\pm$ 0.02             | 0.31 $\pm$ 0.05         | 0.29 $\pm$ 0.05          | 0.32 $\pm$ 0.02                           | 0.35 $\pm$ 0.06     |
| ru         | EleutherAI-gpt-neo-125M            | 0.16 $\pm$ 0.01 | 0.17 $\pm$ 0.03             | 0.15 $\pm$ 0.02         | 0.16 $\pm$ 0.01          | 0.15 $\pm$ 0.02                           | 0.15 $\pm$ 0.04     |
| ru         | bert-base-multilingual-cased       | 0.31 $\pm$ 0.03 | 0.40 $\pm$ 0.05             | 0.44 $\pm$ 0.05         | 0.45 $\pm$ 0.10          | 0.45 $\pm$ 0.04                           | 0.49 $\pm$ 0.02     |
| ru         | distilbert-base-multilingual-cased | 0.23 $\pm$ 0.01 | 0.33 $\pm$ 0.02             | 0.35 $\pm$ 0.04         | 0.39 $\pm$ 0.06          | 0.39 $\pm$ 0.06                           | 0.38 $\pm$ 0.03     |
| ru         | facebook-mbart-large-50            | 0.31 $\pm$ 0.04 | 0.42 $\pm$ 0.03             | 0.44 $\pm$ 0.05         | 0.47 $\pm$ 0.06          | 0.44 $\pm$ 0.03                           | 0.48 $\pm$ 0.01     |
| ru         | gpt2                               | 0.11 $\pm$ 0.06 | 0.09 $\pm$ 0.04             | 0.04 $\pm$ 0.05         | 0.09 $\pm$ 0.06          | 0.10 $\pm$ 0.04                           | 0.10 $\pm$ 0.08     |
| ru         | xlm-roberta-large                  | 0.40 $\pm$ 0.03 | 0.47 $\pm$ 0.05             | 0.48 $\pm$ 0.06         | **0.52 $\pm$ 0.03**      | 0.46 $\pm$ 0.05                           | 0.50 $\pm$ 0.07     |

  multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)


## precision-micro

| language   | model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| en         | EleutherAI-gpt-neo-1.3B            | 0.72 $\pm$ 0.04 | 0.76 $\pm$ 0.02             | 0.74 $\pm$ 0.02         | 0.76 $\pm$ 0.01          | **0.77 $\pm$ 0.03**                       | **0.77 $\pm$ 0.03** |
| en         | EleutherAI-gpt-neo-125M            | 0.63 $\pm$ 0.04 | 0.67 $\pm$ 0.01             | 0.70 $\pm$ 0.01         | 0.71 $\pm$ 0.03          | 0.73 $\pm$ 0.04                           | 0.75 $\pm$ 0.03     |
| en         | bert-base-multilingual-cased       | 0.70 $\pm$ 0.03 | 0.74 $\pm$ 0.03             | 0.76 $\pm$ 0.02         | 0.74 $\pm$ 0.04          | 0.75 $\pm$ 0.03                           | 0.75 $\pm$ 0.02     |
| en         | distilbert-base-multilingual-cased | 0.68 $\pm$ 0.05 | 0.74 $\pm$ 0.03             | 0.73 $\pm$ 0.01         | 0.73 $\pm$ 0.03          | 0.75 $\pm$ 0.02                           | 0.75 $\pm$ 0.03     |
| en         | facebook-mbart-large-50            | 0.73 $\pm$ 0.01 | 0.76 $\pm$ 0.01             | 0.74 $\pm$ 0.03         | 0.76 $\pm$ 0.01          | **0.77 $\pm$ 0.02**                       | 0.74 $\pm$ 0.02     |
| en         | gpt2                               | 0.67 $\pm$ 0.06 | 0.72 $\pm$ 0.02             | 0.70 $\pm$ 0.03         | 0.67 $\pm$ 0.03          | 0.71 $\pm$ 0.05                           | 0.72 $\pm$ 0.03     |
| en         | xlm-roberta-large                  | 0.73 $\pm$ 0.03 | 0.75 $\pm$ 0.03             | 0.75 $\pm$ 0.01         | 0.76 $\pm$ 0.03          | 0.74 $\pm$ 0.01                           | 0.74 $\pm$ 0.02     |
| fr         | EleutherAI-gpt-neo-1.3B            | 0.55 $\pm$ 0.03 | 0.63 $\pm$ 0.04             | 0.57 $\pm$ 0.04         | 0.62 $\pm$ 0.06          | 0.65 $\pm$ 0.06                           | 0.64 $\pm$ 0.02     |
| fr         | EleutherAI-gpt-neo-125M            | 0.42 $\pm$ 0.05 | 0.48 $\pm$ 0.03             | 0.51 $\pm$ 0.02         | 0.55 $\pm$ 0.06          | 0.53 $\pm$ 0.06                           | 0.64 $\pm$ 0.04     |
| fr         | bert-base-multilingual-cased       | 0.59 $\pm$ 0.05 | 0.60 $\pm$ 0.02             | 0.59 $\pm$ 0.03         | 0.61 $\pm$ 0.05          | 0.62 $\pm$ 0.01                           | 0.64 $\pm$ 0.03     |
| fr         | distilbert-base-multilingual-cased | 0.53 $\pm$ 0.05 | 0.59 $\pm$ 0.05             | 0.59 $\pm$ 0.03         | 0.59 $\pm$ 0.03          | 0.62 $\pm$ 0.01                           | 0.61 $\pm$ 0.00     |
| fr         | facebook-mbart-large-50            | 0.60 $\pm$ 0.06 | 0.62 $\pm$ 0.01             | 0.62 $\pm$ 0.04         | 0.63 $\pm$ 0.02          | 0.64 $\pm$ 0.03                           | **0.67 $\pm$ 0.05** |
| fr         | gpt2                               | 0.45 $\pm$ 0.10 | 0.51 $\pm$ 0.04             | 0.53 $\pm$ 0.02         | 0.57 $\pm$ 0.04          | 0.59 $\pm$ 0.02                           | 0.58 $\pm$ 0.02     |
| fr         | xlm-roberta-large                  | 0.59 $\pm$ 0.01 | 0.62 $\pm$ 0.04             | 0.63 $\pm$ 0.03         | 0.62 $\pm$ 0.01          | 0.61 $\pm$ 0.02                           | 0.61 $\pm$ 0.01     |
| ge         | EleutherAI-gpt-neo-1.3B            | 0.57 $\pm$ 0.05 | 0.65 $\pm$ 0.02             | 0.61 $\pm$ 0.03         | 0.70 $\pm$ 0.06          | 0.66 $\pm$ 0.03                           | 0.68 $\pm$ 0.02     |
| ge         | EleutherAI-gpt-neo-125M            | 0.49 $\pm$ 0.02 | 0.51 $\pm$ 0.04             | 0.57 $\pm$ 0.01         | 0.58 $\pm$ 0.02          | 0.60 $\pm$ 0.05                           | 0.62 $\pm$ 0.01     |
| ge         | bert-base-multilingual-cased       | 0.60 $\pm$ 0.02 | 0.62 $\pm$ 0.02             | 0.66 $\pm$ 0.03         | 0.68 $\pm$ 0.06          | 0.66 $\pm$ 0.01                           | 0.65 $\pm$ 0.03     |
| ge         | distilbert-base-multilingual-cased | 0.59 $\pm$ 0.04 | 0.63 $\pm$ 0.02             | 0.63 $\pm$ 0.04         | 0.64 $\pm$ 0.05          | 0.66 $\pm$ 0.03                           | 0.63 $\pm$ 0.01     |
| ge         | facebook-mbart-large-50            | 0.62 $\pm$ 0.01 | 0.66 $\pm$ 0.02             | 0.68 $\pm$ 0.00         | 0.68 $\pm$ 0.02          | 0.70 $\pm$ 0.04                           | 0.70 $\pm$ 0.02     |
| ge         | gpt2                               | 0.49 $\pm$ 0.01 | 0.56 $\pm$ 0.03             | 0.55 $\pm$ 0.04         | 0.58 $\pm$ 0.00          | 0.55 $\pm$ 0.02                           | 0.63 $\pm$ 0.02     |
| ge         | xlm-roberta-large                  | 0.62 $\pm$ 0.03 | 0.65 $\pm$ 0.02             | 0.67 $\pm$ 0.04         | **0.71 $\pm$ 0.01**      | 0.68 $\pm$ 0.04                           | 0.64 $\pm$ 0.06     |
| it         | EleutherAI-gpt-neo-1.3B            | 0.53 $\pm$ 0.03 | 0.58 $\pm$ 0.03             | 0.58 $\pm$ 0.05         | 0.63 $\pm$ 0.05          | 0.63 $\pm$ 0.03                           | 0.66 $\pm$ 0.05     |
| it         | EleutherAI-gpt-neo-125M            | 0.47 $\pm$ 0.05 | 0.49 $\pm$ 0.01             | 0.55 $\pm$ 0.01         | 0.57 $\pm$ 0.01          | 0.59 $\pm$ 0.04                           | 0.64 $\pm$ 0.03     |
| it         | bert-base-multilingual-cased       | 0.57 $\pm$ 0.02 | 0.59 $\pm$ 0.04             | 0.61 $\pm$ 0.04         | 0.62 $\pm$ 0.05          | 0.62 $\pm$ 0.07                           | 0.65 $\pm$ 0.06     |
| it         | distilbert-base-multilingual-cased | 0.54 $\pm$ 0.02 | 0.57 $\pm$ 0.05             | 0.58 $\pm$ 0.03         | 0.62 $\pm$ 0.03          | 0.60 $\pm$ 0.04                           | 0.62 $\pm$ 0.05     |
| it         | facebook-mbart-large-50            | 0.57 $\pm$ 0.01 | 0.61 $\pm$ 0.01             | 0.62 $\pm$ 0.03         | 0.64 $\pm$ 0.03          | 0.65 $\pm$ 0.08                           | **0.68 $\pm$ 0.05** |
| it         | gpt2                               | 0.47 $\pm$ 0.01 | 0.52 $\pm$ 0.02             | 0.52 $\pm$ 0.03         | 0.55 $\pm$ 0.01          | 0.55 $\pm$ 0.06                           | 0.61 $\pm$ 0.01     |
| it         | xlm-roberta-large                  | 0.58 $\pm$ 0.03 | 0.59 $\pm$ 0.03             | 0.61 $\pm$ 0.02         | 0.64 $\pm$ 0.05          | 0.61 $\pm$ 0.04                           | 0.61 $\pm$ 0.05     |
| po         | EleutherAI-gpt-neo-1.3B            | 0.56 $\pm$ 0.10 | 0.63 $\pm$ 0.05             | 0.63 $\pm$ 0.06         | 0.64 $\pm$ 0.06          | 0.69 $\pm$ 0.03                           | 0.65 $\pm$ 0.03     |
| po         | EleutherAI-gpt-neo-125M            | 0.48 $\pm$ 0.01 | 0.53 $\pm$ 0.03             | 0.60 $\pm$ 0.06         | 0.61 $\pm$ 0.07          | 0.59 $\pm$ 0.05                           | 0.63 $\pm$ 0.03     |
| po         | bert-base-multilingual-cased       | 0.61 $\pm$ 0.07 | 0.63 $\pm$ 0.03             | 0.67 $\pm$ 0.05         | 0.69 $\pm$ 0.04          | 0.67 $\pm$ 0.06                           | 0.66 $\pm$ 0.08     |
| po         | distilbert-base-multilingual-cased | 0.59 $\pm$ 0.04 | 0.64 $\pm$ 0.06             | 0.68 $\pm$ 0.07         | 0.67 $\pm$ 0.08          | 0.67 $\pm$ 0.08                           | 0.65 $\pm$ 0.04     |
| po         | facebook-mbart-large-50            | 0.61 $\pm$ 0.06 | 0.67 $\pm$ 0.04             | 0.67 $\pm$ 0.07         | 0.70 $\pm$ 0.02          | 0.69 $\pm$ 0.05                           | **0.71 $\pm$ 0.02** |
| po         | gpt2                               | 0.51 $\pm$ 0.07 | 0.59 $\pm$ 0.10             | 0.59 $\pm$ 0.05         | 0.58 $\pm$ 0.06          | 0.61 $\pm$ 0.06                           | 0.61 $\pm$ 0.05     |
| po         | xlm-roberta-large                  | 0.62 $\pm$ 0.11 | 0.64 $\pm$ 0.03             | 0.67 $\pm$ 0.04         | 0.68 $\pm$ 0.05          | 0.68 $\pm$ 0.06                           | 0.63 $\pm$ 0.01     |
| ru         | EleutherAI-gpt-neo-1.3B            | 0.43 $\pm$ 0.06 | 0.59 $\pm$ 0.14             | 0.50 $\pm$ 0.04         | 0.52 $\pm$ 0.09          | 0.58 $\pm$ 0.15                           | 0.52 $\pm$ 0.05     |
| ru         | EleutherAI-gpt-neo-125M            | 0.35 $\pm$ 0.03 | 0.33 $\pm$ 0.08             | 0.36 $\pm$ 0.05         | 0.37 $\pm$ 0.03          | 0.37 $\pm$ 0.10                           | 0.40 $\pm$ 0.11     |
| ru         | bert-base-multilingual-cased       | 0.53 $\pm$ 0.02 | 0.53 $\pm$ 0.05             | 0.59 $\pm$ 0.04         | 0.60 $\pm$ 0.02          | 0.60 $\pm$ 0.04                           | 0.59 $\pm$ 0.02     |
| ru         | distilbert-base-multilingual-cased | 0.49 $\pm$ 0.04 | 0.54 $\pm$ 0.04             | 0.58 $\pm$ 0.01         | 0.57 $\pm$ 0.04          | 0.60 $\pm$ 0.02                           | 0.55 $\pm$ 0.01     |
| ru         | facebook-mbart-large-50            | 0.55 $\pm$ 0.06 | 0.60 $\pm$ 0.03             | 0.61 $\pm$ 0.04         | 0.63 $\pm$ 0.04          | 0.62 $\pm$ 0.02                           | **0.66 $\pm$ 0.07** |
| ru         | gpt2                               | 0.30 $\pm$ 0.03 | 0.27 $\pm$ 0.09             | 0.25 $\pm$ 0.04         | 0.37 $\pm$ 0.02          | 0.41 $\pm$ 0.00                           | 0.46 $\pm$ 0.15     |
| ru         | xlm-roberta-large                  | 0.57 $\pm$ 0.03 | 0.61 $\pm$ 0.01             | 0.61 $\pm$ 0.06         | 0.58 $\pm$ 0.03          | 0.58 $\pm$ 0.02                           | 0.59 $\pm$ 0.08     |

  multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)


## roc-auc

| language   | model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| en         | EleutherAI-gpt-neo-1.3B            | 0.73 $\pm$ 0.00 | 0.76 $\pm$ 0.01             | 0.77 $\pm$ 0.01         | 0.76 $\pm$ 0.00          | 0.77 $\pm$ 0.01                           | 0.78 $\pm$ 0.00     |
| en         | EleutherAI-gpt-neo-125M            | 0.69 $\pm$ 0.01 | 0.72 $\pm$ 0.01             | 0.73 $\pm$ 0.01         | 0.73 $\pm$ 0.00          | 0.75 $\pm$ 0.01                           | 0.77 $\pm$ 0.01     |
| en         | bert-base-multilingual-cased       | 0.74 $\pm$ 0.01 | 0.77 $\pm$ 0.01             | 0.78 $\pm$ 0.01         | 0.78 $\pm$ 0.01          | 0.78 $\pm$ 0.01                           | 0.78 $\pm$ 0.01     |
| en         | distilbert-base-multilingual-cased | 0.73 $\pm$ 0.02 | 0.76 $\pm$ 0.01             | 0.77 $\pm$ 0.01         | 0.77 $\pm$ 0.02          | 0.76 $\pm$ 0.00                           | 0.78 $\pm$ 0.01     |
| en         | facebook-mbart-large-50            | 0.76 $\pm$ 0.01 | 0.78 $\pm$ 0.01             | **0.79 $\pm$ 0.01**     | **0.79 $\pm$ 0.01**      | **0.79 $\pm$ 0.01**                       | 0.78 $\pm$ 0.01     |
| en         | gpt2                               | 0.74 $\pm$ 0.02 | 0.77 $\pm$ 0.01             | 0.77 $\pm$ 0.00         | 0.77 $\pm$ 0.02          | 0.77 $\pm$ 0.01                           | 0.78 $\pm$ 0.01     |
| en         | xlm-roberta-large                  | 0.76 $\pm$ 0.00 | **0.79 $\pm$ 0.00**         | **0.79 $\pm$ 0.01**     | **0.79 $\pm$ 0.01**      | **0.79 $\pm$ 0.01**                       | **0.79 $\pm$ 0.01** |
| fr         | EleutherAI-gpt-neo-1.3B            | 0.63 $\pm$ 0.02 | 0.66 $\pm$ 0.00             | 0.68 $\pm$ 0.02         | 0.68 $\pm$ 0.00          | 0.69 $\pm$ 0.01                           | 0.71 $\pm$ 0.02     |
| fr         | EleutherAI-gpt-neo-125M            | 0.58 $\pm$ 0.01 | 0.61 $\pm$ 0.01             | 0.62 $\pm$ 0.01         | 0.62 $\pm$ 0.01          | 0.64 $\pm$ 0.02                           | 0.66 $\pm$ 0.02     |
| fr         | bert-base-multilingual-cased       | 0.66 $\pm$ 0.02 | 0.69 $\pm$ 0.02             | 0.70 $\pm$ 0.02         | 0.71 $\pm$ 0.01          | 0.71 $\pm$ 0.02                           | 0.73 $\pm$ 0.01     |
| fr         | distilbert-base-multilingual-cased | 0.64 $\pm$ 0.03 | 0.68 $\pm$ 0.01             | 0.69 $\pm$ 0.02         | 0.70 $\pm$ 0.01          | 0.69 $\pm$ 0.01                           | 0.70 $\pm$ 0.01     |
| fr         | facebook-mbart-large-50            | 0.68 $\pm$ 0.01 | 0.70 $\pm$ 0.01             | 0.71 $\pm$ 0.00         | 0.72 $\pm$ 0.02          | 0.73 $\pm$ 0.01                           | **0.74 $\pm$ 0.01** |
| fr         | gpt2                               | 0.62 $\pm$ 0.04 | 0.64 $\pm$ 0.01             | 0.66 $\pm$ 0.01         | 0.67 $\pm$ 0.01          | 0.67 $\pm$ 0.02                           | 0.69 $\pm$ 0.01     |
| fr         | xlm-roberta-large                  | 0.68 $\pm$ 0.01 | 0.71 $\pm$ 0.01             | 0.72 $\pm$ 0.01         | 0.73 $\pm$ 0.02          | 0.73 $\pm$ 0.01                           | 0.72 $\pm$ 0.03     |
| ge         | EleutherAI-gpt-neo-1.3B            | 0.64 $\pm$ 0.00 | 0.69 $\pm$ 0.02             | 0.68 $\pm$ 0.02         | 0.70 $\pm$ 0.00          | 0.69 $\pm$ 0.01                           | 0.73 $\pm$ 0.01     |
| ge         | EleutherAI-gpt-neo-125M            | 0.59 $\pm$ 0.01 | 0.62 $\pm$ 0.01             | 0.64 $\pm$ 0.00         | 0.65 $\pm$ 0.01          | 0.65 $\pm$ 0.03                           | 0.69 $\pm$ 0.00     |
| ge         | bert-base-multilingual-cased       | 0.66 $\pm$ 0.02 | 0.70 $\pm$ 0.01             | 0.71 $\pm$ 0.01         | 0.73 $\pm$ 0.01          | 0.71 $\pm$ 0.00                           | 0.73 $\pm$ 0.03     |
| ge         | distilbert-base-multilingual-cased | 0.65 $\pm$ 0.02 | 0.69 $\pm$ 0.00             | 0.68 $\pm$ 0.02         | 0.70 $\pm$ 0.01          | 0.70 $\pm$ 0.00                           | 0.72 $\pm$ 0.02     |
| ge         | facebook-mbart-large-50            | 0.68 $\pm$ 0.01 | 0.71 $\pm$ 0.01             | 0.72 $\pm$ 0.02         | 0.73 $\pm$ 0.01          | **0.74 $\pm$ 0.01**                       | **0.74 $\pm$ 0.01** |
| ge         | gpt2                               | 0.62 $\pm$ 0.02 | 0.66 $\pm$ 0.03             | 0.66 $\pm$ 0.03         | 0.67 $\pm$ 0.02          | 0.66 $\pm$ 0.01                           | 0.70 $\pm$ 0.01     |
| ge         | xlm-roberta-large                  | 0.68 $\pm$ 0.01 | 0.72 $\pm$ 0.00             | 0.72 $\pm$ 0.02         | **0.74 $\pm$ 0.01**      | **0.74 $\pm$ 0.01**                       | **0.74 $\pm$ 0.02** |
| it         | EleutherAI-gpt-neo-1.3B            | 0.61 $\pm$ 0.01 | 0.64 $\pm$ 0.03             | 0.66 $\pm$ 0.02         | 0.67 $\pm$ 0.02          | 0.68 $\pm$ 0.03                           | 0.71 $\pm$ 0.01     |
| it         | EleutherAI-gpt-neo-125M            | 0.58 $\pm$ 0.02 | 0.61 $\pm$ 0.01             | 0.63 $\pm$ 0.01         | 0.63 $\pm$ 0.01          | 0.65 $\pm$ 0.03                           | 0.66 $\pm$ 0.01     |
| it         | bert-base-multilingual-cased       | 0.64 $\pm$ 0.01 | 0.68 $\pm$ 0.01             | 0.69 $\pm$ 0.01         | 0.70 $\pm$ 0.01          | 0.70 $\pm$ 0.02                           | 0.71 $\pm$ 0.02     |
| it         | distilbert-base-multilingual-cased | 0.63 $\pm$ 0.01 | 0.66 $\pm$ 0.03             | 0.67 $\pm$ 0.02         | 0.69 $\pm$ 0.03          | 0.67 $\pm$ 0.02                           | 0.70 $\pm$ 0.02     |
| it         | facebook-mbart-large-50            | 0.65 $\pm$ 0.00 | 0.68 $\pm$ 0.03             | 0.69 $\pm$ 0.01         | 0.70 $\pm$ 0.02          | 0.70 $\pm$ 0.03                           | **0.73 $\pm$ 0.02** |
| it         | gpt2                               | 0.61 $\pm$ 0.01 | 0.63 $\pm$ 0.01             | 0.65 $\pm$ 0.02         | 0.67 $\pm$ 0.01          | 0.66 $\pm$ 0.02                           | 0.70 $\pm$ 0.01     |
| it         | xlm-roberta-large                  | 0.66 $\pm$ 0.02 | 0.68 $\pm$ 0.02             | 0.69 $\pm$ 0.01         | 0.71 $\pm$ 0.02          | 0.70 $\pm$ 0.01                           | 0.72 $\pm$ 0.01     |
| po         | EleutherAI-gpt-neo-1.3B            | 0.63 $\pm$ 0.02 | 0.66 $\pm$ 0.00             | 0.69 $\pm$ 0.01         | 0.69 $\pm$ 0.02          | 0.71 $\pm$ 0.01                           | 0.72 $\pm$ 0.01     |
| po         | EleutherAI-gpt-neo-125M            | 0.57 $\pm$ 0.02 | 0.61 $\pm$ 0.01             | 0.66 $\pm$ 0.01         | 0.64 $\pm$ 0.03          | 0.66 $\pm$ 0.02                           | 0.69 $\pm$ 0.01     |
| po         | bert-base-multilingual-cased       | 0.66 $\pm$ 0.01 | 0.69 $\pm$ 0.01             | 0.70 $\pm$ 0.02         | 0.72 $\pm$ 0.01          | 0.73 $\pm$ 0.01                           | 0.72 $\pm$ 0.01     |
| po         | distilbert-base-multilingual-cased | 0.65 $\pm$ 0.02 | 0.69 $\pm$ 0.02             | 0.72 $\pm$ 0.01         | 0.70 $\pm$ 0.01          | 0.70 $\pm$ 0.01                           | 0.72 $\pm$ 0.01     |
| po         | facebook-mbart-large-50            | 0.66 $\pm$ 0.02 | 0.70 $\pm$ 0.00             | 0.72 $\pm$ 0.01         | 0.74 $\pm$ 0.01          | 0.73 $\pm$ 0.00                           | **0.75 $\pm$ 0.01** |
| po         | gpt2                               | 0.61 $\pm$ 0.01 | 0.66 $\pm$ 0.03             | 0.67 $\pm$ 0.02         | 0.68 $\pm$ 0.01          | 0.69 $\pm$ 0.01                           | 0.71 $\pm$ 0.02     |
| po         | xlm-roberta-large                  | 0.68 $\pm$ 0.02 | 0.69 $\pm$ 0.02             | 0.72 $\pm$ 0.02         | 0.73 $\pm$ 0.02          | 0.73 $\pm$ 0.01                           | 0.74 $\pm$ 0.00     |
| ru         | EleutherAI-gpt-neo-1.3B            | 0.55 $\pm$ 0.01 | 0.57 $\pm$ 0.01             | 0.62 $\pm$ 0.02         | 0.62 $\pm$ 0.02          | 0.63 $\pm$ 0.01                           | 0.64 $\pm$ 0.02     |
| ru         | EleutherAI-gpt-neo-125M            | 0.55 $\pm$ 0.01 | 0.55 $\pm$ 0.02             | 0.55 $\pm$ 0.01         | 0.55 $\pm$ 0.01          | 0.55 $\pm$ 0.02                           | 0.55 $\pm$ 0.00     |
| ru         | bert-base-multilingual-cased       | 0.63 $\pm$ 0.01 | 0.66 $\pm$ 0.02             | 0.69 $\pm$ 0.03         | 0.69 $\pm$ 0.04          | 0.69 $\pm$ 0.02                           | 0.71 $\pm$ 0.01     |
| ru         | distilbert-base-multilingual-cased | 0.59 $\pm$ 0.01 | 0.64 $\pm$ 0.01             | 0.65 $\pm$ 0.01         | 0.66 $\pm$ 0.03          | 0.67 $\pm$ 0.03                           | 0.66 $\pm$ 0.01     |
| ru         | facebook-mbart-large-50            | 0.63 $\pm$ 0.02 | 0.68 $\pm$ 0.02             | 0.69 $\pm$ 0.02         | 0.71 $\pm$ 0.03          | 0.69 $\pm$ 0.01                           | 0.71 $\pm$ 0.01     |
| ru         | gpt2                               | 0.53 $\pm$ 0.02 | 0.52 $\pm$ 0.02             | 0.51 $\pm$ 0.01         | 0.53 $\pm$ 0.02          | 0.54 $\pm$ 0.01                           | 0.53 $\pm$ 0.03     |
| ru         | xlm-roberta-large                  | 0.67 $\pm$ 0.01 | 0.70 $\pm$ 0.03             | 0.71 $\pm$ 0.02         | **0.72 $\pm$ 0.01**      | 0.70 $\pm$ 0.03                           | 0.71 $\pm$ 0.02     |

  multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)


## accuracy

| language   | model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| en         | EleutherAI-gpt-neo-1.3B            | 0.08 $\pm$ 0.02 | 0.12 $\pm$ 0.02             | 0.08 $\pm$ 0.01         | 0.11 $\pm$ 0.01          | 0.11 $\pm$ 0.02                           | 0.12 $\pm$ 0.02     |
| en         | EleutherAI-gpt-neo-125M            | 0.03 $\pm$ 0.01 | 0.05 $\pm$ 0.01             | 0.08 $\pm$ 0.01         | 0.07 $\pm$ 0.01          | 0.09 $\pm$ 0.01                           | 0.09 $\pm$ 0.01     |
| en         | bert-base-multilingual-cased       | 0.07 $\pm$ 0.01 | 0.10 $\pm$ 0.03             | **0.13 $\pm$ 0.03**     | 0.09 $\pm$ 0.00          | 0.10 $\pm$ 0.03                           | 0.10 $\pm$ 0.00     |
| en         | distilbert-base-multilingual-cased | 0.06 $\pm$ 0.01 | 0.09 $\pm$ 0.02             | 0.10 $\pm$ 0.02         | 0.09 $\pm$ 0.01          | 0.09 $\pm$ 0.00                           | 0.11 $\pm$ 0.02     |
| en         | facebook-mbart-large-50            | 0.07 $\pm$ 0.04 | 0.10 $\pm$ 0.03             | 0.11 $\pm$ 0.03         | 0.12 $\pm$ 0.03          | 0.12 $\pm$ 0.02                           | 0.10 $\pm$ 0.00     |
| en         | gpt2                               | 0.06 $\pm$ 0.01 | 0.08 $\pm$ 0.02             | 0.07 $\pm$ 0.01         | 0.06 $\pm$ 0.03          | 0.08 $\pm$ 0.01                           | 0.09 $\pm$ 0.01     |
| en         | xlm-roberta-large                  | 0.10 $\pm$ 0.01 | 0.12 $\pm$ 0.01             | 0.10 $\pm$ 0.01         | 0.11 $\pm$ 0.01          | 0.11 $\pm$ 0.02                           | 0.11 $\pm$ 0.02     |
| fr         | EleutherAI-gpt-neo-1.3B            | 0.04 $\pm$ 0.02 | 0.07 $\pm$ 0.01             | 0.06 $\pm$ 0.03         | 0.08 $\pm$ 0.02          | 0.09 $\pm$ 0.03                           | 0.11 $\pm$ 0.04     |
| fr         | EleutherAI-gpt-neo-125M            | 0.03 $\pm$ 0.01 | 0.01 $\pm$ 0.01             | 0.03 $\pm$ 0.01         | 0.04 $\pm$ 0.01          | 0.04 $\pm$ 0.03                           | 0.08 $\pm$ 0.03     |
| fr         | bert-base-multilingual-cased       | 0.07 $\pm$ 0.02 | 0.08 $\pm$ 0.02             | 0.07 $\pm$ 0.01         | 0.09 $\pm$ 0.01          | 0.10 $\pm$ 0.01                           | 0.11 $\pm$ 0.03     |
| fr         | distilbert-base-multilingual-cased | 0.05 $\pm$ 0.02 | 0.05 $\pm$ 0.02             | 0.07 $\pm$ 0.03         | 0.06 $\pm$ 0.04          | 0.09 $\pm$ 0.04                           | 0.08 $\pm$ 0.02     |
| fr         | facebook-mbart-large-50            | 0.08 $\pm$ 0.04 | 0.11 $\pm$ 0.02             | 0.09 $\pm$ 0.01         | 0.10 $\pm$ 0.03          | 0.11 $\pm$ 0.03                           | 0.11 $\pm$ 0.01     |
| fr         | gpt2                               | 0.03 $\pm$ 0.02 | 0.05 $\pm$ 0.02             | 0.06 $\pm$ 0.04         | 0.07 $\pm$ 0.02          | 0.06 $\pm$ 0.03                           | 0.08 $\pm$ 0.06     |
| fr         | xlm-roberta-large                  | 0.07 $\pm$ 0.03 | 0.07 $\pm$ 0.04             | 0.10 $\pm$ 0.03         | 0.10 $\pm$ 0.04          | **0.12 $\pm$ 0.05**                       | 0.08 $\pm$ 0.05     |
| ge         | EleutherAI-gpt-neo-1.3B            | 0.02 $\pm$ 0.01 | 0.03 $\pm$ 0.02             | 0.05 $\pm$ 0.03         | 0.05 $\pm$ 0.03          | 0.03 $\pm$ 0.03                           | 0.06 $\pm$ 0.04     |
| ge         | EleutherAI-gpt-neo-125M            | 0.01 $\pm$ 0.01 | 0.02 $\pm$ 0.01             | 0.03 $\pm$ 0.02         | 0.02 $\pm$ 0.01          | 0.04 $\pm$ 0.01                           | 0.03 $\pm$ 0.03     |
| ge         | bert-base-multilingual-cased       | 0.05 $\pm$ 0.01 | 0.05 $\pm$ 0.03             | 0.07 $\pm$ 0.03         | 0.09 $\pm$ 0.05          | 0.06 $\pm$ 0.01                           | 0.10 $\pm$ 0.07     |
| ge         | distilbert-base-multilingual-cased | 0.02 $\pm$ 0.01 | 0.02 $\pm$ 0.02             | 0.05 $\pm$ 0.04         | 0.05 $\pm$ 0.03          | 0.05 $\pm$ 0.04                           | 0.04 $\pm$ 0.04     |
| ge         | facebook-mbart-large-50            | 0.05 $\pm$ 0.03 | 0.06 $\pm$ 0.04             | 0.06 $\pm$ 0.03         | 0.05 $\pm$ 0.02          | **0.11 $\pm$ 0.03**                       | 0.07 $\pm$ 0.04     |
| ge         | gpt2                               | 0.01 $\pm$ 0.01 | 0.01 $\pm$ 0.02             | 0.03 $\pm$ 0.02         | 0.03 $\pm$ 0.03          | 0.03 $\pm$ 0.01                           | 0.02 $\pm$ 0.01     |
| ge         | xlm-roberta-large                  | 0.03 $\pm$ 0.02 | 0.07 $\pm$ 0.03             | 0.09 $\pm$ 0.03         | 0.10 $\pm$ 0.04          | 0.07 $\pm$ 0.04                           | 0.05 $\pm$ 0.03     |
| it         | EleutherAI-gpt-neo-1.3B            | 0.04 $\pm$ 0.01 | 0.05 $\pm$ 0.04             | 0.06 $\pm$ 0.02         | 0.05 $\pm$ 0.03          | 0.06 $\pm$ 0.02                           | 0.06 $\pm$ 0.03     |
| it         | EleutherAI-gpt-neo-125M            | 0.01 $\pm$ 0.01 | 0.04 $\pm$ 0.01             | 0.04 $\pm$ 0.04         | 0.01 $\pm$ 0.01          | 0.05 $\pm$ 0.01                           | 0.08 $\pm$ 0.02     |
| it         | bert-base-multilingual-cased       | 0.04 $\pm$ 0.01 | 0.05 $\pm$ 0.03             | 0.06 $\pm$ 0.01         | 0.09 $\pm$ 0.02          | 0.09 $\pm$ 0.04                           | 0.08 $\pm$ 0.04     |
| it         | distilbert-base-multilingual-cased | 0.04 $\pm$ 0.02 | 0.05 $\pm$ 0.03             | 0.07 $\pm$ 0.02         | 0.06 $\pm$ 0.04          | 0.08 $\pm$ 0.02                           | 0.06 $\pm$ 0.03     |
| it         | facebook-mbart-large-50            | 0.04 $\pm$ 0.01 | 0.07 $\pm$ 0.04             | 0.05 $\pm$ 0.02         | 0.09 $\pm$ 0.02          | 0.08 $\pm$ 0.03                           | **0.10 $\pm$ 0.02** |
| it         | gpt2                               | 0.02 $\pm$ 0.02 | 0.02 $\pm$ 0.02             | 0.04 $\pm$ 0.02         | 0.03 $\pm$ 0.02          | 0.03 $\pm$ 0.00                           | 0.04 $\pm$ 0.02     |
| it         | xlm-roberta-large                  | 0.06 $\pm$ 0.02 | 0.06 $\pm$ 0.01             | 0.08 $\pm$ 0.01         | 0.07 $\pm$ 0.02          | 0.07 $\pm$ 0.04                           | 0.07 $\pm$ 0.03     |
| po         | EleutherAI-gpt-neo-1.3B            | 0.01 $\pm$ 0.01 | 0.03 $\pm$ 0.02             | 0.03 $\pm$ 0.02         | 0.03 $\pm$ 0.02          | 0.05 $\pm$ 0.02                           | 0.05 $\pm$ 0.02     |
| po         | EleutherAI-gpt-neo-125M            | 0.01 $\pm$ 0.01 | 0.02 $\pm$ 0.01             | 0.02 $\pm$ 0.00         | 0.02 $\pm$ 0.01          | 0.03 $\pm$ 0.01                           | 0.04 $\pm$ 0.04     |
| po         | bert-base-multilingual-cased       | 0.04 $\pm$ 0.02 | 0.06 $\pm$ 0.01             | 0.06 $\pm$ 0.02         | 0.06 $\pm$ 0.02          | 0.07 $\pm$ 0.04                           | 0.05 $\pm$ 0.02     |
| po         | distilbert-base-multilingual-cased | 0.03 $\pm$ 0.02 | 0.04 $\pm$ 0.01             | 0.05 $\pm$ 0.03         | 0.04 $\pm$ 0.01          | 0.05 $\pm$ 0.02                           | 0.04 $\pm$ 0.01     |
| po         | facebook-mbart-large-50            | 0.05 $\pm$ 0.02 | 0.05 $\pm$ 0.01             | 0.06 $\pm$ 0.02         | 0.07 $\pm$ 0.03          | 0.05 $\pm$ 0.03                           | **0.11 $\pm$ 0.03** |
| po         | gpt2                               | 0.00 $\pm$ 0.00 | 0.03 $\pm$ 0.02             | 0.03 $\pm$ 0.03         | 0.04 $\pm$ 0.06          | 0.02 $\pm$ 0.02                           | 0.03 $\pm$ 0.02     |
| po         | xlm-roberta-large                  | 0.04 $\pm$ 0.01 | 0.07 $\pm$ 0.02             | 0.07 $\pm$ 0.03         | 0.07 $\pm$ 0.04          | 0.06 $\pm$ 0.03                           | 0.06 $\pm$ 0.02     |
| ru         | EleutherAI-gpt-neo-1.3B            | 0.04 $\pm$ 0.03 | 0.06 $\pm$ 0.03             | 0.10 $\pm$ 0.01         | 0.08 $\pm$ 0.03          | 0.13 $\pm$ 0.05                           | 0.06 $\pm$ 0.03     |
| ru         | EleutherAI-gpt-neo-125M            | 0.03 $\pm$ 0.03 | 0.04 $\pm$ 0.05             | 0.06 $\pm$ 0.02         | 0.05 $\pm$ 0.04          | 0.03 $\pm$ 0.03                           | 0.03 $\pm$ 0.05     |
| ru         | bert-base-multilingual-cased       | 0.09 $\pm$ 0.03 | 0.12 $\pm$ 0.04             | **0.17 $\pm$ 0.03**     | 0.15 $\pm$ 0.08          | **0.17 $\pm$ 0.03**                       | 0.14 $\pm$ 0.05     |
| ru         | distilbert-base-multilingual-cased | 0.05 $\pm$ 0.01 | 0.11 $\pm$ 0.06             | 0.08 $\pm$ 0.03         | 0.13 $\pm$ 0.03          | 0.13 $\pm$ 0.07                           | 0.12 $\pm$ 0.02     |
| ru         | facebook-mbart-large-50            | 0.11 $\pm$ 0.03 | 0.16 $\pm$ 0.03             | 0.14 $\pm$ 0.03         | **0.17 $\pm$ 0.05**      | 0.15 $\pm$ 0.03                           | 0.13 $\pm$ 0.03     |
| ru         | gpt2                               | 0.02 $\pm$ 0.02 | 0.03 $\pm$ 0.02             | 0.02 $\pm$ 0.02         | 0.02 $\pm$ 0.00          | 0.02 $\pm$ 0.01                           | 0.02 $\pm$ 0.01     |
| ru         | xlm-roberta-large                  | 0.12 $\pm$ 0.05 | **0.17 $\pm$ 0.04**         | 0.14 $\pm$ 0.06         | 0.14 $\pm$ 0.08          | 0.13 $\pm$ 0.03                           | 0.14 $\pm$ 0.06     |

  multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)


In [20]:
display_metrics_and_write_to_file(df=results_majority_vote_pred_df, grouping_criterion=['model_name'], output_dir='per_model_name_tables_majority_voting')

# English

## f1-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text        |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:----------------|
| EleutherAI-gpt-neo-1.3B            | 0.62 $\pm$ 0.01 | 0.67 $\pm$ 0.01             | 0.68 $\pm$ 0.01         | 0.67 $\pm$ 0.01          | 0.67 $\pm$ 0.03                           | 0.69 $\pm$ 0.00 |
| EleutherAI-gpt-neo-125M            | 0.55 $\pm$ 0.03 | 0.60 $\pm$ 0.01             | 0.62 $\pm$ 0.02         | 0.62 $\pm$ 0.01          | 0.64 $\pm$ 0.01                           | 0.66 $\pm$ 0.01 |
| bert-base-multilingual-cased       | 0.64 $\pm$ 0.01 | 0.68 $\pm$ 0.02             | 0.69 $\pm$ 0.02         | 0.69 $\pm$ 0.02          | 0.69 $\pm$ 0.03                           | 0.69 $\pm$ 0.01 |
| distilbert-base-multilingual-cased | 0.61 $\pm$ 0.03 | 0.66 $\pm$ 0.01             | 0.67 $\pm$ 0.01         | 0.68 $\pm$ 0.03          | 0.66 $\pm$ 0.01                           | 0.68 $\pm$ 0.02 |
| facebook-mbart-large-50            | 0.66 $\pm$ 0.02 | 0.70 $\pm$ 0.01             | 0.70 $\pm$ 0.02         | 0.70 $\pm$ 0.01          | **0.71 $\pm$ 0.02**                       | 0.69 $\pm$ 0.02 |
| gpt2                               | 0.63 $\pm$ 0.03 | 0.68 $\pm$ 0.01             | 0.67 $\pm$ 0.01         | 0.67 $\pm$ 0.02          | 0.67 $\pm$ 0.02                           | 0.68 $\pm$ 0.02 |
| xlm-roberta-large                  | 0.66 $\pm$ 0.01 | 0.70 $\pm$ 0.01             | 0.70 $\pm$ 0.01         | **0.71 $\pm$ 0.02**      | 0.70 $\pm$ 0.01                           | 0.70 $\pm$ 0.01 |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text        |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:----------------|
| EleutherAI-gpt-neo-1.3B            | 0.55 $\pm$ 0.01 | 0.60 $\pm$ 0.01             | 0.62 $\pm$ 0.02         | 0.60 $\pm$ 0.02          | 0.59 $\pm$ 0.03                           | 0.61 $\pm$ 0.01 |
| EleutherAI-gpt-neo-125M            | 0.49 $\pm$ 0.03 | 0.54 $\pm$ 0.01             | 0.55 $\pm$ 0.03         | 0.55 $\pm$ 0.00          | 0.56 $\pm$ 0.02                           | 0.58 $\pm$ 0.03 |
| bert-base-multilingual-cased       | 0.58 $\pm$ 0.01 | 0.63 $\pm$ 0.03             | 0.64 $\pm$ 0.02         | 0.65 $\pm$ 0.01          | 0.63 $\pm$ 0.03                           | 0.63 $\pm$ 0.03 |
| distilbert-base-multilingual-cased | 0.56 $\pm$ 0.03 | 0.59 $\pm$ 0.02             | 0.62 $\pm$ 0.02         | 0.63 $\pm$ 0.03          | 0.59 $\pm$ 0.00                           | 0.61 $\pm$ 0.01 |
| facebook-mbart-large-50            | 0.60 $\pm$ 0.03 | 0.65 $\pm$ 0.02             | 0.66 $\pm$ 0.02         | 0.66 $\pm$ 0.01          | 0.65 $\pm$ 0.03                           | 0.63 $\pm$ 0.02 |
| gpt2                               | 0.59 $\pm$ 0.01 | 0.64 $\pm$ 0.03             | 0.65 $\pm$ 0.02         | **0.67 $\pm$ 0.04**      | 0.63 $\pm$ 0.04                           | 0.63 $\pm$ 0.03 |
| xlm-roberta-large                  | 0.60 $\pm$ 0.01 | 0.66 $\pm$ 0.01             | 0.66 $\pm$ 0.02         | **0.67 $\pm$ 0.01**      | 0.66 $\pm$ 0.02                           | 0.65 $\pm$ 0.01 |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.72 $\pm$ 0.04 | 0.76 $\pm$ 0.02             | 0.74 $\pm$ 0.02         | 0.76 $\pm$ 0.01          | 0.78 $\pm$ 0.04                           | **0.79 $\pm$ 0.02** |
| EleutherAI-gpt-neo-125M            | 0.63 $\pm$ 0.04 | 0.67 $\pm$ 0.01             | 0.70 $\pm$ 0.01         | 0.71 $\pm$ 0.03          | 0.74 $\pm$ 0.04                           | 0.77 $\pm$ 0.03     |
| bert-base-multilingual-cased       | 0.70 $\pm$ 0.03 | 0.74 $\pm$ 0.03             | 0.76 $\pm$ 0.02         | 0.74 $\pm$ 0.04          | 0.76 $\pm$ 0.04                           | 0.76 $\pm$ 0.01     |
| distilbert-base-multilingual-cased | 0.68 $\pm$ 0.05 | 0.74 $\pm$ 0.03             | 0.73 $\pm$ 0.01         | 0.73 $\pm$ 0.03          | 0.76 $\pm$ 0.02                           | 0.76 $\pm$ 0.03     |
| facebook-mbart-large-50            | 0.73 $\pm$ 0.01 | 0.76 $\pm$ 0.01             | 0.74 $\pm$ 0.03         | 0.76 $\pm$ 0.01          | 0.78 $\pm$ 0.02                           | 0.76 $\pm$ 0.02     |
| gpt2                               | 0.67 $\pm$ 0.06 | 0.72 $\pm$ 0.02             | 0.70 $\pm$ 0.03         | 0.67 $\pm$ 0.03          | 0.72 $\pm$ 0.06                           | 0.74 $\pm$ 0.03     |
| xlm-roberta-large                  | 0.73 $\pm$ 0.03 | 0.75 $\pm$ 0.03             | 0.75 $\pm$ 0.01         | 0.76 $\pm$ 0.03          | 0.74 $\pm$ 0.01                           | 0.76 $\pm$ 0.02     |

  report_table.reset_index().to_latex(latex_file, index=False)


## roc-auc

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text        |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:----------------|
| EleutherAI-gpt-neo-1.3B            | 0.73 $\pm$ 0.00 | 0.76 $\pm$ 0.01             | 0.77 $\pm$ 0.01         | 0.76 $\pm$ 0.00          | 0.76 $\pm$ 0.02                           | 0.77 $\pm$ 0.00 |
| EleutherAI-gpt-neo-125M            | 0.69 $\pm$ 0.01 | 0.72 $\pm$ 0.01             | 0.73 $\pm$ 0.01         | 0.73 $\pm$ 0.00          | 0.74 $\pm$ 0.01                           | 0.76 $\pm$ 0.01 |
| bert-base-multilingual-cased       | 0.74 $\pm$ 0.01 | 0.77 $\pm$ 0.01             | 0.78 $\pm$ 0.01         | 0.78 $\pm$ 0.01          | 0.78 $\pm$ 0.02                           | 0.78 $\pm$ 0.01 |
| distilbert-base-multilingual-cased | 0.73 $\pm$ 0.02 | 0.76 $\pm$ 0.01             | 0.77 $\pm$ 0.01         | 0.77 $\pm$ 0.02          | 0.76 $\pm$ 0.00                           | 0.77 $\pm$ 0.01 |
| facebook-mbart-large-50            | 0.76 $\pm$ 0.01 | 0.78 $\pm$ 0.01             | **0.79 $\pm$ 0.01**     | **0.79 $\pm$ 0.01**      | **0.79 $\pm$ 0.01**                       | 0.78 $\pm$ 0.01 |
| gpt2                               | 0.74 $\pm$ 0.02 | 0.77 $\pm$ 0.01             | 0.77 $\pm$ 0.00         | 0.77 $\pm$ 0.02          | 0.77 $\pm$ 0.01                           | 0.77 $\pm$ 0.01 |
| xlm-roberta-large                  | 0.76 $\pm$ 0.00 | **0.79 $\pm$ 0.00**         | **0.79 $\pm$ 0.01**     | **0.79 $\pm$ 0.01**      | **0.79 $\pm$ 0.01**                       | 0.78 $\pm$ 0.01 |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.08 $\pm$ 0.02 | 0.12 $\pm$ 0.02             | 0.08 $\pm$ 0.01         | 0.11 $\pm$ 0.01          | 0.10 $\pm$ 0.02                           | **0.13 $\pm$ 0.01** |
| EleutherAI-gpt-neo-125M            | 0.03 $\pm$ 0.01 | 0.05 $\pm$ 0.01             | 0.08 $\pm$ 0.01         | 0.07 $\pm$ 0.01          | 0.08 $\pm$ 0.01                           | 0.09 $\pm$ 0.03     |
| bert-base-multilingual-cased       | 0.07 $\pm$ 0.01 | 0.10 $\pm$ 0.03             | **0.13 $\pm$ 0.03**     | 0.09 $\pm$ 0.00          | 0.10 $\pm$ 0.04                           | 0.11 $\pm$ 0.01     |
| distilbert-base-multilingual-cased | 0.06 $\pm$ 0.01 | 0.09 $\pm$ 0.02             | 0.10 $\pm$ 0.02         | 0.09 $\pm$ 0.01          | 0.08 $\pm$ 0.01                           | 0.10 $\pm$ 0.03     |
| facebook-mbart-large-50            | 0.07 $\pm$ 0.04 | 0.10 $\pm$ 0.03             | 0.11 $\pm$ 0.03         | 0.12 $\pm$ 0.03          | 0.12 $\pm$ 0.02                           | 0.11 $\pm$ 0.01     |
| gpt2                               | 0.06 $\pm$ 0.01 | 0.08 $\pm$ 0.02             | 0.07 $\pm$ 0.01         | 0.06 $\pm$ 0.03          | 0.08 $\pm$ 0.00                           | 0.10 $\pm$ 0.01     |
| xlm-roberta-large                  | 0.10 $\pm$ 0.01 | 0.12 $\pm$ 0.01             | 0.10 $\pm$ 0.01         | 0.11 $\pm$ 0.01          | 0.10 $\pm$ 0.01                           | 0.11 $\pm$ 0.01     |

  report_table.reset_index().to_latex(latex_file, index=False)


# French

## f1-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.41 $\pm$ 0.04 | 0.47 $\pm$ 0.01             | 0.50 $\pm$ 0.04         | 0.50 $\pm$ 0.01          | 0.52 $\pm$ 0.02                           | 0.54 $\pm$ 0.05     |
| EleutherAI-gpt-neo-125M            | 0.31 $\pm$ 0.01 | 0.38 $\pm$ 0.03             | 0.39 $\pm$ 0.03         | 0.39 $\pm$ 0.01          | 0.42 $\pm$ 0.04                           | 0.44 $\pm$ 0.05     |
| bert-base-multilingual-cased       | 0.47 $\pm$ 0.04 | 0.52 $\pm$ 0.03             | 0.53 $\pm$ 0.02         | 0.55 $\pm$ 0.02          | 0.56 $\pm$ 0.03                           | 0.57 $\pm$ 0.02     |
| distilbert-base-multilingual-cased | 0.44 $\pm$ 0.05 | 0.50 $\pm$ 0.02             | 0.53 $\pm$ 0.03         | 0.53 $\pm$ 0.02          | 0.52 $\pm$ 0.02                           | 0.52 $\pm$ 0.03     |
| facebook-mbart-large-50            | 0.50 $\pm$ 0.02 | 0.53 $\pm$ 0.02             | 0.56 $\pm$ 0.01         | 0.57 $\pm$ 0.02          | **0.58 $\pm$ 0.02**                       | **0.58 $\pm$ 0.03** |
| gpt2                               | 0.40 $\pm$ 0.07 | 0.43 $\pm$ 0.02             | 0.47 $\pm$ 0.02         | 0.49 $\pm$ 0.02          | 0.48 $\pm$ 0.04                           | 0.50 $\pm$ 0.01     |
| xlm-roberta-large                  | 0.51 $\pm$ 0.02 | 0.56 $\pm$ 0.03             | 0.57 $\pm$ 0.01         | **0.58 $\pm$ 0.03**      | 0.57 $\pm$ 0.01                           | 0.55 $\pm$ 0.03     |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text        |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:----------------|
| EleutherAI-gpt-neo-1.3B            | 0.32 $\pm$ 0.05 | 0.37 $\pm$ 0.01             | 0.45 $\pm$ 0.04         | 0.42 $\pm$ 0.03          | 0.43 $\pm$ 0.05                           | 0.46 $\pm$ 0.06 |
| EleutherAI-gpt-neo-125M            | 0.24 $\pm$ 0.01 | 0.32 $\pm$ 0.04             | 0.32 $\pm$ 0.04         | 0.31 $\pm$ 0.03          | 0.35 $\pm$ 0.04                           | 0.33 $\pm$ 0.07 |
| bert-base-multilingual-cased       | 0.38 $\pm$ 0.04 | 0.46 $\pm$ 0.03             | 0.49 $\pm$ 0.04         | 0.50 $\pm$ 0.03          | 0.50 $\pm$ 0.05                           | 0.50 $\pm$ 0.03 |
| distilbert-base-multilingual-cased | 0.37 $\pm$ 0.06 | 0.44 $\pm$ 0.02             | 0.48 $\pm$ 0.04         | 0.48 $\pm$ 0.02          | 0.44 $\pm$ 0.04                           | 0.45 $\pm$ 0.04 |
| facebook-mbart-large-50            | 0.44 $\pm$ 0.03 | 0.47 $\pm$ 0.02             | 0.51 $\pm$ 0.02         | 0.52 $\pm$ 0.03          | 0.52 $\pm$ 0.03                           | 0.51 $\pm$ 0.06 |
| gpt2                               | 0.36 $\pm$ 0.06 | 0.36 $\pm$ 0.02             | 0.43 $\pm$ 0.02         | 0.43 $\pm$ 0.02          | 0.42 $\pm$ 0.06                           | 0.44 $\pm$ 0.01 |
| xlm-roberta-large                  | 0.44 $\pm$ 0.02 | 0.51 $\pm$ 0.02             | 0.52 $\pm$ 0.04         | **0.55 $\pm$ 0.04**      | 0.54 $\pm$ 0.02                           | 0.49 $\pm$ 0.07 |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.55 $\pm$ 0.03 | 0.63 $\pm$ 0.04             | 0.57 $\pm$ 0.04         | 0.62 $\pm$ 0.06          | 0.65 $\pm$ 0.05                           | 0.66 $\pm$ 0.01     |
| EleutherAI-gpt-neo-125M            | 0.42 $\pm$ 0.05 | 0.48 $\pm$ 0.03             | 0.51 $\pm$ 0.02         | 0.55 $\pm$ 0.06          | 0.52 $\pm$ 0.06                           | 0.64 $\pm$ 0.04     |
| bert-base-multilingual-cased       | 0.59 $\pm$ 0.05 | 0.60 $\pm$ 0.02             | 0.59 $\pm$ 0.03         | 0.61 $\pm$ 0.05          | 0.63 $\pm$ 0.01                           | 0.67 $\pm$ 0.03     |
| distilbert-base-multilingual-cased | 0.53 $\pm$ 0.05 | 0.59 $\pm$ 0.05             | 0.59 $\pm$ 0.03         | 0.59 $\pm$ 0.03          | 0.63 $\pm$ 0.02                           | 0.61 $\pm$ 0.02     |
| facebook-mbart-large-50            | 0.60 $\pm$ 0.06 | 0.62 $\pm$ 0.01             | 0.62 $\pm$ 0.04         | 0.63 $\pm$ 0.02          | 0.65 $\pm$ 0.02                           | **0.68 $\pm$ 0.04** |
| gpt2                               | 0.45 $\pm$ 0.10 | 0.51 $\pm$ 0.04             | 0.53 $\pm$ 0.02         | 0.57 $\pm$ 0.04          | 0.58 $\pm$ 0.03                           | 0.60 $\pm$ 0.02     |
| xlm-roberta-large                  | 0.59 $\pm$ 0.01 | 0.62 $\pm$ 0.04             | 0.63 $\pm$ 0.03         | 0.62 $\pm$ 0.01          | 0.61 $\pm$ 0.02                           | 0.63 $\pm$ 0.02     |

  report_table.reset_index().to_latex(latex_file, index=False)


## roc-auc

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text        |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:----------------|
| EleutherAI-gpt-neo-1.3B            | 0.63 $\pm$ 0.02 | 0.66 $\pm$ 0.00             | 0.68 $\pm$ 0.02         | 0.68 $\pm$ 0.00          | 0.68 $\pm$ 0.02                           | 0.70 $\pm$ 0.03 |
| EleutherAI-gpt-neo-125M            | 0.58 $\pm$ 0.01 | 0.61 $\pm$ 0.01             | 0.62 $\pm$ 0.01         | 0.62 $\pm$ 0.01          | 0.63 $\pm$ 0.02                           | 0.64 $\pm$ 0.03 |
| bert-base-multilingual-cased       | 0.66 $\pm$ 0.02 | 0.69 $\pm$ 0.02             | 0.70 $\pm$ 0.02         | 0.71 $\pm$ 0.01          | 0.71 $\pm$ 0.02                           | 0.71 $\pm$ 0.01 |
| distilbert-base-multilingual-cased | 0.64 $\pm$ 0.03 | 0.68 $\pm$ 0.01             | 0.69 $\pm$ 0.02         | 0.70 $\pm$ 0.01          | 0.69 $\pm$ 0.01                           | 0.69 $\pm$ 0.02 |
| facebook-mbart-large-50            | 0.68 $\pm$ 0.01 | 0.70 $\pm$ 0.01             | 0.71 $\pm$ 0.00         | 0.72 $\pm$ 0.02          | 0.72 $\pm$ 0.01                           | 0.72 $\pm$ 0.02 |
| gpt2                               | 0.62 $\pm$ 0.04 | 0.64 $\pm$ 0.01             | 0.66 $\pm$ 0.01         | 0.67 $\pm$ 0.01          | 0.67 $\pm$ 0.02                           | 0.68 $\pm$ 0.00 |
| xlm-roberta-large                  | 0.68 $\pm$ 0.01 | 0.71 $\pm$ 0.01             | 0.72 $\pm$ 0.01         | **0.73 $\pm$ 0.02**      | 0.72 $\pm$ 0.01                           | 0.70 $\pm$ 0.03 |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.04 $\pm$ 0.02 | 0.07 $\pm$ 0.01             | 0.06 $\pm$ 0.03         | 0.08 $\pm$ 0.02          | 0.09 $\pm$ 0.04                           | 0.09 $\pm$ 0.03     |
| EleutherAI-gpt-neo-125M            | 0.03 $\pm$ 0.01 | 0.01 $\pm$ 0.01             | 0.03 $\pm$ 0.01         | 0.04 $\pm$ 0.01          | 0.04 $\pm$ 0.03                           | 0.08 $\pm$ 0.05     |
| bert-base-multilingual-cased       | 0.07 $\pm$ 0.02 | 0.08 $\pm$ 0.02             | 0.07 $\pm$ 0.01         | 0.09 $\pm$ 0.01          | 0.10 $\pm$ 0.01                           | **0.11 $\pm$ 0.02** |
| distilbert-base-multilingual-cased | 0.05 $\pm$ 0.02 | 0.05 $\pm$ 0.02             | 0.07 $\pm$ 0.03         | 0.06 $\pm$ 0.04          | 0.09 $\pm$ 0.04                           | 0.09 $\pm$ 0.01     |
| facebook-mbart-large-50            | 0.08 $\pm$ 0.04 | **0.11 $\pm$ 0.02**         | 0.09 $\pm$ 0.01         | 0.10 $\pm$ 0.03          | **0.11 $\pm$ 0.03**                       | **0.11 $\pm$ 0.02** |
| gpt2                               | 0.03 $\pm$ 0.02 | 0.05 $\pm$ 0.02             | 0.06 $\pm$ 0.04         | 0.07 $\pm$ 0.02          | 0.06 $\pm$ 0.03                           | 0.07 $\pm$ 0.05     |
| xlm-roberta-large                  | 0.07 $\pm$ 0.03 | 0.07 $\pm$ 0.04             | 0.10 $\pm$ 0.03         | 0.10 $\pm$ 0.04          | **0.11 $\pm$ 0.05**                       | 0.09 $\pm$ 0.05     |

  report_table.reset_index().to_latex(latex_file, index=False)


# German

## f1-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.48 $\pm$ 0.00 | 0.56 $\pm$ 0.02             | 0.56 $\pm$ 0.02         | 0.58 $\pm$ 0.00          | 0.57 $\pm$ 0.02                           | 0.61 $\pm$ 0.02     |
| EleutherAI-gpt-neo-125M            | 0.39 $\pm$ 0.01 | 0.46 $\pm$ 0.01             | 0.48 $\pm$ 0.00         | 0.50 $\pm$ 0.02          | 0.51 $\pm$ 0.04                           | 0.53 $\pm$ 0.02     |
| bert-base-multilingual-cased       | 0.52 $\pm$ 0.03 | 0.58 $\pm$ 0.02             | 0.59 $\pm$ 0.01         | 0.63 $\pm$ 0.01          | 0.59 $\pm$ 0.01                           | 0.62 $\pm$ 0.03     |
| distilbert-base-multilingual-cased | 0.50 $\pm$ 0.02 | 0.56 $\pm$ 0.00             | 0.55 $\pm$ 0.03         | 0.59 $\pm$ 0.02          | 0.58 $\pm$ 0.01                           | 0.60 $\pm$ 0.02     |
| facebook-mbart-large-50            | 0.55 $\pm$ 0.01 | 0.60 $\pm$ 0.02             | 0.61 $\pm$ 0.02         | 0.63 $\pm$ 0.00          | **0.64 $\pm$ 0.01**                       | **0.64 $\pm$ 0.03** |
| gpt2                               | 0.47 $\pm$ 0.03 | 0.52 $\pm$ 0.04             | 0.52 $\pm$ 0.04         | 0.55 $\pm$ 0.03          | 0.53 $\pm$ 0.01                           | 0.56 $\pm$ 0.00     |
| xlm-roberta-large                  | 0.55 $\pm$ 0.02 | 0.61 $\pm$ 0.00             | 0.62 $\pm$ 0.03         | **0.64 $\pm$ 0.01**      | **0.64 $\pm$ 0.02**                       | **0.64 $\pm$ 0.02** |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text        |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:----------------|
| EleutherAI-gpt-neo-1.3B            | 0.41 $\pm$ 0.03 | 0.49 $\pm$ 0.04             | 0.52 $\pm$ 0.04         | 0.50 $\pm$ 0.03          | 0.49 $\pm$ 0.02                           | 0.54 $\pm$ 0.03 |
| EleutherAI-gpt-neo-125M            | 0.33 $\pm$ 0.03 | 0.41 $\pm$ 0.02             | 0.41 $\pm$ 0.01         | 0.43 $\pm$ 0.02          | 0.44 $\pm$ 0.03                           | 0.45 $\pm$ 0.02 |
| bert-base-multilingual-cased       | 0.46 $\pm$ 0.04 | 0.54 $\pm$ 0.04             | 0.54 $\pm$ 0.02         | 0.58 $\pm$ 0.04          | 0.54 $\pm$ 0.02                           | 0.58 $\pm$ 0.05 |
| distilbert-base-multilingual-cased | 0.44 $\pm$ 0.01 | 0.51 $\pm$ 0.02             | 0.49 $\pm$ 0.03         | 0.54 $\pm$ 0.03          | 0.52 $\pm$ 0.01                           | 0.55 $\pm$ 0.04 |
| facebook-mbart-large-50            | 0.50 $\pm$ 0.02 | 0.55 $\pm$ 0.02             | 0.56 $\pm$ 0.03         | 0.59 $\pm$ 0.01          | 0.59 $\pm$ 0.00                           | 0.58 $\pm$ 0.05 |
| gpt2                               | 0.46 $\pm$ 0.05 | 0.49 $\pm$ 0.05             | 0.50 $\pm$ 0.05         | 0.52 $\pm$ 0.05          | 0.51 $\pm$ 0.03                           | 0.50 $\pm$ 0.01 |
| xlm-roberta-large                  | 0.50 $\pm$ 0.02 | 0.57 $\pm$ 0.02             | 0.57 $\pm$ 0.02         | 0.58 $\pm$ 0.01          | **0.62 $\pm$ 0.07**                       | 0.61 $\pm$ 0.03 |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.57 $\pm$ 0.05 | 0.65 $\pm$ 0.02             | 0.61 $\pm$ 0.03         | 0.70 $\pm$ 0.06          | 0.66 $\pm$ 0.03                           | **0.71 $\pm$ 0.02** |
| EleutherAI-gpt-neo-125M            | 0.49 $\pm$ 0.02 | 0.51 $\pm$ 0.04             | 0.57 $\pm$ 0.01         | 0.58 $\pm$ 0.02          | 0.60 $\pm$ 0.05                           | 0.65 $\pm$ 0.02     |
| bert-base-multilingual-cased       | 0.60 $\pm$ 0.02 | 0.62 $\pm$ 0.02             | 0.66 $\pm$ 0.03         | 0.68 $\pm$ 0.06          | 0.66 $\pm$ 0.01                           | 0.67 $\pm$ 0.03     |
| distilbert-base-multilingual-cased | 0.59 $\pm$ 0.04 | 0.63 $\pm$ 0.02             | 0.63 $\pm$ 0.04         | 0.64 $\pm$ 0.05          | 0.66 $\pm$ 0.03                           | 0.65 $\pm$ 0.01     |
| facebook-mbart-large-50            | 0.62 $\pm$ 0.01 | 0.66 $\pm$ 0.02             | 0.68 $\pm$ 0.00         | 0.68 $\pm$ 0.02          | 0.70 $\pm$ 0.03                           | **0.71 $\pm$ 0.01** |
| gpt2                               | 0.49 $\pm$ 0.01 | 0.56 $\pm$ 0.03             | 0.55 $\pm$ 0.04         | 0.58 $\pm$ 0.00          | 0.56 $\pm$ 0.01                           | 0.63 $\pm$ 0.01     |
| xlm-roberta-large                  | 0.62 $\pm$ 0.03 | 0.65 $\pm$ 0.02             | 0.67 $\pm$ 0.04         | **0.71 $\pm$ 0.01**      | 0.68 $\pm$ 0.04                           | 0.67 $\pm$ 0.07     |

  report_table.reset_index().to_latex(latex_file, index=False)


## roc-auc

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.64 $\pm$ 0.00 | 0.69 $\pm$ 0.02             | 0.68 $\pm$ 0.02         | 0.70 $\pm$ 0.00          | 0.69 $\pm$ 0.01                           | 0.72 $\pm$ 0.01     |
| EleutherAI-gpt-neo-125M            | 0.59 $\pm$ 0.01 | 0.62 $\pm$ 0.01             | 0.64 $\pm$ 0.00         | 0.65 $\pm$ 0.01          | 0.65 $\pm$ 0.03                           | 0.67 $\pm$ 0.01     |
| bert-base-multilingual-cased       | 0.66 $\pm$ 0.02 | 0.70 $\pm$ 0.01             | 0.71 $\pm$ 0.01         | 0.73 $\pm$ 0.01          | 0.71 $\pm$ 0.00                           | 0.72 $\pm$ 0.02     |
| distilbert-base-multilingual-cased | 0.65 $\pm$ 0.02 | 0.69 $\pm$ 0.00             | 0.68 $\pm$ 0.02         | 0.70 $\pm$ 0.01          | 0.70 $\pm$ 0.00                           | 0.71 $\pm$ 0.02     |
| facebook-mbart-large-50            | 0.68 $\pm$ 0.01 | 0.71 $\pm$ 0.01             | 0.72 $\pm$ 0.02         | 0.73 $\pm$ 0.01          | **0.74 $\pm$ 0.01**                       | **0.74 $\pm$ 0.01** |
| gpt2                               | 0.62 $\pm$ 0.02 | 0.66 $\pm$ 0.03             | 0.66 $\pm$ 0.03         | 0.67 $\pm$ 0.02          | 0.66 $\pm$ 0.01                           | 0.68 $\pm$ 0.01     |
| xlm-roberta-large                  | 0.68 $\pm$ 0.01 | 0.72 $\pm$ 0.00             | 0.72 $\pm$ 0.02         | **0.74 $\pm$ 0.01**      | **0.74 $\pm$ 0.02**                       | 0.73 $\pm$ 0.02     |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text        |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:----------------|
| EleutherAI-gpt-neo-1.3B            | 0.02 $\pm$ 0.01 | 0.03 $\pm$ 0.02             | 0.05 $\pm$ 0.03         | 0.05 $\pm$ 0.03          | 0.04 $\pm$ 0.03                           | 0.07 $\pm$ 0.03 |
| EleutherAI-gpt-neo-125M            | 0.01 $\pm$ 0.01 | 0.02 $\pm$ 0.01             | 0.03 $\pm$ 0.02         | 0.02 $\pm$ 0.01          | 0.04 $\pm$ 0.01                           | 0.03 $\pm$ 0.02 |
| bert-base-multilingual-cased       | 0.05 $\pm$ 0.01 | 0.05 $\pm$ 0.03             | 0.07 $\pm$ 0.03         | 0.09 $\pm$ 0.05          | 0.06 $\pm$ 0.01                           | 0.10 $\pm$ 0.05 |
| distilbert-base-multilingual-cased | 0.02 $\pm$ 0.01 | 0.02 $\pm$ 0.02             | 0.05 $\pm$ 0.04         | 0.05 $\pm$ 0.03          | 0.05 $\pm$ 0.04                           | 0.04 $\pm$ 0.04 |
| facebook-mbart-large-50            | 0.05 $\pm$ 0.03 | 0.06 $\pm$ 0.04             | 0.06 $\pm$ 0.03         | 0.05 $\pm$ 0.02          | **0.11 $\pm$ 0.03**                       | 0.08 $\pm$ 0.04 |
| gpt2                               | 0.01 $\pm$ 0.01 | 0.01 $\pm$ 0.02             | 0.03 $\pm$ 0.02         | 0.03 $\pm$ 0.03          | 0.03 $\pm$ 0.01                           | 0.02 $\pm$ 0.01 |
| xlm-roberta-large                  | 0.03 $\pm$ 0.02 | 0.07 $\pm$ 0.03             | 0.09 $\pm$ 0.03         | 0.10 $\pm$ 0.04          | 0.07 $\pm$ 0.04                           | 0.07 $\pm$ 0.04 |

  report_table.reset_index().to_latex(latex_file, index=False)


# Italian

## f1-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.42 $\pm$ 0.02 | 0.46 $\pm$ 0.05             | 0.50 $\pm$ 0.03         | 0.52 $\pm$ 0.04          | 0.52 $\pm$ 0.05                           | 0.56 $\pm$ 0.03     |
| EleutherAI-gpt-neo-125M            | 0.35 $\pm$ 0.04 | 0.42 $\pm$ 0.02             | 0.44 $\pm$ 0.02         | 0.44 $\pm$ 0.01          | 0.47 $\pm$ 0.05                           | 0.48 $\pm$ 0.02     |
| bert-base-multilingual-cased       | 0.47 $\pm$ 0.01 | 0.53 $\pm$ 0.02             | 0.55 $\pm$ 0.02         | 0.56 $\pm$ 0.02          | 0.56 $\pm$ 0.03                           | 0.56 $\pm$ 0.03     |
| distilbert-base-multilingual-cased | 0.45 $\pm$ 0.01 | 0.50 $\pm$ 0.05             | 0.52 $\pm$ 0.03         | 0.56 $\pm$ 0.05          | 0.52 $\pm$ 0.03                           | 0.54 $\pm$ 0.02     |
| facebook-mbart-large-50            | 0.48 $\pm$ 0.01 | 0.53 $\pm$ 0.04             | 0.55 $\pm$ 0.02         | 0.57 $\pm$ 0.03          | 0.56 $\pm$ 0.04                           | **0.60 $\pm$ 0.04** |
| gpt2                               | 0.41 $\pm$ 0.02 | 0.45 $\pm$ 0.01             | 0.48 $\pm$ 0.03         | 0.52 $\pm$ 0.02          | 0.50 $\pm$ 0.03                           | 0.53 $\pm$ 0.01     |
| xlm-roberta-large                  | 0.50 $\pm$ 0.04 | 0.54 $\pm$ 0.02             | 0.55 $\pm$ 0.02         | 0.59 $\pm$ 0.03          | 0.56 $\pm$ 0.01                           | 0.58 $\pm$ 0.03     |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text        |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:----------------|
| EleutherAI-gpt-neo-1.3B            | 0.35 $\pm$ 0.04 | 0.38 $\pm$ 0.06             | 0.44 $\pm$ 0.02         | 0.45 $\pm$ 0.07          | 0.44 $\pm$ 0.09                           | 0.47 $\pm$ 0.01 |
| EleutherAI-gpt-neo-125M            | 0.28 $\pm$ 0.04 | 0.36 $\pm$ 0.04             | 0.36 $\pm$ 0.02         | 0.35 $\pm$ 0.02          | 0.40 $\pm$ 0.06                           | 0.37 $\pm$ 0.01 |
| bert-base-multilingual-cased       | 0.40 $\pm$ 0.00 | 0.48 $\pm$ 0.02             | 0.50 $\pm$ 0.03         | 0.51 $\pm$ 0.03          | 0.51 $\pm$ 0.02                           | 0.49 $\pm$ 0.02 |
| distilbert-base-multilingual-cased | 0.39 $\pm$ 0.01 | 0.44 $\pm$ 0.04             | 0.46 $\pm$ 0.03         | 0.50 $\pm$ 0.06          | 0.45 $\pm$ 0.04                           | 0.47 $\pm$ 0.01 |
| facebook-mbart-large-50            | 0.42 $\pm$ 0.01 | 0.47 $\pm$ 0.06             | 0.49 $\pm$ 0.02         | 0.51 $\pm$ 0.03          | 0.50 $\pm$ 0.03                           | 0.52 $\pm$ 0.05 |
| gpt2                               | 0.37 $\pm$ 0.03 | 0.40 $\pm$ 0.01             | 0.45 $\pm$ 0.03         | 0.49 $\pm$ 0.03          | 0.46 $\pm$ 0.01                           | 0.47 $\pm$ 0.02 |
| xlm-roberta-large                  | 0.43 $\pm$ 0.05 | 0.49 $\pm$ 0.02             | 0.50 $\pm$ 0.03         | **0.54 $\pm$ 0.02**      | 0.52 $\pm$ 0.02                           | 0.53 $\pm$ 0.04 |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.53 $\pm$ 0.03 | 0.58 $\pm$ 0.03             | 0.58 $\pm$ 0.05         | 0.63 $\pm$ 0.05          | 0.63 $\pm$ 0.03                           | 0.68 $\pm$ 0.05     |
| EleutherAI-gpt-neo-125M            | 0.47 $\pm$ 0.05 | 0.49 $\pm$ 0.01             | 0.55 $\pm$ 0.01         | 0.57 $\pm$ 0.01          | 0.59 $\pm$ 0.04                           | 0.67 $\pm$ 0.03     |
| bert-base-multilingual-cased       | 0.57 $\pm$ 0.02 | 0.59 $\pm$ 0.04             | 0.61 $\pm$ 0.04         | 0.62 $\pm$ 0.05          | 0.62 $\pm$ 0.07                           | 0.65 $\pm$ 0.07     |
| distilbert-base-multilingual-cased | 0.54 $\pm$ 0.02 | 0.57 $\pm$ 0.06             | 0.58 $\pm$ 0.03         | 0.62 $\pm$ 0.03          | 0.60 $\pm$ 0.04                           | 0.64 $\pm$ 0.04     |
| facebook-mbart-large-50            | 0.57 $\pm$ 0.01 | 0.61 $\pm$ 0.01             | 0.62 $\pm$ 0.03         | 0.64 $\pm$ 0.03          | 0.65 $\pm$ 0.08                           | **0.70 $\pm$ 0.05** |
| gpt2                               | 0.47 $\pm$ 0.01 | 0.52 $\pm$ 0.02             | 0.52 $\pm$ 0.03         | 0.55 $\pm$ 0.01          | 0.55 $\pm$ 0.06                           | 0.62 $\pm$ 0.01     |
| xlm-roberta-large                  | 0.58 $\pm$ 0.03 | 0.59 $\pm$ 0.03             | 0.61 $\pm$ 0.02         | 0.64 $\pm$ 0.05          | 0.61 $\pm$ 0.05                           | 0.63 $\pm$ 0.05     |

  report_table.reset_index().to_latex(latex_file, index=False)


## roc-auc

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.61 $\pm$ 0.01 | 0.64 $\pm$ 0.03             | 0.66 $\pm$ 0.02         | 0.67 $\pm$ 0.02          | 0.67 $\pm$ 0.03                           | 0.69 $\pm$ 0.01     |
| EleutherAI-gpt-neo-125M            | 0.58 $\pm$ 0.02 | 0.61 $\pm$ 0.01             | 0.63 $\pm$ 0.01         | 0.63 $\pm$ 0.01          | 0.65 $\pm$ 0.03                           | 0.65 $\pm$ 0.01     |
| bert-base-multilingual-cased       | 0.64 $\pm$ 0.01 | 0.68 $\pm$ 0.01             | 0.69 $\pm$ 0.01         | 0.70 $\pm$ 0.01          | 0.70 $\pm$ 0.02                           | 0.69 $\pm$ 0.02     |
| distilbert-base-multilingual-cased | 0.63 $\pm$ 0.01 | 0.66 $\pm$ 0.03             | 0.67 $\pm$ 0.02         | 0.69 $\pm$ 0.03          | 0.67 $\pm$ 0.02                           | 0.68 $\pm$ 0.01     |
| facebook-mbart-large-50            | 0.65 $\pm$ 0.00 | 0.68 $\pm$ 0.03             | 0.69 $\pm$ 0.01         | 0.70 $\pm$ 0.02          | 0.70 $\pm$ 0.03                           | **0.72 $\pm$ 0.03** |
| gpt2                               | 0.61 $\pm$ 0.01 | 0.63 $\pm$ 0.01             | 0.65 $\pm$ 0.02         | 0.67 $\pm$ 0.01          | 0.66 $\pm$ 0.02                           | 0.68 $\pm$ 0.01     |
| xlm-roberta-large                  | 0.66 $\pm$ 0.02 | 0.68 $\pm$ 0.02             | 0.69 $\pm$ 0.01         | 0.71 $\pm$ 0.02          | 0.70 $\pm$ 0.01                           | 0.71 $\pm$ 0.02     |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.04 $\pm$ 0.01 | 0.05 $\pm$ 0.04             | 0.06 $\pm$ 0.02         | 0.05 $\pm$ 0.03          | 0.07 $\pm$ 0.02                           | 0.06 $\pm$ 0.03     |
| EleutherAI-gpt-neo-125M            | 0.01 $\pm$ 0.01 | 0.04 $\pm$ 0.01             | 0.04 $\pm$ 0.04         | 0.01 $\pm$ 0.01          | 0.04 $\pm$ 0.01                           | 0.08 $\pm$ 0.03     |
| bert-base-multilingual-cased       | 0.04 $\pm$ 0.01 | 0.05 $\pm$ 0.03             | 0.06 $\pm$ 0.01         | 0.09 $\pm$ 0.02          | 0.09 $\pm$ 0.04                           | 0.08 $\pm$ 0.03     |
| distilbert-base-multilingual-cased | 0.04 $\pm$ 0.02 | 0.05 $\pm$ 0.03             | 0.07 $\pm$ 0.02         | 0.06 $\pm$ 0.04          | 0.07 $\pm$ 0.03                           | 0.07 $\pm$ 0.05     |
| facebook-mbart-large-50            | 0.04 $\pm$ 0.01 | 0.07 $\pm$ 0.04             | 0.05 $\pm$ 0.02         | 0.09 $\pm$ 0.02          | 0.08 $\pm$ 0.03                           | **0.11 $\pm$ 0.02** |
| gpt2                               | 0.02 $\pm$ 0.02 | 0.02 $\pm$ 0.02             | 0.04 $\pm$ 0.02         | 0.03 $\pm$ 0.02          | 0.03 $\pm$ 0.00                           | 0.07 $\pm$ 0.03     |
| xlm-roberta-large                  | 0.06 $\pm$ 0.02 | 0.06 $\pm$ 0.01             | 0.08 $\pm$ 0.01         | 0.07 $\pm$ 0.02          | 0.08 $\pm$ 0.04                           | 0.08 $\pm$ 0.03     |

  report_table.reset_index().to_latex(latex_file, index=False)


# Polish

## f1-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.51 $\pm$ 0.02 | 0.55 $\pm$ 0.01             | 0.60 $\pm$ 0.02         | 0.58 $\pm$ 0.04          | 0.61 $\pm$ 0.02                           | 0.62 $\pm$ 0.04     |
| EleutherAI-gpt-neo-125M            | 0.39 $\pm$ 0.04 | 0.47 $\pm$ 0.02             | 0.55 $\pm$ 0.02         | 0.51 $\pm$ 0.06          | 0.54 $\pm$ 0.03                           | 0.57 $\pm$ 0.03     |
| bert-base-multilingual-cased       | 0.55 $\pm$ 0.02 | 0.59 $\pm$ 0.02             | 0.61 $\pm$ 0.03         | 0.63 $\pm$ 0.02          | 0.65 $\pm$ 0.02                           | 0.62 $\pm$ 0.01     |
| distilbert-base-multilingual-cased | 0.54 $\pm$ 0.03 | 0.58 $\pm$ 0.04             | 0.62 $\pm$ 0.02         | 0.60 $\pm$ 0.01          | 0.59 $\pm$ 0.02                           | 0.61 $\pm$ 0.01     |
| facebook-mbart-large-50            | 0.55 $\pm$ 0.04 | 0.60 $\pm$ 0.01             | 0.63 $\pm$ 0.03         | **0.66 $\pm$ 0.02**      | 0.64 $\pm$ 0.02                           | **0.66 $\pm$ 0.03** |
| gpt2                               | 0.49 $\pm$ 0.01 | 0.55 $\pm$ 0.05             | 0.57 $\pm$ 0.04         | 0.58 $\pm$ 0.02          | 0.58 $\pm$ 0.02                           | 0.59 $\pm$ 0.04     |
| xlm-roberta-large                  | 0.57 $\pm$ 0.03 | 0.59 $\pm$ 0.04             | 0.63 $\pm$ 0.03         | 0.65 $\pm$ 0.03          | 0.65 $\pm$ 0.02                           | **0.66 $\pm$ 0.03** |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.48 $\pm$ 0.03 | 0.49 $\pm$ 0.02             | 0.58 $\pm$ 0.02         | 0.53 $\pm$ 0.04          | 0.56 $\pm$ 0.05                           | 0.58 $\pm$ 0.05     |
| EleutherAI-gpt-neo-125M            | 0.34 $\pm$ 0.05 | 0.42 $\pm$ 0.04             | 0.50 $\pm$ 0.02         | 0.44 $\pm$ 0.06          | 0.49 $\pm$ 0.02                           | 0.50 $\pm$ 0.02     |
| bert-base-multilingual-cased       | 0.50 $\pm$ 0.04 | 0.56 $\pm$ 0.04             | 0.56 $\pm$ 0.04         | 0.58 $\pm$ 0.02          | 0.62 $\pm$ 0.01                           | 0.57 $\pm$ 0.04     |
| distilbert-base-multilingual-cased | 0.49 $\pm$ 0.03 | 0.54 $\pm$ 0.04             | 0.58 $\pm$ 0.05         | 0.55 $\pm$ 0.03          | 0.54 $\pm$ 0.03                           | 0.57 $\pm$ 0.03     |
| facebook-mbart-large-50            | 0.51 $\pm$ 0.02 | 0.55 $\pm$ 0.01             | 0.59 $\pm$ 0.02         | 0.62 $\pm$ 0.02          | 0.59 $\pm$ 0.02                           | 0.61 $\pm$ 0.05     |
| gpt2                               | 0.48 $\pm$ 0.03 | 0.52 $\pm$ 0.01             | 0.55 $\pm$ 0.04         | 0.58 $\pm$ 0.03          | 0.57 $\pm$ 0.06                           | 0.57 $\pm$ 0.04     |
| xlm-roberta-large                  | 0.54 $\pm$ 0.02 | 0.55 $\pm$ 0.05             | 0.60 $\pm$ 0.03         | 0.63 $\pm$ 0.02          | 0.63 $\pm$ 0.03                           | **0.69 $\pm$ 0.06** |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.56 $\pm$ 0.10 | 0.63 $\pm$ 0.05             | 0.63 $\pm$ 0.06         | 0.64 $\pm$ 0.06          | 0.69 $\pm$ 0.04                           | 0.68 $\pm$ 0.04     |
| EleutherAI-gpt-neo-125M            | 0.48 $\pm$ 0.01 | 0.53 $\pm$ 0.03             | 0.60 $\pm$ 0.06         | 0.61 $\pm$ 0.07          | 0.60 $\pm$ 0.05                           | 0.65 $\pm$ 0.05     |
| bert-base-multilingual-cased       | 0.61 $\pm$ 0.07 | 0.63 $\pm$ 0.02             | 0.67 $\pm$ 0.05         | 0.69 $\pm$ 0.04          | 0.68 $\pm$ 0.05                           | 0.67 $\pm$ 0.08     |
| distilbert-base-multilingual-cased | 0.59 $\pm$ 0.04 | 0.64 $\pm$ 0.06             | 0.68 $\pm$ 0.07         | 0.67 $\pm$ 0.08          | 0.67 $\pm$ 0.09                           | 0.67 $\pm$ 0.05     |
| facebook-mbart-large-50            | 0.61 $\pm$ 0.06 | 0.67 $\pm$ 0.04             | 0.67 $\pm$ 0.07         | 0.70 $\pm$ 0.02          | 0.69 $\pm$ 0.05                           | **0.73 $\pm$ 0.01** |
| gpt2                               | 0.51 $\pm$ 0.07 | 0.59 $\pm$ 0.10             | 0.59 $\pm$ 0.05         | 0.58 $\pm$ 0.06          | 0.61 $\pm$ 0.06                           | 0.62 $\pm$ 0.06     |
| xlm-roberta-large                  | 0.62 $\pm$ 0.11 | 0.64 $\pm$ 0.03             | 0.67 $\pm$ 0.04         | 0.68 $\pm$ 0.05          | 0.68 $\pm$ 0.06                           | 0.64 $\pm$ 0.03     |

  report_table.reset_index().to_latex(latex_file, index=False)


## roc-auc

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.63 $\pm$ 0.02 | 0.66 $\pm$ 0.00             | 0.69 $\pm$ 0.01         | 0.69 $\pm$ 0.02          | 0.71 $\pm$ 0.01                           | 0.71 $\pm$ 0.02     |
| EleutherAI-gpt-neo-125M            | 0.57 $\pm$ 0.02 | 0.61 $\pm$ 0.01             | 0.66 $\pm$ 0.01         | 0.64 $\pm$ 0.03          | 0.66 $\pm$ 0.02                           | 0.68 $\pm$ 0.02     |
| bert-base-multilingual-cased       | 0.66 $\pm$ 0.01 | 0.69 $\pm$ 0.01             | 0.70 $\pm$ 0.02         | 0.72 $\pm$ 0.01          | 0.73 $\pm$ 0.01                           | 0.71 $\pm$ 0.01     |
| distilbert-base-multilingual-cased | 0.65 $\pm$ 0.02 | 0.68 $\pm$ 0.02             | 0.72 $\pm$ 0.01         | 0.70 $\pm$ 0.01          | 0.70 $\pm$ 0.01                           | 0.71 $\pm$ 0.01     |
| facebook-mbart-large-50            | 0.66 $\pm$ 0.02 | 0.70 $\pm$ 0.00             | 0.72 $\pm$ 0.01         | **0.74 $\pm$ 0.01**      | 0.72 $\pm$ 0.01                           | **0.74 $\pm$ 0.01** |
| gpt2                               | 0.61 $\pm$ 0.01 | 0.66 $\pm$ 0.03             | 0.67 $\pm$ 0.02         | 0.68 $\pm$ 0.01          | 0.68 $\pm$ 0.01                           | 0.69 $\pm$ 0.02     |
| xlm-roberta-large                  | 0.68 $\pm$ 0.02 | 0.69 $\pm$ 0.02             | 0.72 $\pm$ 0.02         | 0.73 $\pm$ 0.02          | 0.73 $\pm$ 0.01                           | **0.74 $\pm$ 0.01** |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.01 $\pm$ 0.01 | 0.03 $\pm$ 0.02             | 0.03 $\pm$ 0.02         | 0.03 $\pm$ 0.02          | 0.05 $\pm$ 0.02                           | 0.06 $\pm$ 0.02     |
| EleutherAI-gpt-neo-125M            | 0.01 $\pm$ 0.01 | 0.02 $\pm$ 0.01             | 0.02 $\pm$ 0.00         | 0.02 $\pm$ 0.01          | 0.03 $\pm$ 0.01                           | 0.04 $\pm$ 0.04     |
| bert-base-multilingual-cased       | 0.04 $\pm$ 0.02 | 0.06 $\pm$ 0.01             | 0.06 $\pm$ 0.02         | 0.06 $\pm$ 0.02          | 0.07 $\pm$ 0.04                           | 0.06 $\pm$ 0.02     |
| distilbert-base-multilingual-cased | 0.03 $\pm$ 0.02 | 0.04 $\pm$ 0.01             | 0.05 $\pm$ 0.03         | 0.04 $\pm$ 0.01          | 0.05 $\pm$ 0.02                           | 0.04 $\pm$ 0.01     |
| facebook-mbart-large-50            | 0.05 $\pm$ 0.02 | 0.05 $\pm$ 0.01             | 0.06 $\pm$ 0.02         | 0.07 $\pm$ 0.03          | 0.05 $\pm$ 0.03                           | **0.11 $\pm$ 0.03** |
| gpt2                               | 0.00 $\pm$ 0.00 | 0.03 $\pm$ 0.02             | 0.03 $\pm$ 0.03         | 0.04 $\pm$ 0.06          | 0.02 $\pm$ 0.02                           | 0.03 $\pm$ 0.02     |
| xlm-roberta-large                  | 0.04 $\pm$ 0.01 | 0.07 $\pm$ 0.02             | 0.07 $\pm$ 0.03         | 0.07 $\pm$ 0.04          | 0.06 $\pm$ 0.03                           | 0.07 $\pm$ 0.02     |

  report_table.reset_index().to_latex(latex_file, index=False)


# Russian

## f1-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.21 $\pm$ 0.02 | 0.25 $\pm$ 0.03             | 0.38 $\pm$ 0.03         | 0.37 $\pm$ 0.04          | 0.40 $\pm$ 0.03                           | 0.39 $\pm$ 0.05     |
| EleutherAI-gpt-neo-125M            | 0.22 $\pm$ 0.01 | 0.22 $\pm$ 0.04             | 0.21 $\pm$ 0.02         | 0.22 $\pm$ 0.02          | 0.20 $\pm$ 0.04                           | 0.18 $\pm$ 0.03     |
| bert-base-multilingual-cased       | 0.39 $\pm$ 0.01 | 0.45 $\pm$ 0.02             | 0.50 $\pm$ 0.05         | 0.51 $\pm$ 0.06          | 0.51 $\pm$ 0.03                           | 0.51 $\pm$ 0.03     |
| distilbert-base-multilingual-cased | 0.31 $\pm$ 0.02 | 0.41 $\pm$ 0.01             | 0.44 $\pm$ 0.03         | 0.46 $\pm$ 0.06          | 0.47 $\pm$ 0.04                           | 0.44 $\pm$ 0.01     |
| facebook-mbart-large-50            | 0.40 $\pm$ 0.04 | 0.49 $\pm$ 0.03             | 0.51 $\pm$ 0.03         | 0.54 $\pm$ 0.03          | 0.51 $\pm$ 0.01                           | **0.55 $\pm$ 0.02** |
| gpt2                               | 0.16 $\pm$ 0.08 | 0.13 $\pm$ 0.06             | 0.07 $\pm$ 0.07         | 0.14 $\pm$ 0.08          | 0.16 $\pm$ 0.05                           | 0.14 $\pm$ 0.10     |
| xlm-roberta-large                  | 0.47 $\pm$ 0.03 | 0.53 $\pm$ 0.04             | 0.53 $\pm$ 0.03         | **0.55 $\pm$ 0.01**      | 0.52 $\pm$ 0.03                           | 0.53 $\pm$ 0.02     |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text        |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:----------------|
| EleutherAI-gpt-neo-1.3B            | 0.14 $\pm$ 0.01 | 0.16 $\pm$ 0.02             | 0.31 $\pm$ 0.05         | 0.29 $\pm$ 0.05          | 0.31 $\pm$ 0.02                           | 0.32 $\pm$ 0.07 |
| EleutherAI-gpt-neo-125M            | 0.16 $\pm$ 0.01 | 0.17 $\pm$ 0.03             | 0.15 $\pm$ 0.02         | 0.16 $\pm$ 0.01          | 0.14 $\pm$ 0.02                           | 0.12 $\pm$ 0.04 |
| bert-base-multilingual-cased       | 0.31 $\pm$ 0.03 | 0.40 $\pm$ 0.05             | 0.44 $\pm$ 0.05         | 0.45 $\pm$ 0.10          | 0.44 $\pm$ 0.04                           | 0.45 $\pm$ 0.04 |
| distilbert-base-multilingual-cased | 0.23 $\pm$ 0.01 | 0.33 $\pm$ 0.02             | 0.35 $\pm$ 0.04         | 0.39 $\pm$ 0.06          | 0.39 $\pm$ 0.06                           | 0.36 $\pm$ 0.02 |
| facebook-mbart-large-50            | 0.31 $\pm$ 0.04 | 0.42 $\pm$ 0.02             | 0.44 $\pm$ 0.05         | 0.47 $\pm$ 0.06          | 0.44 $\pm$ 0.03                           | 0.47 $\pm$ 0.01 |
| gpt2                               | 0.11 $\pm$ 0.06 | 0.09 $\pm$ 0.04             | 0.04 $\pm$ 0.05         | 0.09 $\pm$ 0.06          | 0.10 $\pm$ 0.04                           | 0.09 $\pm$ 0.07 |
| xlm-roberta-large                  | 0.40 $\pm$ 0.03 | 0.47 $\pm$ 0.05             | 0.48 $\pm$ 0.06         | **0.52 $\pm$ 0.03**      | 0.46 $\pm$ 0.05                           | 0.47 $\pm$ 0.07 |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision-micro

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.43 $\pm$ 0.06 | 0.59 $\pm$ 0.14             | 0.50 $\pm$ 0.04         | 0.52 $\pm$ 0.09          | 0.59 $\pm$ 0.15                           | 0.52 $\pm$ 0.07     |
| EleutherAI-gpt-neo-125M            | 0.35 $\pm$ 0.03 | 0.32 $\pm$ 0.08             | 0.36 $\pm$ 0.05         | 0.37 $\pm$ 0.03          | 0.36 $\pm$ 0.11                           | 0.39 $\pm$ 0.12     |
| bert-base-multilingual-cased       | 0.53 $\pm$ 0.02 | 0.53 $\pm$ 0.05             | 0.59 $\pm$ 0.04         | 0.60 $\pm$ 0.02          | 0.60 $\pm$ 0.04                           | 0.59 $\pm$ 0.01     |
| distilbert-base-multilingual-cased | 0.49 $\pm$ 0.04 | 0.54 $\pm$ 0.04             | 0.58 $\pm$ 0.01         | 0.57 $\pm$ 0.04          | 0.61 $\pm$ 0.02                           | 0.57 $\pm$ 0.01     |
| facebook-mbart-large-50            | 0.55 $\pm$ 0.06 | 0.60 $\pm$ 0.03             | 0.61 $\pm$ 0.04         | 0.63 $\pm$ 0.04          | 0.62 $\pm$ 0.02                           | **0.67 $\pm$ 0.07** |
| gpt2                               | 0.30 $\pm$ 0.03 | 0.26 $\pm$ 0.10             | 0.25 $\pm$ 0.04         | 0.37 $\pm$ 0.02          | 0.41 $\pm$ 0.02                           | 0.47 $\pm$ 0.12     |
| xlm-roberta-large                  | 0.57 $\pm$ 0.03 | 0.61 $\pm$ 0.01             | 0.61 $\pm$ 0.06         | 0.58 $\pm$ 0.03          | 0.59 $\pm$ 0.02                           | 0.61 $\pm$ 0.07     |

  report_table.reset_index().to_latex(latex_file, index=False)


## roc-auc

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text        |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:----------------|
| EleutherAI-gpt-neo-1.3B            | 0.55 $\pm$ 0.01 | 0.57 $\pm$ 0.01             | 0.62 $\pm$ 0.02         | 0.62 $\pm$ 0.02          | 0.63 $\pm$ 0.01                           | 0.63 $\pm$ 0.03 |
| EleutherAI-gpt-neo-125M            | 0.55 $\pm$ 0.01 | 0.55 $\pm$ 0.02             | 0.55 $\pm$ 0.01         | 0.55 $\pm$ 0.01          | 0.54 $\pm$ 0.02                           | 0.54 $\pm$ 0.01 |
| bert-base-multilingual-cased       | 0.63 $\pm$ 0.01 | 0.66 $\pm$ 0.02             | 0.69 $\pm$ 0.03         | 0.69 $\pm$ 0.04          | 0.69 $\pm$ 0.02                           | 0.69 $\pm$ 0.01 |
| distilbert-base-multilingual-cased | 0.59 $\pm$ 0.01 | 0.64 $\pm$ 0.01             | 0.65 $\pm$ 0.01         | 0.66 $\pm$ 0.03          | 0.67 $\pm$ 0.02                           | 0.65 $\pm$ 0.01 |
| facebook-mbart-large-50            | 0.63 $\pm$ 0.02 | 0.68 $\pm$ 0.02             | 0.69 $\pm$ 0.02         | 0.71 $\pm$ 0.03          | 0.69 $\pm$ 0.01                           | 0.71 $\pm$ 0.00 |
| gpt2                               | 0.53 $\pm$ 0.02 | 0.52 $\pm$ 0.02             | 0.51 $\pm$ 0.01         | 0.53 $\pm$ 0.02          | 0.53 $\pm$ 0.01                           | 0.53 $\pm$ 0.02 |
| xlm-roberta-large                  | 0.67 $\pm$ 0.01 | 0.70 $\pm$ 0.03             | 0.71 $\pm$ 0.02         | **0.72 $\pm$ 0.01**      | 0.70 $\pm$ 0.02                           | 0.70 $\pm$ 0.02 |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| EleutherAI-gpt-neo-1.3B            | 0.04 $\pm$ 0.03 | 0.06 $\pm$ 0.03             | 0.10 $\pm$ 0.01         | 0.08 $\pm$ 0.03          | 0.13 $\pm$ 0.05                           | 0.08 $\pm$ 0.03     |
| EleutherAI-gpt-neo-125M            | 0.03 $\pm$ 0.03 | 0.04 $\pm$ 0.05             | 0.06 $\pm$ 0.02         | 0.05 $\pm$ 0.04          | 0.03 $\pm$ 0.03                           | 0.04 $\pm$ 0.04     |
| bert-base-multilingual-cased       | 0.09 $\pm$ 0.03 | 0.12 $\pm$ 0.04             | **0.17 $\pm$ 0.03**     | 0.15 $\pm$ 0.08          | **0.17 $\pm$ 0.03**                       | 0.13 $\pm$ 0.06     |
| distilbert-base-multilingual-cased | 0.05 $\pm$ 0.01 | 0.11 $\pm$ 0.06             | 0.08 $\pm$ 0.03         | 0.13 $\pm$ 0.03          | 0.13 $\pm$ 0.07                           | 0.12 $\pm$ 0.03     |
| facebook-mbart-large-50            | 0.11 $\pm$ 0.03 | 0.16 $\pm$ 0.03             | 0.14 $\pm$ 0.03         | **0.17 $\pm$ 0.05**      | 0.15 $\pm$ 0.03                           | 0.14 $\pm$ 0.02     |
| gpt2                               | 0.02 $\pm$ 0.02 | 0.03 $\pm$ 0.02             | 0.02 $\pm$ 0.02         | 0.02 $\pm$ 0.00          | 0.02 $\pm$ 0.01                           | 0.03 $\pm$ 0.01     |
| xlm-roberta-large                  | 0.12 $\pm$ 0.05 | **0.17 $\pm$ 0.04**         | 0.14 $\pm$ 0.06         | 0.14 $\pm$ 0.08          | 0.13 $\pm$ 0.02                           | **0.17 $\pm$ 0.06** |

  report_table.reset_index().to_latex(latex_file, index=False)


# All 6 Languages

## f1-micro

| language   | model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| en         | EleutherAI-gpt-neo-1.3B            | 0.62 $\pm$ 0.01 | 0.67 $\pm$ 0.01             | 0.68 $\pm$ 0.01         | 0.67 $\pm$ 0.01          | 0.67 $\pm$ 0.03                           | 0.69 $\pm$ 0.00     |
| en         | EleutherAI-gpt-neo-125M            | 0.55 $\pm$ 0.03 | 0.60 $\pm$ 0.01             | 0.62 $\pm$ 0.02         | 0.62 $\pm$ 0.01          | 0.64 $\pm$ 0.01                           | 0.66 $\pm$ 0.01     |
| en         | bert-base-multilingual-cased       | 0.64 $\pm$ 0.01 | 0.68 $\pm$ 0.02             | 0.69 $\pm$ 0.02         | 0.69 $\pm$ 0.02          | 0.69 $\pm$ 0.03                           | 0.69 $\pm$ 0.01     |
| en         | distilbert-base-multilingual-cased | 0.61 $\pm$ 0.03 | 0.66 $\pm$ 0.01             | 0.67 $\pm$ 0.01         | 0.68 $\pm$ 0.03          | 0.66 $\pm$ 0.01                           | 0.68 $\pm$ 0.02     |
| en         | facebook-mbart-large-50            | 0.66 $\pm$ 0.02 | 0.70 $\pm$ 0.01             | 0.70 $\pm$ 0.02         | 0.70 $\pm$ 0.01          | **0.71 $\pm$ 0.02**                       | 0.69 $\pm$ 0.02     |
| en         | gpt2                               | 0.63 $\pm$ 0.03 | 0.68 $\pm$ 0.01             | 0.67 $\pm$ 0.01         | 0.67 $\pm$ 0.02          | 0.67 $\pm$ 0.02                           | 0.68 $\pm$ 0.02     |
| en         | xlm-roberta-large                  | 0.66 $\pm$ 0.01 | 0.70 $\pm$ 0.01             | 0.70 $\pm$ 0.01         | **0.71 $\pm$ 0.02**      | 0.70 $\pm$ 0.01                           | 0.70 $\pm$ 0.01     |
| fr         | EleutherAI-gpt-neo-1.3B            | 0.41 $\pm$ 0.04 | 0.47 $\pm$ 0.01             | 0.50 $\pm$ 0.04         | 0.50 $\pm$ 0.01          | 0.52 $\pm$ 0.02                           | 0.54 $\pm$ 0.05     |
| fr         | EleutherAI-gpt-neo-125M            | 0.31 $\pm$ 0.01 | 0.38 $\pm$ 0.03             | 0.39 $\pm$ 0.03         | 0.39 $\pm$ 0.01          | 0.42 $\pm$ 0.04                           | 0.44 $\pm$ 0.05     |
| fr         | bert-base-multilingual-cased       | 0.47 $\pm$ 0.04 | 0.52 $\pm$ 0.03             | 0.53 $\pm$ 0.02         | 0.55 $\pm$ 0.02          | 0.56 $\pm$ 0.03                           | 0.57 $\pm$ 0.02     |
| fr         | distilbert-base-multilingual-cased | 0.44 $\pm$ 0.05 | 0.50 $\pm$ 0.02             | 0.53 $\pm$ 0.03         | 0.53 $\pm$ 0.02          | 0.52 $\pm$ 0.02                           | 0.52 $\pm$ 0.03     |
| fr         | facebook-mbart-large-50            | 0.50 $\pm$ 0.02 | 0.53 $\pm$ 0.02             | 0.56 $\pm$ 0.01         | 0.57 $\pm$ 0.02          | **0.58 $\pm$ 0.02**                       | **0.58 $\pm$ 0.03** |
| fr         | gpt2                               | 0.40 $\pm$ 0.07 | 0.43 $\pm$ 0.02             | 0.47 $\pm$ 0.02         | 0.49 $\pm$ 0.02          | 0.48 $\pm$ 0.04                           | 0.50 $\pm$ 0.01     |
| fr         | xlm-roberta-large                  | 0.51 $\pm$ 0.02 | 0.56 $\pm$ 0.03             | 0.57 $\pm$ 0.01         | **0.58 $\pm$ 0.03**      | 0.57 $\pm$ 0.01                           | 0.55 $\pm$ 0.03     |
| ge         | EleutherAI-gpt-neo-1.3B            | 0.48 $\pm$ 0.00 | 0.56 $\pm$ 0.02             | 0.56 $\pm$ 0.02         | 0.58 $\pm$ 0.00          | 0.57 $\pm$ 0.02                           | 0.61 $\pm$ 0.02     |
| ge         | EleutherAI-gpt-neo-125M            | 0.39 $\pm$ 0.01 | 0.46 $\pm$ 0.01             | 0.48 $\pm$ 0.00         | 0.50 $\pm$ 0.02          | 0.51 $\pm$ 0.04                           | 0.53 $\pm$ 0.02     |
| ge         | bert-base-multilingual-cased       | 0.52 $\pm$ 0.03 | 0.58 $\pm$ 0.02             | 0.59 $\pm$ 0.01         | 0.63 $\pm$ 0.01          | 0.59 $\pm$ 0.01                           | 0.62 $\pm$ 0.03     |
| ge         | distilbert-base-multilingual-cased | 0.50 $\pm$ 0.02 | 0.56 $\pm$ 0.00             | 0.55 $\pm$ 0.03         | 0.59 $\pm$ 0.02          | 0.58 $\pm$ 0.01                           | 0.60 $\pm$ 0.02     |
| ge         | facebook-mbart-large-50            | 0.55 $\pm$ 0.01 | 0.60 $\pm$ 0.02             | 0.61 $\pm$ 0.02         | 0.63 $\pm$ 0.00          | **0.64 $\pm$ 0.01**                       | **0.64 $\pm$ 0.03** |
| ge         | gpt2                               | 0.47 $\pm$ 0.03 | 0.52 $\pm$ 0.04             | 0.52 $\pm$ 0.04         | 0.55 $\pm$ 0.03          | 0.53 $\pm$ 0.01                           | 0.56 $\pm$ 0.00     |
| ge         | xlm-roberta-large                  | 0.55 $\pm$ 0.02 | 0.61 $\pm$ 0.00             | 0.62 $\pm$ 0.03         | **0.64 $\pm$ 0.01**      | **0.64 $\pm$ 0.02**                       | **0.64 $\pm$ 0.02** |
| it         | EleutherAI-gpt-neo-1.3B            | 0.42 $\pm$ 0.02 | 0.46 $\pm$ 0.05             | 0.50 $\pm$ 0.03         | 0.52 $\pm$ 0.04          | 0.52 $\pm$ 0.05                           | 0.56 $\pm$ 0.03     |
| it         | EleutherAI-gpt-neo-125M            | 0.35 $\pm$ 0.04 | 0.42 $\pm$ 0.02             | 0.44 $\pm$ 0.02         | 0.44 $\pm$ 0.01          | 0.47 $\pm$ 0.05                           | 0.48 $\pm$ 0.02     |
| it         | bert-base-multilingual-cased       | 0.47 $\pm$ 0.01 | 0.53 $\pm$ 0.02             | 0.55 $\pm$ 0.02         | 0.56 $\pm$ 0.02          | 0.56 $\pm$ 0.03                           | 0.56 $\pm$ 0.03     |
| it         | distilbert-base-multilingual-cased | 0.45 $\pm$ 0.01 | 0.50 $\pm$ 0.05             | 0.52 $\pm$ 0.03         | 0.56 $\pm$ 0.05          | 0.52 $\pm$ 0.03                           | 0.54 $\pm$ 0.02     |
| it         | facebook-mbart-large-50            | 0.48 $\pm$ 0.01 | 0.53 $\pm$ 0.04             | 0.55 $\pm$ 0.02         | 0.57 $\pm$ 0.03          | 0.56 $\pm$ 0.04                           | **0.60 $\pm$ 0.04** |
| it         | gpt2                               | 0.41 $\pm$ 0.02 | 0.45 $\pm$ 0.01             | 0.48 $\pm$ 0.03         | 0.52 $\pm$ 0.02          | 0.50 $\pm$ 0.03                           | 0.53 $\pm$ 0.01     |
| it         | xlm-roberta-large                  | 0.50 $\pm$ 0.04 | 0.54 $\pm$ 0.02             | 0.55 $\pm$ 0.02         | 0.59 $\pm$ 0.03          | 0.56 $\pm$ 0.01                           | 0.58 $\pm$ 0.03     |
| po         | EleutherAI-gpt-neo-1.3B            | 0.51 $\pm$ 0.02 | 0.55 $\pm$ 0.01             | 0.60 $\pm$ 0.02         | 0.58 $\pm$ 0.04          | 0.61 $\pm$ 0.02                           | 0.62 $\pm$ 0.04     |
| po         | EleutherAI-gpt-neo-125M            | 0.39 $\pm$ 0.04 | 0.47 $\pm$ 0.02             | 0.55 $\pm$ 0.02         | 0.51 $\pm$ 0.06          | 0.54 $\pm$ 0.03                           | 0.57 $\pm$ 0.03     |
| po         | bert-base-multilingual-cased       | 0.55 $\pm$ 0.02 | 0.59 $\pm$ 0.02             | 0.61 $\pm$ 0.03         | 0.63 $\pm$ 0.02          | 0.65 $\pm$ 0.02                           | 0.62 $\pm$ 0.01     |
| po         | distilbert-base-multilingual-cased | 0.54 $\pm$ 0.03 | 0.58 $\pm$ 0.04             | 0.62 $\pm$ 0.02         | 0.60 $\pm$ 0.01          | 0.59 $\pm$ 0.02                           | 0.61 $\pm$ 0.01     |
| po         | facebook-mbart-large-50            | 0.55 $\pm$ 0.04 | 0.60 $\pm$ 0.01             | 0.63 $\pm$ 0.03         | **0.66 $\pm$ 0.02**      | 0.64 $\pm$ 0.02                           | **0.66 $\pm$ 0.03** |
| po         | gpt2                               | 0.49 $\pm$ 0.01 | 0.55 $\pm$ 0.05             | 0.57 $\pm$ 0.04         | 0.58 $\pm$ 0.02          | 0.58 $\pm$ 0.02                           | 0.59 $\pm$ 0.04     |
| po         | xlm-roberta-large                  | 0.57 $\pm$ 0.03 | 0.59 $\pm$ 0.04             | 0.63 $\pm$ 0.03         | 0.65 $\pm$ 0.03          | 0.65 $\pm$ 0.02                           | **0.66 $\pm$ 0.03** |
| ru         | EleutherAI-gpt-neo-1.3B            | 0.21 $\pm$ 0.02 | 0.25 $\pm$ 0.03             | 0.38 $\pm$ 0.03         | 0.37 $\pm$ 0.04          | 0.40 $\pm$ 0.03                           | 0.39 $\pm$ 0.05     |
| ru         | EleutherAI-gpt-neo-125M            | 0.22 $\pm$ 0.01 | 0.22 $\pm$ 0.04             | 0.21 $\pm$ 0.02         | 0.22 $\pm$ 0.02          | 0.20 $\pm$ 0.04                           | 0.18 $\pm$ 0.03     |
| ru         | bert-base-multilingual-cased       | 0.39 $\pm$ 0.01 | 0.45 $\pm$ 0.02             | 0.50 $\pm$ 0.05         | 0.51 $\pm$ 0.06          | 0.51 $\pm$ 0.03                           | 0.51 $\pm$ 0.03     |
| ru         | distilbert-base-multilingual-cased | 0.31 $\pm$ 0.02 | 0.41 $\pm$ 0.01             | 0.44 $\pm$ 0.03         | 0.46 $\pm$ 0.06          | 0.47 $\pm$ 0.04                           | 0.44 $\pm$ 0.01     |
| ru         | facebook-mbart-large-50            | 0.40 $\pm$ 0.04 | 0.49 $\pm$ 0.03             | 0.51 $\pm$ 0.03         | 0.54 $\pm$ 0.03          | 0.51 $\pm$ 0.01                           | **0.55 $\pm$ 0.02** |
| ru         | gpt2                               | 0.16 $\pm$ 0.08 | 0.13 $\pm$ 0.06             | 0.07 $\pm$ 0.07         | 0.14 $\pm$ 0.08          | 0.16 $\pm$ 0.05                           | 0.14 $\pm$ 0.10     |
| ru         | xlm-roberta-large                  | 0.47 $\pm$ 0.03 | 0.53 $\pm$ 0.04             | 0.53 $\pm$ 0.03         | **0.55 $\pm$ 0.01**      | 0.52 $\pm$ 0.03                           | 0.53 $\pm$ 0.02     |

  multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)


## recall-micro

| language   | model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| en         | EleutherAI-gpt-neo-1.3B            | 0.55 $\pm$ 0.01 | 0.60 $\pm$ 0.01             | 0.62 $\pm$ 0.02         | 0.60 $\pm$ 0.02          | 0.59 $\pm$ 0.03                           | 0.61 $\pm$ 0.01     |
| en         | EleutherAI-gpt-neo-125M            | 0.49 $\pm$ 0.03 | 0.54 $\pm$ 0.01             | 0.55 $\pm$ 0.03         | 0.55 $\pm$ 0.00          | 0.56 $\pm$ 0.02                           | 0.58 $\pm$ 0.03     |
| en         | bert-base-multilingual-cased       | 0.58 $\pm$ 0.01 | 0.63 $\pm$ 0.03             | 0.64 $\pm$ 0.02         | 0.65 $\pm$ 0.01          | 0.63 $\pm$ 0.03                           | 0.63 $\pm$ 0.03     |
| en         | distilbert-base-multilingual-cased | 0.56 $\pm$ 0.03 | 0.59 $\pm$ 0.02             | 0.62 $\pm$ 0.02         | 0.63 $\pm$ 0.03          | 0.59 $\pm$ 0.00                           | 0.61 $\pm$ 0.01     |
| en         | facebook-mbart-large-50            | 0.60 $\pm$ 0.03 | 0.65 $\pm$ 0.02             | 0.66 $\pm$ 0.02         | 0.66 $\pm$ 0.01          | 0.65 $\pm$ 0.03                           | 0.63 $\pm$ 0.02     |
| en         | gpt2                               | 0.59 $\pm$ 0.01 | 0.64 $\pm$ 0.03             | 0.65 $\pm$ 0.02         | **0.67 $\pm$ 0.04**      | 0.63 $\pm$ 0.04                           | 0.63 $\pm$ 0.03     |
| en         | xlm-roberta-large                  | 0.60 $\pm$ 0.01 | 0.66 $\pm$ 0.01             | 0.66 $\pm$ 0.02         | **0.67 $\pm$ 0.01**      | 0.66 $\pm$ 0.02                           | 0.65 $\pm$ 0.01     |
| fr         | EleutherAI-gpt-neo-1.3B            | 0.32 $\pm$ 0.05 | 0.37 $\pm$ 0.01             | 0.45 $\pm$ 0.04         | 0.42 $\pm$ 0.03          | 0.43 $\pm$ 0.05                           | 0.46 $\pm$ 0.06     |
| fr         | EleutherAI-gpt-neo-125M            | 0.24 $\pm$ 0.01 | 0.32 $\pm$ 0.04             | 0.32 $\pm$ 0.04         | 0.31 $\pm$ 0.03          | 0.35 $\pm$ 0.04                           | 0.33 $\pm$ 0.07     |
| fr         | bert-base-multilingual-cased       | 0.38 $\pm$ 0.04 | 0.46 $\pm$ 0.03             | 0.49 $\pm$ 0.04         | 0.50 $\pm$ 0.03          | 0.50 $\pm$ 0.05                           | 0.50 $\pm$ 0.03     |
| fr         | distilbert-base-multilingual-cased | 0.37 $\pm$ 0.06 | 0.44 $\pm$ 0.02             | 0.48 $\pm$ 0.04         | 0.48 $\pm$ 0.02          | 0.44 $\pm$ 0.04                           | 0.45 $\pm$ 0.04     |
| fr         | facebook-mbart-large-50            | 0.44 $\pm$ 0.03 | 0.47 $\pm$ 0.02             | 0.51 $\pm$ 0.02         | 0.52 $\pm$ 0.03          | 0.52 $\pm$ 0.03                           | 0.51 $\pm$ 0.06     |
| fr         | gpt2                               | 0.36 $\pm$ 0.06 | 0.36 $\pm$ 0.02             | 0.43 $\pm$ 0.02         | 0.43 $\pm$ 0.02          | 0.42 $\pm$ 0.06                           | 0.44 $\pm$ 0.01     |
| fr         | xlm-roberta-large                  | 0.44 $\pm$ 0.02 | 0.51 $\pm$ 0.02             | 0.52 $\pm$ 0.04         | **0.55 $\pm$ 0.04**      | 0.54 $\pm$ 0.02                           | 0.49 $\pm$ 0.07     |
| ge         | EleutherAI-gpt-neo-1.3B            | 0.41 $\pm$ 0.03 | 0.49 $\pm$ 0.04             | 0.52 $\pm$ 0.04         | 0.50 $\pm$ 0.03          | 0.49 $\pm$ 0.02                           | 0.54 $\pm$ 0.03     |
| ge         | EleutherAI-gpt-neo-125M            | 0.33 $\pm$ 0.03 | 0.41 $\pm$ 0.02             | 0.41 $\pm$ 0.01         | 0.43 $\pm$ 0.02          | 0.44 $\pm$ 0.03                           | 0.45 $\pm$ 0.02     |
| ge         | bert-base-multilingual-cased       | 0.46 $\pm$ 0.04 | 0.54 $\pm$ 0.04             | 0.54 $\pm$ 0.02         | 0.58 $\pm$ 0.04          | 0.54 $\pm$ 0.02                           | 0.58 $\pm$ 0.05     |
| ge         | distilbert-base-multilingual-cased | 0.44 $\pm$ 0.01 | 0.51 $\pm$ 0.02             | 0.49 $\pm$ 0.03         | 0.54 $\pm$ 0.03          | 0.52 $\pm$ 0.01                           | 0.55 $\pm$ 0.04     |
| ge         | facebook-mbart-large-50            | 0.50 $\pm$ 0.02 | 0.55 $\pm$ 0.02             | 0.56 $\pm$ 0.03         | 0.59 $\pm$ 0.01          | 0.59 $\pm$ 0.00                           | 0.58 $\pm$ 0.05     |
| ge         | gpt2                               | 0.46 $\pm$ 0.05 | 0.49 $\pm$ 0.05             | 0.50 $\pm$ 0.05         | 0.52 $\pm$ 0.05          | 0.51 $\pm$ 0.03                           | 0.50 $\pm$ 0.01     |
| ge         | xlm-roberta-large                  | 0.50 $\pm$ 0.02 | 0.57 $\pm$ 0.02             | 0.57 $\pm$ 0.02         | 0.58 $\pm$ 0.01          | **0.62 $\pm$ 0.07**                       | 0.61 $\pm$ 0.03     |
| it         | EleutherAI-gpt-neo-1.3B            | 0.35 $\pm$ 0.04 | 0.38 $\pm$ 0.06             | 0.44 $\pm$ 0.02         | 0.45 $\pm$ 0.07          | 0.44 $\pm$ 0.09                           | 0.47 $\pm$ 0.01     |
| it         | EleutherAI-gpt-neo-125M            | 0.28 $\pm$ 0.04 | 0.36 $\pm$ 0.04             | 0.36 $\pm$ 0.02         | 0.35 $\pm$ 0.02          | 0.40 $\pm$ 0.06                           | 0.37 $\pm$ 0.01     |
| it         | bert-base-multilingual-cased       | 0.40 $\pm$ 0.00 | 0.48 $\pm$ 0.02             | 0.50 $\pm$ 0.03         | 0.51 $\pm$ 0.03          | 0.51 $\pm$ 0.02                           | 0.49 $\pm$ 0.02     |
| it         | distilbert-base-multilingual-cased | 0.39 $\pm$ 0.01 | 0.44 $\pm$ 0.04             | 0.46 $\pm$ 0.03         | 0.50 $\pm$ 0.06          | 0.45 $\pm$ 0.04                           | 0.47 $\pm$ 0.01     |
| it         | facebook-mbart-large-50            | 0.42 $\pm$ 0.01 | 0.47 $\pm$ 0.06             | 0.49 $\pm$ 0.02         | 0.51 $\pm$ 0.03          | 0.50 $\pm$ 0.03                           | 0.52 $\pm$ 0.05     |
| it         | gpt2                               | 0.37 $\pm$ 0.03 | 0.40 $\pm$ 0.01             | 0.45 $\pm$ 0.03         | 0.49 $\pm$ 0.03          | 0.46 $\pm$ 0.01                           | 0.47 $\pm$ 0.02     |
| it         | xlm-roberta-large                  | 0.43 $\pm$ 0.05 | 0.49 $\pm$ 0.02             | 0.50 $\pm$ 0.03         | **0.54 $\pm$ 0.02**      | 0.52 $\pm$ 0.02                           | 0.53 $\pm$ 0.04     |
| po         | EleutherAI-gpt-neo-1.3B            | 0.48 $\pm$ 0.03 | 0.49 $\pm$ 0.02             | 0.58 $\pm$ 0.02         | 0.53 $\pm$ 0.04          | 0.56 $\pm$ 0.05                           | 0.58 $\pm$ 0.05     |
| po         | EleutherAI-gpt-neo-125M            | 0.34 $\pm$ 0.05 | 0.42 $\pm$ 0.04             | 0.50 $\pm$ 0.02         | 0.44 $\pm$ 0.06          | 0.49 $\pm$ 0.02                           | 0.50 $\pm$ 0.02     |
| po         | bert-base-multilingual-cased       | 0.50 $\pm$ 0.04 | 0.56 $\pm$ 0.04             | 0.56 $\pm$ 0.04         | 0.58 $\pm$ 0.02          | 0.62 $\pm$ 0.01                           | 0.57 $\pm$ 0.04     |
| po         | distilbert-base-multilingual-cased | 0.49 $\pm$ 0.03 | 0.54 $\pm$ 0.04             | 0.58 $\pm$ 0.05         | 0.55 $\pm$ 0.03          | 0.54 $\pm$ 0.03                           | 0.57 $\pm$ 0.03     |
| po         | facebook-mbart-large-50            | 0.51 $\pm$ 0.02 | 0.55 $\pm$ 0.01             | 0.59 $\pm$ 0.02         | 0.62 $\pm$ 0.02          | 0.59 $\pm$ 0.02                           | 0.61 $\pm$ 0.05     |
| po         | gpt2                               | 0.48 $\pm$ 0.03 | 0.52 $\pm$ 0.01             | 0.55 $\pm$ 0.04         | 0.58 $\pm$ 0.03          | 0.57 $\pm$ 0.06                           | 0.57 $\pm$ 0.04     |
| po         | xlm-roberta-large                  | 0.54 $\pm$ 0.02 | 0.55 $\pm$ 0.05             | 0.60 $\pm$ 0.03         | 0.63 $\pm$ 0.02          | 0.63 $\pm$ 0.03                           | **0.69 $\pm$ 0.06** |
| ru         | EleutherAI-gpt-neo-1.3B            | 0.14 $\pm$ 0.01 | 0.16 $\pm$ 0.02             | 0.31 $\pm$ 0.05         | 0.29 $\pm$ 0.05          | 0.31 $\pm$ 0.02                           | 0.32 $\pm$ 0.07     |
| ru         | EleutherAI-gpt-neo-125M            | 0.16 $\pm$ 0.01 | 0.17 $\pm$ 0.03             | 0.15 $\pm$ 0.02         | 0.16 $\pm$ 0.01          | 0.14 $\pm$ 0.02                           | 0.12 $\pm$ 0.04     |
| ru         | bert-base-multilingual-cased       | 0.31 $\pm$ 0.03 | 0.40 $\pm$ 0.05             | 0.44 $\pm$ 0.05         | 0.45 $\pm$ 0.10          | 0.44 $\pm$ 0.04                           | 0.45 $\pm$ 0.04     |
| ru         | distilbert-base-multilingual-cased | 0.23 $\pm$ 0.01 | 0.33 $\pm$ 0.02             | 0.35 $\pm$ 0.04         | 0.39 $\pm$ 0.06          | 0.39 $\pm$ 0.06                           | 0.36 $\pm$ 0.02     |
| ru         | facebook-mbart-large-50            | 0.31 $\pm$ 0.04 | 0.42 $\pm$ 0.02             | 0.44 $\pm$ 0.05         | 0.47 $\pm$ 0.06          | 0.44 $\pm$ 0.03                           | 0.47 $\pm$ 0.01     |
| ru         | gpt2                               | 0.11 $\pm$ 0.06 | 0.09 $\pm$ 0.04             | 0.04 $\pm$ 0.05         | 0.09 $\pm$ 0.06          | 0.10 $\pm$ 0.04                           | 0.09 $\pm$ 0.07     |
| ru         | xlm-roberta-large                  | 0.40 $\pm$ 0.03 | 0.47 $\pm$ 0.05             | 0.48 $\pm$ 0.06         | **0.52 $\pm$ 0.03**      | 0.46 $\pm$ 0.05                           | 0.47 $\pm$ 0.07     |

  multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)


## precision-micro

| language   | model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| en         | EleutherAI-gpt-neo-1.3B            | 0.72 $\pm$ 0.04 | 0.76 $\pm$ 0.02             | 0.74 $\pm$ 0.02         | 0.76 $\pm$ 0.01          | 0.78 $\pm$ 0.04                           | **0.79 $\pm$ 0.02** |
| en         | EleutherAI-gpt-neo-125M            | 0.63 $\pm$ 0.04 | 0.67 $\pm$ 0.01             | 0.70 $\pm$ 0.01         | 0.71 $\pm$ 0.03          | 0.74 $\pm$ 0.04                           | 0.77 $\pm$ 0.03     |
| en         | bert-base-multilingual-cased       | 0.70 $\pm$ 0.03 | 0.74 $\pm$ 0.03             | 0.76 $\pm$ 0.02         | 0.74 $\pm$ 0.04          | 0.76 $\pm$ 0.04                           | 0.76 $\pm$ 0.01     |
| en         | distilbert-base-multilingual-cased | 0.68 $\pm$ 0.05 | 0.74 $\pm$ 0.03             | 0.73 $\pm$ 0.01         | 0.73 $\pm$ 0.03          | 0.76 $\pm$ 0.02                           | 0.76 $\pm$ 0.03     |
| en         | facebook-mbart-large-50            | 0.73 $\pm$ 0.01 | 0.76 $\pm$ 0.01             | 0.74 $\pm$ 0.03         | 0.76 $\pm$ 0.01          | 0.78 $\pm$ 0.02                           | 0.76 $\pm$ 0.02     |
| en         | gpt2                               | 0.67 $\pm$ 0.06 | 0.72 $\pm$ 0.02             | 0.70 $\pm$ 0.03         | 0.67 $\pm$ 0.03          | 0.72 $\pm$ 0.06                           | 0.74 $\pm$ 0.03     |
| en         | xlm-roberta-large                  | 0.73 $\pm$ 0.03 | 0.75 $\pm$ 0.03             | 0.75 $\pm$ 0.01         | 0.76 $\pm$ 0.03          | 0.74 $\pm$ 0.01                           | 0.76 $\pm$ 0.02     |
| fr         | EleutherAI-gpt-neo-1.3B            | 0.55 $\pm$ 0.03 | 0.63 $\pm$ 0.04             | 0.57 $\pm$ 0.04         | 0.62 $\pm$ 0.06          | 0.65 $\pm$ 0.05                           | 0.66 $\pm$ 0.01     |
| fr         | EleutherAI-gpt-neo-125M            | 0.42 $\pm$ 0.05 | 0.48 $\pm$ 0.03             | 0.51 $\pm$ 0.02         | 0.55 $\pm$ 0.06          | 0.52 $\pm$ 0.06                           | 0.64 $\pm$ 0.04     |
| fr         | bert-base-multilingual-cased       | 0.59 $\pm$ 0.05 | 0.60 $\pm$ 0.02             | 0.59 $\pm$ 0.03         | 0.61 $\pm$ 0.05          | 0.63 $\pm$ 0.01                           | 0.67 $\pm$ 0.03     |
| fr         | distilbert-base-multilingual-cased | 0.53 $\pm$ 0.05 | 0.59 $\pm$ 0.05             | 0.59 $\pm$ 0.03         | 0.59 $\pm$ 0.03          | 0.63 $\pm$ 0.02                           | 0.61 $\pm$ 0.02     |
| fr         | facebook-mbart-large-50            | 0.60 $\pm$ 0.06 | 0.62 $\pm$ 0.01             | 0.62 $\pm$ 0.04         | 0.63 $\pm$ 0.02          | 0.65 $\pm$ 0.02                           | **0.68 $\pm$ 0.04** |
| fr         | gpt2                               | 0.45 $\pm$ 0.10 | 0.51 $\pm$ 0.04             | 0.53 $\pm$ 0.02         | 0.57 $\pm$ 0.04          | 0.58 $\pm$ 0.03                           | 0.60 $\pm$ 0.02     |
| fr         | xlm-roberta-large                  | 0.59 $\pm$ 0.01 | 0.62 $\pm$ 0.04             | 0.63 $\pm$ 0.03         | 0.62 $\pm$ 0.01          | 0.61 $\pm$ 0.02                           | 0.63 $\pm$ 0.02     |
| ge         | EleutherAI-gpt-neo-1.3B            | 0.57 $\pm$ 0.05 | 0.65 $\pm$ 0.02             | 0.61 $\pm$ 0.03         | 0.70 $\pm$ 0.06          | 0.66 $\pm$ 0.03                           | **0.71 $\pm$ 0.02** |
| ge         | EleutherAI-gpt-neo-125M            | 0.49 $\pm$ 0.02 | 0.51 $\pm$ 0.04             | 0.57 $\pm$ 0.01         | 0.58 $\pm$ 0.02          | 0.60 $\pm$ 0.05                           | 0.65 $\pm$ 0.02     |
| ge         | bert-base-multilingual-cased       | 0.60 $\pm$ 0.02 | 0.62 $\pm$ 0.02             | 0.66 $\pm$ 0.03         | 0.68 $\pm$ 0.06          | 0.66 $\pm$ 0.01                           | 0.67 $\pm$ 0.03     |
| ge         | distilbert-base-multilingual-cased | 0.59 $\pm$ 0.04 | 0.63 $\pm$ 0.02             | 0.63 $\pm$ 0.04         | 0.64 $\pm$ 0.05          | 0.66 $\pm$ 0.03                           | 0.65 $\pm$ 0.01     |
| ge         | facebook-mbart-large-50            | 0.62 $\pm$ 0.01 | 0.66 $\pm$ 0.02             | 0.68 $\pm$ 0.00         | 0.68 $\pm$ 0.02          | 0.70 $\pm$ 0.03                           | **0.71 $\pm$ 0.01** |
| ge         | gpt2                               | 0.49 $\pm$ 0.01 | 0.56 $\pm$ 0.03             | 0.55 $\pm$ 0.04         | 0.58 $\pm$ 0.00          | 0.56 $\pm$ 0.01                           | 0.63 $\pm$ 0.01     |
| ge         | xlm-roberta-large                  | 0.62 $\pm$ 0.03 | 0.65 $\pm$ 0.02             | 0.67 $\pm$ 0.04         | **0.71 $\pm$ 0.01**      | 0.68 $\pm$ 0.04                           | 0.67 $\pm$ 0.07     |
| it         | EleutherAI-gpt-neo-1.3B            | 0.53 $\pm$ 0.03 | 0.58 $\pm$ 0.03             | 0.58 $\pm$ 0.05         | 0.63 $\pm$ 0.05          | 0.63 $\pm$ 0.03                           | 0.68 $\pm$ 0.05     |
| it         | EleutherAI-gpt-neo-125M            | 0.47 $\pm$ 0.05 | 0.49 $\pm$ 0.01             | 0.55 $\pm$ 0.01         | 0.57 $\pm$ 0.01          | 0.59 $\pm$ 0.04                           | 0.67 $\pm$ 0.03     |
| it         | bert-base-multilingual-cased       | 0.57 $\pm$ 0.02 | 0.59 $\pm$ 0.04             | 0.61 $\pm$ 0.04         | 0.62 $\pm$ 0.05          | 0.62 $\pm$ 0.07                           | 0.65 $\pm$ 0.07     |
| it         | distilbert-base-multilingual-cased | 0.54 $\pm$ 0.02 | 0.57 $\pm$ 0.06             | 0.58 $\pm$ 0.03         | 0.62 $\pm$ 0.03          | 0.60 $\pm$ 0.04                           | 0.64 $\pm$ 0.04     |
| it         | facebook-mbart-large-50            | 0.57 $\pm$ 0.01 | 0.61 $\pm$ 0.01             | 0.62 $\pm$ 0.03         | 0.64 $\pm$ 0.03          | 0.65 $\pm$ 0.08                           | **0.70 $\pm$ 0.05** |
| it         | gpt2                               | 0.47 $\pm$ 0.01 | 0.52 $\pm$ 0.02             | 0.52 $\pm$ 0.03         | 0.55 $\pm$ 0.01          | 0.55 $\pm$ 0.06                           | 0.62 $\pm$ 0.01     |
| it         | xlm-roberta-large                  | 0.58 $\pm$ 0.03 | 0.59 $\pm$ 0.03             | 0.61 $\pm$ 0.02         | 0.64 $\pm$ 0.05          | 0.61 $\pm$ 0.05                           | 0.63 $\pm$ 0.05     |
| po         | EleutherAI-gpt-neo-1.3B            | 0.56 $\pm$ 0.10 | 0.63 $\pm$ 0.05             | 0.63 $\pm$ 0.06         | 0.64 $\pm$ 0.06          | 0.69 $\pm$ 0.04                           | 0.68 $\pm$ 0.04     |
| po         | EleutherAI-gpt-neo-125M            | 0.48 $\pm$ 0.01 | 0.53 $\pm$ 0.03             | 0.60 $\pm$ 0.06         | 0.61 $\pm$ 0.07          | 0.60 $\pm$ 0.05                           | 0.65 $\pm$ 0.05     |
| po         | bert-base-multilingual-cased       | 0.61 $\pm$ 0.07 | 0.63 $\pm$ 0.02             | 0.67 $\pm$ 0.05         | 0.69 $\pm$ 0.04          | 0.68 $\pm$ 0.05                           | 0.67 $\pm$ 0.08     |
| po         | distilbert-base-multilingual-cased | 0.59 $\pm$ 0.04 | 0.64 $\pm$ 0.06             | 0.68 $\pm$ 0.07         | 0.67 $\pm$ 0.08          | 0.67 $\pm$ 0.09                           | 0.67 $\pm$ 0.05     |
| po         | facebook-mbart-large-50            | 0.61 $\pm$ 0.06 | 0.67 $\pm$ 0.04             | 0.67 $\pm$ 0.07         | 0.70 $\pm$ 0.02          | 0.69 $\pm$ 0.05                           | **0.73 $\pm$ 0.01** |
| po         | gpt2                               | 0.51 $\pm$ 0.07 | 0.59 $\pm$ 0.10             | 0.59 $\pm$ 0.05         | 0.58 $\pm$ 0.06          | 0.61 $\pm$ 0.06                           | 0.62 $\pm$ 0.06     |
| po         | xlm-roberta-large                  | 0.62 $\pm$ 0.11 | 0.64 $\pm$ 0.03             | 0.67 $\pm$ 0.04         | 0.68 $\pm$ 0.05          | 0.68 $\pm$ 0.06                           | 0.64 $\pm$ 0.03     |
| ru         | EleutherAI-gpt-neo-1.3B            | 0.43 $\pm$ 0.06 | 0.59 $\pm$ 0.14             | 0.50 $\pm$ 0.04         | 0.52 $\pm$ 0.09          | 0.59 $\pm$ 0.15                           | 0.52 $\pm$ 0.07     |
| ru         | EleutherAI-gpt-neo-125M            | 0.35 $\pm$ 0.03 | 0.32 $\pm$ 0.08             | 0.36 $\pm$ 0.05         | 0.37 $\pm$ 0.03          | 0.36 $\pm$ 0.11                           | 0.39 $\pm$ 0.12     |
| ru         | bert-base-multilingual-cased       | 0.53 $\pm$ 0.02 | 0.53 $\pm$ 0.05             | 0.59 $\pm$ 0.04         | 0.60 $\pm$ 0.02          | 0.60 $\pm$ 0.04                           | 0.59 $\pm$ 0.01     |
| ru         | distilbert-base-multilingual-cased | 0.49 $\pm$ 0.04 | 0.54 $\pm$ 0.04             | 0.58 $\pm$ 0.01         | 0.57 $\pm$ 0.04          | 0.61 $\pm$ 0.02                           | 0.57 $\pm$ 0.01     |
| ru         | facebook-mbart-large-50            | 0.55 $\pm$ 0.06 | 0.60 $\pm$ 0.03             | 0.61 $\pm$ 0.04         | 0.63 $\pm$ 0.04          | 0.62 $\pm$ 0.02                           | **0.67 $\pm$ 0.07** |
| ru         | gpt2                               | 0.30 $\pm$ 0.03 | 0.26 $\pm$ 0.10             | 0.25 $\pm$ 0.04         | 0.37 $\pm$ 0.02          | 0.41 $\pm$ 0.02                           | 0.47 $\pm$ 0.12     |
| ru         | xlm-roberta-large                  | 0.57 $\pm$ 0.03 | 0.61 $\pm$ 0.01             | 0.61 $\pm$ 0.06         | 0.58 $\pm$ 0.03          | 0.59 $\pm$ 0.02                           | 0.61 $\pm$ 0.07     |

  multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)


## roc-auc

| language   | model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| en         | EleutherAI-gpt-neo-1.3B            | 0.73 $\pm$ 0.00 | 0.76 $\pm$ 0.01             | 0.77 $\pm$ 0.01         | 0.76 $\pm$ 0.00          | 0.76 $\pm$ 0.02                           | 0.77 $\pm$ 0.00     |
| en         | EleutherAI-gpt-neo-125M            | 0.69 $\pm$ 0.01 | 0.72 $\pm$ 0.01             | 0.73 $\pm$ 0.01         | 0.73 $\pm$ 0.00          | 0.74 $\pm$ 0.01                           | 0.76 $\pm$ 0.01     |
| en         | bert-base-multilingual-cased       | 0.74 $\pm$ 0.01 | 0.77 $\pm$ 0.01             | 0.78 $\pm$ 0.01         | 0.78 $\pm$ 0.01          | 0.78 $\pm$ 0.02                           | 0.78 $\pm$ 0.01     |
| en         | distilbert-base-multilingual-cased | 0.73 $\pm$ 0.02 | 0.76 $\pm$ 0.01             | 0.77 $\pm$ 0.01         | 0.77 $\pm$ 0.02          | 0.76 $\pm$ 0.00                           | 0.77 $\pm$ 0.01     |
| en         | facebook-mbart-large-50            | 0.76 $\pm$ 0.01 | 0.78 $\pm$ 0.01             | **0.79 $\pm$ 0.01**     | **0.79 $\pm$ 0.01**      | **0.79 $\pm$ 0.01**                       | 0.78 $\pm$ 0.01     |
| en         | gpt2                               | 0.74 $\pm$ 0.02 | 0.77 $\pm$ 0.01             | 0.77 $\pm$ 0.00         | 0.77 $\pm$ 0.02          | 0.77 $\pm$ 0.01                           | 0.77 $\pm$ 0.01     |
| en         | xlm-roberta-large                  | 0.76 $\pm$ 0.00 | **0.79 $\pm$ 0.00**         | **0.79 $\pm$ 0.01**     | **0.79 $\pm$ 0.01**      | **0.79 $\pm$ 0.01**                       | 0.78 $\pm$ 0.01     |
| fr         | EleutherAI-gpt-neo-1.3B            | 0.63 $\pm$ 0.02 | 0.66 $\pm$ 0.00             | 0.68 $\pm$ 0.02         | 0.68 $\pm$ 0.00          | 0.68 $\pm$ 0.02                           | 0.70 $\pm$ 0.03     |
| fr         | EleutherAI-gpt-neo-125M            | 0.58 $\pm$ 0.01 | 0.61 $\pm$ 0.01             | 0.62 $\pm$ 0.01         | 0.62 $\pm$ 0.01          | 0.63 $\pm$ 0.02                           | 0.64 $\pm$ 0.03     |
| fr         | bert-base-multilingual-cased       | 0.66 $\pm$ 0.02 | 0.69 $\pm$ 0.02             | 0.70 $\pm$ 0.02         | 0.71 $\pm$ 0.01          | 0.71 $\pm$ 0.02                           | 0.71 $\pm$ 0.01     |
| fr         | distilbert-base-multilingual-cased | 0.64 $\pm$ 0.03 | 0.68 $\pm$ 0.01             | 0.69 $\pm$ 0.02         | 0.70 $\pm$ 0.01          | 0.69 $\pm$ 0.01                           | 0.69 $\pm$ 0.02     |
| fr         | facebook-mbart-large-50            | 0.68 $\pm$ 0.01 | 0.70 $\pm$ 0.01             | 0.71 $\pm$ 0.00         | 0.72 $\pm$ 0.02          | 0.72 $\pm$ 0.01                           | 0.72 $\pm$ 0.02     |
| fr         | gpt2                               | 0.62 $\pm$ 0.04 | 0.64 $\pm$ 0.01             | 0.66 $\pm$ 0.01         | 0.67 $\pm$ 0.01          | 0.67 $\pm$ 0.02                           | 0.68 $\pm$ 0.00     |
| fr         | xlm-roberta-large                  | 0.68 $\pm$ 0.01 | 0.71 $\pm$ 0.01             | 0.72 $\pm$ 0.01         | **0.73 $\pm$ 0.02**      | 0.72 $\pm$ 0.01                           | 0.70 $\pm$ 0.03     |
| ge         | EleutherAI-gpt-neo-1.3B            | 0.64 $\pm$ 0.00 | 0.69 $\pm$ 0.02             | 0.68 $\pm$ 0.02         | 0.70 $\pm$ 0.00          | 0.69 $\pm$ 0.01                           | 0.72 $\pm$ 0.01     |
| ge         | EleutherAI-gpt-neo-125M            | 0.59 $\pm$ 0.01 | 0.62 $\pm$ 0.01             | 0.64 $\pm$ 0.00         | 0.65 $\pm$ 0.01          | 0.65 $\pm$ 0.03                           | 0.67 $\pm$ 0.01     |
| ge         | bert-base-multilingual-cased       | 0.66 $\pm$ 0.02 | 0.70 $\pm$ 0.01             | 0.71 $\pm$ 0.01         | 0.73 $\pm$ 0.01          | 0.71 $\pm$ 0.00                           | 0.72 $\pm$ 0.02     |
| ge         | distilbert-base-multilingual-cased | 0.65 $\pm$ 0.02 | 0.69 $\pm$ 0.00             | 0.68 $\pm$ 0.02         | 0.70 $\pm$ 0.01          | 0.70 $\pm$ 0.00                           | 0.71 $\pm$ 0.02     |
| ge         | facebook-mbart-large-50            | 0.68 $\pm$ 0.01 | 0.71 $\pm$ 0.01             | 0.72 $\pm$ 0.02         | 0.73 $\pm$ 0.01          | **0.74 $\pm$ 0.01**                       | **0.74 $\pm$ 0.01** |
| ge         | gpt2                               | 0.62 $\pm$ 0.02 | 0.66 $\pm$ 0.03             | 0.66 $\pm$ 0.03         | 0.67 $\pm$ 0.02          | 0.66 $\pm$ 0.01                           | 0.68 $\pm$ 0.01     |
| ge         | xlm-roberta-large                  | 0.68 $\pm$ 0.01 | 0.72 $\pm$ 0.00             | 0.72 $\pm$ 0.02         | **0.74 $\pm$ 0.01**      | **0.74 $\pm$ 0.02**                       | 0.73 $\pm$ 0.02     |
| it         | EleutherAI-gpt-neo-1.3B            | 0.61 $\pm$ 0.01 | 0.64 $\pm$ 0.03             | 0.66 $\pm$ 0.02         | 0.67 $\pm$ 0.02          | 0.67 $\pm$ 0.03                           | 0.69 $\pm$ 0.01     |
| it         | EleutherAI-gpt-neo-125M            | 0.58 $\pm$ 0.02 | 0.61 $\pm$ 0.01             | 0.63 $\pm$ 0.01         | 0.63 $\pm$ 0.01          | 0.65 $\pm$ 0.03                           | 0.65 $\pm$ 0.01     |
| it         | bert-base-multilingual-cased       | 0.64 $\pm$ 0.01 | 0.68 $\pm$ 0.01             | 0.69 $\pm$ 0.01         | 0.70 $\pm$ 0.01          | 0.70 $\pm$ 0.02                           | 0.69 $\pm$ 0.02     |
| it         | distilbert-base-multilingual-cased | 0.63 $\pm$ 0.01 | 0.66 $\pm$ 0.03             | 0.67 $\pm$ 0.02         | 0.69 $\pm$ 0.03          | 0.67 $\pm$ 0.02                           | 0.68 $\pm$ 0.01     |
| it         | facebook-mbart-large-50            | 0.65 $\pm$ 0.00 | 0.68 $\pm$ 0.03             | 0.69 $\pm$ 0.01         | 0.70 $\pm$ 0.02          | 0.70 $\pm$ 0.03                           | **0.72 $\pm$ 0.03** |
| it         | gpt2                               | 0.61 $\pm$ 0.01 | 0.63 $\pm$ 0.01             | 0.65 $\pm$ 0.02         | 0.67 $\pm$ 0.01          | 0.66 $\pm$ 0.02                           | 0.68 $\pm$ 0.01     |
| it         | xlm-roberta-large                  | 0.66 $\pm$ 0.02 | 0.68 $\pm$ 0.02             | 0.69 $\pm$ 0.01         | 0.71 $\pm$ 0.02          | 0.70 $\pm$ 0.01                           | 0.71 $\pm$ 0.02     |
| po         | EleutherAI-gpt-neo-1.3B            | 0.63 $\pm$ 0.02 | 0.66 $\pm$ 0.00             | 0.69 $\pm$ 0.01         | 0.69 $\pm$ 0.02          | 0.71 $\pm$ 0.01                           | 0.71 $\pm$ 0.02     |
| po         | EleutherAI-gpt-neo-125M            | 0.57 $\pm$ 0.02 | 0.61 $\pm$ 0.01             | 0.66 $\pm$ 0.01         | 0.64 $\pm$ 0.03          | 0.66 $\pm$ 0.02                           | 0.68 $\pm$ 0.02     |
| po         | bert-base-multilingual-cased       | 0.66 $\pm$ 0.01 | 0.69 $\pm$ 0.01             | 0.70 $\pm$ 0.02         | 0.72 $\pm$ 0.01          | 0.73 $\pm$ 0.01                           | 0.71 $\pm$ 0.01     |
| po         | distilbert-base-multilingual-cased | 0.65 $\pm$ 0.02 | 0.68 $\pm$ 0.02             | 0.72 $\pm$ 0.01         | 0.70 $\pm$ 0.01          | 0.70 $\pm$ 0.01                           | 0.71 $\pm$ 0.01     |
| po         | facebook-mbart-large-50            | 0.66 $\pm$ 0.02 | 0.70 $\pm$ 0.00             | 0.72 $\pm$ 0.01         | **0.74 $\pm$ 0.01**      | 0.72 $\pm$ 0.01                           | **0.74 $\pm$ 0.01** |
| po         | gpt2                               | 0.61 $\pm$ 0.01 | 0.66 $\pm$ 0.03             | 0.67 $\pm$ 0.02         | 0.68 $\pm$ 0.01          | 0.68 $\pm$ 0.01                           | 0.69 $\pm$ 0.02     |
| po         | xlm-roberta-large                  | 0.68 $\pm$ 0.02 | 0.69 $\pm$ 0.02             | 0.72 $\pm$ 0.02         | 0.73 $\pm$ 0.02          | 0.73 $\pm$ 0.01                           | **0.74 $\pm$ 0.01** |
| ru         | EleutherAI-gpt-neo-1.3B            | 0.55 $\pm$ 0.01 | 0.57 $\pm$ 0.01             | 0.62 $\pm$ 0.02         | 0.62 $\pm$ 0.02          | 0.63 $\pm$ 0.01                           | 0.63 $\pm$ 0.03     |
| ru         | EleutherAI-gpt-neo-125M            | 0.55 $\pm$ 0.01 | 0.55 $\pm$ 0.02             | 0.55 $\pm$ 0.01         | 0.55 $\pm$ 0.01          | 0.54 $\pm$ 0.02                           | 0.54 $\pm$ 0.01     |
| ru         | bert-base-multilingual-cased       | 0.63 $\pm$ 0.01 | 0.66 $\pm$ 0.02             | 0.69 $\pm$ 0.03         | 0.69 $\pm$ 0.04          | 0.69 $\pm$ 0.02                           | 0.69 $\pm$ 0.01     |
| ru         | distilbert-base-multilingual-cased | 0.59 $\pm$ 0.01 | 0.64 $\pm$ 0.01             | 0.65 $\pm$ 0.01         | 0.66 $\pm$ 0.03          | 0.67 $\pm$ 0.02                           | 0.65 $\pm$ 0.01     |
| ru         | facebook-mbart-large-50            | 0.63 $\pm$ 0.02 | 0.68 $\pm$ 0.02             | 0.69 $\pm$ 0.02         | 0.71 $\pm$ 0.03          | 0.69 $\pm$ 0.01                           | 0.71 $\pm$ 0.00     |
| ru         | gpt2                               | 0.53 $\pm$ 0.02 | 0.52 $\pm$ 0.02             | 0.51 $\pm$ 0.01         | 0.53 $\pm$ 0.02          | 0.53 $\pm$ 0.01                           | 0.53 $\pm$ 0.02     |
| ru         | xlm-roberta-large                  | 0.67 $\pm$ 0.01 | 0.70 $\pm$ 0.03             | 0.71 $\pm$ 0.02         | **0.72 $\pm$ 0.01**      | 0.70 $\pm$ 0.02                           | 0.70 $\pm$ 0.02     |

  multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)


## accuracy

| language   | model_name                         | title           | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text            |
|:-----------|:-----------------------------------|:----------------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:--------------------|
| en         | EleutherAI-gpt-neo-1.3B            | 0.08 $\pm$ 0.02 | 0.12 $\pm$ 0.02             | 0.08 $\pm$ 0.01         | 0.11 $\pm$ 0.01          | 0.10 $\pm$ 0.02                           | **0.13 $\pm$ 0.01** |
| en         | EleutherAI-gpt-neo-125M            | 0.03 $\pm$ 0.01 | 0.05 $\pm$ 0.01             | 0.08 $\pm$ 0.01         | 0.07 $\pm$ 0.01          | 0.08 $\pm$ 0.01                           | 0.09 $\pm$ 0.03     |
| en         | bert-base-multilingual-cased       | 0.07 $\pm$ 0.01 | 0.10 $\pm$ 0.03             | **0.13 $\pm$ 0.03**     | 0.09 $\pm$ 0.00          | 0.10 $\pm$ 0.04                           | 0.11 $\pm$ 0.01     |
| en         | distilbert-base-multilingual-cased | 0.06 $\pm$ 0.01 | 0.09 $\pm$ 0.02             | 0.10 $\pm$ 0.02         | 0.09 $\pm$ 0.01          | 0.08 $\pm$ 0.01                           | 0.10 $\pm$ 0.03     |
| en         | facebook-mbart-large-50            | 0.07 $\pm$ 0.04 | 0.10 $\pm$ 0.03             | 0.11 $\pm$ 0.03         | 0.12 $\pm$ 0.03          | 0.12 $\pm$ 0.02                           | 0.11 $\pm$ 0.01     |
| en         | gpt2                               | 0.06 $\pm$ 0.01 | 0.08 $\pm$ 0.02             | 0.07 $\pm$ 0.01         | 0.06 $\pm$ 0.03          | 0.08 $\pm$ 0.00                           | 0.10 $\pm$ 0.01     |
| en         | xlm-roberta-large                  | 0.10 $\pm$ 0.01 | 0.12 $\pm$ 0.01             | 0.10 $\pm$ 0.01         | 0.11 $\pm$ 0.01          | 0.10 $\pm$ 0.01                           | 0.11 $\pm$ 0.01     |
| fr         | EleutherAI-gpt-neo-1.3B            | 0.04 $\pm$ 0.02 | 0.07 $\pm$ 0.01             | 0.06 $\pm$ 0.03         | 0.08 $\pm$ 0.02          | 0.09 $\pm$ 0.04                           | 0.09 $\pm$ 0.03     |
| fr         | EleutherAI-gpt-neo-125M            | 0.03 $\pm$ 0.01 | 0.01 $\pm$ 0.01             | 0.03 $\pm$ 0.01         | 0.04 $\pm$ 0.01          | 0.04 $\pm$ 0.03                           | 0.08 $\pm$ 0.05     |
| fr         | bert-base-multilingual-cased       | 0.07 $\pm$ 0.02 | 0.08 $\pm$ 0.02             | 0.07 $\pm$ 0.01         | 0.09 $\pm$ 0.01          | 0.10 $\pm$ 0.01                           | **0.11 $\pm$ 0.02** |
| fr         | distilbert-base-multilingual-cased | 0.05 $\pm$ 0.02 | 0.05 $\pm$ 0.02             | 0.07 $\pm$ 0.03         | 0.06 $\pm$ 0.04          | 0.09 $\pm$ 0.04                           | 0.09 $\pm$ 0.01     |
| fr         | facebook-mbart-large-50            | 0.08 $\pm$ 0.04 | **0.11 $\pm$ 0.02**         | 0.09 $\pm$ 0.01         | 0.10 $\pm$ 0.03          | **0.11 $\pm$ 0.03**                       | **0.11 $\pm$ 0.02** |
| fr         | gpt2                               | 0.03 $\pm$ 0.02 | 0.05 $\pm$ 0.02             | 0.06 $\pm$ 0.04         | 0.07 $\pm$ 0.02          | 0.06 $\pm$ 0.03                           | 0.07 $\pm$ 0.05     |
| fr         | xlm-roberta-large                  | 0.07 $\pm$ 0.03 | 0.07 $\pm$ 0.04             | 0.10 $\pm$ 0.03         | 0.10 $\pm$ 0.04          | **0.11 $\pm$ 0.05**                       | 0.09 $\pm$ 0.05     |
| ge         | EleutherAI-gpt-neo-1.3B            | 0.02 $\pm$ 0.01 | 0.03 $\pm$ 0.02             | 0.05 $\pm$ 0.03         | 0.05 $\pm$ 0.03          | 0.04 $\pm$ 0.03                           | 0.07 $\pm$ 0.03     |
| ge         | EleutherAI-gpt-neo-125M            | 0.01 $\pm$ 0.01 | 0.02 $\pm$ 0.01             | 0.03 $\pm$ 0.02         | 0.02 $\pm$ 0.01          | 0.04 $\pm$ 0.01                           | 0.03 $\pm$ 0.02     |
| ge         | bert-base-multilingual-cased       | 0.05 $\pm$ 0.01 | 0.05 $\pm$ 0.03             | 0.07 $\pm$ 0.03         | 0.09 $\pm$ 0.05          | 0.06 $\pm$ 0.01                           | 0.10 $\pm$ 0.05     |
| ge         | distilbert-base-multilingual-cased | 0.02 $\pm$ 0.01 | 0.02 $\pm$ 0.02             | 0.05 $\pm$ 0.04         | 0.05 $\pm$ 0.03          | 0.05 $\pm$ 0.04                           | 0.04 $\pm$ 0.04     |
| ge         | facebook-mbart-large-50            | 0.05 $\pm$ 0.03 | 0.06 $\pm$ 0.04             | 0.06 $\pm$ 0.03         | 0.05 $\pm$ 0.02          | **0.11 $\pm$ 0.03**                       | 0.08 $\pm$ 0.04     |
| ge         | gpt2                               | 0.01 $\pm$ 0.01 | 0.01 $\pm$ 0.02             | 0.03 $\pm$ 0.02         | 0.03 $\pm$ 0.03          | 0.03 $\pm$ 0.01                           | 0.02 $\pm$ 0.01     |
| ge         | xlm-roberta-large                  | 0.03 $\pm$ 0.02 | 0.07 $\pm$ 0.03             | 0.09 $\pm$ 0.03         | 0.10 $\pm$ 0.04          | 0.07 $\pm$ 0.04                           | 0.07 $\pm$ 0.04     |
| it         | EleutherAI-gpt-neo-1.3B            | 0.04 $\pm$ 0.01 | 0.05 $\pm$ 0.04             | 0.06 $\pm$ 0.02         | 0.05 $\pm$ 0.03          | 0.07 $\pm$ 0.02                           | 0.06 $\pm$ 0.03     |
| it         | EleutherAI-gpt-neo-125M            | 0.01 $\pm$ 0.01 | 0.04 $\pm$ 0.01             | 0.04 $\pm$ 0.04         | 0.01 $\pm$ 0.01          | 0.04 $\pm$ 0.01                           | 0.08 $\pm$ 0.03     |
| it         | bert-base-multilingual-cased       | 0.04 $\pm$ 0.01 | 0.05 $\pm$ 0.03             | 0.06 $\pm$ 0.01         | 0.09 $\pm$ 0.02          | 0.09 $\pm$ 0.04                           | 0.08 $\pm$ 0.03     |
| it         | distilbert-base-multilingual-cased | 0.04 $\pm$ 0.02 | 0.05 $\pm$ 0.03             | 0.07 $\pm$ 0.02         | 0.06 $\pm$ 0.04          | 0.07 $\pm$ 0.03                           | 0.07 $\pm$ 0.05     |
| it         | facebook-mbart-large-50            | 0.04 $\pm$ 0.01 | 0.07 $\pm$ 0.04             | 0.05 $\pm$ 0.02         | 0.09 $\pm$ 0.02          | 0.08 $\pm$ 0.03                           | **0.11 $\pm$ 0.02** |
| it         | gpt2                               | 0.02 $\pm$ 0.02 | 0.02 $\pm$ 0.02             | 0.04 $\pm$ 0.02         | 0.03 $\pm$ 0.02          | 0.03 $\pm$ 0.00                           | 0.07 $\pm$ 0.03     |
| it         | xlm-roberta-large                  | 0.06 $\pm$ 0.02 | 0.06 $\pm$ 0.01             | 0.08 $\pm$ 0.01         | 0.07 $\pm$ 0.02          | 0.08 $\pm$ 0.04                           | 0.08 $\pm$ 0.03     |
| po         | EleutherAI-gpt-neo-1.3B            | 0.01 $\pm$ 0.01 | 0.03 $\pm$ 0.02             | 0.03 $\pm$ 0.02         | 0.03 $\pm$ 0.02          | 0.05 $\pm$ 0.02                           | 0.06 $\pm$ 0.02     |
| po         | EleutherAI-gpt-neo-125M            | 0.01 $\pm$ 0.01 | 0.02 $\pm$ 0.01             | 0.02 $\pm$ 0.00         | 0.02 $\pm$ 0.01          | 0.03 $\pm$ 0.01                           | 0.04 $\pm$ 0.04     |
| po         | bert-base-multilingual-cased       | 0.04 $\pm$ 0.02 | 0.06 $\pm$ 0.01             | 0.06 $\pm$ 0.02         | 0.06 $\pm$ 0.02          | 0.07 $\pm$ 0.04                           | 0.06 $\pm$ 0.02     |
| po         | distilbert-base-multilingual-cased | 0.03 $\pm$ 0.02 | 0.04 $\pm$ 0.01             | 0.05 $\pm$ 0.03         | 0.04 $\pm$ 0.01          | 0.05 $\pm$ 0.02                           | 0.04 $\pm$ 0.01     |
| po         | facebook-mbart-large-50            | 0.05 $\pm$ 0.02 | 0.05 $\pm$ 0.01             | 0.06 $\pm$ 0.02         | 0.07 $\pm$ 0.03          | 0.05 $\pm$ 0.03                           | **0.11 $\pm$ 0.03** |
| po         | gpt2                               | 0.00 $\pm$ 0.00 | 0.03 $\pm$ 0.02             | 0.03 $\pm$ 0.03         | 0.04 $\pm$ 0.06          | 0.02 $\pm$ 0.02                           | 0.03 $\pm$ 0.02     |
| po         | xlm-roberta-large                  | 0.04 $\pm$ 0.01 | 0.07 $\pm$ 0.02             | 0.07 $\pm$ 0.03         | 0.07 $\pm$ 0.04          | 0.06 $\pm$ 0.03                           | 0.07 $\pm$ 0.02     |
| ru         | EleutherAI-gpt-neo-1.3B            | 0.04 $\pm$ 0.03 | 0.06 $\pm$ 0.03             | 0.10 $\pm$ 0.01         | 0.08 $\pm$ 0.03          | 0.13 $\pm$ 0.05                           | 0.08 $\pm$ 0.03     |
| ru         | EleutherAI-gpt-neo-125M            | 0.03 $\pm$ 0.03 | 0.04 $\pm$ 0.05             | 0.06 $\pm$ 0.02         | 0.05 $\pm$ 0.04          | 0.03 $\pm$ 0.03                           | 0.04 $\pm$ 0.04     |
| ru         | bert-base-multilingual-cased       | 0.09 $\pm$ 0.03 | 0.12 $\pm$ 0.04             | **0.17 $\pm$ 0.03**     | 0.15 $\pm$ 0.08          | **0.17 $\pm$ 0.03**                       | 0.13 $\pm$ 0.06     |
| ru         | distilbert-base-multilingual-cased | 0.05 $\pm$ 0.01 | 0.11 $\pm$ 0.06             | 0.08 $\pm$ 0.03         | 0.13 $\pm$ 0.03          | 0.13 $\pm$ 0.07                           | 0.12 $\pm$ 0.03     |
| ru         | facebook-mbart-large-50            | 0.11 $\pm$ 0.03 | 0.16 $\pm$ 0.03             | 0.14 $\pm$ 0.03         | **0.17 $\pm$ 0.05**      | 0.15 $\pm$ 0.03                           | 0.14 $\pm$ 0.02     |
| ru         | gpt2                               | 0.02 $\pm$ 0.02 | 0.03 $\pm$ 0.02             | 0.02 $\pm$ 0.02         | 0.02 $\pm$ 0.00          | 0.02 $\pm$ 0.01                           | 0.03 $\pm$ 0.01     |
| ru         | xlm-roberta-large                  | 0.12 $\pm$ 0.05 | **0.17 $\pm$ 0.04**         | 0.14 $\pm$ 0.06         | 0.14 $\pm$ 0.08          | 0.13 $\pm$ 0.02                           | **0.17 $\pm$ 0.06** |

  multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)
