In [32]:
import os
import glob

from IPython.display import display, Markdown
import pandas as pd

# Group types of models (experiment type and model type) and pick best performing in terms of f1-score per unit of analysis and report them in a table


In [33]:
results_filepaths_mean_pred = glob.glob('./logged_performance_per_model/*/*raw_mean*.csv')
results_filepaths_majority_pred = glob.glob('./logged_performance_per_model/*/*raw_majority*.csv')

In [34]:
def conantenate_results(filepath_list):
    dfs_list = []
    for results_filepath in filepath_list:
        model_name = results_filepath.split('/')[-2]
        results_df_i = pd.read_csv(results_filepath)
        results_df_i['model_name'] = model_name
        dfs_list.append(results_df_i)

    results_df_ = pd.concat(dfs_list).set_index(['language', 'model_name', 'unit_of_analysis']).sort_index()
    results_df_.rename(columns={'f1-mico_mean': 'f1-micro_mean', 'f1-mico_std': 'f1-micro_std'}, inplace=True)

    return results_df_

In [35]:
results_mean_pred_df = conantenate_results(results_filepaths_mean_pred)
results_majority_vote_pred_df = conantenate_results(results_filepaths_majority_pred)

### Generate the tables to report

In [36]:
def display_performance_table(df, metric, index_cols=['model_name'], display_=True):
    report_table = df.reset_index().copy()
    report_table['result'] = report_table[f'{metric}'].map(lambda x: f'{x:.3f}')
    report_table['col_title'] = report_table.unit_of_analysis.str.split('_').str.join(' ') 
    report_table['col_title'] = pd.Categorical(
        report_table.col_title,
        categories=['title', 'title and first paragraph', 'title and 5 sentences', 'title and 10 sentences',
                    'title and first sentence each paragraph', 'raw text'],
        ordered=True)
    report_table = report_table[index_cols + ['col_title', 'result']]\
        .pivot_table(index=index_cols, columns=['col_title'], values=['result'], aggfunc='first', fill_value=0)\
        .droplevel(0, axis=1)

    report_table.columns.names = [None]

    # Highlight best scoring models according to their average
    mean_perf_arr = report_table.applymap(lambda x: float(str(x).split(' ')[0])).to_numpy()
    highlight_mask = mean_perf_arr == mean_perf_arr.max()
    report_table_arr = report_table.to_numpy()  # Note it passes the array by reference
    report_table_arr[highlight_mask] = '**' + report_table_arr[highlight_mask] + '**'

    if display_:
        display(Markdown(report_table.to_markdown()))
    
    return report_table

### Generate tables for all languages

In [37]:
metrics_to_report = ['f1_micro', 'recall_micro', 'precision_micro', 'accuracy']

In [38]:
language_dict = {'en': 'English', 'it': 'Italian', 'fr': 'French', 'po': 'Polish', 'ru': 'Russian', 'ge': 'German'}

In [39]:
def display_metrics_and_write_to_file(df, grouping_criterion, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    report_tables_dfs_dict = {metric: [] for metric in metrics_to_report}

    for language, results_df in df.groupby(level=0):
        display(Markdown(f'# {language_dict[language]}'))
        
        for metric in metrics_to_report:
            os.makedirs(os.path.join(output_dir, metric), exist_ok=True)

            output_dir_markdown = os.path.join(output_dir, metric, 'markdown')
            output_dir_latex = os.path.join(output_dir, metric, 'latex')
            output_dir_csv = os.path.join(output_dir, metric, 'csv')

            os.makedirs(output_dir_markdown, exist_ok=True)
            os.makedirs(output_dir_latex, exist_ok=True)
            os.makedirs(output_dir_csv, exist_ok=True)

            display(Markdown(f'## {metric}'))

            report_table = display_performance_table(df=results_df, index_cols=grouping_criterion, metric=metric, display_=True)

            # Export as markdown
            markdown_file = open(os.path.join(output_dir_markdown, f"{language_dict[language]}_{metric}.md"), "w")
            report_table.reset_index().to_markdown(markdown_file, index=False)
            markdown_file.close()

            # Export as latex table
            latex_file = open(os.path.join(output_dir_latex, f"{language_dict[language]}_{metric}.tex"), "w")
            report_table.reset_index().to_latex(latex_file, index=False)
            latex_file.close()

            # Export as csv
            report_table.to_csv(os.path.join(output_dir_csv, f"{language_dict[language]}_{metric}.csv"))

            # Stack all languages into single table
            report_table['language'] = language
            report_table = report_table.reset_index().set_index(['language'] + grouping_criterion)

            report_tables_dfs_dict[metric].append(report_table)

    # Report or store unified table
    display(Markdown(f'# All 6 Languages'))
    for metric in metrics_to_report:
        display(Markdown(f'## {metric}'))
        multi_language_report_table_metric = pd.concat(report_tables_dfs_dict[metric])
        display(Markdown(multi_language_report_table_metric.reset_index().to_markdown(index=False)))

        output_dir_markdown = os.path.join(output_dir, metric, 'markdown')
        output_dir_latex = os.path.join(output_dir, metric, 'latex')
        output_dir_csv = os.path.join(output_dir, metric, 'csv')

        # Export as markdown
        markdown_file = open(os.path.join(output_dir_markdown, f"all_6_languages_{metric}.md"), "w")
        multi_language_report_table_metric.reset_index().to_markdown(markdown_file, index=False)
        markdown_file.close()

        # Export as latex table
        latex_file = open(os.path.join(output_dir_latex, f"all_6_languages_{metric}.tex"), "w")
        multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)
        latex_file.close()

        # Export as csv
        multi_language_report_table_metric.to_csv(os.path.join(output_dir_csv, f"all_6_languages_{metric}.csv"))

# Per model type

In [40]:
display_metrics_and_write_to_file(df=results_mean_pred_df, grouping_criterion=['model_name'], output_dir='per_model_name_tables_mean_prediction')

# English

## f1_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences | title and 10 sentences   | title and first sentence each paragraph   |   raw text |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|:-------------------------|:------------------------------------------|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.64  |                       0.7   |                   0.684 | 0.699                    | 0.704                                     |      0.691 |
| EleutherAI-gpt-neo-1.3B                    |   0.63  |                       0.703 |                   0.707 | 0.693                    | 0.695                                     |      0.698 |
| EleutherAI-gpt-neo-125M                    |   0.53  |                       0.623 |                   0.637 | 0.634                    | 0.663                                     |      0.671 |
| bert-base-multilingual-cased               |   0.61  |                       0.677 |                   0.7   | 0.707                    | 0.684                                     |      0.693 |
| distilbert-base-multilingual-cased         |   0.596 |                       0.665 |                   0.661 | 0.672                    | 0.672                                     |      0.69  |
| facebook-mbart-large-50                    |   0.677 |                       0.717 |                   0.719 | **0.721**                | **0.721**                                 |      0.707 |
| gpt2                                       |   0.618 |                       0.693 |                   0.69  | 0.673                    | 0.692                                     |      0.696 |
| xlm-roberta-large                          |   0.66  |                       0.694 |                   0.718 | 0.715                    | 0.717                                     |      0.704 |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences | title and first sentence each paragraph   | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|:------------------------------------------|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.577 |                       0.638 |                   0.611 |                    0.643 | **0.667**                                 | 0.653      |
| EleutherAI-gpt-neo-1.3B                    |   0.538 |                       0.631 |                   0.619 |                    0.604 | 0.619                                     | 0.633      |
| EleutherAI-gpt-neo-125M                    |   0.45  |                       0.543 |                   0.562 |                    0.548 | 0.582                                     | 0.599      |
| bert-base-multilingual-cased               |   0.543 |                       0.606 |                   0.631 |                    0.65  | 0.628                                     | 0.655      |
| distilbert-base-multilingual-cased         |   0.528 |                       0.592 |                   0.579 |                    0.619 | 0.609                                     | 0.655      |
| facebook-mbart-large-50                    |   0.599 |                       0.653 |                   0.645 |                    0.653 | **0.667**                                 | **0.667**  |
| gpt2                                       |   0.548 |                       0.643 |                   0.658 |                    0.638 | 0.653                                     | 0.653      |
| xlm-roberta-large                          |   0.584 |                       0.636 |                   0.636 |                    0.643 | 0.663                                     | 0.653      |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision_micro

| model_name                                 |   title |   title and first paragraph | title and 5 sentences   |   title and 10 sentences |   title and first sentence each paragraph |   raw text |
|:-------------------------------------------|--------:|----------------------------:|:------------------------|-------------------------:|------------------------------------------:|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.717 |                       0.774 | 0.776                   |                    0.765 |                                     0.744 |      0.734 |
| EleutherAI-gpt-neo-1.3B                    |   0.761 |                       0.794 | 0.824                   |                    0.812 |                                     0.793 |      0.778 |
| EleutherAI-gpt-neo-125M                    |   0.646 |                       0.73  | 0.735                   |                    0.752 |                                     0.77  |      0.763 |
| bert-base-multilingual-cased               |   0.696 |                       0.765 | 0.787                   |                    0.776 |                                     0.751 |      0.736 |
| distilbert-base-multilingual-cased         |   0.684 |                       0.759 | 0.769                   |                    0.735 |                                     0.75  |      0.728 |
| facebook-mbart-large-50                    |   0.778 |                       0.795 | 0.812                   |                    0.804 |                                     0.784 |      0.752 |
| gpt2                                       |   0.709 |                       0.751 | 0.725                   |                    0.711 |                                     0.736 |      0.746 |
| xlm-roberta-large                          |   0.759 |                       0.765 | **0.825**               |                    0.804 |                                     0.781 |      0.765 |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                                 |   title |   title and first paragraph | title and 5 sentences   |   title and 10 sentences |   title and first sentence each paragraph |   raw text |
|:-------------------------------------------|--------:|----------------------------:|:------------------------|-------------------------:|------------------------------------------:|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.087 |                       0.136 | 0.126                   |                    0.068 |                                     0.078 |      0.058 |
| EleutherAI-gpt-neo-1.3B                    |   0.087 |                       0.087 | **0.155**               |                    0.097 |                                     0.117 |      0.087 |
| EleutherAI-gpt-neo-125M                    |   0.019 |                       0.087 | 0.058                   |                    0.039 |                                     0.087 |      0.078 |
| bert-base-multilingual-cased               |   0.078 |                       0.087 | 0.117                   |                    0.078 |                                     0.068 |      0.087 |
| distilbert-base-multilingual-cased         |   0.097 |                       0.078 | 0.126                   |                    0.078 |                                     0.117 |      0.097 |
| facebook-mbart-large-50                    |   0.117 |                       0.126 | 0.146                   |                    0.107 |                                     0.146 |      0.097 |
| gpt2                                       |   0.097 |                       0.097 | 0.117                   |                    0.087 |                                     0.078 |      0.117 |
| xlm-roberta-large                          |   0.058 |                       0.087 | 0.146                   |                    0.126 |                                     0.117 |      0.117 |

  report_table.reset_index().to_latex(latex_file, index=False)


# French

## f1_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.4   |                       0.46  |                   0.486 |                    0.449 |                                     0.52  | 0.502      |
| EleutherAI-gpt-neo-1.3B                    |   0.379 |                       0.448 |                   0.441 |                    0.394 |                                     0.459 | 0.493      |
| EleutherAI-gpt-neo-125M                    |   0.227 |                       0.359 |                   0.358 |                    0.352 |                                     0.413 | 0.462      |
| bert-base-multilingual-cased               |   0.419 |                       0.412 |                   0.441 |                    0.494 |                                     0.513 | 0.555      |
| distilbert-base-multilingual-cased         |   0.371 |                       0.461 |                   0.46  |                    0.508 |                                     0.54  | 0.546      |
| facebook-mbart-large-50                    |   0.453 |                       0.475 |                   0.53  |                    0.504 |                                     0.541 | **0.568**  |
| gpt2                                       |   0.323 |                       0.386 |                   0.4   |                    0.449 |                                     0.415 | 0.491      |
| xlm-roberta-large                          |   0.434 |                       0.48  |                   0.502 |                    0.5   |                                     0.529 | 0.545      |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.357 |                       0.413 |                   0.429 |                    0.405 |                                     0.468 | 0.460      |
| EleutherAI-gpt-neo-1.3B                    |   0.294 |                       0.357 |                   0.373 |                    0.31  |                                     0.381 | 0.429      |
| EleutherAI-gpt-neo-125M                    |   0.175 |                       0.294 |                   0.286 |                    0.294 |                                     0.341 | 0.389      |
| bert-base-multilingual-cased               |   0.349 |                       0.333 |                   0.397 |                    0.46  |                                     0.484 | 0.540      |
| distilbert-base-multilingual-cased         |   0.302 |                       0.421 |                   0.413 |                    0.476 |                                     0.484 | **0.563**  |
| facebook-mbart-large-50                    |   0.381 |                       0.413 |                   0.452 |                    0.452 |                                     0.5   | 0.532      |
| gpt2                                       |   0.286 |                       0.341 |                   0.341 |                    0.405 |                                     0.349 | 0.444      |
| xlm-roberta-large                          |   0.381 |                       0.429 |                   0.444 |                    0.46  |                                     0.476 | 0.532      |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision_micro

| model_name                                 |   title |   title and first paragraph | title and 5 sentences   |   title and 10 sentences |   title and first sentence each paragraph |   raw text |
|:-------------------------------------------|--------:|----------------------------:|:------------------------|-------------------------:|------------------------------------------:|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.455 |                       0.52  | 0.562                   |                    0.505 |                                     0.584 |      0.552 |
| EleutherAI-gpt-neo-1.3B                    |   0.536 |                       0.6   | 0.540                   |                    0.542 |                                     0.578 |      0.581 |
| EleutherAI-gpt-neo-125M                    |   0.324 |                       0.463 | 0.480                   |                    0.44  |                                     0.524 |      0.57  |
| bert-base-multilingual-cased               |   0.524 |                       0.538 | 0.495                   |                    0.532 |                                     0.545 |      0.571 |
| distilbert-base-multilingual-cased         |   0.481 |                       0.51  | 0.520                   |                    0.545 |                                     0.61  |      0.53  |
| facebook-mbart-large-50                    |   0.558 |                       0.559 | **0.640**               |                    0.57  |                                     0.589 |      0.609 |
| gpt2                                       |   0.371 |                       0.443 | 0.483                   |                    0.505 |                                     0.512 |      0.549 |
| xlm-roberta-large                          |   0.505 |                       0.545 | 0.577                   |                    0.547 |                                     0.594 |      0.558 |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                                 |   title | title and first paragraph   |   title and 5 sentences |   title and 10 sentences | title and first sentence each paragraph   |   raw text |
|:-------------------------------------------|--------:|:----------------------------|------------------------:|-------------------------:|:------------------------------------------|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.071 | 0.071                       |                   0.071 |                    0.071 | **0.119**                                 |      0.071 |
| EleutherAI-gpt-neo-1.3B                    |   0.024 | **0.119**                   |                   0.024 |                    0.071 | 0.024                                     |      0.071 |
| EleutherAI-gpt-neo-125M                    |   0.024 | 0.024                       |                   0.024 |                    0.024 | 0.024                                     |      0.071 |
| bert-base-multilingual-cased               |   0.048 | 0.071                       |                   0.048 |                    0.071 | 0.071                                     |      0.095 |
| distilbert-base-multilingual-cased         |   0.071 | 0.071                       |                   0.095 |                    0.048 | 0.095                                     |      0.048 |
| facebook-mbart-large-50                    |   0.024 | 0.095                       |                   0.071 |                    0.048 | 0.071                                     |      0.071 |
| gpt2                                       |   0     | 0.024                       |                   0.048 |                    0     | 0.048                                     |      0.048 |
| xlm-roberta-large                          |   0     | **0.119**                   |                   0.071 |                    0.095 | 0.095                                     |      0.071 |

  report_table.reset_index().to_latex(latex_file, index=False)


# German

## f1_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.559 |                       0.592 |                   0.567 |                    0.603 |                                     0.601 | 0.646      |
| EleutherAI-gpt-neo-1.3B                    |   0.466 |                       0.572 |                   0.585 |                    0.593 |                                     0.603 | 0.647      |
| EleutherAI-gpt-neo-125M                    |   0.409 |                       0.48  |                   0.509 |                    0.496 |                                     0.531 | 0.605      |
| bert-base-multilingual-cased               |   0.5   |                       0.568 |                   0.591 |                    0.616 |                                     0.598 | 0.659      |
| distilbert-base-multilingual-cased         |   0.516 |                       0.571 |                   0.542 |                    0.583 |                                     0.589 | 0.649      |
| facebook-mbart-large-50                    |   0.585 |                       0.601 |                   0.566 |                    0.628 |                                     0.619 | 0.646      |
| gpt2                                       |   0.475 |                       0.525 |                   0.485 |                    0.581 |                                     0.526 | 0.616      |
| xlm-roberta-large                          |   0.565 |                       0.63  |                   0.587 |                    0.638 |                                     0.652 | **0.660**  |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.465 |                       0.535 |                   0.483 |                    0.494 |                                     0.529 | 0.599      |
| EleutherAI-gpt-neo-1.3B                    |   0.355 |                       0.483 |                   0.5   |                    0.471 |                                     0.512 | 0.587      |
| EleutherAI-gpt-neo-125M                    |   0.331 |                       0.384 |                   0.407 |                    0.401 |                                     0.442 | 0.535      |
| bert-base-multilingual-cased               |   0.424 |                       0.488 |                   0.512 |                    0.547 |                                     0.57  | **0.645**  |
| distilbert-base-multilingual-cased         |   0.424 |                       0.517 |                   0.471 |                    0.5   |                                     0.517 | **0.645**  |
| facebook-mbart-large-50                    |   0.483 |                       0.547 |                   0.488 |                    0.535 |                                     0.552 | 0.599      |
| gpt2                                       |   0.413 |                       0.453 |                   0.413 |                    0.523 |                                     0.442 | 0.587      |
| xlm-roberta-large                          |   0.465 |                       0.57  |                   0.5   |                    0.564 |                                     0.593 | 0.610      |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences | title and 10 sentences   |   title and first sentence each paragraph |   raw text |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|:-------------------------|------------------------------------------:|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.702 |                       0.662 |                   0.686 | 0.773                    |                                     0.695 |      0.701 |
| EleutherAI-gpt-neo-1.3B                    |   0.678 |                       0.703 |                   0.705 | **0.802**                |                                     0.733 |      0.721 |
| EleutherAI-gpt-neo-125M                    |   0.533 |                       0.641 |                   0.68  | 0.651                    |                                     0.667 |      0.697 |
| bert-base-multilingual-cased               |   0.608 |                       0.677 |                   0.698 | 0.707                    |                                     0.628 |      0.673 |
| distilbert-base-multilingual-cased         |   0.658 |                       0.636 |                   0.638 | 0.699                    |                                     0.685 |      0.653 |
| facebook-mbart-large-50                    |   0.741 |                       0.667 |                   0.672 | 0.760                    |                                     0.704 |      0.701 |
| gpt2                                       |   0.559 |                       0.624 |                   0.587 | 0.652                    |                                     0.65  |      0.647 |
| xlm-roberta-large                          |   0.721 |                       0.705 |                   0.711 | 0.735                    |                                     0.723 |      0.719 |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences | title and 10 sentences   |   title and first sentence each paragraph |   raw text |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|:-------------------------|------------------------------------------:|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.029 |                       0.029 |                   0.029 | 0.057                    |                                     0.057 |      0.029 |
| EleutherAI-gpt-neo-1.3B                    |   0     |                       0.029 |                   0     | 0.057                    |                                     0.029 |      0.029 |
| EleutherAI-gpt-neo-125M                    |   0     |                       0     |                   0.029 | 0.000                    |                                     0.029 |      0.029 |
| bert-base-multilingual-cased               |   0     |                       0.029 |                   0.029 | 0.057                    |                                     0     |      0.029 |
| distilbert-base-multilingual-cased         |   0.029 |                       0.029 |                   0     | 0.000                    |                                     0.029 |      0     |
| facebook-mbart-large-50                    |   0.057 |                       0.029 |                   0.029 | 0.029                    |                                     0.057 |      0     |
| gpt2                                       |   0     |                       0     |                   0.029 | **0.086**                |                                     0     |      0     |
| xlm-roberta-large                          |   0.029 |                       0.057 |                   0.029 | 0.000                    |                                     0.057 |      0.029 |

  report_table.reset_index().to_latex(latex_file, index=False)


# Italian

## f1_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.57  |                       0.551 |                   0.552 |                    0.573 |                                     0.558 | 0.603      |
| EleutherAI-gpt-neo-1.3B                    |   0.437 |                       0.538 |                   0.527 |                    0.537 |                                     0.547 | 0.607      |
| EleutherAI-gpt-neo-125M                    |   0.3   |                       0.324 |                   0.442 |                    0.475 |                                     0.464 | 0.556      |
| bert-base-multilingual-cased               |   0.479 |                       0.572 |                   0.566 |                    0.592 |                                     0.576 | 0.610      |
| distilbert-base-multilingual-cased         |   0.476 |                       0.545 |                   0.564 |                    0.589 |                                     0.541 | 0.627      |
| facebook-mbart-large-50                    |   0.533 |                       0.585 |                   0.59  |                    0.593 |                                     0.613 | **0.639**  |
| gpt2                                       |   0.397 |                       0.485 |                   0.513 |                    0.543 |                                     0.477 | 0.535      |
| xlm-roberta-large                          |   0.532 |                       0.587 |                   0.602 |                    0.598 |                                     0.587 | 0.633      |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.478 |                       0.474 |                   0.474 |                    0.47  |                                     0.452 | 0.526      |
| EleutherAI-gpt-neo-1.3B                    |   0.33  |                       0.426 |                   0.404 |                    0.426 |                                     0.443 | 0.526      |
| EleutherAI-gpt-neo-125M                    |   0.243 |                       0.243 |                   0.348 |                    0.4   |                                     0.378 | 0.465      |
| bert-base-multilingual-cased               |   0.413 |                       0.483 |                   0.491 |                    0.517 |                                     0.504 | 0.548      |
| distilbert-base-multilingual-cased         |   0.409 |                       0.47  |                   0.487 |                    0.526 |                                     0.47  | **0.578**  |
| facebook-mbart-large-50                    |   0.439 |                       0.487 |                   0.483 |                    0.478 |                                     0.526 | 0.565      |
| gpt2                                       |   0.335 |                       0.417 |                   0.457 |                    0.491 |                                     0.387 | 0.496      |
| xlm-roberta-large                          |   0.439 |                       0.491 |                   0.496 |                    0.491 |                                     0.491 | 0.543      |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences | title and 10 sentences   |   title and first sentence each paragraph |   raw text |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|:-------------------------|------------------------------------------:|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.705 |                       0.657 |                   0.661 | 0.735                    |                                     0.727 |      0.708 |
| EleutherAI-gpt-neo-1.3B                    |   0.644 |                       0.731 |                   0.756 | 0.726                    |                                     0.713 |      0.716 |
| EleutherAI-gpt-neo-125M                    |   0.392 |                       0.483 |                   0.606 | 0.586                    |                                     0.6   |      0.69  |
| bert-base-multilingual-cased               |   0.569 |                       0.703 |                   0.669 | 0.692                    |                                     0.671 |      0.689 |
| distilbert-base-multilingual-cased         |   0.57  |                       0.651 |                   0.671 | 0.669                    |                                     0.639 |      0.686 |
| facebook-mbart-large-50                    |   0.678 |                       0.732 |                   0.76  | **0.780**                |                                     0.733 |      0.734 |
| gpt2                                       |   0.487 |                       0.578 |                   0.587 | 0.608                    |                                     0.622 |      0.582 |
| xlm-roberta-large                          |   0.673 |                       0.729 |                   0.765 | 0.764                    |                                     0.729 |      0.758 |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                                 |   title |   title and first paragraph | title and 5 sentences   | title and 10 sentences   |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|:------------------------|:-------------------------|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.1   |                       0.15  | 0.167                   | 0.133                    |                                     0.1   | 0.117      |
| EleutherAI-gpt-neo-1.3B                    |   0.05  |                       0.117 | **0.183**               | 0.150                    |                                     0.133 | 0.133      |
| EleutherAI-gpt-neo-125M                    |   0     |                       0     | 0.033                   | 0.067                    |                                     0.033 | 0.133      |
| bert-base-multilingual-cased               |   0.017 |                       0.117 | 0.067                   | **0.183**                |                                     0.117 | 0.133      |
| distilbert-base-multilingual-cased         |   0.017 |                       0.117 | 0.117                   | 0.150                    |                                     0.083 | 0.133      |
| facebook-mbart-large-50                    |   0.1   |                       0.167 | 0.150                   | 0.167                    |                                     0.167 | **0.183**  |
| gpt2                                       |   0.033 |                       0.033 | 0.033                   | 0.067                    |                                     0.083 | 0.050      |
| xlm-roberta-large                          |   0.1   |                       0.167 | **0.183**               | 0.133                    |                                     0.15  | **0.183**  |

  report_table.reset_index().to_latex(latex_file, index=False)


# Polish

## f1_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.507 |                       0.599 |                   0.617 |                    0.607 |                                     0.635 | 0.634      |
| EleutherAI-gpt-neo-1.3B                    |   0.464 |                       0.515 |                   0.618 |                    0.586 |                                     0.618 | 0.623      |
| EleutherAI-gpt-neo-125M                    |   0.453 |                       0.496 |                   0.516 |                    0.566 |                                     0.545 | 0.551      |
| bert-base-multilingual-cased               |   0.571 |                       0.62  |                   0.652 |                    0.654 |                                     0.667 | 0.663      |
| distilbert-base-multilingual-cased         |   0.558 |                       0.542 |                   0.592 |                    0.639 |                                     0.631 | 0.638      |
| facebook-mbart-large-50                    |   0.565 |                       0.604 |                   0.676 |                    0.665 |                                     0.685 | 0.672      |
| gpt2                                       |   0.517 |                       0.538 |                   0.591 |                    0.621 |                                     0.575 | 0.653      |
| xlm-roberta-large                          |   0.569 |                       0.64  |                   0.634 |                    0.641 |                                     0.695 | **0.698**  |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.437 |                       0.549 |                   0.563 |                    0.51  |                                     0.544 | 0.573      |
| EleutherAI-gpt-neo-1.3B                    |   0.393 |                       0.422 |                   0.539 |                    0.481 |                                     0.539 | 0.553      |
| EleutherAI-gpt-neo-125M                    |   0.388 |                       0.427 |                   0.461 |                    0.519 |                                     0.471 | 0.471      |
| bert-base-multilingual-cased               |   0.519 |                       0.563 |                   0.587 |                    0.597 |                                     0.597 | 0.636      |
| distilbert-base-multilingual-cased         |   0.49  |                       0.5   |                   0.524 |                    0.558 |                                     0.568 | 0.621      |
| facebook-mbart-large-50                    |   0.524 |                       0.544 |                   0.602 |                    0.573 |                                     0.607 | 0.617      |
| gpt2                                       |   0.481 |                       0.481 |                   0.597 |                    0.568 |                                     0.51  | 0.617      |
| xlm-roberta-large                          |   0.51  |                       0.587 |                   0.573 |                    0.563 |                                     0.626 | **0.650**  |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences | title and 10 sentences   |   title and first sentence each paragraph |   raw text |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|:-------------------------|------------------------------------------:|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.604 |                       0.661 |                   0.682 | 0.750                    |                                     0.762 |      0.711 |
| EleutherAI-gpt-neo-1.3B                    |   0.566 |                       0.659 |                   0.725 | 0.750                    |                                     0.725 |      0.713 |
| EleutherAI-gpt-neo-125M                    |   0.544 |                       0.591 |                   0.586 | 0.622                    |                                     0.647 |      0.664 |
| bert-base-multilingual-cased               |   0.633 |                       0.69  |                   0.733 | 0.724                    |                                     0.755 |      0.693 |
| distilbert-base-multilingual-cased         |   0.647 |                       0.592 |                   0.679 | 0.747                    |                                     0.709 |      0.656 |
| facebook-mbart-large-50                    |   0.614 |                       0.679 |                   0.77  | **0.792**                |                                     0.786 |      0.738 |
| gpt2                                       |   0.559 |                       0.611 |                   0.586 | 0.684                    |                                     0.66  |      0.694 |
| xlm-roberta-large                          |   0.644 |                       0.703 |                   0.711 | 0.744                    |                                     0.782 |      0.753 |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences | title and 10 sentences   |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|:-------------------------|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.026 |                       0.026 |                   0.026 | 0.051                    |                                     0.051 | 0.051      |
| EleutherAI-gpt-neo-1.3B                    |   0.026 |                       0     |                   0.051 | 0.051                    |                                     0.026 | 0.051      |
| EleutherAI-gpt-neo-125M                    |   0     |                       0     |                   0.051 | 0.026                    |                                     0.026 | 0.026      |
| bert-base-multilingual-cased               |   0.026 |                       0.051 |                   0.051 | 0.051                    |                                     0.051 | 0.051      |
| distilbert-base-multilingual-cased         |   0.051 |                       0     |                   0.026 | 0.051                    |                                     0.051 | 0.026      |
| facebook-mbart-large-50                    |   0.026 |                       0     |                   0.026 | **0.077**                |                                     0.026 | 0.051      |
| gpt2                                       |   0     |                       0     |                   0     | 0.026                    |                                     0.026 | 0.051      |
| xlm-roberta-large                          |   0.051 |                       0.026 |                   0.026 | 0.051                    |                                     0.026 | **0.077**  |

  report_table.reset_index().to_latex(latex_file, index=False)


# Russian

## f1_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.419 |                       0.465 |                   0.474 |                    0.487 |                                     0.521 | **0.532**  |
| EleutherAI-gpt-neo-1.3B                    |   0.319 |                       0.286 |                   0.377 |                    0.387 |                                     0.422 | 0.375      |
| EleutherAI-gpt-neo-125M                    |   0.162 |                       0.22  |                   0.24  |                    0.217 |                                     0.137 | 0.150      |
| bert-base-multilingual-cased               |   0.366 |                       0.436 |                   0.516 |                    0.471 |                                     0.468 | 0.478      |
| distilbert-base-multilingual-cased         |   0.318 |                       0.407 |                   0.47  |                    0.5   |                                     0.468 | 0.503      |
| facebook-mbart-large-50                    |   0.426 |                       0.435 |                   0.511 |                    0.49  |                                     0.526 | 0.519      |
| gpt2                                       |   0.159 |                       0.107 |                   0.075 |                    0.095 |                                     0.143 | 0.217      |
| xlm-roberta-large                          |   0.403 |                       0.446 |                   0.479 |                    0.455 |                                     0.507 | 0.443      |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.36  |                       0.419 |                   0.419 |                    0.43  |                                     0.442 | 0.488      |
| EleutherAI-gpt-neo-1.3B                    |   0.221 |                       0.198 |                   0.267 |                    0.279 |                                     0.314 | 0.279      |
| EleutherAI-gpt-neo-125M                    |   0.105 |                       0.163 |                   0.174 |                    0.174 |                                     0.093 | 0.105      |
| bert-base-multilingual-cased               |   0.302 |                       0.395 |                   0.465 |                    0.419 |                                     0.419 | 0.442      |
| distilbert-base-multilingual-cased         |   0.244 |                       0.384 |                   0.407 |                    0.465 |                                     0.419 | **0.523**  |
| facebook-mbart-large-50                    |   0.337 |                       0.349 |                   0.419 |                    0.407 |                                     0.419 | 0.465      |
| gpt2                                       |   0.105 |                       0.07  |                   0.047 |                    0.058 |                                     0.093 | 0.163      |
| xlm-roberta-large                          |   0.314 |                       0.36  |                   0.395 |                    0.384 |                                     0.442 | 0.384      |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences | title and first sentence each paragraph   |   raw text |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|:------------------------------------------|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.5   |                       0.522 |                   0.545 |                    0.561 | 0.633                                     |      0.583 |
| EleutherAI-gpt-neo-1.3B                    |   0.576 |                       0.515 |                   0.639 |                    0.632 | 0.643                                     |      0.571 |
| EleutherAI-gpt-neo-125M                    |   0.36  |                       0.341 |                   0.385 |                    0.288 | 0.258                                     |      0.265 |
| bert-base-multilingual-cased               |   0.464 |                       0.486 |                   0.58  |                    0.537 | 0.529                                     |      0.521 |
| distilbert-base-multilingual-cased         |   0.457 |                       0.434 |                   0.556 |                    0.541 | 0.529                                     |      0.484 |
| facebook-mbart-large-50                    |   0.58  |                       0.577 |                   0.655 |                    0.614 | **0.706**                                 |      0.588 |
| gpt2                                       |   0.333 |                       0.231 |                   0.19  |                    0.263 | 0.308                                     |      0.326 |
| xlm-roberta-large                          |   0.562 |                       0.585 |                   0.607 |                    0.559 | 0.594                                     |      0.524 |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences | title and first sentence each paragraph   |   raw text |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|:------------------------------------------|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.158 |                       0.158 |                   0.132 |                    0.079 | **0.263**                                 |      0.237 |
| EleutherAI-gpt-neo-1.3B                    |   0.053 |                       0.105 |                   0.079 |                    0.158 | 0.105                                     |      0.105 |
| EleutherAI-gpt-neo-125M                    |   0.026 |                       0     |                   0.053 |                    0     | 0.026                                     |      0.026 |
| bert-base-multilingual-cased               |   0.079 |                       0.105 |                   0.237 |                    0.158 | 0.184                                     |      0.237 |
| distilbert-base-multilingual-cased         |   0.105 |                       0.132 |                   0.158 |                    0.158 | 0.211                                     |      0.158 |
| facebook-mbart-large-50                    |   0.158 |                       0.211 |                   0.211 |                    0.184 | 0.211                                     |      0.184 |
| gpt2                                       |   0     |                       0     |                   0     |                    0.026 | 0.053                                     |      0.026 |
| xlm-roberta-large                          |   0.158 |                       0.184 |                   0.211 |                    0.105 | 0.211                                     |      0.158 |

  report_table.reset_index().to_latex(latex_file, index=False)


# All 6 Languages

## f1_micro

| language   | model_name                                 |   title |   title and first paragraph |   title and 5 sentences | title and 10 sentences   | title and first sentence each paragraph   | raw text   |
|:-----------|:-------------------------------------------|--------:|----------------------------:|------------------------:|:-------------------------|:------------------------------------------|:-----------|
| en         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.64  |                       0.7   |                   0.684 | 0.699                    | 0.704                                     | 0.691      |
| en         | EleutherAI-gpt-neo-1.3B                    |   0.63  |                       0.703 |                   0.707 | 0.693                    | 0.695                                     | 0.698      |
| en         | EleutherAI-gpt-neo-125M                    |   0.53  |                       0.623 |                   0.637 | 0.634                    | 0.663                                     | 0.671      |
| en         | bert-base-multilingual-cased               |   0.61  |                       0.677 |                   0.7   | 0.707                    | 0.684                                     | 0.693      |
| en         | distilbert-base-multilingual-cased         |   0.596 |                       0.665 |                   0.661 | 0.672                    | 0.672                                     | 0.690      |
| en         | facebook-mbart-large-50                    |   0.677 |                       0.717 |                   0.719 | **0.721**                | **0.721**                                 | 0.707      |
| en         | gpt2                                       |   0.618 |                       0.693 |                   0.69  | 0.673                    | 0.692                                     | 0.696      |
| en         | xlm-roberta-large                          |   0.66  |                       0.694 |                   0.718 | 0.715                    | 0.717                                     | 0.704      |
| fr         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.4   |                       0.46  |                   0.486 | 0.449                    | 0.520                                     | 0.502      |
| fr         | EleutherAI-gpt-neo-1.3B                    |   0.379 |                       0.448 |                   0.441 | 0.394                    | 0.459                                     | 0.493      |
| fr         | EleutherAI-gpt-neo-125M                    |   0.227 |                       0.359 |                   0.358 | 0.352                    | 0.413                                     | 0.462      |
| fr         | bert-base-multilingual-cased               |   0.419 |                       0.412 |                   0.441 | 0.494                    | 0.513                                     | 0.555      |
| fr         | distilbert-base-multilingual-cased         |   0.371 |                       0.461 |                   0.46  | 0.508                    | 0.540                                     | 0.546      |
| fr         | facebook-mbart-large-50                    |   0.453 |                       0.475 |                   0.53  | 0.504                    | 0.541                                     | **0.568**  |
| fr         | gpt2                                       |   0.323 |                       0.386 |                   0.4   | 0.449                    | 0.415                                     | 0.491      |
| fr         | xlm-roberta-large                          |   0.434 |                       0.48  |                   0.502 | 0.500                    | 0.529                                     | 0.545      |
| ge         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.559 |                       0.592 |                   0.567 | 0.603                    | 0.601                                     | 0.646      |
| ge         | EleutherAI-gpt-neo-1.3B                    |   0.466 |                       0.572 |                   0.585 | 0.593                    | 0.603                                     | 0.647      |
| ge         | EleutherAI-gpt-neo-125M                    |   0.409 |                       0.48  |                   0.509 | 0.496                    | 0.531                                     | 0.605      |
| ge         | bert-base-multilingual-cased               |   0.5   |                       0.568 |                   0.591 | 0.616                    | 0.598                                     | 0.659      |
| ge         | distilbert-base-multilingual-cased         |   0.516 |                       0.571 |                   0.542 | 0.583                    | 0.589                                     | 0.649      |
| ge         | facebook-mbart-large-50                    |   0.585 |                       0.601 |                   0.566 | 0.628                    | 0.619                                     | 0.646      |
| ge         | gpt2                                       |   0.475 |                       0.525 |                   0.485 | 0.581                    | 0.526                                     | 0.616      |
| ge         | xlm-roberta-large                          |   0.565 |                       0.63  |                   0.587 | 0.638                    | 0.652                                     | **0.660**  |
| it         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.57  |                       0.551 |                   0.552 | 0.573                    | 0.558                                     | 0.603      |
| it         | EleutherAI-gpt-neo-1.3B                    |   0.437 |                       0.538 |                   0.527 | 0.537                    | 0.547                                     | 0.607      |
| it         | EleutherAI-gpt-neo-125M                    |   0.3   |                       0.324 |                   0.442 | 0.475                    | 0.464                                     | 0.556      |
| it         | bert-base-multilingual-cased               |   0.479 |                       0.572 |                   0.566 | 0.592                    | 0.576                                     | 0.610      |
| it         | distilbert-base-multilingual-cased         |   0.476 |                       0.545 |                   0.564 | 0.589                    | 0.541                                     | 0.627      |
| it         | facebook-mbart-large-50                    |   0.533 |                       0.585 |                   0.59  | 0.593                    | 0.613                                     | **0.639**  |
| it         | gpt2                                       |   0.397 |                       0.485 |                   0.513 | 0.543                    | 0.477                                     | 0.535      |
| it         | xlm-roberta-large                          |   0.532 |                       0.587 |                   0.602 | 0.598                    | 0.587                                     | 0.633      |
| po         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.507 |                       0.599 |                   0.617 | 0.607                    | 0.635                                     | 0.634      |
| po         | EleutherAI-gpt-neo-1.3B                    |   0.464 |                       0.515 |                   0.618 | 0.586                    | 0.618                                     | 0.623      |
| po         | EleutherAI-gpt-neo-125M                    |   0.453 |                       0.496 |                   0.516 | 0.566                    | 0.545                                     | 0.551      |
| po         | bert-base-multilingual-cased               |   0.571 |                       0.62  |                   0.652 | 0.654                    | 0.667                                     | 0.663      |
| po         | distilbert-base-multilingual-cased         |   0.558 |                       0.542 |                   0.592 | 0.639                    | 0.631                                     | 0.638      |
| po         | facebook-mbart-large-50                    |   0.565 |                       0.604 |                   0.676 | 0.665                    | 0.685                                     | 0.672      |
| po         | gpt2                                       |   0.517 |                       0.538 |                   0.591 | 0.621                    | 0.575                                     | 0.653      |
| po         | xlm-roberta-large                          |   0.569 |                       0.64  |                   0.634 | 0.641                    | 0.695                                     | **0.698**  |
| ru         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.419 |                       0.465 |                   0.474 | 0.487                    | 0.521                                     | **0.532**  |
| ru         | EleutherAI-gpt-neo-1.3B                    |   0.319 |                       0.286 |                   0.377 | 0.387                    | 0.422                                     | 0.375      |
| ru         | EleutherAI-gpt-neo-125M                    |   0.162 |                       0.22  |                   0.24  | 0.217                    | 0.137                                     | 0.150      |
| ru         | bert-base-multilingual-cased               |   0.366 |                       0.436 |                   0.516 | 0.471                    | 0.468                                     | 0.478      |
| ru         | distilbert-base-multilingual-cased         |   0.318 |                       0.407 |                   0.47  | 0.500                    | 0.468                                     | 0.503      |
| ru         | facebook-mbart-large-50                    |   0.426 |                       0.435 |                   0.511 | 0.490                    | 0.526                                     | 0.519      |
| ru         | gpt2                                       |   0.159 |                       0.107 |                   0.075 | 0.095                    | 0.143                                     | 0.217      |
| ru         | xlm-roberta-large                          |   0.403 |                       0.446 |                   0.479 | 0.455                    | 0.507                                     | 0.443      |

  multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)


## recall_micro

| language   | model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences | title and first sentence each paragraph   | raw text   |
|:-----------|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|:------------------------------------------|:-----------|
| en         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.577 |                       0.638 |                   0.611 |                    0.643 | **0.667**                                 | 0.653      |
| en         | EleutherAI-gpt-neo-1.3B                    |   0.538 |                       0.631 |                   0.619 |                    0.604 | 0.619                                     | 0.633      |
| en         | EleutherAI-gpt-neo-125M                    |   0.45  |                       0.543 |                   0.562 |                    0.548 | 0.582                                     | 0.599      |
| en         | bert-base-multilingual-cased               |   0.543 |                       0.606 |                   0.631 |                    0.65  | 0.628                                     | 0.655      |
| en         | distilbert-base-multilingual-cased         |   0.528 |                       0.592 |                   0.579 |                    0.619 | 0.609                                     | 0.655      |
| en         | facebook-mbart-large-50                    |   0.599 |                       0.653 |                   0.645 |                    0.653 | **0.667**                                 | **0.667**  |
| en         | gpt2                                       |   0.548 |                       0.643 |                   0.658 |                    0.638 | 0.653                                     | 0.653      |
| en         | xlm-roberta-large                          |   0.584 |                       0.636 |                   0.636 |                    0.643 | 0.663                                     | 0.653      |
| fr         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.357 |                       0.413 |                   0.429 |                    0.405 | 0.468                                     | 0.460      |
| fr         | EleutherAI-gpt-neo-1.3B                    |   0.294 |                       0.357 |                   0.373 |                    0.31  | 0.381                                     | 0.429      |
| fr         | EleutherAI-gpt-neo-125M                    |   0.175 |                       0.294 |                   0.286 |                    0.294 | 0.341                                     | 0.389      |
| fr         | bert-base-multilingual-cased               |   0.349 |                       0.333 |                   0.397 |                    0.46  | 0.484                                     | 0.540      |
| fr         | distilbert-base-multilingual-cased         |   0.302 |                       0.421 |                   0.413 |                    0.476 | 0.484                                     | **0.563**  |
| fr         | facebook-mbart-large-50                    |   0.381 |                       0.413 |                   0.452 |                    0.452 | 0.500                                     | 0.532      |
| fr         | gpt2                                       |   0.286 |                       0.341 |                   0.341 |                    0.405 | 0.349                                     | 0.444      |
| fr         | xlm-roberta-large                          |   0.381 |                       0.429 |                   0.444 |                    0.46  | 0.476                                     | 0.532      |
| ge         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.465 |                       0.535 |                   0.483 |                    0.494 | 0.529                                     | 0.599      |
| ge         | EleutherAI-gpt-neo-1.3B                    |   0.355 |                       0.483 |                   0.5   |                    0.471 | 0.512                                     | 0.587      |
| ge         | EleutherAI-gpt-neo-125M                    |   0.331 |                       0.384 |                   0.407 |                    0.401 | 0.442                                     | 0.535      |
| ge         | bert-base-multilingual-cased               |   0.424 |                       0.488 |                   0.512 |                    0.547 | 0.570                                     | **0.645**  |
| ge         | distilbert-base-multilingual-cased         |   0.424 |                       0.517 |                   0.471 |                    0.5   | 0.517                                     | **0.645**  |
| ge         | facebook-mbart-large-50                    |   0.483 |                       0.547 |                   0.488 |                    0.535 | 0.552                                     | 0.599      |
| ge         | gpt2                                       |   0.413 |                       0.453 |                   0.413 |                    0.523 | 0.442                                     | 0.587      |
| ge         | xlm-roberta-large                          |   0.465 |                       0.57  |                   0.5   |                    0.564 | 0.593                                     | 0.610      |
| it         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.478 |                       0.474 |                   0.474 |                    0.47  | 0.452                                     | 0.526      |
| it         | EleutherAI-gpt-neo-1.3B                    |   0.33  |                       0.426 |                   0.404 |                    0.426 | 0.443                                     | 0.526      |
| it         | EleutherAI-gpt-neo-125M                    |   0.243 |                       0.243 |                   0.348 |                    0.4   | 0.378                                     | 0.465      |
| it         | bert-base-multilingual-cased               |   0.413 |                       0.483 |                   0.491 |                    0.517 | 0.504                                     | 0.548      |
| it         | distilbert-base-multilingual-cased         |   0.409 |                       0.47  |                   0.487 |                    0.526 | 0.470                                     | **0.578**  |
| it         | facebook-mbart-large-50                    |   0.439 |                       0.487 |                   0.483 |                    0.478 | 0.526                                     | 0.565      |
| it         | gpt2                                       |   0.335 |                       0.417 |                   0.457 |                    0.491 | 0.387                                     | 0.496      |
| it         | xlm-roberta-large                          |   0.439 |                       0.491 |                   0.496 |                    0.491 | 0.491                                     | 0.543      |
| po         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.437 |                       0.549 |                   0.563 |                    0.51  | 0.544                                     | 0.573      |
| po         | EleutherAI-gpt-neo-1.3B                    |   0.393 |                       0.422 |                   0.539 |                    0.481 | 0.539                                     | 0.553      |
| po         | EleutherAI-gpt-neo-125M                    |   0.388 |                       0.427 |                   0.461 |                    0.519 | 0.471                                     | 0.471      |
| po         | bert-base-multilingual-cased               |   0.519 |                       0.563 |                   0.587 |                    0.597 | 0.597                                     | 0.636      |
| po         | distilbert-base-multilingual-cased         |   0.49  |                       0.5   |                   0.524 |                    0.558 | 0.568                                     | 0.621      |
| po         | facebook-mbart-large-50                    |   0.524 |                       0.544 |                   0.602 |                    0.573 | 0.607                                     | 0.617      |
| po         | gpt2                                       |   0.481 |                       0.481 |                   0.597 |                    0.568 | 0.510                                     | 0.617      |
| po         | xlm-roberta-large                          |   0.51  |                       0.587 |                   0.573 |                    0.563 | 0.626                                     | **0.650**  |
| ru         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.36  |                       0.419 |                   0.419 |                    0.43  | 0.442                                     | 0.488      |
| ru         | EleutherAI-gpt-neo-1.3B                    |   0.221 |                       0.198 |                   0.267 |                    0.279 | 0.314                                     | 0.279      |
| ru         | EleutherAI-gpt-neo-125M                    |   0.105 |                       0.163 |                   0.174 |                    0.174 | 0.093                                     | 0.105      |
| ru         | bert-base-multilingual-cased               |   0.302 |                       0.395 |                   0.465 |                    0.419 | 0.419                                     | 0.442      |
| ru         | distilbert-base-multilingual-cased         |   0.244 |                       0.384 |                   0.407 |                    0.465 | 0.419                                     | **0.523**  |
| ru         | facebook-mbart-large-50                    |   0.337 |                       0.349 |                   0.419 |                    0.407 | 0.419                                     | 0.465      |
| ru         | gpt2                                       |   0.105 |                       0.07  |                   0.047 |                    0.058 | 0.093                                     | 0.163      |
| ru         | xlm-roberta-large                          |   0.314 |                       0.36  |                   0.395 |                    0.384 | 0.442                                     | 0.384      |

  multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)


## precision_micro

| language   | model_name                                 |   title |   title and first paragraph | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   |   raw text |
|:-----------|:-------------------------------------------|--------:|----------------------------:|:------------------------|:-------------------------|:------------------------------------------|-----------:|
| en         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.717 |                       0.774 | 0.776                   | 0.765                    | 0.744                                     |      0.734 |
| en         | EleutherAI-gpt-neo-1.3B                    |   0.761 |                       0.794 | 0.824                   | 0.812                    | 0.793                                     |      0.778 |
| en         | EleutherAI-gpt-neo-125M                    |   0.646 |                       0.73  | 0.735                   | 0.752                    | 0.770                                     |      0.763 |
| en         | bert-base-multilingual-cased               |   0.696 |                       0.765 | 0.787                   | 0.776                    | 0.751                                     |      0.736 |
| en         | distilbert-base-multilingual-cased         |   0.684 |                       0.759 | 0.769                   | 0.735                    | 0.750                                     |      0.728 |
| en         | facebook-mbart-large-50                    |   0.778 |                       0.795 | 0.812                   | 0.804                    | 0.784                                     |      0.752 |
| en         | gpt2                                       |   0.709 |                       0.751 | 0.725                   | 0.711                    | 0.736                                     |      0.746 |
| en         | xlm-roberta-large                          |   0.759 |                       0.765 | **0.825**               | 0.804                    | 0.781                                     |      0.765 |
| fr         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.455 |                       0.52  | 0.562                   | 0.505                    | 0.584                                     |      0.552 |
| fr         | EleutherAI-gpt-neo-1.3B                    |   0.536 |                       0.6   | 0.540                   | 0.542                    | 0.578                                     |      0.581 |
| fr         | EleutherAI-gpt-neo-125M                    |   0.324 |                       0.463 | 0.480                   | 0.440                    | 0.524                                     |      0.57  |
| fr         | bert-base-multilingual-cased               |   0.524 |                       0.538 | 0.495                   | 0.532                    | 0.545                                     |      0.571 |
| fr         | distilbert-base-multilingual-cased         |   0.481 |                       0.51  | 0.520                   | 0.545                    | 0.610                                     |      0.53  |
| fr         | facebook-mbart-large-50                    |   0.558 |                       0.559 | **0.640**               | 0.570                    | 0.589                                     |      0.609 |
| fr         | gpt2                                       |   0.371 |                       0.443 | 0.483                   | 0.505                    | 0.512                                     |      0.549 |
| fr         | xlm-roberta-large                          |   0.505 |                       0.545 | 0.577                   | 0.547                    | 0.594                                     |      0.558 |
| ge         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.702 |                       0.662 | 0.686                   | 0.773                    | 0.695                                     |      0.701 |
| ge         | EleutherAI-gpt-neo-1.3B                    |   0.678 |                       0.703 | 0.705                   | **0.802**                | 0.733                                     |      0.721 |
| ge         | EleutherAI-gpt-neo-125M                    |   0.533 |                       0.641 | 0.680                   | 0.651                    | 0.667                                     |      0.697 |
| ge         | bert-base-multilingual-cased               |   0.608 |                       0.677 | 0.698                   | 0.707                    | 0.628                                     |      0.673 |
| ge         | distilbert-base-multilingual-cased         |   0.658 |                       0.636 | 0.638                   | 0.699                    | 0.685                                     |      0.653 |
| ge         | facebook-mbart-large-50                    |   0.741 |                       0.667 | 0.672                   | 0.760                    | 0.704                                     |      0.701 |
| ge         | gpt2                                       |   0.559 |                       0.624 | 0.587                   | 0.652                    | 0.650                                     |      0.647 |
| ge         | xlm-roberta-large                          |   0.721 |                       0.705 | 0.711                   | 0.735                    | 0.723                                     |      0.719 |
| it         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.705 |                       0.657 | 0.661                   | 0.735                    | 0.727                                     |      0.708 |
| it         | EleutherAI-gpt-neo-1.3B                    |   0.644 |                       0.731 | 0.756                   | 0.726                    | 0.713                                     |      0.716 |
| it         | EleutherAI-gpt-neo-125M                    |   0.392 |                       0.483 | 0.606                   | 0.586                    | 0.600                                     |      0.69  |
| it         | bert-base-multilingual-cased               |   0.569 |                       0.703 | 0.669                   | 0.692                    | 0.671                                     |      0.689 |
| it         | distilbert-base-multilingual-cased         |   0.57  |                       0.651 | 0.671                   | 0.669                    | 0.639                                     |      0.686 |
| it         | facebook-mbart-large-50                    |   0.678 |                       0.732 | 0.760                   | **0.780**                | 0.733                                     |      0.734 |
| it         | gpt2                                       |   0.487 |                       0.578 | 0.587                   | 0.608                    | 0.622                                     |      0.582 |
| it         | xlm-roberta-large                          |   0.673 |                       0.729 | 0.765                   | 0.764                    | 0.729                                     |      0.758 |
| po         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.604 |                       0.661 | 0.682                   | 0.750                    | 0.762                                     |      0.711 |
| po         | EleutherAI-gpt-neo-1.3B                    |   0.566 |                       0.659 | 0.725                   | 0.750                    | 0.725                                     |      0.713 |
| po         | EleutherAI-gpt-neo-125M                    |   0.544 |                       0.591 | 0.586                   | 0.622                    | 0.647                                     |      0.664 |
| po         | bert-base-multilingual-cased               |   0.633 |                       0.69  | 0.733                   | 0.724                    | 0.755                                     |      0.693 |
| po         | distilbert-base-multilingual-cased         |   0.647 |                       0.592 | 0.679                   | 0.747                    | 0.709                                     |      0.656 |
| po         | facebook-mbart-large-50                    |   0.614 |                       0.679 | 0.770                   | **0.792**                | 0.786                                     |      0.738 |
| po         | gpt2                                       |   0.559 |                       0.611 | 0.586                   | 0.684                    | 0.660                                     |      0.694 |
| po         | xlm-roberta-large                          |   0.644 |                       0.703 | 0.711                   | 0.744                    | 0.782                                     |      0.753 |
| ru         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.5   |                       0.522 | 0.545                   | 0.561                    | 0.633                                     |      0.583 |
| ru         | EleutherAI-gpt-neo-1.3B                    |   0.576 |                       0.515 | 0.639                   | 0.632                    | 0.643                                     |      0.571 |
| ru         | EleutherAI-gpt-neo-125M                    |   0.36  |                       0.341 | 0.385                   | 0.288                    | 0.258                                     |      0.265 |
| ru         | bert-base-multilingual-cased               |   0.464 |                       0.486 | 0.580                   | 0.537                    | 0.529                                     |      0.521 |
| ru         | distilbert-base-multilingual-cased         |   0.457 |                       0.434 | 0.556                   | 0.541                    | 0.529                                     |      0.484 |
| ru         | facebook-mbart-large-50                    |   0.58  |                       0.577 | 0.655                   | 0.614                    | **0.706**                                 |      0.588 |
| ru         | gpt2                                       |   0.333 |                       0.231 | 0.190                   | 0.263                    | 0.308                                     |      0.326 |
| ru         | xlm-roberta-large                          |   0.562 |                       0.585 | 0.607                   | 0.559                    | 0.594                                     |      0.524 |

  multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)


## accuracy

| language   | model_name                                 |   title | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text   |
|:-----------|:-------------------------------------------|--------:|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:-----------|
| en         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.087 | 0.136                       | 0.126                   | 0.068                    | 0.078                                     | 0.058      |
| en         | EleutherAI-gpt-neo-1.3B                    |   0.087 | 0.087                       | **0.155**               | 0.097                    | 0.117                                     | 0.087      |
| en         | EleutherAI-gpt-neo-125M                    |   0.019 | 0.087                       | 0.058                   | 0.039                    | 0.087                                     | 0.078      |
| en         | bert-base-multilingual-cased               |   0.078 | 0.087                       | 0.117                   | 0.078                    | 0.068                                     | 0.087      |
| en         | distilbert-base-multilingual-cased         |   0.097 | 0.078                       | 0.126                   | 0.078                    | 0.117                                     | 0.097      |
| en         | facebook-mbart-large-50                    |   0.117 | 0.126                       | 0.146                   | 0.107                    | 0.146                                     | 0.097      |
| en         | gpt2                                       |   0.097 | 0.097                       | 0.117                   | 0.087                    | 0.078                                     | 0.117      |
| en         | xlm-roberta-large                          |   0.058 | 0.087                       | 0.146                   | 0.126                    | 0.117                                     | 0.117      |
| fr         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.071 | 0.071                       | 0.071                   | 0.071                    | **0.119**                                 | 0.071      |
| fr         | EleutherAI-gpt-neo-1.3B                    |   0.024 | **0.119**                   | 0.024                   | 0.071                    | 0.024                                     | 0.071      |
| fr         | EleutherAI-gpt-neo-125M                    |   0.024 | 0.024                       | 0.024                   | 0.024                    | 0.024                                     | 0.071      |
| fr         | bert-base-multilingual-cased               |   0.048 | 0.071                       | 0.048                   | 0.071                    | 0.071                                     | 0.095      |
| fr         | distilbert-base-multilingual-cased         |   0.071 | 0.071                       | 0.095                   | 0.048                    | 0.095                                     | 0.048      |
| fr         | facebook-mbart-large-50                    |   0.024 | 0.095                       | 0.071                   | 0.048                    | 0.071                                     | 0.071      |
| fr         | gpt2                                       |   0     | 0.024                       | 0.048                   | 0.000                    | 0.048                                     | 0.048      |
| fr         | xlm-roberta-large                          |   0     | **0.119**                   | 0.071                   | 0.095                    | 0.095                                     | 0.071      |
| ge         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.029 | 0.029                       | 0.029                   | 0.057                    | 0.057                                     | 0.029      |
| ge         | EleutherAI-gpt-neo-1.3B                    |   0     | 0.029                       | 0.000                   | 0.057                    | 0.029                                     | 0.029      |
| ge         | EleutherAI-gpt-neo-125M                    |   0     | 0.000                       | 0.029                   | 0.000                    | 0.029                                     | 0.029      |
| ge         | bert-base-multilingual-cased               |   0     | 0.029                       | 0.029                   | 0.057                    | 0.000                                     | 0.029      |
| ge         | distilbert-base-multilingual-cased         |   0.029 | 0.029                       | 0.000                   | 0.000                    | 0.029                                     | 0.000      |
| ge         | facebook-mbart-large-50                    |   0.057 | 0.029                       | 0.029                   | 0.029                    | 0.057                                     | 0.000      |
| ge         | gpt2                                       |   0     | 0.000                       | 0.029                   | **0.086**                | 0.000                                     | 0.000      |
| ge         | xlm-roberta-large                          |   0.029 | 0.057                       | 0.029                   | 0.000                    | 0.057                                     | 0.029      |
| it         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.1   | 0.150                       | 0.167                   | 0.133                    | 0.100                                     | 0.117      |
| it         | EleutherAI-gpt-neo-1.3B                    |   0.05  | 0.117                       | **0.183**               | 0.150                    | 0.133                                     | 0.133      |
| it         | EleutherAI-gpt-neo-125M                    |   0     | 0.000                       | 0.033                   | 0.067                    | 0.033                                     | 0.133      |
| it         | bert-base-multilingual-cased               |   0.017 | 0.117                       | 0.067                   | **0.183**                | 0.117                                     | 0.133      |
| it         | distilbert-base-multilingual-cased         |   0.017 | 0.117                       | 0.117                   | 0.150                    | 0.083                                     | 0.133      |
| it         | facebook-mbart-large-50                    |   0.1   | 0.167                       | 0.150                   | 0.167                    | 0.167                                     | **0.183**  |
| it         | gpt2                                       |   0.033 | 0.033                       | 0.033                   | 0.067                    | 0.083                                     | 0.050      |
| it         | xlm-roberta-large                          |   0.1   | 0.167                       | **0.183**               | 0.133                    | 0.150                                     | **0.183**  |
| po         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.026 | 0.026                       | 0.026                   | 0.051                    | 0.051                                     | 0.051      |
| po         | EleutherAI-gpt-neo-1.3B                    |   0.026 | 0.000                       | 0.051                   | 0.051                    | 0.026                                     | 0.051      |
| po         | EleutherAI-gpt-neo-125M                    |   0     | 0.000                       | 0.051                   | 0.026                    | 0.026                                     | 0.026      |
| po         | bert-base-multilingual-cased               |   0.026 | 0.051                       | 0.051                   | 0.051                    | 0.051                                     | 0.051      |
| po         | distilbert-base-multilingual-cased         |   0.051 | 0.000                       | 0.026                   | 0.051                    | 0.051                                     | 0.026      |
| po         | facebook-mbart-large-50                    |   0.026 | 0.000                       | 0.026                   | **0.077**                | 0.026                                     | 0.051      |
| po         | gpt2                                       |   0     | 0.000                       | 0.000                   | 0.026                    | 0.026                                     | 0.051      |
| po         | xlm-roberta-large                          |   0.051 | 0.026                       | 0.026                   | 0.051                    | 0.026                                     | **0.077**  |
| ru         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.158 | 0.158                       | 0.132                   | 0.079                    | **0.263**                                 | 0.237      |
| ru         | EleutherAI-gpt-neo-1.3B                    |   0.053 | 0.105                       | 0.079                   | 0.158                    | 0.105                                     | 0.105      |
| ru         | EleutherAI-gpt-neo-125M                    |   0.026 | 0.000                       | 0.053                   | 0.000                    | 0.026                                     | 0.026      |
| ru         | bert-base-multilingual-cased               |   0.079 | 0.105                       | 0.237                   | 0.158                    | 0.184                                     | 0.237      |
| ru         | distilbert-base-multilingual-cased         |   0.105 | 0.132                       | 0.158                   | 0.158                    | 0.211                                     | 0.158      |
| ru         | facebook-mbart-large-50                    |   0.158 | 0.211                       | 0.211                   | 0.184                    | 0.211                                     | 0.184      |
| ru         | gpt2                                       |   0     | 0.000                       | 0.000                   | 0.026                    | 0.053                                     | 0.026      |
| ru         | xlm-roberta-large                          |   0.158 | 0.184                       | 0.211                   | 0.105                    | 0.211                                     | 0.158      |

  multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)


In [41]:
display_metrics_and_write_to_file(df=results_majority_vote_pred_df, grouping_criterion=['model_name'], output_dir='per_model_name_tables_majority_voting')

# English

## f1_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences | title and first sentence each paragraph   |   raw text |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|:------------------------------------------|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.64  |                       0.7   |                   0.684 |                    0.699 | 0.696                                     |      0.679 |
| EleutherAI-gpt-neo-1.3B                    |   0.63  |                       0.703 |                   0.707 |                    0.693 | 0.692                                     |      0.692 |
| EleutherAI-gpt-neo-125M                    |   0.53  |                       0.623 |                   0.637 |                    0.634 | 0.649                                     |      0.654 |
| bert-base-multilingual-cased               |   0.61  |                       0.677 |                   0.7   |                    0.707 | 0.682                                     |      0.688 |
| distilbert-base-multilingual-cased         |   0.596 |                       0.665 |                   0.661 |                    0.672 | 0.662                                     |      0.683 |
| facebook-mbart-large-50                    |   0.677 |                       0.717 |                   0.719 |                    0.721 | **0.723**                                 |      0.701 |
| gpt2                                       |   0.618 |                       0.693 |                   0.69  |                    0.673 | 0.696                                     |      0.691 |
| xlm-roberta-large                          |   0.66  |                       0.694 |                   0.718 |                    0.715 | 0.710                                     |      0.687 |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences | title and first sentence each paragraph   |   raw text |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|:------------------------------------------|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.577 |                       0.638 |                   0.611 |                    0.643 | 0.643                                     |      0.621 |
| EleutherAI-gpt-neo-1.3B                    |   0.538 |                       0.631 |                   0.619 |                    0.604 | 0.604                                     |      0.611 |
| EleutherAI-gpt-neo-125M                    |   0.45  |                       0.543 |                   0.562 |                    0.548 | 0.555                                     |      0.562 |
| bert-base-multilingual-cased               |   0.543 |                       0.606 |                   0.631 |                    0.65  | 0.619                                     |      0.621 |
| distilbert-base-multilingual-cased         |   0.528 |                       0.592 |                   0.579 |                    0.619 | 0.589                                     |      0.623 |
| facebook-mbart-large-50                    |   0.599 |                       0.653 |                   0.645 |                    0.653 | **0.660**                                 |      0.643 |
| gpt2                                       |   0.548 |                       0.643 |                   0.658 |                    0.638 | 0.645                                     |      0.631 |
| xlm-roberta-large                          |   0.584 |                       0.636 |                   0.636 |                    0.643 | 0.648                                     |      0.621 |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision_micro

| model_name                                 |   title |   title and first paragraph | title and 5 sentences   |   title and 10 sentences |   title and first sentence each paragraph |   raw text |
|:-------------------------------------------|--------:|----------------------------:|:------------------------|-------------------------:|------------------------------------------:|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.717 |                       0.774 | 0.776                   |                    0.765 |                                     0.758 |      0.749 |
| EleutherAI-gpt-neo-1.3B                    |   0.761 |                       0.794 | 0.824                   |                    0.812 |                                     0.81  |      0.796 |
| EleutherAI-gpt-neo-125M                    |   0.646 |                       0.73  | 0.735                   |                    0.752 |                                     0.78  |      0.782 |
| bert-base-multilingual-cased               |   0.696 |                       0.765 | 0.787                   |                    0.776 |                                     0.76  |      0.772 |
| distilbert-base-multilingual-cased         |   0.684 |                       0.759 | 0.769                   |                    0.735 |                                     0.755 |      0.754 |
| facebook-mbart-large-50                    |   0.778 |                       0.795 | 0.812                   |                    0.804 |                                     0.799 |      0.771 |
| gpt2                                       |   0.709 |                       0.751 | 0.725                   |                    0.711 |                                     0.754 |      0.763 |
| xlm-roberta-large                          |   0.759 |                       0.765 | **0.825**               |                    0.804 |                                     0.784 |      0.77  |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                                 |   title |   title and first paragraph | title and 5 sentences   |   title and 10 sentences |   title and first sentence each paragraph |   raw text |
|:-------------------------------------------|--------:|----------------------------:|:------------------------|-------------------------:|------------------------------------------:|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.087 |                       0.136 | 0.126                   |                    0.068 |                                     0.097 |      0.058 |
| EleutherAI-gpt-neo-1.3B                    |   0.087 |                       0.087 | **0.155**               |                    0.097 |                                     0.107 |      0.087 |
| EleutherAI-gpt-neo-125M                    |   0.019 |                       0.087 | 0.058                   |                    0.039 |                                     0.078 |      0.107 |
| bert-base-multilingual-cased               |   0.078 |                       0.087 | 0.117                   |                    0.078 |                                     0.078 |      0.117 |
| distilbert-base-multilingual-cased         |   0.097 |                       0.078 | 0.126                   |                    0.078 |                                     0.117 |      0.117 |
| facebook-mbart-large-50                    |   0.117 |                       0.126 | 0.146                   |                    0.107 |                                     0.126 |      0.126 |
| gpt2                                       |   0.097 |                       0.097 | 0.117                   |                    0.087 |                                     0.087 |      0.117 |
| xlm-roberta-large                          |   0.058 |                       0.087 | 0.146                   |                    0.126 |                                     0.107 |      0.107 |

  report_table.reset_index().to_latex(latex_file, index=False)


# French

## f1_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.4   |                       0.46  |                   0.486 |                    0.449 |                                     0.513 | 0.491      |
| EleutherAI-gpt-neo-1.3B                    |   0.379 |                       0.448 |                   0.441 |                    0.394 |                                     0.433 | 0.483      |
| EleutherAI-gpt-neo-125M                    |   0.227 |                       0.359 |                   0.358 |                    0.352 |                                     0.414 | 0.459      |
| bert-base-multilingual-cased               |   0.419 |                       0.412 |                   0.441 |                    0.494 |                                     0.517 | 0.498      |
| distilbert-base-multilingual-cased         |   0.371 |                       0.461 |                   0.46  |                    0.508 |                                     0.532 | **0.544**  |
| facebook-mbart-large-50                    |   0.453 |                       0.475 |                   0.53  |                    0.504 |                                     0.534 | 0.522      |
| gpt2                                       |   0.323 |                       0.386 |                   0.4   |                    0.449 |                                     0.4   | 0.495      |
| xlm-roberta-large                          |   0.434 |                       0.48  |                   0.502 |                    0.5   |                                     0.529 | 0.513      |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.357 |                       0.413 |                   0.429 |                    0.405 |                                     0.46  | 0.429      |
| EleutherAI-gpt-neo-1.3B                    |   0.294 |                       0.357 |                   0.373 |                    0.31  |                                     0.349 | 0.405      |
| EleutherAI-gpt-neo-125M                    |   0.175 |                       0.294 |                   0.286 |                    0.294 |                                     0.333 | 0.357      |
| bert-base-multilingual-cased               |   0.349 |                       0.333 |                   0.397 |                    0.46  |                                     0.476 | 0.437      |
| distilbert-base-multilingual-cased         |   0.302 |                       0.421 |                   0.413 |                    0.476 |                                     0.468 | **0.516**  |
| facebook-mbart-large-50                    |   0.381 |                       0.413 |                   0.452 |                    0.452 |                                     0.492 | 0.468      |
| gpt2                                       |   0.286 |                       0.341 |                   0.341 |                    0.405 |                                     0.333 | 0.421      |
| xlm-roberta-large                          |   0.381 |                       0.429 |                   0.444 |                    0.46  |                                     0.476 | 0.460      |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.455 |                       0.52  |                   0.562 |                    0.505 |                                     0.58  | 0.574      |
| EleutherAI-gpt-neo-1.3B                    |   0.536 |                       0.6   |                   0.54  |                    0.542 |                                     0.571 | 0.600      |
| EleutherAI-gpt-neo-125M                    |   0.324 |                       0.463 |                   0.48  |                    0.44  |                                     0.545 | **0.643**  |
| bert-base-multilingual-cased               |   0.524 |                       0.538 |                   0.495 |                    0.532 |                                     0.566 | 0.579      |
| distilbert-base-multilingual-cased         |   0.481 |                       0.51  |                   0.52  |                    0.545 |                                     0.615 | 0.575      |
| facebook-mbart-large-50                    |   0.558 |                       0.559 |                   0.64  |                    0.57  |                                     0.585 | 0.590      |
| gpt2                                       |   0.371 |                       0.443 |                   0.483 |                    0.505 |                                     0.5   | 0.602      |
| xlm-roberta-large                          |   0.505 |                       0.545 |                   0.577 |                    0.547 |                                     0.594 | 0.580      |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                                 |   title | title and first paragraph   |   title and 5 sentences |   title and 10 sentences | title and first sentence each paragraph   |   raw text |
|:-------------------------------------------|--------:|:----------------------------|------------------------:|-------------------------:|:------------------------------------------|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.071 | 0.071                       |                   0.071 |                    0.071 | **0.119**                                 |      0.095 |
| EleutherAI-gpt-neo-1.3B                    |   0.024 | **0.119**                   |                   0.024 |                    0.071 | 0.024                                     |      0.071 |
| EleutherAI-gpt-neo-125M                    |   0.024 | 0.024                       |                   0.024 |                    0.024 | 0.024                                     |      0.071 |
| bert-base-multilingual-cased               |   0.048 | 0.071                       |                   0.048 |                    0.071 | 0.071                                     |      0.095 |
| distilbert-base-multilingual-cased         |   0.071 | 0.071                       |                   0.095 |                    0.048 | 0.095                                     |      0.095 |
| facebook-mbart-large-50                    |   0.024 | 0.095                       |                   0.071 |                    0.048 | 0.071                                     |      0.071 |
| gpt2                                       |   0     | 0.024                       |                   0.048 |                    0     | 0.048                                     |      0.048 |
| xlm-roberta-large                          |   0     | **0.119**                   |                   0.071 |                    0.095 | 0.095                                     |      0.048 |

  report_table.reset_index().to_latex(latex_file, index=False)


# German

## f1_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.559 |                       0.592 |                   0.567 |                    0.603 |                                     0.581 | 0.636      |
| EleutherAI-gpt-neo-1.3B                    |   0.466 |                       0.572 |                   0.585 |                    0.593 |                                     0.582 | 0.636      |
| EleutherAI-gpt-neo-125M                    |   0.409 |                       0.48  |                   0.509 |                    0.496 |                                     0.518 | 0.592      |
| bert-base-multilingual-cased               |   0.5   |                       0.568 |                   0.591 |                    0.616 |                                     0.583 | **0.654**  |
| distilbert-base-multilingual-cased         |   0.516 |                       0.571 |                   0.542 |                    0.583 |                                     0.576 | 0.638      |
| facebook-mbart-large-50                    |   0.585 |                       0.601 |                   0.566 |                    0.628 |                                     0.616 | 0.645      |
| gpt2                                       |   0.475 |                       0.525 |                   0.485 |                    0.581 |                                     0.512 | 0.618      |
| xlm-roberta-large                          |   0.565 |                       0.63  |                   0.587 |                    0.638 |                                     0.623 | 0.636      |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.465 |                       0.535 |                   0.483 |                    0.494 |                                     0.5   | 0.570      |
| EleutherAI-gpt-neo-1.3B                    |   0.355 |                       0.483 |                   0.5   |                    0.471 |                                     0.483 | 0.558      |
| EleutherAI-gpt-neo-125M                    |   0.331 |                       0.384 |                   0.407 |                    0.401 |                                     0.419 | 0.494      |
| bert-base-multilingual-cased               |   0.424 |                       0.488 |                   0.512 |                    0.547 |                                     0.541 | **0.616**  |
| distilbert-base-multilingual-cased         |   0.424 |                       0.517 |                   0.471 |                    0.5   |                                     0.494 | 0.599      |
| facebook-mbart-large-50                    |   0.483 |                       0.547 |                   0.488 |                    0.535 |                                     0.541 | 0.564      |
| gpt2                                       |   0.413 |                       0.453 |                   0.413 |                    0.523 |                                     0.419 | 0.570      |
| xlm-roberta-large                          |   0.465 |                       0.57  |                   0.5   |                    0.564 |                                     0.552 | 0.564      |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences | title and 10 sentences   |   title and first sentence each paragraph |   raw text |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|:-------------------------|------------------------------------------:|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.702 |                       0.662 |                   0.686 | 0.773                    |                                     0.694 |      0.721 |
| EleutherAI-gpt-neo-1.3B                    |   0.678 |                       0.703 |                   0.705 | **0.802**                |                                     0.735 |      0.738 |
| EleutherAI-gpt-neo-125M                    |   0.533 |                       0.641 |                   0.68  | 0.651                    |                                     0.679 |      0.739 |
| bert-base-multilingual-cased               |   0.608 |                       0.677 |                   0.698 | 0.707                    |                                     0.633 |      0.697 |
| distilbert-base-multilingual-cased         |   0.658 |                       0.636 |                   0.638 | 0.699                    |                                     0.691 |      0.682 |
| facebook-mbart-large-50                    |   0.741 |                       0.667 |                   0.672 | 0.760                    |                                     0.715 |      0.752 |
| gpt2                                       |   0.559 |                       0.624 |                   0.587 | 0.652                    |                                     0.661 |      0.676 |
| xlm-roberta-large                          |   0.721 |                       0.705 |                   0.711 | 0.735                    |                                     0.714 |      0.729 |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences | title and 10 sentences   |   title and first sentence each paragraph |   raw text |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|:-------------------------|------------------------------------------:|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.029 |                       0.029 |                   0.029 | 0.057                    |                                     0.057 |      0.029 |
| EleutherAI-gpt-neo-1.3B                    |   0     |                       0.029 |                   0     | 0.057                    |                                     0.029 |      0.029 |
| EleutherAI-gpt-neo-125M                    |   0     |                       0     |                   0.029 | 0.000                    |                                     0.029 |      0.029 |
| bert-base-multilingual-cased               |   0     |                       0.029 |                   0.029 | 0.057                    |                                     0     |      0.029 |
| distilbert-base-multilingual-cased         |   0.029 |                       0.029 |                   0     | 0.000                    |                                     0.029 |      0     |
| facebook-mbart-large-50                    |   0.057 |                       0.029 |                   0.029 | 0.029                    |                                     0.057 |      0     |
| gpt2                                       |   0     |                       0     |                   0.029 | **0.086**                |                                     0     |      0     |
| xlm-roberta-large                          |   0.029 |                       0.057 |                   0.029 | 0.000                    |                                     0.057 |      0.029 |

  report_table.reset_index().to_latex(latex_file, index=False)


# Italian

## f1_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.57  |                       0.551 |                   0.552 |                    0.573 |                                     0.542 | 0.573      |
| EleutherAI-gpt-neo-1.3B                    |   0.437 |                       0.538 |                   0.527 |                    0.537 |                                     0.541 | 0.575      |
| EleutherAI-gpt-neo-125M                    |   0.3   |                       0.324 |                   0.442 |                    0.475 |                                     0.459 | 0.509      |
| bert-base-multilingual-cased               |   0.479 |                       0.572 |                   0.566 |                    0.592 |                                     0.574 | 0.585      |
| distilbert-base-multilingual-cased         |   0.476 |                       0.545 |                   0.564 |                    0.589 |                                     0.535 | 0.589      |
| facebook-mbart-large-50                    |   0.533 |                       0.585 |                   0.59  |                    0.593 |                                     0.593 | **0.627**  |
| gpt2                                       |   0.397 |                       0.485 |                   0.513 |                    0.543 |                                     0.469 | 0.525      |
| xlm-roberta-large                          |   0.532 |                       0.587 |                   0.602 |                    0.598 |                                     0.57  | 0.573      |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences | title and 10 sentences   |   title and first sentence each paragraph |   raw text |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|:-------------------------|------------------------------------------:|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.478 |                       0.474 |                   0.474 | 0.470                    |                                     0.435 |      0.478 |
| EleutherAI-gpt-neo-1.3B                    |   0.33  |                       0.426 |                   0.404 | 0.426                    |                                     0.435 |      0.474 |
| EleutherAI-gpt-neo-125M                    |   0.243 |                       0.243 |                   0.348 | 0.400                    |                                     0.37  |      0.409 |
| bert-base-multilingual-cased               |   0.413 |                       0.483 |                   0.491 | 0.517                    |                                     0.496 |      0.496 |
| distilbert-base-multilingual-cased         |   0.409 |                       0.47  |                   0.487 | **0.526**                |                                     0.461 |      0.517 |
| facebook-mbart-large-50                    |   0.439 |                       0.487 |                   0.483 | 0.478                    |                                     0.5   |      0.522 |
| gpt2                                       |   0.335 |                       0.417 |                   0.457 | 0.491                    |                                     0.374 |      0.461 |
| xlm-roberta-large                          |   0.439 |                       0.491 |                   0.496 | 0.491                    |                                     0.47  |      0.461 |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.705 |                       0.657 |                   0.661 |                    0.735 |                                     0.719 | 0.714      |
| EleutherAI-gpt-neo-1.3B                    |   0.644 |                       0.731 |                   0.756 |                    0.726 |                                     0.714 | 0.732      |
| EleutherAI-gpt-neo-125M                    |   0.392 |                       0.483 |                   0.606 |                    0.586 |                                     0.607 | 0.676      |
| bert-base-multilingual-cased               |   0.569 |                       0.703 |                   0.669 |                    0.692 |                                     0.683 | 0.713      |
| distilbert-base-multilingual-cased         |   0.57  |                       0.651 |                   0.671 |                    0.669 |                                     0.639 | 0.684      |
| facebook-mbart-large-50                    |   0.678 |                       0.732 |                   0.76  |                    0.78  |                                     0.728 | **0.784**  |
| gpt2                                       |   0.487 |                       0.578 |                   0.587 |                    0.608 |                                     0.628 | 0.609      |
| xlm-roberta-large                          |   0.673 |                       0.729 |                   0.765 |                    0.764 |                                     0.725 | 0.757      |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.1   |                       0.15  |                   0.167 |                    0.133 |                                     0.1   | 0.117      |
| EleutherAI-gpt-neo-1.3B                    |   0.05  |                       0.117 |                   0.183 |                    0.15  |                                     0.133 | 0.167      |
| EleutherAI-gpt-neo-125M                    |   0     |                       0     |                   0.033 |                    0.067 |                                     0.033 | 0.117      |
| bert-base-multilingual-cased               |   0.017 |                       0.117 |                   0.067 |                    0.183 |                                     0.117 | 0.133      |
| distilbert-base-multilingual-cased         |   0.017 |                       0.117 |                   0.117 |                    0.15  |                                     0.083 | 0.100      |
| facebook-mbart-large-50                    |   0.1   |                       0.167 |                   0.15  |                    0.167 |                                     0.167 | **0.217**  |
| gpt2                                       |   0.033 |                       0.033 |                   0.033 |                    0.067 |                                     0.083 | 0.067      |
| xlm-roberta-large                          |   0.1   |                       0.167 |                   0.183 |                    0.133 |                                     0.15  | 0.183      |

  report_table.reset_index().to_latex(latex_file, index=False)


# Polish

## f1_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences | title and first sentence each paragraph   |   raw text |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|:------------------------------------------|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.507 |                       0.599 |                   0.617 |                    0.607 | 0.623                                     |      0.607 |
| EleutherAI-gpt-neo-1.3B                    |   0.464 |                       0.515 |                   0.618 |                    0.586 | 0.618                                     |      0.604 |
| EleutherAI-gpt-neo-125M                    |   0.453 |                       0.496 |                   0.516 |                    0.566 | 0.522                                     |      0.511 |
| bert-base-multilingual-cased               |   0.571 |                       0.62  |                   0.652 |                    0.654 | 0.646                                     |      0.624 |
| distilbert-base-multilingual-cased         |   0.558 |                       0.542 |                   0.592 |                    0.639 | 0.623                                     |      0.604 |
| facebook-mbart-large-50                    |   0.565 |                       0.604 |                   0.676 |                    0.665 | **0.680**                                 |      0.652 |
| gpt2                                       |   0.517 |                       0.538 |                   0.591 |                    0.621 | 0.568                                     |      0.59  |
| xlm-roberta-large                          |   0.569 |                       0.64  |                   0.634 |                    0.641 | 0.667                                     |      0.665 |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall_micro

| model_name                                 |   title |   title and first paragraph | title and 5 sentences   |   title and 10 sentences |   title and first sentence each paragraph |   raw text |
|:-------------------------------------------|--------:|----------------------------:|:------------------------|-------------------------:|------------------------------------------:|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.437 |                       0.549 | 0.563                   |                    0.51  |                                     0.529 |      0.524 |
| EleutherAI-gpt-neo-1.3B                    |   0.393 |                       0.422 | 0.539                   |                    0.481 |                                     0.529 |      0.515 |
| EleutherAI-gpt-neo-125M                    |   0.388 |                       0.427 | 0.461                   |                    0.519 |                                     0.432 |      0.408 |
| bert-base-multilingual-cased               |   0.519 |                       0.563 | 0.587                   |                    0.597 |                                     0.568 |      0.563 |
| distilbert-base-multilingual-cased         |   0.49  |                       0.5   | 0.524                   |                    0.558 |                                     0.553 |      0.549 |
| facebook-mbart-large-50                    |   0.524 |                       0.544 | **0.602**               |                    0.573 |                                     0.597 |      0.573 |
| gpt2                                       |   0.481 |                       0.481 | 0.597                   |                    0.568 |                                     0.495 |      0.51  |
| xlm-roberta-large                          |   0.51  |                       0.587 | 0.573                   |                    0.563 |                                     0.587 |      0.592 |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences | title and 10 sentences   |   title and first sentence each paragraph |   raw text |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|:-------------------------|------------------------------------------:|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.604 |                       0.661 |                   0.682 | 0.750                    |                                     0.757 |      0.72  |
| EleutherAI-gpt-neo-1.3B                    |   0.566 |                       0.659 |                   0.725 | 0.750                    |                                     0.741 |      0.731 |
| EleutherAI-gpt-neo-125M                    |   0.544 |                       0.591 |                   0.586 | 0.622                    |                                     0.659 |      0.683 |
| bert-base-multilingual-cased               |   0.633 |                       0.69  |                   0.733 | 0.724                    |                                     0.75  |      0.699 |
| distilbert-base-multilingual-cased         |   0.647 |                       0.592 |                   0.679 | 0.747                    |                                     0.713 |      0.673 |
| facebook-mbart-large-50                    |   0.614 |                       0.679 |                   0.77  | **0.792**                |                                     0.788 |      0.756 |
| gpt2                                       |   0.559 |                       0.611 |                   0.586 | 0.684                    |                                     0.667 |      0.7   |
| xlm-roberta-large                          |   0.644 |                       0.703 |                   0.711 | 0.744                    |                                     0.771 |      0.758 |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences | title and 10 sentences   |   title and first sentence each paragraph |   raw text |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|:-------------------------|------------------------------------------:|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.026 |                       0.026 |                   0.026 | 0.051                    |                                     0.051 |      0.051 |
| EleutherAI-gpt-neo-1.3B                    |   0.026 |                       0     |                   0.051 | 0.051                    |                                     0.026 |      0.051 |
| EleutherAI-gpt-neo-125M                    |   0     |                       0     |                   0.051 | 0.026                    |                                     0.026 |      0.026 |
| bert-base-multilingual-cased               |   0.026 |                       0.051 |                   0.051 | 0.051                    |                                     0.051 |      0.051 |
| distilbert-base-multilingual-cased         |   0.051 |                       0     |                   0.026 | 0.051                    |                                     0.051 |      0.026 |
| facebook-mbart-large-50                    |   0.026 |                       0     |                   0.026 | **0.077**                |                                     0.026 |      0.026 |
| gpt2                                       |   0     |                       0     |                   0     | 0.026                    |                                     0.026 |      0.026 |
| xlm-roberta-large                          |   0.051 |                       0.026 |                   0.026 | 0.051                    |                                     0.026 |      0.026 |

  report_table.reset_index().to_latex(latex_file, index=False)


# Russian

## f1_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.419 |                       0.465 |                   0.474 |                    0.487 |                                     0.521 | **0.530**  |
| EleutherAI-gpt-neo-1.3B                    |   0.319 |                       0.286 |                   0.377 |                    0.387 |                                     0.409 | 0.374      |
| EleutherAI-gpt-neo-125M                    |   0.162 |                       0.22  |                   0.24  |                    0.217 |                                     0.122 | 0.151      |
| bert-base-multilingual-cased               |   0.366 |                       0.436 |                   0.516 |                    0.471 |                                     0.447 | 0.497      |
| distilbert-base-multilingual-cased         |   0.318 |                       0.407 |                   0.47  |                    0.5   |                                     0.461 | 0.488      |
| facebook-mbart-large-50                    |   0.426 |                       0.435 |                   0.511 |                    0.49  |                                     0.519 | 0.507      |
| gpt2                                       |   0.159 |                       0.107 |                   0.075 |                    0.095 |                                     0.126 | 0.215      |
| xlm-roberta-large                          |   0.403 |                       0.446 |                   0.479 |                    0.455 |                                     0.486 | 0.455      |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.36  |                       0.419 |                   0.419 |                    0.43  |                                     0.442 | 0.465      |
| EleutherAI-gpt-neo-1.3B                    |   0.221 |                       0.198 |                   0.267 |                    0.279 |                                     0.302 | 0.267      |
| EleutherAI-gpt-neo-125M                    |   0.105 |                       0.163 |                   0.174 |                    0.174 |                                     0.081 | 0.105      |
| bert-base-multilingual-cased               |   0.302 |                       0.395 |                   0.465 |                    0.419 |                                     0.395 | 0.442      |
| distilbert-base-multilingual-cased         |   0.244 |                       0.384 |                   0.407 |                    0.465 |                                     0.407 | **0.477**  |
| facebook-mbart-large-50                    |   0.337 |                       0.349 |                   0.419 |                    0.407 |                                     0.407 | 0.430      |
| gpt2                                       |   0.105 |                       0.07  |                   0.047 |                    0.058 |                                     0.081 | 0.151      |
| xlm-roberta-large                          |   0.314 |                       0.36  |                   0.395 |                    0.384 |                                     0.419 | 0.384      |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences | title and first sentence each paragraph   |   raw text |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|:------------------------------------------|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.5   |                       0.522 |                   0.545 |                    0.561 | 0.633                                     |      0.615 |
| EleutherAI-gpt-neo-1.3B                    |   0.576 |                       0.515 |                   0.639 |                    0.632 | 0.634                                     |      0.622 |
| EleutherAI-gpt-neo-125M                    |   0.36  |                       0.341 |                   0.385 |                    0.288 | 0.241                                     |      0.273 |
| bert-base-multilingual-cased               |   0.464 |                       0.486 |                   0.58  |                    0.537 | 0.515                                     |      0.567 |
| distilbert-base-multilingual-cased         |   0.457 |                       0.434 |                   0.556 |                    0.541 | 0.530                                     |      0.5   |
| facebook-mbart-large-50                    |   0.58  |                       0.577 |                   0.655 |                    0.614 | **0.714**                                 |      0.617 |
| gpt2                                       |   0.333 |                       0.231 |                   0.19  |                    0.263 | 0.280                                     |      0.371 |
| xlm-roberta-large                          |   0.562 |                       0.585 |                   0.607 |                    0.559 | 0.581                                     |      0.559 |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences | title and first sentence each paragraph   |   raw text |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|:------------------------------------------|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.158 |                       0.158 |                   0.132 |                    0.079 | **0.263**                                 |      0.211 |
| EleutherAI-gpt-neo-1.3B                    |   0.053 |                       0.105 |                   0.079 |                    0.158 | 0.105                                     |      0.132 |
| EleutherAI-gpt-neo-125M                    |   0.026 |                       0     |                   0.053 |                    0     | 0.026                                     |      0.026 |
| bert-base-multilingual-cased               |   0.079 |                       0.105 |                   0.237 |                    0.158 | 0.158                                     |      0.237 |
| distilbert-base-multilingual-cased         |   0.105 |                       0.132 |                   0.158 |                    0.158 | 0.184                                     |      0.184 |
| facebook-mbart-large-50                    |   0.158 |                       0.211 |                   0.211 |                    0.184 | 0.184                                     |      0.184 |
| gpt2                                       |   0     |                       0     |                   0     |                    0.026 | 0.053                                     |      0.026 |
| xlm-roberta-large                          |   0.158 |                       0.184 |                   0.211 |                    0.105 | 0.184                                     |      0.158 |

  report_table.reset_index().to_latex(latex_file, index=False)


# All 6 Languages

## f1_micro

| language   | model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences | title and first sentence each paragraph   | raw text   |
|:-----------|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|:------------------------------------------|:-----------|
| en         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.64  |                       0.7   |                   0.684 |                    0.699 | 0.696                                     | 0.679      |
| en         | EleutherAI-gpt-neo-1.3B                    |   0.63  |                       0.703 |                   0.707 |                    0.693 | 0.692                                     | 0.692      |
| en         | EleutherAI-gpt-neo-125M                    |   0.53  |                       0.623 |                   0.637 |                    0.634 | 0.649                                     | 0.654      |
| en         | bert-base-multilingual-cased               |   0.61  |                       0.677 |                   0.7   |                    0.707 | 0.682                                     | 0.688      |
| en         | distilbert-base-multilingual-cased         |   0.596 |                       0.665 |                   0.661 |                    0.672 | 0.662                                     | 0.683      |
| en         | facebook-mbart-large-50                    |   0.677 |                       0.717 |                   0.719 |                    0.721 | **0.723**                                 | 0.701      |
| en         | gpt2                                       |   0.618 |                       0.693 |                   0.69  |                    0.673 | 0.696                                     | 0.691      |
| en         | xlm-roberta-large                          |   0.66  |                       0.694 |                   0.718 |                    0.715 | 0.710                                     | 0.687      |
| fr         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.4   |                       0.46  |                   0.486 |                    0.449 | 0.513                                     | 0.491      |
| fr         | EleutherAI-gpt-neo-1.3B                    |   0.379 |                       0.448 |                   0.441 |                    0.394 | 0.433                                     | 0.483      |
| fr         | EleutherAI-gpt-neo-125M                    |   0.227 |                       0.359 |                   0.358 |                    0.352 | 0.414                                     | 0.459      |
| fr         | bert-base-multilingual-cased               |   0.419 |                       0.412 |                   0.441 |                    0.494 | 0.517                                     | 0.498      |
| fr         | distilbert-base-multilingual-cased         |   0.371 |                       0.461 |                   0.46  |                    0.508 | 0.532                                     | **0.544**  |
| fr         | facebook-mbart-large-50                    |   0.453 |                       0.475 |                   0.53  |                    0.504 | 0.534                                     | 0.522      |
| fr         | gpt2                                       |   0.323 |                       0.386 |                   0.4   |                    0.449 | 0.400                                     | 0.495      |
| fr         | xlm-roberta-large                          |   0.434 |                       0.48  |                   0.502 |                    0.5   | 0.529                                     | 0.513      |
| ge         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.559 |                       0.592 |                   0.567 |                    0.603 | 0.581                                     | 0.636      |
| ge         | EleutherAI-gpt-neo-1.3B                    |   0.466 |                       0.572 |                   0.585 |                    0.593 | 0.582                                     | 0.636      |
| ge         | EleutherAI-gpt-neo-125M                    |   0.409 |                       0.48  |                   0.509 |                    0.496 | 0.518                                     | 0.592      |
| ge         | bert-base-multilingual-cased               |   0.5   |                       0.568 |                   0.591 |                    0.616 | 0.583                                     | **0.654**  |
| ge         | distilbert-base-multilingual-cased         |   0.516 |                       0.571 |                   0.542 |                    0.583 | 0.576                                     | 0.638      |
| ge         | facebook-mbart-large-50                    |   0.585 |                       0.601 |                   0.566 |                    0.628 | 0.616                                     | 0.645      |
| ge         | gpt2                                       |   0.475 |                       0.525 |                   0.485 |                    0.581 | 0.512                                     | 0.618      |
| ge         | xlm-roberta-large                          |   0.565 |                       0.63  |                   0.587 |                    0.638 | 0.623                                     | 0.636      |
| it         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.57  |                       0.551 |                   0.552 |                    0.573 | 0.542                                     | 0.573      |
| it         | EleutherAI-gpt-neo-1.3B                    |   0.437 |                       0.538 |                   0.527 |                    0.537 | 0.541                                     | 0.575      |
| it         | EleutherAI-gpt-neo-125M                    |   0.3   |                       0.324 |                   0.442 |                    0.475 | 0.459                                     | 0.509      |
| it         | bert-base-multilingual-cased               |   0.479 |                       0.572 |                   0.566 |                    0.592 | 0.574                                     | 0.585      |
| it         | distilbert-base-multilingual-cased         |   0.476 |                       0.545 |                   0.564 |                    0.589 | 0.535                                     | 0.589      |
| it         | facebook-mbart-large-50                    |   0.533 |                       0.585 |                   0.59  |                    0.593 | 0.593                                     | **0.627**  |
| it         | gpt2                                       |   0.397 |                       0.485 |                   0.513 |                    0.543 | 0.469                                     | 0.525      |
| it         | xlm-roberta-large                          |   0.532 |                       0.587 |                   0.602 |                    0.598 | 0.570                                     | 0.573      |
| po         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.507 |                       0.599 |                   0.617 |                    0.607 | 0.623                                     | 0.607      |
| po         | EleutherAI-gpt-neo-1.3B                    |   0.464 |                       0.515 |                   0.618 |                    0.586 | 0.618                                     | 0.604      |
| po         | EleutherAI-gpt-neo-125M                    |   0.453 |                       0.496 |                   0.516 |                    0.566 | 0.522                                     | 0.511      |
| po         | bert-base-multilingual-cased               |   0.571 |                       0.62  |                   0.652 |                    0.654 | 0.646                                     | 0.624      |
| po         | distilbert-base-multilingual-cased         |   0.558 |                       0.542 |                   0.592 |                    0.639 | 0.623                                     | 0.604      |
| po         | facebook-mbart-large-50                    |   0.565 |                       0.604 |                   0.676 |                    0.665 | **0.680**                                 | 0.652      |
| po         | gpt2                                       |   0.517 |                       0.538 |                   0.591 |                    0.621 | 0.568                                     | 0.590      |
| po         | xlm-roberta-large                          |   0.569 |                       0.64  |                   0.634 |                    0.641 | 0.667                                     | 0.665      |
| ru         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.419 |                       0.465 |                   0.474 |                    0.487 | 0.521                                     | **0.530**  |
| ru         | EleutherAI-gpt-neo-1.3B                    |   0.319 |                       0.286 |                   0.377 |                    0.387 | 0.409                                     | 0.374      |
| ru         | EleutherAI-gpt-neo-125M                    |   0.162 |                       0.22  |                   0.24  |                    0.217 | 0.122                                     | 0.151      |
| ru         | bert-base-multilingual-cased               |   0.366 |                       0.436 |                   0.516 |                    0.471 | 0.447                                     | 0.497      |
| ru         | distilbert-base-multilingual-cased         |   0.318 |                       0.407 |                   0.47  |                    0.5   | 0.461                                     | 0.488      |
| ru         | facebook-mbart-large-50                    |   0.426 |                       0.435 |                   0.511 |                    0.49  | 0.519                                     | 0.507      |
| ru         | gpt2                                       |   0.159 |                       0.107 |                   0.075 |                    0.095 | 0.126                                     | 0.215      |
| ru         | xlm-roberta-large                          |   0.403 |                       0.446 |                   0.479 |                    0.455 | 0.486                                     | 0.455      |

  multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)


## recall_micro

| language   | model_name                                 |   title |   title and first paragraph | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text   |
|:-----------|:-------------------------------------------|--------:|----------------------------:|:------------------------|:-------------------------|:------------------------------------------|:-----------|
| en         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.577 |                       0.638 | 0.611                   | 0.643                    | 0.643                                     | 0.621      |
| en         | EleutherAI-gpt-neo-1.3B                    |   0.538 |                       0.631 | 0.619                   | 0.604                    | 0.604                                     | 0.611      |
| en         | EleutherAI-gpt-neo-125M                    |   0.45  |                       0.543 | 0.562                   | 0.548                    | 0.555                                     | 0.562      |
| en         | bert-base-multilingual-cased               |   0.543 |                       0.606 | 0.631                   | 0.650                    | 0.619                                     | 0.621      |
| en         | distilbert-base-multilingual-cased         |   0.528 |                       0.592 | 0.579                   | 0.619                    | 0.589                                     | 0.623      |
| en         | facebook-mbart-large-50                    |   0.599 |                       0.653 | 0.645                   | 0.653                    | **0.660**                                 | 0.643      |
| en         | gpt2                                       |   0.548 |                       0.643 | 0.658                   | 0.638                    | 0.645                                     | 0.631      |
| en         | xlm-roberta-large                          |   0.584 |                       0.636 | 0.636                   | 0.643                    | 0.648                                     | 0.621      |
| fr         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.357 |                       0.413 | 0.429                   | 0.405                    | 0.460                                     | 0.429      |
| fr         | EleutherAI-gpt-neo-1.3B                    |   0.294 |                       0.357 | 0.373                   | 0.310                    | 0.349                                     | 0.405      |
| fr         | EleutherAI-gpt-neo-125M                    |   0.175 |                       0.294 | 0.286                   | 0.294                    | 0.333                                     | 0.357      |
| fr         | bert-base-multilingual-cased               |   0.349 |                       0.333 | 0.397                   | 0.460                    | 0.476                                     | 0.437      |
| fr         | distilbert-base-multilingual-cased         |   0.302 |                       0.421 | 0.413                   | 0.476                    | 0.468                                     | **0.516**  |
| fr         | facebook-mbart-large-50                    |   0.381 |                       0.413 | 0.452                   | 0.452                    | 0.492                                     | 0.468      |
| fr         | gpt2                                       |   0.286 |                       0.341 | 0.341                   | 0.405                    | 0.333                                     | 0.421      |
| fr         | xlm-roberta-large                          |   0.381 |                       0.429 | 0.444                   | 0.460                    | 0.476                                     | 0.460      |
| ge         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.465 |                       0.535 | 0.483                   | 0.494                    | 0.500                                     | 0.570      |
| ge         | EleutherAI-gpt-neo-1.3B                    |   0.355 |                       0.483 | 0.500                   | 0.471                    | 0.483                                     | 0.558      |
| ge         | EleutherAI-gpt-neo-125M                    |   0.331 |                       0.384 | 0.407                   | 0.401                    | 0.419                                     | 0.494      |
| ge         | bert-base-multilingual-cased               |   0.424 |                       0.488 | 0.512                   | 0.547                    | 0.541                                     | **0.616**  |
| ge         | distilbert-base-multilingual-cased         |   0.424 |                       0.517 | 0.471                   | 0.500                    | 0.494                                     | 0.599      |
| ge         | facebook-mbart-large-50                    |   0.483 |                       0.547 | 0.488                   | 0.535                    | 0.541                                     | 0.564      |
| ge         | gpt2                                       |   0.413 |                       0.453 | 0.413                   | 0.523                    | 0.419                                     | 0.570      |
| ge         | xlm-roberta-large                          |   0.465 |                       0.57  | 0.500                   | 0.564                    | 0.552                                     | 0.564      |
| it         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.478 |                       0.474 | 0.474                   | 0.470                    | 0.435                                     | 0.478      |
| it         | EleutherAI-gpt-neo-1.3B                    |   0.33  |                       0.426 | 0.404                   | 0.426                    | 0.435                                     | 0.474      |
| it         | EleutherAI-gpt-neo-125M                    |   0.243 |                       0.243 | 0.348                   | 0.400                    | 0.370                                     | 0.409      |
| it         | bert-base-multilingual-cased               |   0.413 |                       0.483 | 0.491                   | 0.517                    | 0.496                                     | 0.496      |
| it         | distilbert-base-multilingual-cased         |   0.409 |                       0.47  | 0.487                   | **0.526**                | 0.461                                     | 0.517      |
| it         | facebook-mbart-large-50                    |   0.439 |                       0.487 | 0.483                   | 0.478                    | 0.500                                     | 0.522      |
| it         | gpt2                                       |   0.335 |                       0.417 | 0.457                   | 0.491                    | 0.374                                     | 0.461      |
| it         | xlm-roberta-large                          |   0.439 |                       0.491 | 0.496                   | 0.491                    | 0.470                                     | 0.461      |
| po         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.437 |                       0.549 | 0.563                   | 0.510                    | 0.529                                     | 0.524      |
| po         | EleutherAI-gpt-neo-1.3B                    |   0.393 |                       0.422 | 0.539                   | 0.481                    | 0.529                                     | 0.515      |
| po         | EleutherAI-gpt-neo-125M                    |   0.388 |                       0.427 | 0.461                   | 0.519                    | 0.432                                     | 0.408      |
| po         | bert-base-multilingual-cased               |   0.519 |                       0.563 | 0.587                   | 0.597                    | 0.568                                     | 0.563      |
| po         | distilbert-base-multilingual-cased         |   0.49  |                       0.5   | 0.524                   | 0.558                    | 0.553                                     | 0.549      |
| po         | facebook-mbart-large-50                    |   0.524 |                       0.544 | **0.602**               | 0.573                    | 0.597                                     | 0.573      |
| po         | gpt2                                       |   0.481 |                       0.481 | 0.597                   | 0.568                    | 0.495                                     | 0.510      |
| po         | xlm-roberta-large                          |   0.51  |                       0.587 | 0.573                   | 0.563                    | 0.587                                     | 0.592      |
| ru         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.36  |                       0.419 | 0.419                   | 0.430                    | 0.442                                     | 0.465      |
| ru         | EleutherAI-gpt-neo-1.3B                    |   0.221 |                       0.198 | 0.267                   | 0.279                    | 0.302                                     | 0.267      |
| ru         | EleutherAI-gpt-neo-125M                    |   0.105 |                       0.163 | 0.174                   | 0.174                    | 0.081                                     | 0.105      |
| ru         | bert-base-multilingual-cased               |   0.302 |                       0.395 | 0.465                   | 0.419                    | 0.395                                     | 0.442      |
| ru         | distilbert-base-multilingual-cased         |   0.244 |                       0.384 | 0.407                   | 0.465                    | 0.407                                     | **0.477**  |
| ru         | facebook-mbart-large-50                    |   0.337 |                       0.349 | 0.419                   | 0.407                    | 0.407                                     | 0.430      |
| ru         | gpt2                                       |   0.105 |                       0.07  | 0.047                   | 0.058                    | 0.081                                     | 0.151      |
| ru         | xlm-roberta-large                          |   0.314 |                       0.36  | 0.395                   | 0.384                    | 0.419                                     | 0.384      |

  multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)


## precision_micro

| language   | model_name                                 |   title |   title and first paragraph | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text   |
|:-----------|:-------------------------------------------|--------:|----------------------------:|:------------------------|:-------------------------|:------------------------------------------|:-----------|
| en         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.717 |                       0.774 | 0.776                   | 0.765                    | 0.758                                     | 0.749      |
| en         | EleutherAI-gpt-neo-1.3B                    |   0.761 |                       0.794 | 0.824                   | 0.812                    | 0.810                                     | 0.796      |
| en         | EleutherAI-gpt-neo-125M                    |   0.646 |                       0.73  | 0.735                   | 0.752                    | 0.780                                     | 0.782      |
| en         | bert-base-multilingual-cased               |   0.696 |                       0.765 | 0.787                   | 0.776                    | 0.760                                     | 0.772      |
| en         | distilbert-base-multilingual-cased         |   0.684 |                       0.759 | 0.769                   | 0.735                    | 0.755                                     | 0.754      |
| en         | facebook-mbart-large-50                    |   0.778 |                       0.795 | 0.812                   | 0.804                    | 0.799                                     | 0.771      |
| en         | gpt2                                       |   0.709 |                       0.751 | 0.725                   | 0.711                    | 0.754                                     | 0.763      |
| en         | xlm-roberta-large                          |   0.759 |                       0.765 | **0.825**               | 0.804                    | 0.784                                     | 0.770      |
| fr         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.455 |                       0.52  | 0.562                   | 0.505                    | 0.580                                     | 0.574      |
| fr         | EleutherAI-gpt-neo-1.3B                    |   0.536 |                       0.6   | 0.540                   | 0.542                    | 0.571                                     | 0.600      |
| fr         | EleutherAI-gpt-neo-125M                    |   0.324 |                       0.463 | 0.480                   | 0.440                    | 0.545                                     | **0.643**  |
| fr         | bert-base-multilingual-cased               |   0.524 |                       0.538 | 0.495                   | 0.532                    | 0.566                                     | 0.579      |
| fr         | distilbert-base-multilingual-cased         |   0.481 |                       0.51  | 0.520                   | 0.545                    | 0.615                                     | 0.575      |
| fr         | facebook-mbart-large-50                    |   0.558 |                       0.559 | 0.640                   | 0.570                    | 0.585                                     | 0.590      |
| fr         | gpt2                                       |   0.371 |                       0.443 | 0.483                   | 0.505                    | 0.500                                     | 0.602      |
| fr         | xlm-roberta-large                          |   0.505 |                       0.545 | 0.577                   | 0.547                    | 0.594                                     | 0.580      |
| ge         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.702 |                       0.662 | 0.686                   | 0.773                    | 0.694                                     | 0.721      |
| ge         | EleutherAI-gpt-neo-1.3B                    |   0.678 |                       0.703 | 0.705                   | **0.802**                | 0.735                                     | 0.738      |
| ge         | EleutherAI-gpt-neo-125M                    |   0.533 |                       0.641 | 0.680                   | 0.651                    | 0.679                                     | 0.739      |
| ge         | bert-base-multilingual-cased               |   0.608 |                       0.677 | 0.698                   | 0.707                    | 0.633                                     | 0.697      |
| ge         | distilbert-base-multilingual-cased         |   0.658 |                       0.636 | 0.638                   | 0.699                    | 0.691                                     | 0.682      |
| ge         | facebook-mbart-large-50                    |   0.741 |                       0.667 | 0.672                   | 0.760                    | 0.715                                     | 0.752      |
| ge         | gpt2                                       |   0.559 |                       0.624 | 0.587                   | 0.652                    | 0.661                                     | 0.676      |
| ge         | xlm-roberta-large                          |   0.721 |                       0.705 | 0.711                   | 0.735                    | 0.714                                     | 0.729      |
| it         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.705 |                       0.657 | 0.661                   | 0.735                    | 0.719                                     | 0.714      |
| it         | EleutherAI-gpt-neo-1.3B                    |   0.644 |                       0.731 | 0.756                   | 0.726                    | 0.714                                     | 0.732      |
| it         | EleutherAI-gpt-neo-125M                    |   0.392 |                       0.483 | 0.606                   | 0.586                    | 0.607                                     | 0.676      |
| it         | bert-base-multilingual-cased               |   0.569 |                       0.703 | 0.669                   | 0.692                    | 0.683                                     | 0.713      |
| it         | distilbert-base-multilingual-cased         |   0.57  |                       0.651 | 0.671                   | 0.669                    | 0.639                                     | 0.684      |
| it         | facebook-mbart-large-50                    |   0.678 |                       0.732 | 0.760                   | 0.780                    | 0.728                                     | **0.784**  |
| it         | gpt2                                       |   0.487 |                       0.578 | 0.587                   | 0.608                    | 0.628                                     | 0.609      |
| it         | xlm-roberta-large                          |   0.673 |                       0.729 | 0.765                   | 0.764                    | 0.725                                     | 0.757      |
| po         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.604 |                       0.661 | 0.682                   | 0.750                    | 0.757                                     | 0.720      |
| po         | EleutherAI-gpt-neo-1.3B                    |   0.566 |                       0.659 | 0.725                   | 0.750                    | 0.741                                     | 0.731      |
| po         | EleutherAI-gpt-neo-125M                    |   0.544 |                       0.591 | 0.586                   | 0.622                    | 0.659                                     | 0.683      |
| po         | bert-base-multilingual-cased               |   0.633 |                       0.69  | 0.733                   | 0.724                    | 0.750                                     | 0.699      |
| po         | distilbert-base-multilingual-cased         |   0.647 |                       0.592 | 0.679                   | 0.747                    | 0.713                                     | 0.673      |
| po         | facebook-mbart-large-50                    |   0.614 |                       0.679 | 0.770                   | **0.792**                | 0.788                                     | 0.756      |
| po         | gpt2                                       |   0.559 |                       0.611 | 0.586                   | 0.684                    | 0.667                                     | 0.700      |
| po         | xlm-roberta-large                          |   0.644 |                       0.703 | 0.711                   | 0.744                    | 0.771                                     | 0.758      |
| ru         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.5   |                       0.522 | 0.545                   | 0.561                    | 0.633                                     | 0.615      |
| ru         | EleutherAI-gpt-neo-1.3B                    |   0.576 |                       0.515 | 0.639                   | 0.632                    | 0.634                                     | 0.622      |
| ru         | EleutherAI-gpt-neo-125M                    |   0.36  |                       0.341 | 0.385                   | 0.288                    | 0.241                                     | 0.273      |
| ru         | bert-base-multilingual-cased               |   0.464 |                       0.486 | 0.580                   | 0.537                    | 0.515                                     | 0.567      |
| ru         | distilbert-base-multilingual-cased         |   0.457 |                       0.434 | 0.556                   | 0.541                    | 0.530                                     | 0.500      |
| ru         | facebook-mbart-large-50                    |   0.58  |                       0.577 | 0.655                   | 0.614                    | **0.714**                                 | 0.617      |
| ru         | gpt2                                       |   0.333 |                       0.231 | 0.190                   | 0.263                    | 0.280                                     | 0.371      |
| ru         | xlm-roberta-large                          |   0.562 |                       0.585 | 0.607                   | 0.559                    | 0.581                                     | 0.559      |

  multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)


## accuracy

| language   | model_name                                 |   title | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text   |
|:-----------|:-------------------------------------------|--------:|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:-----------|
| en         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.087 | 0.136                       | 0.126                   | 0.068                    | 0.097                                     | 0.058      |
| en         | EleutherAI-gpt-neo-1.3B                    |   0.087 | 0.087                       | **0.155**               | 0.097                    | 0.107                                     | 0.087      |
| en         | EleutherAI-gpt-neo-125M                    |   0.019 | 0.087                       | 0.058                   | 0.039                    | 0.078                                     | 0.107      |
| en         | bert-base-multilingual-cased               |   0.078 | 0.087                       | 0.117                   | 0.078                    | 0.078                                     | 0.117      |
| en         | distilbert-base-multilingual-cased         |   0.097 | 0.078                       | 0.126                   | 0.078                    | 0.117                                     | 0.117      |
| en         | facebook-mbart-large-50                    |   0.117 | 0.126                       | 0.146                   | 0.107                    | 0.126                                     | 0.126      |
| en         | gpt2                                       |   0.097 | 0.097                       | 0.117                   | 0.087                    | 0.087                                     | 0.117      |
| en         | xlm-roberta-large                          |   0.058 | 0.087                       | 0.146                   | 0.126                    | 0.107                                     | 0.107      |
| fr         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.071 | 0.071                       | 0.071                   | 0.071                    | **0.119**                                 | 0.095      |
| fr         | EleutherAI-gpt-neo-1.3B                    |   0.024 | **0.119**                   | 0.024                   | 0.071                    | 0.024                                     | 0.071      |
| fr         | EleutherAI-gpt-neo-125M                    |   0.024 | 0.024                       | 0.024                   | 0.024                    | 0.024                                     | 0.071      |
| fr         | bert-base-multilingual-cased               |   0.048 | 0.071                       | 0.048                   | 0.071                    | 0.071                                     | 0.095      |
| fr         | distilbert-base-multilingual-cased         |   0.071 | 0.071                       | 0.095                   | 0.048                    | 0.095                                     | 0.095      |
| fr         | facebook-mbart-large-50                    |   0.024 | 0.095                       | 0.071                   | 0.048                    | 0.071                                     | 0.071      |
| fr         | gpt2                                       |   0     | 0.024                       | 0.048                   | 0.000                    | 0.048                                     | 0.048      |
| fr         | xlm-roberta-large                          |   0     | **0.119**                   | 0.071                   | 0.095                    | 0.095                                     | 0.048      |
| ge         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.029 | 0.029                       | 0.029                   | 0.057                    | 0.057                                     | 0.029      |
| ge         | EleutherAI-gpt-neo-1.3B                    |   0     | 0.029                       | 0.000                   | 0.057                    | 0.029                                     | 0.029      |
| ge         | EleutherAI-gpt-neo-125M                    |   0     | 0.000                       | 0.029                   | 0.000                    | 0.029                                     | 0.029      |
| ge         | bert-base-multilingual-cased               |   0     | 0.029                       | 0.029                   | 0.057                    | 0.000                                     | 0.029      |
| ge         | distilbert-base-multilingual-cased         |   0.029 | 0.029                       | 0.000                   | 0.000                    | 0.029                                     | 0.000      |
| ge         | facebook-mbart-large-50                    |   0.057 | 0.029                       | 0.029                   | 0.029                    | 0.057                                     | 0.000      |
| ge         | gpt2                                       |   0     | 0.000                       | 0.029                   | **0.086**                | 0.000                                     | 0.000      |
| ge         | xlm-roberta-large                          |   0.029 | 0.057                       | 0.029                   | 0.000                    | 0.057                                     | 0.029      |
| it         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.1   | 0.150                       | 0.167                   | 0.133                    | 0.100                                     | 0.117      |
| it         | EleutherAI-gpt-neo-1.3B                    |   0.05  | 0.117                       | 0.183                   | 0.150                    | 0.133                                     | 0.167      |
| it         | EleutherAI-gpt-neo-125M                    |   0     | 0.000                       | 0.033                   | 0.067                    | 0.033                                     | 0.117      |
| it         | bert-base-multilingual-cased               |   0.017 | 0.117                       | 0.067                   | 0.183                    | 0.117                                     | 0.133      |
| it         | distilbert-base-multilingual-cased         |   0.017 | 0.117                       | 0.117                   | 0.150                    | 0.083                                     | 0.100      |
| it         | facebook-mbart-large-50                    |   0.1   | 0.167                       | 0.150                   | 0.167                    | 0.167                                     | **0.217**  |
| it         | gpt2                                       |   0.033 | 0.033                       | 0.033                   | 0.067                    | 0.083                                     | 0.067      |
| it         | xlm-roberta-large                          |   0.1   | 0.167                       | 0.183                   | 0.133                    | 0.150                                     | 0.183      |
| po         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.026 | 0.026                       | 0.026                   | 0.051                    | 0.051                                     | 0.051      |
| po         | EleutherAI-gpt-neo-1.3B                    |   0.026 | 0.000                       | 0.051                   | 0.051                    | 0.026                                     | 0.051      |
| po         | EleutherAI-gpt-neo-125M                    |   0     | 0.000                       | 0.051                   | 0.026                    | 0.026                                     | 0.026      |
| po         | bert-base-multilingual-cased               |   0.026 | 0.051                       | 0.051                   | 0.051                    | 0.051                                     | 0.051      |
| po         | distilbert-base-multilingual-cased         |   0.051 | 0.000                       | 0.026                   | 0.051                    | 0.051                                     | 0.026      |
| po         | facebook-mbart-large-50                    |   0.026 | 0.000                       | 0.026                   | **0.077**                | 0.026                                     | 0.026      |
| po         | gpt2                                       |   0     | 0.000                       | 0.000                   | 0.026                    | 0.026                                     | 0.026      |
| po         | xlm-roberta-large                          |   0.051 | 0.026                       | 0.026                   | 0.051                    | 0.026                                     | 0.026      |
| ru         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.158 | 0.158                       | 0.132                   | 0.079                    | **0.263**                                 | 0.211      |
| ru         | EleutherAI-gpt-neo-1.3B                    |   0.053 | 0.105                       | 0.079                   | 0.158                    | 0.105                                     | 0.132      |
| ru         | EleutherAI-gpt-neo-125M                    |   0.026 | 0.000                       | 0.053                   | 0.000                    | 0.026                                     | 0.026      |
| ru         | bert-base-multilingual-cased               |   0.079 | 0.105                       | 0.237                   | 0.158                    | 0.158                                     | 0.237      |
| ru         | distilbert-base-multilingual-cased         |   0.105 | 0.132                       | 0.158                   | 0.158                    | 0.184                                     | 0.184      |
| ru         | facebook-mbart-large-50                    |   0.158 | 0.211                       | 0.211                   | 0.184                    | 0.184                                     | 0.184      |
| ru         | gpt2                                       |   0     | 0.000                       | 0.000                   | 0.026                    | 0.053                                     | 0.026      |
| ru         | xlm-roberta-large                          |   0.158 | 0.184                       | 0.211                   | 0.105                    | 0.184                                     | 0.158      |

  multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)
