In [1]:
import os
import glob

from IPython.display import display, Markdown
import pandas as pd

# Group types of models (experiment type and model type) and pick best performing in terms of f1-score per unit of analysis and report them in a table


In [2]:
results_filepaths = glob.glob('./logged_performance_per_model/*/*raw*.csv')

In [3]:
results_filepaths

['./logged_performance_per_model/distilbert-base-multilingual-cased/truncated_raw_truncated_single_instance_distilbert-base-multilingual-cased-title_and_first_sentence_each_paragraph_metrics.csv',
 './logged_performance_per_model/distilbert-base-multilingual-cased/truncated_raw_truncated_single_instance_distilbert-base-multilingual-cased-raw_text_metrics.csv',
 './logged_performance_per_model/distilbert-base-multilingual-cased/truncated_raw_truncated_single_instance_distilbert-base-multilingual-cased-title_and_5_sentences_metrics.csv',
 './logged_performance_per_model/distilbert-base-multilingual-cased/truncated_raw_truncated_single_instance_distilbert-base-multilingual-cased-title_metrics.csv',
 './logged_performance_per_model/distilbert-base-multilingual-cased/truncated_raw_truncated_single_instance_distilbert-base-multilingual-cased-title_and_first_paragraph_metrics.csv',
 './logged_performance_per_model/distilbert-base-multilingual-cased/truncated_raw_truncated_single_instance_dist

In [4]:
dfs_list = []
for results_filepath in results_filepaths:
    model_name = results_filepath.split('/')[-2]
    results_df_i = pd.read_csv(results_filepath)
    results_df_i['model_name'] = model_name
    dfs_list.append(results_df_i)

results_df = pd.concat(dfs_list).set_index(['language', 'model_name', 'unit_of_analysis']).sort_index()
results_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,f1_micro,precision_micro,recall_micro,f1_macro,precision_macro,recall_macro,accuracy
language,model_name,unit_of_analysis,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
en,EleutherAI-gpt-neo-1.3B,raw_text,0.693042,0.783951,0.621027,0.507779,0.652437,0.445927,0.097087
en,EleutherAI-gpt-neo-1.3B,title,0.646067,0.759076,0.562347,0.443437,0.575110,0.388387,0.058252
en,EleutherAI-gpt-neo-1.3B,title_and_10_sentences,0.682451,0.792880,0.599022,0.498518,0.665394,0.425943,0.077670
en,EleutherAI-gpt-neo-1.3B,title_and_5_sentences,0.668524,0.776699,0.586797,0.451412,0.578119,0.391092,0.116505
en,EleutherAI-gpt-neo-1.3B,title_and_first_paragraph,0.686981,0.792332,0.606357,0.493932,0.662291,0.425312,0.135922
...,...,...,...,...,...,...,...,...,...
ru,xlm-roberta-large,title,0.463768,0.615385,0.372093,0.304746,0.346726,0.293027,0.236842
ru,xlm-roberta-large,title_and_10_sentences,0.533333,0.625000,0.465116,0.436164,0.511738,0.409099,0.210526
ru,xlm-roberta-large,title_and_5_sentences,0.463768,0.615385,0.372093,0.311980,0.356132,0.300425,0.210526
ru,xlm-roberta-large,title_and_first_paragraph,0.488889,0.673469,0.383721,0.383344,0.525935,0.341752,0.210526


In [5]:
results_df.to_csv('performance_of_models.csv')

### Generate the tables to report

In [6]:
def display_performance_table(df, metric, index_cols=['model_name'], display_=True):
    report_table = df.reset_index().copy()
    report_table['result'] = report_table[f'{metric}'].map(lambda x: f'{x:.3f}')
    report_table['col_title'] = report_table.unit_of_analysis.str.split('_').str.join(' ') 
    report_table['col_title'] = pd.Categorical(
        report_table.col_title,
        categories=['title', 'title and first paragraph', 'title and 5 sentences', 'title and 10 sentences',
                    'title and first sentence each paragraph', 'raw text'],
        ordered=True)
    report_table = report_table[index_cols + ['col_title', 'result']]\
        .pivot_table(index=index_cols, columns=['col_title'], values=['result'], aggfunc='first', fill_value=0)\
        .droplevel(0, axis=1)

    report_table.columns.names = [None]

    # Highlight best scoring models according to their average
    mean_perf_arr = report_table.applymap(lambda x: float(str(x).split(' ')[0])).to_numpy()
    highlight_mask = mean_perf_arr == mean_perf_arr.max()
    report_table_arr = report_table.to_numpy()  # Note it passes the array by reference
    report_table_arr[highlight_mask] = '**' + report_table_arr[highlight_mask] + '**'

    if display_:
        display(Markdown(report_table.to_markdown()))
    
    return report_table

In [7]:
display_performance_table(df=results_df.loc['en'], metric='f1_micro', index_cols=['model_name'], display_=True)

| model_name                         |   title | title and first paragraph   |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph |   raw text |
|:-----------------------------------|--------:|:----------------------------|------------------------:|-------------------------:|------------------------------------------:|-----------:|
| EleutherAI-gpt-neo-1.3B            |   0.646 | 0.687                       |                   0.669 |                    0.682 |                                     0.709 |      0.693 |
| EleutherAI-gpt-neo-125M            |   0.573 | 0.647                       |                   0.642 |                    0.636 |                                     0.649 |      0.631 |
| bert-base-multilingual-cased       |   0.619 | 0.690                       |                   0.676 |                    0.689 |                                     0.688 |      0.711 |
| distilbert-base-multilingual-cased |   0.592 | 0.662                       |                   0.685 |                    0.686 |                                     0.684 |      0.684 |
| facebook-mbart-large-50            |   0.666 | **0.734**                   |                   0.731 |                    0.718 |                                     0.708 |      0.711 |
| gpt2                               |   0.625 | 0.664                       |                   0.678 |                    0.66  |                                     0.68  |      0.654 |
| xlm-roberta-large                  |   0.659 | 0.710                       |                   0.721 |                    0.71  |                                     0.709 |      0.7   |

Unnamed: 0_level_0,title,title and first paragraph,title and 5 sentences,title and 10 sentences,title and first sentence each paragraph,raw text
model_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
EleutherAI-gpt-neo-1.3B,0.646,0.687,0.669,0.682,0.709,0.693
EleutherAI-gpt-neo-125M,0.573,0.647,0.642,0.636,0.649,0.631
bert-base-multilingual-cased,0.619,0.690,0.676,0.689,0.688,0.711
distilbert-base-multilingual-cased,0.592,0.662,0.685,0.686,0.684,0.684
facebook-mbart-large-50,0.666,**0.734**,0.731,0.718,0.708,0.711
gpt2,0.625,0.664,0.678,0.66,0.68,0.654
xlm-roberta-large,0.659,0.710,0.721,0.71,0.709,0.7


### Generate tables for all languages

In [8]:
metrics_to_report = ['f1_micro', 'recall_micro', 'precision_micro', 'accuracy']

In [9]:
language_dict = {'en': 'English', 'it': 'Italian', 'fr': 'French', 'po': 'Polish', 'ru': 'Russian', 'ge': 'German'}

In [10]:
results_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,f1_micro,precision_micro,recall_micro,f1_macro,precision_macro,recall_macro,accuracy
language,model_name,unit_of_analysis,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
en,EleutherAI-gpt-neo-1.3B,raw_text,0.693042,0.783951,0.621027,0.507779,0.652437,0.445927,0.097087
en,EleutherAI-gpt-neo-1.3B,title,0.646067,0.759076,0.562347,0.443437,0.575110,0.388387,0.058252
en,EleutherAI-gpt-neo-1.3B,title_and_10_sentences,0.682451,0.792880,0.599022,0.498518,0.665394,0.425943,0.077670
en,EleutherAI-gpt-neo-1.3B,title_and_5_sentences,0.668524,0.776699,0.586797,0.451412,0.578119,0.391092,0.116505
en,EleutherAI-gpt-neo-1.3B,title_and_first_paragraph,0.686981,0.792332,0.606357,0.493932,0.662291,0.425312,0.135922
...,...,...,...,...,...,...,...,...,...
ru,xlm-roberta-large,title,0.463768,0.615385,0.372093,0.304746,0.346726,0.293027,0.236842
ru,xlm-roberta-large,title_and_10_sentences,0.533333,0.625000,0.465116,0.436164,0.511738,0.409099,0.210526
ru,xlm-roberta-large,title_and_5_sentences,0.463768,0.615385,0.372093,0.311980,0.356132,0.300425,0.210526
ru,xlm-roberta-large,title_and_first_paragraph,0.488889,0.673469,0.383721,0.383344,0.525935,0.341752,0.210526


In [11]:
def display_metrics_and_write_to_file(df, grouping_criterion, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    report_tables_dfs_dict = {metric: [] for metric in metrics_to_report}

    for language, results_df in df.groupby(level=0):
        display(Markdown(f'# {language_dict[language]}'))
        
        for metric in metrics_to_report:
            os.makedirs(os.path.join(output_dir, metric), exist_ok=True)

            output_dir_markdown = os.path.join(output_dir, metric, 'markdown')
            output_dir_latex = os.path.join(output_dir, metric, 'latex')
            output_dir_csv = os.path.join(output_dir, metric, 'csv')

            os.makedirs(output_dir_markdown, exist_ok=True)
            os.makedirs(output_dir_latex, exist_ok=True)
            os.makedirs(output_dir_csv, exist_ok=True)

            display(Markdown(f'## {metric}'))

            report_table = display_performance_table(df=results_df, index_cols=grouping_criterion, metric=metric, display_=True)

            # Export as markdown
            markdown_file = open(os.path.join(output_dir_markdown, f"{language_dict[language]}_{metric}.md"), "w")
            report_table.reset_index().to_markdown(markdown_file, index=False)
            markdown_file.close()

            # Export as latex table
            latex_file = open(os.path.join(output_dir_latex, f"{language_dict[language]}_{metric}.tex"), "w")
            report_table.reset_index().to_latex(latex_file, index=False)
            latex_file.close()

            # Export as csv
            report_table.to_csv(os.path.join(output_dir_csv, f"{language_dict[language]}_{metric}.csv"))

            # Stack all languages into single table
            report_table['language'] = language
            report_table = report_table.reset_index().set_index(['language'] + grouping_criterion)

            report_tables_dfs_dict[metric].append(report_table)

    # Report or store unified table
    display(Markdown(f'# All 6 Languages'))
    for metric in metrics_to_report:
        display(Markdown(f'## {metric}'))
        multi_language_report_table_metric = pd.concat(report_tables_dfs_dict[metric])
        display(Markdown(multi_language_report_table_metric.reset_index().to_markdown(index=False)))

        output_dir_markdown = os.path.join(output_dir, metric, 'markdown')
        output_dir_latex = os.path.join(output_dir, metric, 'latex')
        output_dir_csv = os.path.join(output_dir, metric, 'csv')

        # Export as markdown
        markdown_file = open(os.path.join(output_dir_markdown, f"all_6_languages_{metric}.md"), "w")
        multi_language_report_table_metric.reset_index().to_markdown(markdown_file, index=False)
        markdown_file.close()

        # Export as latex table
        latex_file = open(os.path.join(output_dir_latex, f"all_6_languages_{metric}.tex"), "w")
        multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)
        latex_file.close()

        # Export as csv
        multi_language_report_table_metric.to_csv(os.path.join(output_dir_csv, f"all_6_languages_{metric}.csv"))

# Per model type

In [12]:
display_metrics_and_write_to_file(df=results_df, grouping_criterion=['model_name'], output_dir='per_model_name_tables')

# English

## f1_micro

| model_name                         |   title | title and first paragraph   |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph |   raw text |
|:-----------------------------------|--------:|:----------------------------|------------------------:|-------------------------:|------------------------------------------:|-----------:|
| EleutherAI-gpt-neo-1.3B            |   0.646 | 0.687                       |                   0.669 |                    0.682 |                                     0.709 |      0.693 |
| EleutherAI-gpt-neo-125M            |   0.573 | 0.647                       |                   0.642 |                    0.636 |                                     0.649 |      0.631 |
| bert-base-multilingual-cased       |   0.619 | 0.690                       |                   0.676 |                    0.689 |                                     0.688 |      0.711 |
| distilbert-base-multilingual-cased |   0.592 | 0.662                       |                   0.685 |                    0.686 |                                     0.684 |      0.684 |
| facebook-mbart-large-50            |   0.666 | **0.734**                   |                   0.731 |                    0.718 |                                     0.708 |      0.711 |
| gpt2                               |   0.625 | 0.664                       |                   0.678 |                    0.66  |                                     0.68  |      0.654 |
| xlm-roberta-large                  |   0.659 | 0.710                       |                   0.721 |                    0.71  |                                     0.709 |      0.7   |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall_micro

| model_name                         |   title | title and first paragraph   |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph |   raw text |
|:-----------------------------------|--------:|:----------------------------|------------------------:|-------------------------:|------------------------------------------:|-----------:|
| EleutherAI-gpt-neo-1.3B            |   0.562 | 0.606                       |                   0.587 |                    0.599 |                                     0.645 |      0.621 |
| EleutherAI-gpt-neo-125M            |   0.491 | 0.572                       |                   0.567 |                    0.553 |                                     0.577 |      0.57  |
| bert-base-multilingual-cased       |   0.545 | 0.626                       |                   0.599 |                    0.638 |                                     0.66  |      0.65  |
| distilbert-base-multilingual-cased |   0.513 | 0.592                       |                   0.611 |                    0.621 |                                     0.623 |      0.614 |
| facebook-mbart-large-50            |   0.587 | **0.680**                   |                   0.665 |                    0.643 |                                     0.65  |      0.655 |
| gpt2                               |   0.565 | 0.621                       |                   0.655 |                    0.601 |                                     0.645 |      0.592 |
| xlm-roberta-large                  |   0.579 | 0.636                       |                   0.645 |                    0.641 |                                     0.653 |      0.626 |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision_micro

| model_name                         |   title |   title and first paragraph | title and 5 sentences   |   title and 10 sentences |   title and first sentence each paragraph |   raw text |
|:-----------------------------------|--------:|----------------------------:|:------------------------|-------------------------:|------------------------------------------:|-----------:|
| EleutherAI-gpt-neo-1.3B            |   0.759 |                       0.792 | 0.777                   |                    0.793 |                                     0.786 |      0.784 |
| EleutherAI-gpt-neo-125M            |   0.686 |                       0.745 | 0.739                   |                    0.748 |                                     0.742 |      0.706 |
| bert-base-multilingual-cased       |   0.715 |                       0.769 | 0.775                   |                    0.748 |                                     0.718 |      0.785 |
| distilbert-base-multilingual-cased |   0.7   |                       0.752 | 0.779                   |                    0.765 |                                     0.757 |      0.772 |
| facebook-mbart-large-50            |   0.769 |                       0.797 | 0.812                   |                    0.812 |                                     0.778 |      0.777 |
| gpt2                               |   0.7   |                       0.713 | 0.702                   |                    0.732 |                                     0.719 |      0.731 |
| xlm-roberta-large                  |   0.765 |                       0.805 | **0.817**               |                    0.796 |                                     0.776 |      0.795 |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                         |   title |   title and first paragraph | title and 5 sentences   |   title and 10 sentences |   title and first sentence each paragraph |   raw text |
|:-----------------------------------|--------:|----------------------------:|:------------------------|-------------------------:|------------------------------------------:|-----------:|
| EleutherAI-gpt-neo-1.3B            |   0.058 |                       0.136 | 0.117                   |                    0.078 |                                     0.146 |      0.097 |
| EleutherAI-gpt-neo-125M            |   0.068 |                       0.087 | 0.107                   |                    0.097 |                                     0.078 |      0.019 |
| bert-base-multilingual-cased       |   0.097 |                       0.136 | 0.126                   |                    0.097 |                                     0.126 |      0.117 |
| distilbert-base-multilingual-cased |   0.087 |                       0.097 | 0.117                   |                    0.058 |                                     0.068 |      0.087 |
| facebook-mbart-large-50            |   0.097 |                       0.126 | **0.155**               |                    0.126 |                                     0.136 |      0.117 |
| gpt2                               |   0.049 |                       0.078 | 0.087                   |                    0.068 |                                     0.058 |      0.039 |
| xlm-roberta-large                  |   0.049 |                       0.117 | **0.155**               |                    0.126 |                                     0.068 |      0.097 |

  report_table.reset_index().to_latex(latex_file, index=False)


# French

## f1_micro

| model_name                         |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-----------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| EleutherAI-gpt-neo-1.3B            |   0.368 |                       0.454 |                   0.452 |                    0.429 |                                     0.486 | 0.500      |
| EleutherAI-gpt-neo-125M            |   0.317 |                       0.314 |                   0.378 |                    0.396 |                                     0.439 | 0.338      |
| bert-base-multilingual-cased       |   0.429 |                       0.421 |                   0.475 |                    0.492 |                                     0.545 | **0.549**  |
| distilbert-base-multilingual-cased |   0.377 |                       0.426 |                   0.459 |                    0.538 |                                     0.538 | 0.496      |
| facebook-mbart-large-50            |   0.429 |                       0.498 |                   0.489 |                    0.498 |                                     0.513 | 0.509      |
| gpt2                               |   0.356 |                       0.387 |                   0.41  |                    0.369 |                                     0.471 | 0.517      |
| xlm-roberta-large                  |   0.475 |                       0.484 |                   0.489 |                    0.533 |                                     0.526 | 0.498      |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall_micro

| model_name                         |   title |   title and first paragraph |   title and 5 sentences | title and 10 sentences   |   title and first sentence each paragraph | raw text   |
|:-----------------------------------|--------:|----------------------------:|------------------------:|:-------------------------|------------------------------------------:|:-----------|
| EleutherAI-gpt-neo-1.3B            |   0.278 |                       0.373 |                   0.389 | 0.357                    |                                     0.429 | 0.444      |
| EleutherAI-gpt-neo-125M            |   0.262 |                       0.238 |                   0.333 | 0.357                    |                                     0.357 | 0.270      |
| bert-base-multilingual-cased       |   0.357 |                       0.357 |                   0.413 | 0.460                    |                                     0.5   | **0.508**  |
| distilbert-base-multilingual-cased |   0.317 |                       0.389 |                   0.421 | 0.500                    |                                     0.476 | 0.452      |
| facebook-mbart-large-50            |   0.357 |                       0.437 |                   0.429 | 0.452                    |                                     0.46  | 0.444      |
| gpt2                               |   0.31  |                       0.325 |                   0.341 | 0.302                    |                                     0.413 | 0.476      |
| xlm-roberta-large                  |   0.413 |                       0.429 |                   0.444 | **0.508**                |                                     0.484 | 0.452      |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision_micro

| model_name                         |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences | title and first sentence each paragraph   |   raw text |
|:-----------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|:------------------------------------------|-----------:|
| EleutherAI-gpt-neo-1.3B            |   0.547 |                       0.58  |                   0.538 |                    0.536 | 0.562                                     |      0.571 |
| EleutherAI-gpt-neo-125M            |   0.402 |                       0.462 |                   0.438 |                    0.446 | 0.570                                     |      0.453 |
| bert-base-multilingual-cased       |   0.536 |                       0.511 |                   0.559 |                    0.527 | 0.600                                     |      0.598 |
| distilbert-base-multilingual-cased |   0.465 |                       0.471 |                   0.505 |                    0.583 | **0.619**                                 |      0.548 |
| facebook-mbart-large-50            |   0.536 |                       0.579 |                   0.568 |                    0.553 | 0.580                                     |      0.596 |
| gpt2                               |   0.419 |                       0.477 |                   0.512 |                    0.475 | 0.547                                     |      0.566 |
| xlm-roberta-large                  |   0.559 |                       0.557 |                   0.544 |                    0.561 | 0.575                                     |      0.553 |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                         |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences | title and first sentence each paragraph   |   raw text |
|:-----------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|:------------------------------------------|-----------:|
| EleutherAI-gpt-neo-1.3B            |   0.024 |                       0.095 |                   0.071 |                    0.048 | 0.071                                     |      0.071 |
| EleutherAI-gpt-neo-125M            |   0.071 |                       0.024 |                   0.048 |                    0.071 | 0.048                                     |      0     |
| bert-base-multilingual-cased       |   0.048 |                       0.095 |                   0.071 |                    0.071 | **0.167**                                 |      0.048 |
| distilbert-base-multilingual-cased |   0.048 |                       0.048 |                   0.048 |                    0.048 | 0.048                                     |      0.071 |
| facebook-mbart-large-50            |   0     |                       0.095 |                   0.024 |                    0.024 | 0.095                                     |      0.071 |
| gpt2                               |   0     |                       0.071 |                   0.024 |                    0.024 | 0.071                                     |      0.071 |
| xlm-roberta-large                  |   0.095 |                       0.071 |                   0.071 |                    0.071 | 0.095                                     |      0.095 |

  report_table.reset_index().to_latex(latex_file, index=False)


# German

## f1_micro

| model_name                         |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-----------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| EleutherAI-gpt-neo-1.3B            |   0.502 |                       0.546 |                   0.567 |                    0.578 |                                     0.573 | 0.568      |
| EleutherAI-gpt-neo-125M            |   0.395 |                       0.462 |                   0.468 |                    0.486 |                                     0.507 | 0.452      |
| bert-base-multilingual-cased       |   0.488 |                       0.599 |                   0.587 |                    0.602 |                                     0.587 | 0.617      |
| distilbert-base-multilingual-cased |   0.483 |                       0.551 |                   0.561 |                    0.578 |                                     0.632 | 0.587      |
| facebook-mbart-large-50            |   0.602 |                       0.625 |                   0.598 |                    0.647 |                                     0.604 | **0.693**  |
| gpt2                               |   0.462 |                       0.474 |                   0.469 |                    0.554 |                                     0.583 | 0.563      |
| xlm-roberta-large                  |   0.566 |                       0.595 |                   0.609 |                    0.634 |                                     0.622 | 0.645      |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall_micro

| model_name                         |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-----------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| EleutherAI-gpt-neo-1.3B            |   0.39  |                       0.448 |                   0.477 |                    0.453 |                                     0.477 | 0.471      |
| EleutherAI-gpt-neo-125M            |   0.297 |                       0.384 |                   0.366 |                    0.401 |                                     0.413 | 0.355      |
| bert-base-multilingual-cased       |   0.413 |                       0.535 |                   0.488 |                    0.523 |                                     0.541 | 0.535      |
| distilbert-base-multilingual-cased |   0.401 |                       0.471 |                   0.494 |                    0.517 |                                     0.558 | 0.512      |
| facebook-mbart-large-50            |   0.523 |                       0.581 |                   0.541 |                    0.576 |                                     0.541 | **0.616**  |
| gpt2                               |   0.401 |                       0.395 |                   0.424 |                    0.459 |                                     0.5   | 0.483      |
| xlm-roberta-large                  |   0.471 |                       0.535 |                   0.529 |                    0.558 |                                     0.541 | 0.576      |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision_micro

| model_name                         |   title |   title and first paragraph |   title and 5 sentences | title and 10 sentences   |   title and first sentence each paragraph |   raw text |
|:-----------------------------------|--------:|----------------------------:|------------------------:|:-------------------------|------------------------------------------:|-----------:|
| EleutherAI-gpt-neo-1.3B            |   0.705 |                       0.7   |                   0.701 | **0.796**                |                                     0.719 |      0.717 |
| EleutherAI-gpt-neo-125M            |   0.593 |                       0.579 |                   0.649 | 0.616                    |                                     0.657 |      0.622 |
| bert-base-multilingual-cased       |   0.597 |                       0.681 |                   0.737 | 0.709                    |                                     0.641 |      0.73  |
| distilbert-base-multilingual-cased |   0.605 |                       0.664 |                   0.649 | 0.654                    |                                     0.727 |      0.688 |
| facebook-mbart-large-50            |   0.709 |                       0.676 |                   0.669 | 0.739                    |                                     0.684 |      0.791 |
| gpt2                               |   0.543 |                       0.591 |                   0.525 | 0.699                    |                                     0.699 |      0.675 |
| xlm-roberta-large                  |   0.711 |                       0.672 |                   0.717 | 0.733                    |                                     0.732 |      0.733 |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                         |   title | title and first paragraph   |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph |   raw text |
|:-----------------------------------|--------:|:----------------------------|------------------------:|-------------------------:|------------------------------------------:|-----------:|
| EleutherAI-gpt-neo-1.3B            |   0     | 0.029                       |                   0     |                    0.057 |                                     0.029 |      0.057 |
| EleutherAI-gpt-neo-125M            |   0     | 0.000                       |                   0     |                    0     |                                     0     |      0     |
| bert-base-multilingual-cased       |   0.029 | 0.029                       |                   0.086 |                    0     |                                     0     |      0     |
| distilbert-base-multilingual-cased |   0     | 0.029                       |                   0     |                    0     |                                     0.029 |      0.029 |
| facebook-mbart-large-50            |   0.057 | **0.114**                   |                   0.029 |                    0.029 |                                     0.029 |      0.086 |
| gpt2                               |   0.029 | 0.000                       |                   0     |                    0     |                                     0.029 |      0     |
| xlm-roberta-large                  |   0.057 | 0.086                       |                   0.029 |                    0.029 |                                     0.057 |      0.029 |

  report_table.reset_index().to_latex(latex_file, index=False)


# Italian

## f1_micro

| model_name                         |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-----------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| EleutherAI-gpt-neo-1.3B            |   0.492 |                       0.522 |                   0.555 |                    0.54  |                                     0.538 | 0.603      |
| EleutherAI-gpt-neo-125M            |   0.353 |                       0.471 |                   0.45  |                    0.481 |                                     0.524 | 0.450      |
| bert-base-multilingual-cased       |   0.492 |                       0.562 |                   0.56  |                    0.61  |                                     0.601 | 0.607      |
| distilbert-base-multilingual-cased |   0.458 |                       0.495 |                   0.54  |                    0.585 |                                     0.527 | 0.602      |
| facebook-mbart-large-50            |   0.545 |                       0.571 |                   0.596 |                    0.599 |                                     0.621 | **0.655**  |
| gpt2                               |   0.409 |                       0.47  |                   0.491 |                    0.523 |                                     0.533 | 0.545      |
| xlm-roberta-large                  |   0.565 |                       0.604 |                   0.608 |                    0.603 |                                     0.586 | **0.655**  |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall_micro

| model_name                         |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-----------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| EleutherAI-gpt-neo-1.3B            |   0.396 |                       0.413 |                   0.452 |                    0.426 |                                     0.426 | 0.496      |
| EleutherAI-gpt-neo-125M            |   0.265 |                       0.391 |                   0.37  |                    0.387 |                                     0.426 | 0.352      |
| bert-base-multilingual-cased       |   0.426 |                       0.474 |                   0.474 |                    0.53  |                                     0.548 | 0.513      |
| distilbert-base-multilingual-cased |   0.383 |                       0.413 |                   0.47  |                    0.496 |                                     0.465 | 0.500      |
| facebook-mbart-large-50            |   0.448 |                       0.474 |                   0.504 |                    0.513 |                                     0.53  | **0.565**  |
| gpt2                               |   0.322 |                       0.391 |                   0.43  |                    0.443 |                                     0.457 | 0.483      |
| xlm-roberta-large                  |   0.47  |                       0.491 |                   0.509 |                    0.496 |                                     0.504 | **0.565**  |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision_micro

| model_name                         |   title | title and first paragraph   |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph |   raw text |
|:-----------------------------------|--------:|:----------------------------|------------------------:|-------------------------:|------------------------------------------:|-----------:|
| EleutherAI-gpt-neo-1.3B            |   0.65  | 0.709                       |                   0.717 |                    0.737 |                                     0.731 |      0.77  |
| EleutherAI-gpt-neo-125M            |   0.526 | 0.592                       |                   0.574 |                    0.636 |                                     0.681 |      0.623 |
| bert-base-multilingual-cased       |   0.583 | 0.690                       |                   0.686 |                    0.718 |                                     0.667 |      0.742 |
| distilbert-base-multilingual-cased |   0.571 | 0.617                       |                   0.635 |                    0.713 |                                     0.608 |      0.757 |
| facebook-mbart-large-50            |   0.696 | 0.717                       |                   0.73  |                    0.72  |                                     0.748 |      0.778 |
| gpt2                               |   0.561 | 0.588                       |                   0.572 |                    0.637 |                                     0.64  |      0.627 |
| xlm-roberta-large                  |   0.711 | **0.785**                   |                   0.755 |                    0.77  |                                     0.699 |      0.778 |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                         |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-----------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| EleutherAI-gpt-neo-1.3B            |   0.05  |                       0.133 |                   0.117 |                    0.183 |                                     0.133 | 0.200      |
| EleutherAI-gpt-neo-125M            |   0.017 |                       0.067 |                   0.033 |                    0.05  |                                     0.067 | 0.067      |
| bert-base-multilingual-cased       |   0.083 |                       0.117 |                   0.1   |                    0.167 |                                     0.1   | **0.267**  |
| distilbert-base-multilingual-cased |   0.017 |                       0.1   |                   0.117 |                    0.1   |                                     0.033 | 0.117      |
| facebook-mbart-large-50            |   0.117 |                       0.117 |                   0.133 |                    0.083 |                                     0.133 | 0.183      |
| gpt2                               |   0.05  |                       0.083 |                   0.05  |                    0.083 |                                     0.1   | 0.050      |
| xlm-roberta-large                  |   0.117 |                       0.2   |                   0.167 |                    0.15  |                                     0.117 | 0.200      |

  report_table.reset_index().to_latex(latex_file, index=False)


# Polish

## f1_micro

| model_name                         |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-----------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| EleutherAI-gpt-neo-1.3B            |   0.463 |                       0.521 |                   0.585 |                    0.603 |                                     0.622 | 0.599      |
| EleutherAI-gpt-neo-125M            |   0.448 |                       0.475 |                   0.503 |                    0.499 |                                     0.553 | 0.488      |
| bert-base-multilingual-cased       |   0.578 |                       0.59  |                   0.636 |                    0.64  |                                     0.656 | 0.625      |
| distilbert-base-multilingual-cased |   0.5   |                       0.6   |                   0.617 |                    0.647 |                                     0.593 | 0.620      |
| facebook-mbart-large-50            |   0.572 |                       0.597 |                   0.652 |                    0.657 |                                     0.701 | **0.727**  |
| gpt2                               |   0.522 |                       0.548 |                   0.579 |                    0.558 |                                     0.575 | 0.634      |
| xlm-roberta-large                  |   0.591 |                       0.63  |                   0.658 |                    0.667 |                                     0.622 | 0.667      |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall_micro

| model_name                         |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-----------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| EleutherAI-gpt-neo-1.3B            |   0.379 |                       0.442 |                   0.5   |                    0.51  |                                     0.524 | 0.500      |
| EleutherAI-gpt-neo-125M            |   0.374 |                       0.398 |                   0.437 |                    0.413 |                                     0.466 | 0.408      |
| bert-base-multilingual-cased       |   0.539 |                       0.534 |                   0.573 |                    0.587 |                                     0.583 | 0.539      |
| distilbert-base-multilingual-cased |   0.442 |                       0.553 |                   0.539 |                    0.583 |                                     0.524 | 0.539      |
| facebook-mbart-large-50            |   0.51  |                       0.524 |                   0.587 |                    0.578 |                                     0.626 | **0.665**  |
| gpt2                               |   0.456 |                       0.51  |                   0.558 |                    0.5   |                                     0.5   | 0.587      |
| xlm-roberta-large                  |   0.544 |                       0.578 |                   0.607 |                    0.602 |                                     0.519 | 0.583      |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision_micro

| model_name                         |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-----------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| EleutherAI-gpt-neo-1.3B            |   0.595 |                       0.636 |                   0.705 |                    0.739 |                                     0.766 | 0.746      |
| EleutherAI-gpt-neo-125M            |   0.558 |                       0.59  |                   0.592 |                    0.63  |                                     0.681 | 0.609      |
| bert-base-multilingual-cased       |   0.624 |                       0.659 |                   0.715 |                    0.703 |                                     0.75  | 0.745      |
| distilbert-base-multilingual-cased |   0.576 |                       0.655 |                   0.721 |                    0.727 |                                     0.684 | 0.730      |
| facebook-mbart-large-50            |   0.652 |                       0.692 |                   0.733 |                    0.763 |                                     0.796 | **0.801**  |
| gpt2                               |   0.61  |                       0.593 |                   0.602 |                    0.632 |                                     0.678 | 0.688      |
| xlm-roberta-large                  |   0.647 |                       0.692 |                   0.718 |                    0.747 |                                     0.775 | 0.779      |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                         |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-----------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| EleutherAI-gpt-neo-1.3B            |   0.026 |                       0     |                   0.026 |                    0.026 |                                     0.051 | 0.051      |
| EleutherAI-gpt-neo-125M            |   0     |                       0     |                   0     |                    0.051 |                                     0     | 0.026      |
| bert-base-multilingual-cased       |   0     |                       0.026 |                   0.051 |                    0.026 |                                     0.051 | **0.103**  |
| distilbert-base-multilingual-cased |   0     |                       0     |                   0.051 |                    0.026 |                                     0.051 | 0.026      |
| facebook-mbart-large-50            |   0     |                       0.026 |                   0.051 |                    0.077 |                                     0.051 | 0.051      |
| gpt2                               |   0     |                       0.026 |                   0     |                    0.026 |                                     0.051 | 0.026      |
| xlm-roberta-large                  |   0     |                       0.051 |                   0.026 |                    0.026 |                                     0.077 | 0.077      |

  report_table.reset_index().to_latex(latex_file, index=False)


# Russian

## f1_micro

| model_name                         |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences | title and first sentence each paragraph   |   raw text |
|:-----------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|:------------------------------------------|-----------:|
| EleutherAI-gpt-neo-1.3B            |   0.296 |                       0.308 |                   0.397 |                    0.381 | 0.371                                     |      0.298 |
| EleutherAI-gpt-neo-125M            |   0.172 |                       0.169 |                   0.203 |                    0.252 | 0.192                                     |      0.125 |
| bert-base-multilingual-cased       |   0.386 |                       0.426 |                   0.487 |                    0.447 | 0.515                                     |      0.464 |
| distilbert-base-multilingual-cased |   0.34  |                       0.372 |                   0.497 |                    0.497 | 0.455                                     |      0.517 |
| facebook-mbart-large-50            |   0.424 |                       0.441 |                   0.446 |                    0.521 | **0.573**                                 |      0.529 |
| gpt2                               |   0.061 |                       0.073 |                   0.062 |                    0.039 | 0.191                                     |      0.171 |
| xlm-roberta-large                  |   0.464 |                       0.489 |                   0.464 |                    0.533 | 0.521                                     |      0.553 |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall_micro

| model_name                         |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences | title and first sentence each paragraph   |   raw text |
|:-----------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|:------------------------------------------|-----------:|
| EleutherAI-gpt-neo-1.3B            |   0.186 |                       0.209 |                   0.291 |                    0.279 | 0.267                                     |      0.209 |
| EleutherAI-gpt-neo-125M            |   0.116 |                       0.128 |                   0.14  |                    0.198 | 0.140                                     |      0.093 |
| bert-base-multilingual-cased       |   0.314 |                       0.384 |                   0.43  |                    0.419 | 0.488                                     |      0.407 |
| distilbert-base-multilingual-cased |   0.291 |                       0.337 |                   0.442 |                    0.442 | 0.407                                     |      0.453 |
| facebook-mbart-large-50            |   0.326 |                       0.349 |                   0.36  |                    0.442 | **0.500**                                 |      0.43  |
| gpt2                               |   0.035 |                       0.047 |                   0.035 |                    0.023 | 0.128                                     |      0.105 |
| xlm-roberta-large                  |   0.372 |                       0.384 |                   0.372 |                    0.465 | 0.442                                     |      0.488 |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision_micro

| model_name                         | title     |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph |   raw text |
|:-----------------------------------|:----------|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|-----------:|
| EleutherAI-gpt-neo-1.3B            | **0.727** |                       0.581 |                   0.625 |                    0.6   |                                     0.605 |      0.514 |
| EleutherAI-gpt-neo-125M            | 0.333     |                       0.25  |                   0.375 |                    0.347 |                                     0.308 |      0.19  |
| bert-base-multilingual-cased       | 0.500     |                       0.478 |                   0.561 |                    0.48  |                                     0.545 |      0.538 |
| distilbert-base-multilingual-cased | 0.410     |                       0.414 |                   0.567 |                    0.567 |                                     0.515 |      0.6   |
| facebook-mbart-large-50            | 0.609     |                       0.6   |                   0.585 |                    0.633 |                                     0.672 |      0.685 |
| gpt2                               | 0.231     |                       0.174 |                   0.3   |                    0.118 |                                     0.379 |      0.474 |
| xlm-roberta-large                  | 0.615     |                       0.673 |                   0.615 |                    0.625 |                                     0.633 |      0.636 |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                         | title     |   title and first paragraph |   title and 5 sentences |   title and 10 sentences | title and first sentence each paragraph   |   raw text |
|:-----------------------------------|:----------|----------------------------:|------------------------:|-------------------------:|:------------------------------------------|-----------:|
| EleutherAI-gpt-neo-1.3B            | 0.053     |                       0.079 |                   0.079 |                    0.158 | 0.158                                     |      0.053 |
| EleutherAI-gpt-neo-125M            | 0.026     |                       0.026 |                   0.053 |                    0.053 | 0.105                                     |      0.026 |
| bert-base-multilingual-cased       | 0.105     |                       0.132 |                   0.211 |                    0.158 | **0.237**                                 |      0.211 |
| distilbert-base-multilingual-cased | 0.105     |                       0.158 |                   0.132 |                    0.211 | 0.132                                     |      0.184 |
| facebook-mbart-large-50            | 0.158     |                       0.211 |                   0.132 |                    0.211 | 0.158                                     |      0.184 |
| gpt2                               | 0.000     |                       0     |                   0.026 |                    0     | 0.079                                     |      0.026 |
| xlm-roberta-large                  | **0.237** |                       0.211 |                   0.211 |                    0.211 | 0.211                                     |      0.211 |

  report_table.reset_index().to_latex(latex_file, index=False)


# All 6 Languages

## f1_micro

| language   | model_name                         |   title | title and first paragraph   |   title and 5 sentences |   title and 10 sentences | title and first sentence each paragraph   | raw text   |
|:-----------|:-----------------------------------|--------:|:----------------------------|------------------------:|-------------------------:|:------------------------------------------|:-----------|
| en         | EleutherAI-gpt-neo-1.3B            |   0.646 | 0.687                       |                   0.669 |                    0.682 | 0.709                                     | 0.693      |
| en         | EleutherAI-gpt-neo-125M            |   0.573 | 0.647                       |                   0.642 |                    0.636 | 0.649                                     | 0.631      |
| en         | bert-base-multilingual-cased       |   0.619 | 0.690                       |                   0.676 |                    0.689 | 0.688                                     | 0.711      |
| en         | distilbert-base-multilingual-cased |   0.592 | 0.662                       |                   0.685 |                    0.686 | 0.684                                     | 0.684      |
| en         | facebook-mbart-large-50            |   0.666 | **0.734**                   |                   0.731 |                    0.718 | 0.708                                     | 0.711      |
| en         | gpt2                               |   0.625 | 0.664                       |                   0.678 |                    0.66  | 0.680                                     | 0.654      |
| en         | xlm-roberta-large                  |   0.659 | 0.710                       |                   0.721 |                    0.71  | 0.709                                     | 0.700      |
| fr         | EleutherAI-gpt-neo-1.3B            |   0.368 | 0.454                       |                   0.452 |                    0.429 | 0.486                                     | 0.500      |
| fr         | EleutherAI-gpt-neo-125M            |   0.317 | 0.314                       |                   0.378 |                    0.396 | 0.439                                     | 0.338      |
| fr         | bert-base-multilingual-cased       |   0.429 | 0.421                       |                   0.475 |                    0.492 | 0.545                                     | **0.549**  |
| fr         | distilbert-base-multilingual-cased |   0.377 | 0.426                       |                   0.459 |                    0.538 | 0.538                                     | 0.496      |
| fr         | facebook-mbart-large-50            |   0.429 | 0.498                       |                   0.489 |                    0.498 | 0.513                                     | 0.509      |
| fr         | gpt2                               |   0.356 | 0.387                       |                   0.41  |                    0.369 | 0.471                                     | 0.517      |
| fr         | xlm-roberta-large                  |   0.475 | 0.484                       |                   0.489 |                    0.533 | 0.526                                     | 0.498      |
| ge         | EleutherAI-gpt-neo-1.3B            |   0.502 | 0.546                       |                   0.567 |                    0.578 | 0.573                                     | 0.568      |
| ge         | EleutherAI-gpt-neo-125M            |   0.395 | 0.462                       |                   0.468 |                    0.486 | 0.507                                     | 0.452      |
| ge         | bert-base-multilingual-cased       |   0.488 | 0.599                       |                   0.587 |                    0.602 | 0.587                                     | 0.617      |
| ge         | distilbert-base-multilingual-cased |   0.483 | 0.551                       |                   0.561 |                    0.578 | 0.632                                     | 0.587      |
| ge         | facebook-mbart-large-50            |   0.602 | 0.625                       |                   0.598 |                    0.647 | 0.604                                     | **0.693**  |
| ge         | gpt2                               |   0.462 | 0.474                       |                   0.469 |                    0.554 | 0.583                                     | 0.563      |
| ge         | xlm-roberta-large                  |   0.566 | 0.595                       |                   0.609 |                    0.634 | 0.622                                     | 0.645      |
| it         | EleutherAI-gpt-neo-1.3B            |   0.492 | 0.522                       |                   0.555 |                    0.54  | 0.538                                     | 0.603      |
| it         | EleutherAI-gpt-neo-125M            |   0.353 | 0.471                       |                   0.45  |                    0.481 | 0.524                                     | 0.450      |
| it         | bert-base-multilingual-cased       |   0.492 | 0.562                       |                   0.56  |                    0.61  | 0.601                                     | 0.607      |
| it         | distilbert-base-multilingual-cased |   0.458 | 0.495                       |                   0.54  |                    0.585 | 0.527                                     | 0.602      |
| it         | facebook-mbart-large-50            |   0.545 | 0.571                       |                   0.596 |                    0.599 | 0.621                                     | **0.655**  |
| it         | gpt2                               |   0.409 | 0.470                       |                   0.491 |                    0.523 | 0.533                                     | 0.545      |
| it         | xlm-roberta-large                  |   0.565 | 0.604                       |                   0.608 |                    0.603 | 0.586                                     | **0.655**  |
| po         | EleutherAI-gpt-neo-1.3B            |   0.463 | 0.521                       |                   0.585 |                    0.603 | 0.622                                     | 0.599      |
| po         | EleutherAI-gpt-neo-125M            |   0.448 | 0.475                       |                   0.503 |                    0.499 | 0.553                                     | 0.488      |
| po         | bert-base-multilingual-cased       |   0.578 | 0.590                       |                   0.636 |                    0.64  | 0.656                                     | 0.625      |
| po         | distilbert-base-multilingual-cased |   0.5   | 0.600                       |                   0.617 |                    0.647 | 0.593                                     | 0.620      |
| po         | facebook-mbart-large-50            |   0.572 | 0.597                       |                   0.652 |                    0.657 | 0.701                                     | **0.727**  |
| po         | gpt2                               |   0.522 | 0.548                       |                   0.579 |                    0.558 | 0.575                                     | 0.634      |
| po         | xlm-roberta-large                  |   0.591 | 0.630                       |                   0.658 |                    0.667 | 0.622                                     | 0.667      |
| ru         | EleutherAI-gpt-neo-1.3B            |   0.296 | 0.308                       |                   0.397 |                    0.381 | 0.371                                     | 0.298      |
| ru         | EleutherAI-gpt-neo-125M            |   0.172 | 0.169                       |                   0.203 |                    0.252 | 0.192                                     | 0.125      |
| ru         | bert-base-multilingual-cased       |   0.386 | 0.426                       |                   0.487 |                    0.447 | 0.515                                     | 0.464      |
| ru         | distilbert-base-multilingual-cased |   0.34  | 0.372                       |                   0.497 |                    0.497 | 0.455                                     | 0.517      |
| ru         | facebook-mbart-large-50            |   0.424 | 0.441                       |                   0.446 |                    0.521 | **0.573**                                 | 0.529      |
| ru         | gpt2                               |   0.061 | 0.073                       |                   0.062 |                    0.039 | 0.191                                     | 0.171      |
| ru         | xlm-roberta-large                  |   0.464 | 0.489                       |                   0.464 |                    0.533 | 0.521                                     | 0.553      |

  multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)


## recall_micro

| language   | model_name                         |   title | title and first paragraph   |   title and 5 sentences | title and 10 sentences   | title and first sentence each paragraph   | raw text   |
|:-----------|:-----------------------------------|--------:|:----------------------------|------------------------:|:-------------------------|:------------------------------------------|:-----------|
| en         | EleutherAI-gpt-neo-1.3B            |   0.562 | 0.606                       |                   0.587 | 0.599                    | 0.645                                     | 0.621      |
| en         | EleutherAI-gpt-neo-125M            |   0.491 | 0.572                       |                   0.567 | 0.553                    | 0.577                                     | 0.570      |
| en         | bert-base-multilingual-cased       |   0.545 | 0.626                       |                   0.599 | 0.638                    | 0.660                                     | 0.650      |
| en         | distilbert-base-multilingual-cased |   0.513 | 0.592                       |                   0.611 | 0.621                    | 0.623                                     | 0.614      |
| en         | facebook-mbart-large-50            |   0.587 | **0.680**                   |                   0.665 | 0.643                    | 0.650                                     | 0.655      |
| en         | gpt2                               |   0.565 | 0.621                       |                   0.655 | 0.601                    | 0.645                                     | 0.592      |
| en         | xlm-roberta-large                  |   0.579 | 0.636                       |                   0.645 | 0.641                    | 0.653                                     | 0.626      |
| fr         | EleutherAI-gpt-neo-1.3B            |   0.278 | 0.373                       |                   0.389 | 0.357                    | 0.429                                     | 0.444      |
| fr         | EleutherAI-gpt-neo-125M            |   0.262 | 0.238                       |                   0.333 | 0.357                    | 0.357                                     | 0.270      |
| fr         | bert-base-multilingual-cased       |   0.357 | 0.357                       |                   0.413 | 0.460                    | 0.500                                     | **0.508**  |
| fr         | distilbert-base-multilingual-cased |   0.317 | 0.389                       |                   0.421 | 0.500                    | 0.476                                     | 0.452      |
| fr         | facebook-mbart-large-50            |   0.357 | 0.437                       |                   0.429 | 0.452                    | 0.460                                     | 0.444      |
| fr         | gpt2                               |   0.31  | 0.325                       |                   0.341 | 0.302                    | 0.413                                     | 0.476      |
| fr         | xlm-roberta-large                  |   0.413 | 0.429                       |                   0.444 | **0.508**                | 0.484                                     | 0.452      |
| ge         | EleutherAI-gpt-neo-1.3B            |   0.39  | 0.448                       |                   0.477 | 0.453                    | 0.477                                     | 0.471      |
| ge         | EleutherAI-gpt-neo-125M            |   0.297 | 0.384                       |                   0.366 | 0.401                    | 0.413                                     | 0.355      |
| ge         | bert-base-multilingual-cased       |   0.413 | 0.535                       |                   0.488 | 0.523                    | 0.541                                     | 0.535      |
| ge         | distilbert-base-multilingual-cased |   0.401 | 0.471                       |                   0.494 | 0.517                    | 0.558                                     | 0.512      |
| ge         | facebook-mbart-large-50            |   0.523 | 0.581                       |                   0.541 | 0.576                    | 0.541                                     | **0.616**  |
| ge         | gpt2                               |   0.401 | 0.395                       |                   0.424 | 0.459                    | 0.500                                     | 0.483      |
| ge         | xlm-roberta-large                  |   0.471 | 0.535                       |                   0.529 | 0.558                    | 0.541                                     | 0.576      |
| it         | EleutherAI-gpt-neo-1.3B            |   0.396 | 0.413                       |                   0.452 | 0.426                    | 0.426                                     | 0.496      |
| it         | EleutherAI-gpt-neo-125M            |   0.265 | 0.391                       |                   0.37  | 0.387                    | 0.426                                     | 0.352      |
| it         | bert-base-multilingual-cased       |   0.426 | 0.474                       |                   0.474 | 0.530                    | 0.548                                     | 0.513      |
| it         | distilbert-base-multilingual-cased |   0.383 | 0.413                       |                   0.47  | 0.496                    | 0.465                                     | 0.500      |
| it         | facebook-mbart-large-50            |   0.448 | 0.474                       |                   0.504 | 0.513                    | 0.530                                     | **0.565**  |
| it         | gpt2                               |   0.322 | 0.391                       |                   0.43  | 0.443                    | 0.457                                     | 0.483      |
| it         | xlm-roberta-large                  |   0.47  | 0.491                       |                   0.509 | 0.496                    | 0.504                                     | **0.565**  |
| po         | EleutherAI-gpt-neo-1.3B            |   0.379 | 0.442                       |                   0.5   | 0.510                    | 0.524                                     | 0.500      |
| po         | EleutherAI-gpt-neo-125M            |   0.374 | 0.398                       |                   0.437 | 0.413                    | 0.466                                     | 0.408      |
| po         | bert-base-multilingual-cased       |   0.539 | 0.534                       |                   0.573 | 0.587                    | 0.583                                     | 0.539      |
| po         | distilbert-base-multilingual-cased |   0.442 | 0.553                       |                   0.539 | 0.583                    | 0.524                                     | 0.539      |
| po         | facebook-mbart-large-50            |   0.51  | 0.524                       |                   0.587 | 0.578                    | 0.626                                     | **0.665**  |
| po         | gpt2                               |   0.456 | 0.510                       |                   0.558 | 0.500                    | 0.500                                     | 0.587      |
| po         | xlm-roberta-large                  |   0.544 | 0.578                       |                   0.607 | 0.602                    | 0.519                                     | 0.583      |
| ru         | EleutherAI-gpt-neo-1.3B            |   0.186 | 0.209                       |                   0.291 | 0.279                    | 0.267                                     | 0.209      |
| ru         | EleutherAI-gpt-neo-125M            |   0.116 | 0.128                       |                   0.14  | 0.198                    | 0.140                                     | 0.093      |
| ru         | bert-base-multilingual-cased       |   0.314 | 0.384                       |                   0.43  | 0.419                    | 0.488                                     | 0.407      |
| ru         | distilbert-base-multilingual-cased |   0.291 | 0.337                       |                   0.442 | 0.442                    | 0.407                                     | 0.453      |
| ru         | facebook-mbart-large-50            |   0.326 | 0.349                       |                   0.36  | 0.442                    | **0.500**                                 | 0.430      |
| ru         | gpt2                               |   0.035 | 0.047                       |                   0.035 | 0.023                    | 0.128                                     | 0.105      |
| ru         | xlm-roberta-large                  |   0.372 | 0.384                       |                   0.372 | 0.465                    | 0.442                                     | 0.488      |

  multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)


## precision_micro

| language   | model_name                         | title     | title and first paragraph   | title and 5 sentences   | title and 10 sentences   | title and first sentence each paragraph   | raw text   |
|:-----------|:-----------------------------------|:----------|:----------------------------|:------------------------|:-------------------------|:------------------------------------------|:-----------|
| en         | EleutherAI-gpt-neo-1.3B            | 0.759     | 0.792                       | 0.777                   | 0.793                    | 0.786                                     | 0.784      |
| en         | EleutherAI-gpt-neo-125M            | 0.686     | 0.745                       | 0.739                   | 0.748                    | 0.742                                     | 0.706      |
| en         | bert-base-multilingual-cased       | 0.715     | 0.769                       | 0.775                   | 0.748                    | 0.718                                     | 0.785      |
| en         | distilbert-base-multilingual-cased | 0.700     | 0.752                       | 0.779                   | 0.765                    | 0.757                                     | 0.772      |
| en         | facebook-mbart-large-50            | 0.769     | 0.797                       | 0.812                   | 0.812                    | 0.778                                     | 0.777      |
| en         | gpt2                               | 0.700     | 0.713                       | 0.702                   | 0.732                    | 0.719                                     | 0.731      |
| en         | xlm-roberta-large                  | 0.765     | 0.805                       | **0.817**               | 0.796                    | 0.776                                     | 0.795      |
| fr         | EleutherAI-gpt-neo-1.3B            | 0.547     | 0.580                       | 0.538                   | 0.536                    | 0.562                                     | 0.571      |
| fr         | EleutherAI-gpt-neo-125M            | 0.402     | 0.462                       | 0.438                   | 0.446                    | 0.570                                     | 0.453      |
| fr         | bert-base-multilingual-cased       | 0.536     | 0.511                       | 0.559                   | 0.527                    | 0.600                                     | 0.598      |
| fr         | distilbert-base-multilingual-cased | 0.465     | 0.471                       | 0.505                   | 0.583                    | **0.619**                                 | 0.548      |
| fr         | facebook-mbart-large-50            | 0.536     | 0.579                       | 0.568                   | 0.553                    | 0.580                                     | 0.596      |
| fr         | gpt2                               | 0.419     | 0.477                       | 0.512                   | 0.475                    | 0.547                                     | 0.566      |
| fr         | xlm-roberta-large                  | 0.559     | 0.557                       | 0.544                   | 0.561                    | 0.575                                     | 0.553      |
| ge         | EleutherAI-gpt-neo-1.3B            | 0.705     | 0.700                       | 0.701                   | **0.796**                | 0.719                                     | 0.717      |
| ge         | EleutherAI-gpt-neo-125M            | 0.593     | 0.579                       | 0.649                   | 0.616                    | 0.657                                     | 0.622      |
| ge         | bert-base-multilingual-cased       | 0.597     | 0.681                       | 0.737                   | 0.709                    | 0.641                                     | 0.730      |
| ge         | distilbert-base-multilingual-cased | 0.605     | 0.664                       | 0.649                   | 0.654                    | 0.727                                     | 0.688      |
| ge         | facebook-mbart-large-50            | 0.709     | 0.676                       | 0.669                   | 0.739                    | 0.684                                     | 0.791      |
| ge         | gpt2                               | 0.543     | 0.591                       | 0.525                   | 0.699                    | 0.699                                     | 0.675      |
| ge         | xlm-roberta-large                  | 0.711     | 0.672                       | 0.717                   | 0.733                    | 0.732                                     | 0.733      |
| it         | EleutherAI-gpt-neo-1.3B            | 0.650     | 0.709                       | 0.717                   | 0.737                    | 0.731                                     | 0.770      |
| it         | EleutherAI-gpt-neo-125M            | 0.526     | 0.592                       | 0.574                   | 0.636                    | 0.681                                     | 0.623      |
| it         | bert-base-multilingual-cased       | 0.583     | 0.690                       | 0.686                   | 0.718                    | 0.667                                     | 0.742      |
| it         | distilbert-base-multilingual-cased | 0.571     | 0.617                       | 0.635                   | 0.713                    | 0.608                                     | 0.757      |
| it         | facebook-mbart-large-50            | 0.696     | 0.717                       | 0.730                   | 0.720                    | 0.748                                     | 0.778      |
| it         | gpt2                               | 0.561     | 0.588                       | 0.572                   | 0.637                    | 0.640                                     | 0.627      |
| it         | xlm-roberta-large                  | 0.711     | **0.785**                   | 0.755                   | 0.770                    | 0.699                                     | 0.778      |
| po         | EleutherAI-gpt-neo-1.3B            | 0.595     | 0.636                       | 0.705                   | 0.739                    | 0.766                                     | 0.746      |
| po         | EleutherAI-gpt-neo-125M            | 0.558     | 0.590                       | 0.592                   | 0.630                    | 0.681                                     | 0.609      |
| po         | bert-base-multilingual-cased       | 0.624     | 0.659                       | 0.715                   | 0.703                    | 0.750                                     | 0.745      |
| po         | distilbert-base-multilingual-cased | 0.576     | 0.655                       | 0.721                   | 0.727                    | 0.684                                     | 0.730      |
| po         | facebook-mbart-large-50            | 0.652     | 0.692                       | 0.733                   | 0.763                    | 0.796                                     | **0.801**  |
| po         | gpt2                               | 0.610     | 0.593                       | 0.602                   | 0.632                    | 0.678                                     | 0.688      |
| po         | xlm-roberta-large                  | 0.647     | 0.692                       | 0.718                   | 0.747                    | 0.775                                     | 0.779      |
| ru         | EleutherAI-gpt-neo-1.3B            | **0.727** | 0.581                       | 0.625                   | 0.600                    | 0.605                                     | 0.514      |
| ru         | EleutherAI-gpt-neo-125M            | 0.333     | 0.250                       | 0.375                   | 0.347                    | 0.308                                     | 0.190      |
| ru         | bert-base-multilingual-cased       | 0.500     | 0.478                       | 0.561                   | 0.480                    | 0.545                                     | 0.538      |
| ru         | distilbert-base-multilingual-cased | 0.410     | 0.414                       | 0.567                   | 0.567                    | 0.515                                     | 0.600      |
| ru         | facebook-mbart-large-50            | 0.609     | 0.600                       | 0.585                   | 0.633                    | 0.672                                     | 0.685      |
| ru         | gpt2                               | 0.231     | 0.174                       | 0.300                   | 0.118                    | 0.379                                     | 0.474      |
| ru         | xlm-roberta-large                  | 0.615     | 0.673                       | 0.615                   | 0.625                    | 0.633                                     | 0.636      |

  multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)


## accuracy

| language   | model_name                         | title     | title and first paragraph   | title and 5 sentences   |   title and 10 sentences | title and first sentence each paragraph   | raw text   |
|:-----------|:-----------------------------------|:----------|:----------------------------|:------------------------|-------------------------:|:------------------------------------------|:-----------|
| en         | EleutherAI-gpt-neo-1.3B            | 0.058     | 0.136                       | 0.117                   |                    0.078 | 0.146                                     | 0.097      |
| en         | EleutherAI-gpt-neo-125M            | 0.068     | 0.087                       | 0.107                   |                    0.097 | 0.078                                     | 0.019      |
| en         | bert-base-multilingual-cased       | 0.097     | 0.136                       | 0.126                   |                    0.097 | 0.126                                     | 0.117      |
| en         | distilbert-base-multilingual-cased | 0.087     | 0.097                       | 0.117                   |                    0.058 | 0.068                                     | 0.087      |
| en         | facebook-mbart-large-50            | 0.097     | 0.126                       | **0.155**               |                    0.126 | 0.136                                     | 0.117      |
| en         | gpt2                               | 0.049     | 0.078                       | 0.087                   |                    0.068 | 0.058                                     | 0.039      |
| en         | xlm-roberta-large                  | 0.049     | 0.117                       | **0.155**               |                    0.126 | 0.068                                     | 0.097      |
| fr         | EleutherAI-gpt-neo-1.3B            | 0.024     | 0.095                       | 0.071                   |                    0.048 | 0.071                                     | 0.071      |
| fr         | EleutherAI-gpt-neo-125M            | 0.071     | 0.024                       | 0.048                   |                    0.071 | 0.048                                     | 0.000      |
| fr         | bert-base-multilingual-cased       | 0.048     | 0.095                       | 0.071                   |                    0.071 | **0.167**                                 | 0.048      |
| fr         | distilbert-base-multilingual-cased | 0.048     | 0.048                       | 0.048                   |                    0.048 | 0.048                                     | 0.071      |
| fr         | facebook-mbart-large-50            | 0.000     | 0.095                       | 0.024                   |                    0.024 | 0.095                                     | 0.071      |
| fr         | gpt2                               | 0.000     | 0.071                       | 0.024                   |                    0.024 | 0.071                                     | 0.071      |
| fr         | xlm-roberta-large                  | 0.095     | 0.071                       | 0.071                   |                    0.071 | 0.095                                     | 0.095      |
| ge         | EleutherAI-gpt-neo-1.3B            | 0.000     | 0.029                       | 0.000                   |                    0.057 | 0.029                                     | 0.057      |
| ge         | EleutherAI-gpt-neo-125M            | 0.000     | 0.000                       | 0.000                   |                    0     | 0.000                                     | 0.000      |
| ge         | bert-base-multilingual-cased       | 0.029     | 0.029                       | 0.086                   |                    0     | 0.000                                     | 0.000      |
| ge         | distilbert-base-multilingual-cased | 0.000     | 0.029                       | 0.000                   |                    0     | 0.029                                     | 0.029      |
| ge         | facebook-mbart-large-50            | 0.057     | **0.114**                   | 0.029                   |                    0.029 | 0.029                                     | 0.086      |
| ge         | gpt2                               | 0.029     | 0.000                       | 0.000                   |                    0     | 0.029                                     | 0.000      |
| ge         | xlm-roberta-large                  | 0.057     | 0.086                       | 0.029                   |                    0.029 | 0.057                                     | 0.029      |
| it         | EleutherAI-gpt-neo-1.3B            | 0.050     | 0.133                       | 0.117                   |                    0.183 | 0.133                                     | 0.200      |
| it         | EleutherAI-gpt-neo-125M            | 0.017     | 0.067                       | 0.033                   |                    0.05  | 0.067                                     | 0.067      |
| it         | bert-base-multilingual-cased       | 0.083     | 0.117                       | 0.100                   |                    0.167 | 0.100                                     | **0.267**  |
| it         | distilbert-base-multilingual-cased | 0.017     | 0.100                       | 0.117                   |                    0.1   | 0.033                                     | 0.117      |
| it         | facebook-mbart-large-50            | 0.117     | 0.117                       | 0.133                   |                    0.083 | 0.133                                     | 0.183      |
| it         | gpt2                               | 0.050     | 0.083                       | 0.050                   |                    0.083 | 0.100                                     | 0.050      |
| it         | xlm-roberta-large                  | 0.117     | 0.200                       | 0.167                   |                    0.15  | 0.117                                     | 0.200      |
| po         | EleutherAI-gpt-neo-1.3B            | 0.026     | 0.000                       | 0.026                   |                    0.026 | 0.051                                     | 0.051      |
| po         | EleutherAI-gpt-neo-125M            | 0.000     | 0.000                       | 0.000                   |                    0.051 | 0.000                                     | 0.026      |
| po         | bert-base-multilingual-cased       | 0.000     | 0.026                       | 0.051                   |                    0.026 | 0.051                                     | **0.103**  |
| po         | distilbert-base-multilingual-cased | 0.000     | 0.000                       | 0.051                   |                    0.026 | 0.051                                     | 0.026      |
| po         | facebook-mbart-large-50            | 0.000     | 0.026                       | 0.051                   |                    0.077 | 0.051                                     | 0.051      |
| po         | gpt2                               | 0.000     | 0.026                       | 0.000                   |                    0.026 | 0.051                                     | 0.026      |
| po         | xlm-roberta-large                  | 0.000     | 0.051                       | 0.026                   |                    0.026 | 0.077                                     | 0.077      |
| ru         | EleutherAI-gpt-neo-1.3B            | 0.053     | 0.079                       | 0.079                   |                    0.158 | 0.158                                     | 0.053      |
| ru         | EleutherAI-gpt-neo-125M            | 0.026     | 0.026                       | 0.053                   |                    0.053 | 0.105                                     | 0.026      |
| ru         | bert-base-multilingual-cased       | 0.105     | 0.132                       | 0.211                   |                    0.158 | **0.237**                                 | 0.211      |
| ru         | distilbert-base-multilingual-cased | 0.105     | 0.158                       | 0.132                   |                    0.211 | 0.132                                     | 0.184      |
| ru         | facebook-mbart-large-50            | 0.158     | 0.211                       | 0.132                   |                    0.211 | 0.158                                     | 0.184      |
| ru         | gpt2                               | 0.000     | 0.000                       | 0.026                   |                    0     | 0.079                                     | 0.026      |
| ru         | xlm-roberta-large                  | **0.237** | 0.211                       | 0.211                   |                    0.211 | 0.211                                     | 0.211      |

  multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)
