In [1]:
import os
import glob

from IPython.display import display, Markdown
import pandas as pd

# Group types of models (experiment type and model type) and pick best performing in terms of f1-score per unit of analysis and report them in a table


In [2]:
results_filepaths = glob.glob('./logged_performance_per_model/*/*raw*.csv')

In [3]:
results_filepaths

['./logged_performance_per_model/AshtonIsNotHere-xlm-roberta-long-base-4096/truncated_raw_truncated_single_instance_AshtonIsNotHere_xlm-roberta-long-base-4096-raw_text_metrics.csv',
 './logged_performance_per_model/AshtonIsNotHere-xlm-roberta-long-base-4096/truncated_raw_truncated_single_instance_AshtonIsNotHere_xlm-roberta-long-base-4096-title_metrics.csv',
 './logged_performance_per_model/AshtonIsNotHere-xlm-roberta-long-base-4096/truncated_raw_truncated_single_instance_AshtonIsNotHere_xlm-roberta-long-base-4096-title_and_first_sentence_each_paragraph_metrics.csv',
 './logged_performance_per_model/AshtonIsNotHere-xlm-roberta-long-base-4096/truncated_raw_truncated_single_instance_AshtonIsNotHere_xlm-roberta-long-base-4096-title_and_10_sentences_metrics.csv',
 './logged_performance_per_model/AshtonIsNotHere-xlm-roberta-long-base-4096/truncated_raw_truncated_single_instance_AshtonIsNotHere_xlm-roberta-long-base-4096-title_and_5_sentences_metrics.csv',
 './logged_performance_per_model/As

In [4]:
dfs_list = []
for results_filepath in results_filepaths:
    model_name = results_filepath.split('/')[-2]
    results_df_i = pd.read_csv(results_filepath)
    results_df_i['model_name'] = model_name
    dfs_list.append(results_df_i)

results_df = pd.concat(dfs_list).set_index(['language', 'model_name', 'unit_of_analysis']).sort_index()
results_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,f1_micro,precision_micro,recall_micro,f1_macro,precision_macro,recall_macro,accuracy
language,model_name,unit_of_analysis,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
en,AshtonIsNotHere-xlm-roberta-long-base-4096,raw_text,0.684142,0.737288,0.638142,0.522957,0.574319,0.493717,0.097087
en,AshtonIsNotHere-xlm-roberta-long-base-4096,title,0.627717,0.706422,0.564792,0.462368,0.571385,0.414542,0.067961
en,AshtonIsNotHere-xlm-roberta-long-base-4096,title_and_10_sentences,0.688396,0.73743,0.645477,0.521331,0.605306,0.478738,0.097087
en,AshtonIsNotHere-xlm-roberta-long-base-4096,title_and_5_sentences,0.685083,0.787302,0.606357,0.51798,0.631508,0.456047,0.116505
en,AshtonIsNotHere-xlm-roberta-long-base-4096,title_and_first_paragraph,0.679195,0.752976,0.618582,0.481295,0.608876,0.430143,0.097087
en,AshtonIsNotHere-xlm-roberta-long-base-4096,title_and_first_sentence_each_paragraph,0.70415,0.778107,0.643032,0.583106,0.715612,0.52812,0.126214
fr,AshtonIsNotHere-xlm-roberta-long-base-4096,raw_text,0.53719,0.560345,0.515873,0.444194,0.56233,0.420883,0.071429
fr,AshtonIsNotHere-xlm-roberta-long-base-4096,title,0.416667,0.5,0.357143,0.22635,0.268776,0.228918,0.071429
fr,AshtonIsNotHere-xlm-roberta-long-base-4096,title_and_10_sentences,0.502128,0.541284,0.468254,0.400744,0.541047,0.377638,0.02381
fr,AshtonIsNotHere-xlm-roberta-long-base-4096,title_and_5_sentences,0.481481,0.577778,0.412698,0.387088,0.539113,0.342154,0.095238


In [5]:
results_df.to_csv('performance_of_models.csv')

### Generate the tables to report

In [6]:
def display_performance_table(df, metric, index_cols=['model_name'], display_=True):
    report_table = df.reset_index().copy()
    report_table['result'] = report_table[f'{metric}'].map(lambda x: f'{x:.3f}')
    report_table['col_title'] = report_table.unit_of_analysis.str.split('_').str.join(' ') 
    report_table['col_title'] = pd.Categorical(
        report_table.col_title,
        categories=['title', 'title and first paragraph', 'title and 5 sentences', 'title and 10 sentences',
                    'title and first sentence each paragraph', 'raw text'],
        ordered=True)
    report_table = report_table[index_cols + ['col_title', 'result']]\
        .pivot_table(index=index_cols, columns=['col_title'], values=['result'], aggfunc='first', fill_value=0)\
        .droplevel(0, axis=1)

    report_table.columns.names = [None]

    # Highlight best scoring models according to their average
    mean_perf_arr = report_table.applymap(lambda x: float(str(x).split(' ')[0])).to_numpy()
    highlight_mask = mean_perf_arr == mean_perf_arr.max()
    report_table_arr = report_table.to_numpy()  # Note it passes the array by reference
    report_table_arr[highlight_mask] = '**' + report_table_arr[highlight_mask] + '**'

    if display_:
        display(Markdown(report_table.to_markdown()))
    
    return report_table

In [7]:
display_performance_table(df=results_df.loc['en'], metric='f1_micro', index_cols=['model_name'], display_=True)

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences | title and first sentence each paragraph   |   raw text |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|:------------------------------------------|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.628 |                       0.679 |                   0.685 |                    0.688 | **0.704**                                 |      0.684 |

Unnamed: 0_level_0,title,title and first paragraph,title and 5 sentences,title and 10 sentences,title and first sentence each paragraph,raw text
model_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AshtonIsNotHere-xlm-roberta-long-base-4096,0.628,0.679,0.685,0.688,**0.704**,0.684


### Generate tables for all languages

In [8]:
metrics_to_report = ['f1_micro', 'recall_micro', 'precision_micro', 'accuracy']

In [9]:
language_dict = {'en': 'English', 'it': 'Italian', 'fr': 'French', 'po': 'Polish', 'ru': 'Russian', 'ge': 'German'}

In [10]:
results_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,f1_micro,precision_micro,recall_micro,f1_macro,precision_macro,recall_macro,accuracy
language,model_name,unit_of_analysis,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
en,AshtonIsNotHere-xlm-roberta-long-base-4096,raw_text,0.684142,0.737288,0.638142,0.522957,0.574319,0.493717,0.097087
en,AshtonIsNotHere-xlm-roberta-long-base-4096,title,0.627717,0.706422,0.564792,0.462368,0.571385,0.414542,0.067961
en,AshtonIsNotHere-xlm-roberta-long-base-4096,title_and_10_sentences,0.688396,0.73743,0.645477,0.521331,0.605306,0.478738,0.097087
en,AshtonIsNotHere-xlm-roberta-long-base-4096,title_and_5_sentences,0.685083,0.787302,0.606357,0.51798,0.631508,0.456047,0.116505
en,AshtonIsNotHere-xlm-roberta-long-base-4096,title_and_first_paragraph,0.679195,0.752976,0.618582,0.481295,0.608876,0.430143,0.097087
en,AshtonIsNotHere-xlm-roberta-long-base-4096,title_and_first_sentence_each_paragraph,0.70415,0.778107,0.643032,0.583106,0.715612,0.52812,0.126214
fr,AshtonIsNotHere-xlm-roberta-long-base-4096,raw_text,0.53719,0.560345,0.515873,0.444194,0.56233,0.420883,0.071429
fr,AshtonIsNotHere-xlm-roberta-long-base-4096,title,0.416667,0.5,0.357143,0.22635,0.268776,0.228918,0.071429
fr,AshtonIsNotHere-xlm-roberta-long-base-4096,title_and_10_sentences,0.502128,0.541284,0.468254,0.400744,0.541047,0.377638,0.02381
fr,AshtonIsNotHere-xlm-roberta-long-base-4096,title_and_5_sentences,0.481481,0.577778,0.412698,0.387088,0.539113,0.342154,0.095238


In [11]:
def display_metrics_and_write_to_file(df, grouping_criterion, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    report_tables_dfs_dict = {metric: [] for metric in metrics_to_report}

    for language, results_df in df.groupby(level=0):
        display(Markdown(f'# {language_dict[language]}'))
        
        for metric in metrics_to_report:
            os.makedirs(os.path.join(output_dir, metric), exist_ok=True)

            output_dir_markdown = os.path.join(output_dir, metric, 'markdown')
            output_dir_latex = os.path.join(output_dir, metric, 'latex')
            output_dir_csv = os.path.join(output_dir, metric, 'csv')

            os.makedirs(output_dir_markdown, exist_ok=True)
            os.makedirs(output_dir_latex, exist_ok=True)
            os.makedirs(output_dir_csv, exist_ok=True)

            display(Markdown(f'## {metric}'))

            report_table = display_performance_table(df=results_df, index_cols=grouping_criterion, metric=metric, display_=True)

            # Export as markdown
            markdown_file = open(os.path.join(output_dir_markdown, f"{language_dict[language]}_{metric}.md"), "w")
            report_table.reset_index().to_markdown(markdown_file, index=False)
            markdown_file.close()

            # Export as latex table
            latex_file = open(os.path.join(output_dir_latex, f"{language_dict[language]}_{metric}.tex"), "w")
            report_table.reset_index().to_latex(latex_file, index=False)
            latex_file.close()

            # Export as csv
            report_table.to_csv(os.path.join(output_dir_csv, f"{language_dict[language]}_{metric}.csv"))

            # Stack all languages into single table
            report_table['language'] = language
            report_table = report_table.reset_index().set_index(['language'] + grouping_criterion)

            report_tables_dfs_dict[metric].append(report_table)

    # Report or store unified table
    display(Markdown(f'# All 6 Languages'))
    for metric in metrics_to_report:
        display(Markdown(f'## {metric}'))
        multi_language_report_table_metric = pd.concat(report_tables_dfs_dict[metric])
        display(Markdown(multi_language_report_table_metric.reset_index().to_markdown(index=False)))

        output_dir_markdown = os.path.join(output_dir, metric, 'markdown')
        output_dir_latex = os.path.join(output_dir, metric, 'latex')
        output_dir_csv = os.path.join(output_dir, metric, 'csv')

        # Export as markdown
        markdown_file = open(os.path.join(output_dir_markdown, f"all_6_languages_{metric}.md"), "w")
        multi_language_report_table_metric.reset_index().to_markdown(markdown_file, index=False)
        markdown_file.close()

        # Export as latex table
        latex_file = open(os.path.join(output_dir_latex, f"all_6_languages_{metric}.tex"), "w")
        multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)
        latex_file.close()

        # Export as csv
        multi_language_report_table_metric.to_csv(os.path.join(output_dir_csv, f"all_6_languages_{metric}.csv"))

# Per model type

In [12]:
display_metrics_and_write_to_file(df=results_df, grouping_criterion=['model_name'], output_dir='per_model_name_tables')

# English

## f1_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences | title and first sentence each paragraph   |   raw text |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|:------------------------------------------|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.628 |                       0.679 |                   0.685 |                    0.688 | **0.704**                                 |      0.684 |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences | title and 10 sentences   |   title and first sentence each paragraph |   raw text |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|:-------------------------|------------------------------------------:|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.565 |                       0.619 |                   0.606 | **0.645**                |                                     0.643 |      0.638 |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision_micro

| model_name                                 |   title |   title and first paragraph | title and 5 sentences   |   title and 10 sentences |   title and first sentence each paragraph |   raw text |
|:-------------------------------------------|--------:|----------------------------:|:------------------------|-------------------------:|------------------------------------------:|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.706 |                       0.753 | **0.787**               |                    0.737 |                                     0.778 |      0.737 |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences | title and first sentence each paragraph   |   raw text |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|:------------------------------------------|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.068 |                       0.097 |                   0.117 |                    0.097 | **0.126**                                 |      0.097 |

  report_table.reset_index().to_latex(latex_file, index=False)


# French

## f1_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.417 |                       0.461 |                   0.481 |                    0.502 |                                     0.526 | **0.537**  |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.357 |                       0.397 |                   0.413 |                    0.468 |                                     0.476 | **0.516**  |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences | title and first sentence each paragraph   |   raw text |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|:------------------------------------------|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |     0.5 |                       0.549 |                   0.578 |                    0.541 | **0.588**                                 |       0.56 |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences | title and first sentence each paragraph   |   raw text |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|:------------------------------------------|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.071 |                       0.095 |                   0.095 |                    0.024 | **0.119**                                 |      0.071 |

  report_table.reset_index().to_latex(latex_file, index=False)


# German

## f1_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.554 |                       0.589 |                   0.612 |                    0.619 |                                     0.618 | **0.669**  |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.494 |                         0.5 |                   0.547 |                    0.552 |                                     0.512 | **0.651**  |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences | title and first sentence each paragraph   |   raw text |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|:------------------------------------------|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |    0.63 |                       0.717 |                   0.696 |                    0.704 | **0.779**                                 |      0.687 |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                                 |   title | title and first paragraph   |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|:----------------------------|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |       0 | **0.029**                   |                       0 |                        0 |                                         0 | **0.029**  |

  report_table.reset_index().to_latex(latex_file, index=False)


# Italian

## f1_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.542 |                       0.555 |                   0.548 |                    0.585 |                                     0.532 | **0.650**  |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.461 |                        0.47 |                   0.461 |                    0.487 |                                     0.435 | **0.570**  |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.658 |                       0.679 |                   0.675 |                    0.732 |                                     0.685 | **0.757**  |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.083 |                       0.117 |                   0.133 |                    0.133 |                                     0.117 | **0.167**  |

  report_table.reset_index().to_latex(latex_file, index=False)


# Polish

## f1_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.538 |                       0.591 |                   0.591 |                    0.628 |                                     0.621 | **0.711**  |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.466 |                       0.529 |                   0.534 |                    0.529 |                                     0.524 | **0.626**  |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.636 |                       0.669 |                   0.663 |                    0.773 |                                     0.761 | **0.822**  |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences | title and 10 sentences   |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|:-------------------------|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.026 |                       0.026 |                   0.026 | **0.051**                |                                     0.026 | **0.051**  |

  report_table.reset_index().to_latex(latex_file, index=False)


# Russian

## f1_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences | title and first sentence each paragraph   |   raw text |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|:------------------------------------------|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.392 |                       0.471 |                   0.469 |                    0.503 | **0.534**                                 |      0.519 |

  report_table.reset_index().to_latex(latex_file, index=False)


## recall_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences |   title and first sentence each paragraph | raw text   |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|------------------------------------------:|:-----------|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.326 |                       0.419 |                   0.395 |                    0.442 |                                     0.453 | **0.477**  |

  report_table.reset_index().to_latex(latex_file, index=False)


## precision_micro

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences | title and first sentence each paragraph   |   raw text |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|:------------------------------------------|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.491 |                       0.537 |                   0.576 |                    0.585 | **0.650**                                 |      0.569 |

  report_table.reset_index().to_latex(latex_file, index=False)


## accuracy

| model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences | title and first sentence each paragraph   |   raw text |
|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|:------------------------------------------|-----------:|
| AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.079 |                       0.211 |                   0.158 |                    0.211 | **0.237**                                 |      0.184 |

  report_table.reset_index().to_latex(latex_file, index=False)


# All 6 Languages

## f1_micro

| language   | model_name                                 |   title |   title and first paragraph |   title and 5 sentences |   title and 10 sentences | title and first sentence each paragraph   | raw text   |
|:-----------|:-------------------------------------------|--------:|----------------------------:|------------------------:|-------------------------:|:------------------------------------------|:-----------|
| en         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.628 |                       0.679 |                   0.685 |                    0.688 | **0.704**                                 | 0.684      |
| fr         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.417 |                       0.461 |                   0.481 |                    0.502 | 0.526                                     | **0.537**  |
| ge         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.554 |                       0.589 |                   0.612 |                    0.619 | 0.618                                     | **0.669**  |
| it         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.542 |                       0.555 |                   0.548 |                    0.585 | 0.532                                     | **0.650**  |
| po         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.538 |                       0.591 |                   0.591 |                    0.628 | 0.621                                     | **0.711**  |
| ru         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.392 |                       0.471 |                   0.469 |                    0.503 | **0.534**                                 | 0.519      |

  multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)


## recall_micro

| language   | model_name                                 |   title |   title and first paragraph |   title and 5 sentences | title and 10 sentences   |   title and first sentence each paragraph | raw text   |
|:-----------|:-------------------------------------------|--------:|----------------------------:|------------------------:|:-------------------------|------------------------------------------:|:-----------|
| en         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.565 |                       0.619 |                   0.606 | **0.645**                |                                     0.643 | 0.638      |
| fr         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.357 |                       0.397 |                   0.413 | 0.468                    |                                     0.476 | **0.516**  |
| ge         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.494 |                       0.5   |                   0.547 | 0.552                    |                                     0.512 | **0.651**  |
| it         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.461 |                       0.47  |                   0.461 | 0.487                    |                                     0.435 | **0.570**  |
| po         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.466 |                       0.529 |                   0.534 | 0.529                    |                                     0.524 | **0.626**  |
| ru         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.326 |                       0.419 |                   0.395 | 0.442                    |                                     0.453 | **0.477**  |

  multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)


## precision_micro

| language   | model_name                                 |   title |   title and first paragraph | title and 5 sentences   |   title and 10 sentences | title and first sentence each paragraph   | raw text   |
|:-----------|:-------------------------------------------|--------:|----------------------------:|:------------------------|-------------------------:|:------------------------------------------|:-----------|
| en         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.706 |                       0.753 | **0.787**               |                    0.737 | 0.778                                     | 0.737      |
| fr         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.5   |                       0.549 | 0.578                   |                    0.541 | **0.588**                                 | 0.560      |
| ge         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.63  |                       0.717 | 0.696                   |                    0.704 | **0.779**                                 | 0.687      |
| it         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.658 |                       0.679 | 0.675                   |                    0.732 | 0.685                                     | **0.757**  |
| po         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.636 |                       0.669 | 0.663                   |                    0.773 | 0.761                                     | **0.822**  |
| ru         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.491 |                       0.537 | 0.576                   |                    0.585 | **0.650**                                 | 0.569      |

  multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)


## accuracy

| language   | model_name                                 |   title | title and first paragraph   |   title and 5 sentences | title and 10 sentences   | title and first sentence each paragraph   | raw text   |
|:-----------|:-------------------------------------------|--------:|:----------------------------|------------------------:|:-------------------------|:------------------------------------------|:-----------|
| en         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.068 | 0.097                       |                   0.117 | 0.097                    | **0.126**                                 | 0.097      |
| fr         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.071 | 0.095                       |                   0.095 | 0.024                    | **0.119**                                 | 0.071      |
| ge         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0     | **0.029**                   |                   0     | 0.000                    | 0.000                                     | **0.029**  |
| it         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.083 | 0.117                       |                   0.133 | 0.133                    | 0.117                                     | **0.167**  |
| po         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.026 | 0.026                       |                   0.026 | **0.051**                | 0.026                                     | **0.051**  |
| ru         | AshtonIsNotHere-xlm-roberta-long-base-4096 |   0.079 | 0.211                       |                   0.158 | 0.211                    | **0.237**                                 | 0.184      |

  multi_language_report_table_metric.reset_index().to_latex(latex_file, index=False)
