# Group evaluations

Once we have produced several `xx_metrics.json` files, this notebook will put all together in a single CSV file.

In [1]:
import glob
import json
import os
import pandas as pd
import re

In [2]:
def group_metrics(ckpt_names):
    df = {
        'ckpt_names': [],
        'dataset_names': [],
        'exact_scores': [],
        'f1_scores': [],
    }
    
    for ckpt_name in ckpt_names:
        input_path = '../artifacts/predictions_normal/%s/' % ckpt_name
        
        files = glob.glob('%s*_metrics.json' % input_path)
        for file in files:
            filename = os.path.basename(file)
            dataset_name = re.sub(r'_metrics.json', '', filename)
            with open(file, 'r') as fp:
                scores = json.load(fp)

            df['ckpt_names'].append(ckpt_name)
            df['dataset_names'].append(dataset_name)
            df['exact_scores'].append(scores['exact'])
            df['f1_scores'].append(scores['f1'])

    df = pd.DataFrame.from_dict(df).set_index('ckpt_names')
    return df

In [3]:
ckpt_names = ['qa_mbert_squad', 'qa_mbert_squad5k']
group_metrics(ckpt_names)

Unnamed: 0_level_0,dataset_names,exact_scores,f1_scores
ckpt_names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
qa_mbert_squad,dev-v1.1-es,49.995226,68.698912
qa_mbert_squad,dev-v1.1-ja,61.514196,63.412198
qa_mbert_squad,dev-v1.1,80.982296,88.106862
qa_mbert_squad,mlqa-v1-test-es-es,42.36832,62.849735
qa_mbert_squad,mlqa-v1-test-hi-hi,31.982314,45.998602
qa_mbert_squad,mlqa-v1-test-vi-vi,39.633639,58.526424
qa_mbert_squad,tydiqa-gold-v1.1-dev-russian,47.044025,64.788938
qa_mbert_squad,tydiqa-goldp-v1.1-dev-russian,47.044025,64.788938
qa_mbert_squad,xquad.es,55.08982,73.298553
qa_mbert_squad,xquad.hi,41.261261,54.933071


In [4]:
ckpt_names = ['qa_xlm_squad', 'qa_xlm_squad5k']
group_metrics(ckpt_names)

Unnamed: 0_level_0,dataset_names,exact_scores,f1_scores
ckpt_names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
qa_xlm_squad,dev-v1.1-es,45.292037,61.447115
qa_xlm_squad,dev-v1.1-ja,53.82263,55.142096
qa_xlm_squad,dev-v1.1,69.661421,75.559731
qa_xlm_squad,mlqa-v1-test-es-es,36.115385,53.578392
qa_xlm_squad,mlqa-v1-test-hi-hi,33.799955,47.808495
qa_xlm_squad,mlqa-v1-test-vi-vi,38.632585,55.23567
qa_xlm_squad,tydiqa-gold-v1.1-dev-russian,31.203008,46.337288
qa_xlm_squad,tydiqa-goldp-v1.1-dev-russian,31.203008,46.337288
qa_xlm_squad,xquad.es,49.786142,64.441313
qa_xlm_squad,xquad.hi,43.793103,56.515629


In [5]:
ckpt_names = ['qa_mbert_synthetic_es_top1', 'qa_mbert_synthetic_es_top2',
              'qa_mbert_synthetic_es_top3', 'qa_mbert_synthetic_es_top5',
              'qa_mbert_extsquad_es_top1', 'qa_mbert_extsquad_es_top2',
              'qa_mbert_extsquad_es_top3', 'qa_mbert_extsquad_es_top5',
              'qa_mbert_extsquad5k_es_top1', 'qa_mbert_extsquad5k_es_top2',
              'qa_mbert_extsquad5k_es_top3', 'qa_mbert_extsquad5k_es_top5']
group_metrics(ckpt_names)

Unnamed: 0_level_0,dataset_names,exact_scores,f1_scores
ckpt_names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
qa_mbert_synthetic_es_top1,dev-v1.1-es,14.742672,21.796018
qa_mbert_synthetic_es_top1,mlqa-v1-test-es-es,12.360631,20.613115
qa_mbert_synthetic_es_top1,xquad.es,14.371257,22.040813
qa_mbert_synthetic_es_top2,dev-v1.1-es,16.490022,23.99373
qa_mbert_synthetic_es_top2,mlqa-v1-test-es-es,13.264129,21.811303
qa_mbert_synthetic_es_top2,xquad.es,15.911035,23.092772
qa_mbert_synthetic_es_top3,dev-v1.1-es,18.886661,27.726908
qa_mbert_synthetic_es_top3,mlqa-v1-test-es-es,15.186467,25.984014
qa_mbert_synthetic_es_top3,xquad.es,17.365269,27.308467
qa_mbert_synthetic_es_top5,dev-v1.1-es,17.693116,25.406085


In [6]:
ckpt_names = ['qa_mbert_synthetic_ru_top1', 'qa_mbert_synthetic_ru_top2',
              'qa_mbert_synthetic_ru_top3', 'qa_mbert_synthetic_ru_top5',
              'qa_mbert_extsquad_ru_top1', 'qa_mbert_extsquad_ru_top2',
              'qa_mbert_extsquad_ru_top3', 'qa_mbert_extsquad_ru_top5',
              'qa_mbert_extsquad5k_ru_top1', 'qa_mbert_extsquad5k_ru_top2',
              'qa_mbert_extsquad5k_ru_top3', 'qa_mbert_extsquad5k_ru_top5']
group_metrics(ckpt_names)

Unnamed: 0_level_0,dataset_names,exact_scores,f1_scores
ckpt_names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
qa_mbert_synthetic_ru_top1,tydiqa-goldp-v1.1-dev-russian,15.72327,25.6751
qa_mbert_synthetic_ru_top1,xquad.ru,12.619669,18.252138
qa_mbert_synthetic_ru_top2,tydiqa-goldp-v1.1-dev-russian,16.981132,29.901063
qa_mbert_synthetic_ru_top2,xquad.ru,14.708442,20.470549
qa_mbert_synthetic_ru_top3,tydiqa-goldp-v1.1-dev-russian,16.855346,29.134025
qa_mbert_synthetic_ru_top3,xquad.ru,15.143603,22.054233
qa_mbert_synthetic_ru_top5,tydiqa-goldp-v1.1-dev-russian,18.113208,27.896569
qa_mbert_synthetic_ru_top5,xquad.ru,14.447346,20.895918
qa_mbert_extsquad_ru_top1,tydiqa-goldp-v1.1-dev-russian,47.169811,64.69058
qa_mbert_extsquad_ru_top1,xquad.ru,49.260226,65.566237


In [7]:
ckpt_names = ['qa_mbert_synthetic_hi_top1', 'qa_mbert_synthetic_hi_top2',
              'qa_mbert_synthetic_hi_top3', 'qa_mbert_synthetic_hi_top5',
              'qa_mbert_extsquad_hi_top1', 'qa_mbert_extsquad_hi_top2',
              'qa_mbert_extsquad_hi_top3', 'qa_mbert_extsquad_hi_top5',
              'qa_mbert_extsquad5k_hi_top1', 'qa_mbert_extsquad5k_hi_top2',
              'qa_mbert_extsquad5k_hi_top3', 'qa_mbert_extsquad5k_hi_top5']
group_metrics(ckpt_names)

Unnamed: 0_level_0,dataset_names,exact_scores,f1_scores
ckpt_names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1


In [8]:
ckpt_names = ['qa_mbert_synthetic_vi_top1', 'qa_mbert_synthetic_vi_top2',
              'qa_mbert_synthetic_vi_top3', 'qa_mbert_synthetic_vi_top5',
              'qa_mbert_extsquad_vi_top1', 'qa_mbert_extsquad_vi_top2',
              'qa_mbert_extsquad_vi_top3', 'qa_mbert_extsquad_vi_top5',
              'qa_mbert_extsquad5k_vi_top1', 'qa_mbert_extsquad5k_vi_top2',
              'qa_mbert_extsquad5k_vi_top3', 'qa_mbert_extsquad5k_vi_top5']
group_metrics(ckpt_names)

Unnamed: 0_level_0,dataset_names,exact_scores,f1_scores
ckpt_names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
qa_mbert_synthetic_vi_top1,mlqa-v1-test-vi-vi,7.389675,13.234309
qa_mbert_synthetic_vi_top1,xquad.vi,10.445205,16.669212
qa_mbert_synthetic_vi_top2,mlqa-v1-test-vi-vi,9.17985,15.879419
qa_mbert_synthetic_vi_top2,xquad.vi,12.585616,18.839686
qa_mbert_synthetic_vi_top3,mlqa-v1-test-vi-vi,11.136553,18.756102
qa_mbert_synthetic_vi_top3,xquad.vi,13.527397,21.251181
qa_mbert_synthetic_vi_top5,mlqa-v1-test-vi-vi,10.366361,17.323405
qa_mbert_synthetic_vi_top5,xquad.vi,12.414384,20.054625
qa_mbert_extsquad_vi_top1,mlqa-v1-test-vi-vi,37.281432,54.398071
qa_mbert_extsquad_vi_top1,xquad.vi,46.40411,63.988515


In [9]:
ckpt_names = ['qa_mbert_synthetic_ja_top1', 'qa_mbert_synthetic_ja_top2',
              'qa_mbert_synthetic_ja_top3', 'qa_mbert_synthetic_ja_top5',
              'qa_mbert_extsquad_ja_top1', 'qa_mbert_extsquad_ja_top2',
              'qa_mbert_extsquad_ja_top3', 'qa_mbert_extsquad_ja_top5',
              'qa_mbert_extsquad5k_ja_top1', 'qa_mbert_extsquad5k_ja_top2',
              'qa_mbert_extsquad5k_ja_top3', 'qa_mbert_extsquad5k_ja_top5']
group_metrics(ckpt_names)

Unnamed: 0_level_0,dataset_names,exact_scores,f1_scores
ckpt_names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
qa_mbert_synthetic_ja_top1,dev-v1.1-ja,14.195584,15.614288
qa_mbert_synthetic_ja_top2,dev-v1.1-ja,15.141956,16.048432
qa_mbert_synthetic_ja_top3,dev-v1.1-ja,14.511041,15.53278
qa_mbert_synthetic_ja_top5,dev-v1.1-ja,14.195584,16.577492
qa_mbert_extsquad_ja_top1,dev-v1.1-ja,51.419558,52.23474
qa_mbert_extsquad_ja_top2,dev-v1.1-ja,48.264984,49.746132
qa_mbert_extsquad_ja_top3,dev-v1.1-ja,47.003155,48.080967
qa_mbert_extsquad_ja_top5,dev-v1.1-ja,48.580442,49.600421
qa_mbert_extsquad5k_ja_top1,dev-v1.1-ja,46.37224,47.862003
qa_mbert_extsquad5k_ja_top2,dev-v1.1-ja,36.908517,38.035504


In [10]:
ckpt_names = ['qa_mbert_synthetic_all_top1', 'qa_mbert_synthetic_all_top2',
              'qa_mbert_synthetic_all_top3', 'qa_mbert_synthetic_all_top5',
              'qa_mbert_extsquad_all_top1', 'qa_mbert_extsquad_all_top2',
              'qa_mbert_extsquad_all_top3', 'qa_mbert_extsquad_all_top5',
              'qa_mbert_extsquad5k_all_top1', 'qa_mbert_extsquad5k_all_top2',
              'qa_mbert_extsquad5k_all_top3', 'qa_mbert_extsquad5k_all_top5']
group_metrics(ckpt_names)

Unnamed: 0_level_0,dataset_names,exact_scores,f1_scores
ckpt_names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
qa_mbert_synthetic_all_top1,dev-v1.1-es,20.748592,30.297740
qa_mbert_synthetic_all_top1,dev-v1.1-ja,18.296530,18.804178
qa_mbert_synthetic_all_top1,dev-v1.1,26.280221,34.164148
qa_mbert_synthetic_all_top1,mlqa-v1-test-es-es,16.512880,28.633313
qa_mbert_synthetic_all_top1,mlqa-v1-test-hi-hi,12.601326,22.409264
...,...,...,...
qa_mbert_extsquad5k_all_top5,tydiqa-goldp-v1.1-dev-russian,33.081761,52.812081
qa_mbert_extsquad5k_all_top5,xquad.es,39.863131,54.956471
qa_mbert_extsquad5k_all_top5,xquad.hi,29.819820,40.777961
qa_mbert_extsquad5k_all_top5,xquad.ru,35.596171,49.524947


In [11]:
def generate_table(columns, ckpts, df_key, metric_label, metric_name, table_part):
    text =  "\\begin{table}[ht]\n"
    text += "\\small\n"
    text += "\\centering\n"
    text += "\\addtolength{\\leftskip} {-2cm}"
    text += "\\addtolength{\\rightskip}{-2cm}"
    text += "\\renewcommand{\\arraystretch}{1.2}\n"
    
    n_columns = '|'.join([''.join(['l'] * len(x)) for _, x in columns.items()])
    text += "\\begin{tabular}{|l|%s|}\n" % n_columns
    
    text += "\\hline\n"
    text += "\\textbf{Trained on} $\\rightarrow$ &\n"
    
    column_headers = []
    for column_name, column_data in columns.items():
        column_headers += ["  \\multicolumn{%d}{c|}{\\textbf{%s}}" % (len(column_data), column_name)]
    text += " &\n".join(column_headers)
    text += "\\\\\n"
    text += "\\hline\n"
    text += "\\hline\n"
    
    text += "\\textbf{Tested on} $\\downarrow$ &\n"
    
    column_headers = []
    for _, column_data in columns.items():
        for _, subcol_data in column_data.items():
            column_headers += ["  \\multicolumn{1}{c|}{\\textbf{\\begin{tabular}[c]{@{}c@{}}%s\\end{tabular}}}" % (subcol_data['col_name'])]
    text += " &\n".join(column_headers)
    text += "\\\\\n"
    text += "\\hline\n"
    text += "\\hline\n"
    
    ## ROWS ##
    
    for ckpt_data in ckpts:
        text += "  \\textbf{\\begin{tabular}[c]{@{}l@{}}%s\\end{tabular}} &\n" % ckpt_data['row_name']
        
        if ckpt_data['for_all'] and not ckpt_data['multi_model_version']:
            df = group_metrics([ckpt_data['ckpts']['all']])

        row_scores = []
        for column_name, column_data in columns.items():
            for dataset_name, subcol_data in column_data.items():
                if ckpt_data['for_all']:
                    if ckpt_data['multi_model_version']:
                        multi_score = []
                        for k_top in [1, 2, 3, 5]:
                            if 'all' in ckpt_data['ckpts']:
                                k_ckpt_name = '%s%d' % (ckpt_data['ckpts']['all'], k_top)
                                df = group_metrics([k_ckpt_name])
                                item_score = df.loc[df['dataset_names'] == dataset_name][df_key].to_numpy()
                                item_score = ('%.4f' % item_score[0]) if len(item_score) > 0 else '-'
                            else:
                                item_score = '-'
                            multi_score += ["%s" % item_score]
                        multi_score = " \\\\".join(multi_score)
                        row_scores += ["\\begin{tabular}[c]{@{}l@{}}%s\\end{tabular}" % multi_score]
                    else:
                        item_score = df.loc[df['dataset_names'] == dataset_name][df_key].to_numpy()
                        item_score = ('%.4f' % item_score[0]) if len(item_score) > 0 else '-'
                        row_scores += ["%s" % item_score]
                else:
                    multi_score = []
                    for k_top in [1, 2, 3, 5]:
                        if column_name in ckpt_data['ckpts']:
                            k_ckpt_name = '%s%d' % (ckpt_data['ckpts'][column_name], k_top)
                            df = group_metrics([k_ckpt_name])
                            item_score = df.loc[df['dataset_names'] == dataset_name][df_key].to_numpy()
                            if len(item_score) == 0:
                                print(df.loc[df['dataset_names'] == dataset_name])
                                raise Exception()
                            item_score = ('%.4f' % item_score[0]) if len(item_score) > 0 else '-'
                        else:
                            item_score = '-'
                        multi_score += ["%s" % item_score]
                    multi_score = " \\\\".join(multi_score)
                    row_scores += ["\\begin{tabular}[c]{@{}l@{}}%s\\end{tabular}" % multi_score]
        text += " &\n".join(row_scores)
        text += "\\\\\n"
        text += "\\hline\n"
        if ckpt_data['insert_hline']:
            text += "\\hline \n"
    
    ## END TABLE ##
    
    text += "\\end{tabular}\n"
    text += "\\label{table:%sScoresModels%s}\n" % (metric_label, table_part)
    text += "\\caption{(Part %s of 2) %s of each model on each non-filtered synthetic dataset.}\n" % (table_part, metric_name)
    text += "\\end{table}\n"
    
    return text

In [12]:
ckpts = [
    {
        'ckpts': {'all': 'qa_mbert_squad'},
        'row_name': 'mBERT + SQuAD',
        'insert_hline': False,
        'multi_model_version': False,
        'for_all': True,
    },
    {
        'ckpts': {'all': 'qa_mbert_squad5k'},
        'row_name': 'mBERT + SQuAD-5k',
        'insert_hline': False,
        'multi_model_version': False,
        'for_all': True,
    },
    {
        'ckpts': {'all': 'qa_xlm_squad'},
        'row_name': 'XLM-R + SQuAD',
        'insert_hline': False,
        'multi_model_version': False,
        'for_all': True,
    },
    {
        'ckpts': {'all': 'qa_xlm_squad5k'},
        'row_name': 'XLM-R + SQuAD-5k',
        'insert_hline': True,
        'multi_model_version': False,
        'for_all': True,
    },
    ################
    {
        'ckpts': {
            'Spanish': 'qa_mbert_synthetic_es_top',
            'Russian': 'qa_mbert_synthetic_ru_top',
            'Vietnamese': 'qa_mbert_synthetic_vi_top',
            'Japanese': 'qa_mbert_synthetic_ja_top',
        },
        'row_name': 'mBERT +\\\\Synth\\_*',
        'insert_hline': False,
        'multi_model_version': True,
        'for_all': False,
    },
    {
        'ckpts': {
            'Spanish': 'qa_mbert_extsquad_es_top',
            'Russian': 'qa_mbert_extsquad_ru_top',
            'Vietnamese': 'qa_mbert_extsquad_vi_top',
            'Japanese': 'qa_mbert_extsquad_ja_top',
        },
        'row_name': 'mBERT + SQuAD +\\\\Synth\\_*',
        'insert_hline': False,
        'multi_model_version': True,
        'for_all': False,
    },
    {
        'ckpts': {
            'Spanish': 'qa_mbert_extsquad5k_es_top',
            'Russian': 'qa_mbert_extsquad5k_ru_top',
            'Vietnamese': 'qa_mbert_extsquad5k_vi_top',
            'Japanese': 'qa_mbert_extsquad5k_ja_top',
        },
        'row_name': 'mBERT + SQuAD-5k +\\\\Synth\\_*',
        'insert_hline': True,
        'multi_model_version': True,
        'for_all': False,
    },
    ################
    {
        'ckpts': {'all': 'qa_mbert_synthetic_all_top'},
        'row_name': 'mBERT +\\\\all Synth\\_*',
        'insert_hline': False,
        'multi_model_version': True,
        'for_all': True,
    },
    {
        'ckpts': {'all': 'qa_mbert_extsquad_all_top'},
        'row_name': 'mBERT + SQuAD +\\\\all Synth\\_*',
        'insert_hline': False,
        'multi_model_version': True,
        'for_all': True,
    },
    {
        'ckpts': {'all': 'qa_mbert_extsquad5k_all_top'},
        'row_name': 'mBERT + SQuAD-5k +\\\\all Synth\\_*',
        'insert_hline': True,
        'multi_model_version': True,
        'for_all': True,
    },
]

In [13]:
columns = {
    'Spanish': {
        'mlqa-v1-test-es-es': {
            'col_name': 'MLQA\\\\(es-es)',
        },
        'xquad.es': {
            'col_name': 'XQuAD\\\\(es)',
        },
        'dev-v1.1-es': {
            'col_name': 'Carrino et al.\\\\(SQuAD-es)',
        },
    },
    'Russian': {
        'tydiqa-goldp-v1.1-dev-russian': {
            'col_name': 'TyDiQA\\\\(ru)',
        },
        'xquad.ru': {
            'col_name': 'XQuAD\\\\(tu)',
        },
    },
}

metric_label = 'F1Scores'
metric_name = 'F1-Scores'
df_key = 'f1_scores'
table_part = '1'

table_1 = generate_table(columns, ckpts, df_key, metric_label, metric_name, table_part)

In [14]:
columns = {
    'Hindi': {
        'mlqa-v1-test-hi-hi': {
            'col_name': 'MLQA\\\\(hi-hi)',
        },
        'xquad.hi': {
            'col_name': 'XQuAD\\\\(hi)',
        },
    },
    'Vietnamese': {
        'mlqa-v1-test-vi-vi': {
            'col_name': 'MLQA\\\\(vi-vi)',
        },
        'xquad.vi': {
            'col_name': 'XQuAD\\\\(vi)',
        },
    },
    'Japanese': {
        'dev-v1.1-ja': {
            'col_name': 'Akari Asai et al.\\\\(SQuAD-ja)',
        },
    },
}

metric_label = 'F1Scores'
metric_name = 'F1-Scores'
df_key = 'f1_scores'
table_part = '2'

table_2 = generate_table(columns, ckpts, df_key, metric_label, metric_name, table_part)

In [15]:
columns = {
    'Spanish': {
        'mlqa-v1-test-es-es': {
            'col_name': 'MLQA\\\\(es-es)',
        },
        'xquad.es': {
            'col_name': 'XQuAD\\\\(es)',
        },
        'dev-v1.1-es': {
            'col_name': 'Carrino et al.\\\\(SQuAD-es)',
        },
    },
    'Russian': {
        'tydiqa-goldp-v1.1-dev-russian': {
            'col_name': 'TyDiQA\\\\(ru)',
        },
        'xquad.ru': {
            'col_name': 'XQuAD\\\\(tu)',
        },
    },
}

metric_label = 'ExactMatch'
metric_name = 'Exact-Match scores'
df_key = 'exact_scores'
table_part = '1'

table_3 = generate_table(columns, ckpts, df_key, metric_label, metric_name, table_part)

In [16]:
columns = {
    'Hindi': {
        'mlqa-v1-test-hi-hi': {
            'col_name': 'MLQA\\\\(hi-hi)',
        },
        'xquad.hi': {
            'col_name': 'XQuAD\\\\(hi)',
        },
    },
    'Vietnamese': {
        'mlqa-v1-test-vi-vi': {
            'col_name': 'MLQA\\\\(vi-vi)',
        },
        'xquad.vi': {
            'col_name': 'XQuAD\\\\(vi)',
        },
    },
    'Japanese': {
        'dev-v1.1-ja': {
            'col_name': 'Akari Asai et al.\\\\(SQuAD-ja)',
        },
    },
}

metric_label = 'ExactMatch'
metric_name = 'Exact-Match scores'
df_key = 'exact_scores'
table_part = '2'

table_4 = generate_table(columns, ckpts, df_key, metric_label, metric_name, table_part)

In [17]:
print(table_1)
print(table_2)
print(table_3)
print(table_4)

\begin{table}[ht]
\small
\centering
\addtolength{\leftskip} {-2cm}\addtolength{\rightskip}{-2cm}\renewcommand{\arraystretch}{1.2}
\begin{tabular}{|l|lll|ll|}
\hline
\textbf{Trained on} $\rightarrow$ &
  \multicolumn{3}{c|}{\textbf{Spanish}} &
  \multicolumn{2}{c|}{\textbf{Russian}}\\
\hline
\hline
\textbf{Tested on} $\downarrow$ &
  \multicolumn{1}{c|}{\textbf{\begin{tabular}[c]{@{}c@{}}MLQA\\(es-es)\end{tabular}}} &
  \multicolumn{1}{c|}{\textbf{\begin{tabular}[c]{@{}c@{}}XQuAD\\(es)\end{tabular}}} &
  \multicolumn{1}{c|}{\textbf{\begin{tabular}[c]{@{}c@{}}Carrino et al.\\(SQuAD-es)\end{tabular}}} &
  \multicolumn{1}{c|}{\textbf{\begin{tabular}[c]{@{}c@{}}TyDiQA\\(ru)\end{tabular}}} &
  \multicolumn{1}{c|}{\textbf{\begin{tabular}[c]{@{}c@{}}XQuAD\\(tu)\end{tabular}}}\\
\hline
\hline
  \textbf{\begin{tabular}[c]{@{}l@{}}mBERT + SQuAD\end{tabular}} &
62.8497 &
73.2986 &
68.6989 &
64.7889 &
68.7534\\
\hline
  \textbf{\begin{tabular}[c]{@{}l@{}}mBERT + SQuAD-5k\end{tabular}} &
50.5748 &
5