In [93]:
import os
import glob
import math
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from run_ner_no_trainer import get_meta_splits, ner_target_langs

# Task: NER

In [94]:
LMS_models_dir = '/mnt/xtb/knarik/outputs/ner_3e-5_lr/LMS_models'
OUTPUT_DIR = '/mnt/xtb/knarik/outputs/ner_3e-5_lr/'
path = OUTPUT_DIR + 'all_models_evals.json'

In [95]:
TARGET_LANGS = ner_target_langs.keys()
TARGET_LANGS

dict_keys(['de', 'es', 'nl', 'zh'])

In [96]:
with open(path, 'r+') as f:
    evals_dict = json.load(f)        

In [97]:
evals_dict

{'model-seed_12128-lr_7e-05-ep_4': {'En_Dev': 0.9533557046979867,
  'All-Target_de': 0.5334703997097417,
  '100-Target_de': 0.7865707434052759,
  'Pivot_de': 0.7987908558473454,
  'Test_de': 0.6949045391326336,
  'All-Target_es': 0.6949045391326336,
  '100-Target_es': 0.7761194029850746,
  'Pivot_es': 0.7987908558473454,
  'Test_es': 0.7545072522705708,
  'All-Target_nl': 0.7545072522705708,
  '100-Target_nl': 0.8229166666666666,
  'Pivot_nl': 0.7587653581060833,
  'Test_nl': 0.7659247889485803,
  'All-Target_zh': 0.7659247889485803,
  '100-Target_zh': 0.578125,
  'Pivot_zh': 0.7587653581060833,
  'Test_zh': 0.5618483609975811},
 'model-seed_12762-lr_7e-05-ep_4': {'En_Dev': 0.9534435030618237,
  'All-Target_de': 0.517085119412125,
  '100-Target_de': 0.784431137724551,
  'Pivot_de': 0.809813617344998,
  'Test_de': 0.6953900263785957,
  'All-Target_es': 0.6953900263785957,
  '100-Target_es': 0.7576374745417516,
  'Pivot_es': 0.809813617344998,
  'Test_es': 0.7435512488057867,
  'All-Targ

In [98]:
len(evals_dict.keys())

237

In [99]:
# Getting the meta-test split models' paths for
meta_split_seed = 8

meta_dict = get_meta_splits(OUTPUT_DIR, seed=meta_split_seed)
meta_test = meta_dict['test']  

meta_test_names = []

for model_path in meta_test:
    meta_test_names.append(model_path.split('/')[-1])
    
len(meta_test_names)

57

In [100]:
evals_dict_test = {name: evals_dict[name] for name in meta_test_names}
len(evals_dict_test)

57

In [101]:
# Table 3 
df = pd.DataFrame(TARGET_LANGS, columns=['Lang'])
df    

Unnamed: 0,Lang
0,de
1,es
2,nl
3,zh


# En-Dev

In [102]:
model_by_en_dev = sorted(evals_dict_test.items(), key=lambda item: item[1]['En_Dev'])[-1]

# the model that has max En-Dev accuracy        
model_by_en_dev    

('model-seed_21400-lr_3e-05-ep_7',
 {'En_Dev': 0.9602348993288591,
  'All-Target_de': 0.5583277140930546,
  '100-Target_de': 0.7784090909090909,
  'Pivot_de': 0.7953348382242288,
  'Test_de': 0.707449560269012,
  'All-Target_es': 0.707449560269012,
  '100-Target_es': 0.7736263736263738,
  'Pivot_es': 0.7953348382242288,
  'Test_es': 0.7391895567038346,
  'All-Target_nl': 0.7391895567038346,
  '100-Target_nl': 0.8268156424581005,
  'Pivot_nl': 0.7773951792586592,
  'Test_nl': 0.7773372554035043,
  'All-Target_zh': 0.7773372554035043,
  '100-Target_zh': 0.5623003194888179,
  'Pivot_zh': 0.7773951792586592,
  'Test_zh': 0.5942923898531376})

In [103]:
for lang in TARGET_LANGS:
    df.loc[df.Lang == lang, f"En_Dev_{meta_split_seed}"] = round(model_by_en_dev[1][f'Test_{lang}']*100, 2)

print("En_Dev")    
df

En_Dev


Unnamed: 0,Lang,En_Dev_8
0,de,70.74
1,es,73.92
2,nl,77.73
3,zh,59.43


# Pivot-Dev

In [104]:
pivot_lang_models = {}

for target_lang in TARGET_LANGS:
    pivot_lang_models[target_lang] = sorted(evals_dict_test.items(), key=lambda item: item[1][f'Pivot_{target_lang}'])[-1]   

In [105]:
pivot_lang_models

{'de': ('model-seed_10640-lr_3e-05-ep_6',
  {'En_Dev': 0.9591340102374759,
   'All-Target_de': 0.5644625509244751,
   '100-Target_de': 0.7949999999999999,
   'Pivot_de': 0.8228090317642557,
   'Test_de': 0.7051993291188233,
   'All-Target_es': 0.7051993291188233,
   '100-Target_es': 0.6652806652806654,
   'Pivot_es': 0.8228090317642557,
   'Test_es': 0.7335751713018944,
   'All-Target_nl': 0.7335751713018944,
   '100-Target_nl': 0.8,
   'Pivot_nl': 0.7745217745217745,
   'Test_nl': 0.7936054068105018,
   'All-Target_zh': 0.7936054068105018,
   '100-Target_zh': 0.6503067484662577,
   'Pivot_zh': 0.7745217745217745,
   'Test_zh': 0.5987608426270136}),
 'es': ('model-seed_10640-lr_3e-05-ep_6',
  {'En_Dev': 0.9591340102374759,
   'All-Target_de': 0.5644625509244751,
   '100-Target_de': 0.7949999999999999,
   'Pivot_de': 0.8228090317642557,
   'Test_de': 0.7051993291188233,
   'All-Target_es': 0.7051993291188233,
   '100-Target_es': 0.6652806652806654,
   'Pivot_es': 0.8228090317642557,
   

In [106]:
for target_lang in TARGET_LANGS:
    max_model = pivot_lang_models[target_lang]
    df.loc[df.Lang == target_lang, f"Pivot__{meta_split_seed}"] = round(max_model[1][f'Test_{target_lang}']*100, 2)

print("Pivot_Dev")    
df    

Pivot_Dev


Unnamed: 0,Lang,En_Dev_8,Pivot__8
0,de,70.74,70.52
1,es,73.92,73.36
2,nl,77.73,78.7
3,zh,59.43,60.62


# 100-Target

In [107]:
target_100_lang_models = {}

for target_lang in TARGET_LANGS:
    target_100_lang_models[target_lang] = sorted(evals_dict_test.items(), key=lambda item: item[1][f'100-Target_{target_lang}'])[-1]    

In [108]:
for target_lang in TARGET_LANGS:
    max_model = target_100_lang_models[target_lang]
    df.loc[df.Lang == target_lang, f"100-Target_{meta_split_seed}"] = round(max_model[1][f'Test_{target_lang}']*100, 2)

print("100-Target")
df   

100-Target


Unnamed: 0,Lang,En_Dev_8,Pivot__8,100-Target_8
0,de,70.74,70.52,70.49
1,es,73.92,73.36,76.0
2,nl,77.73,78.7,78.88
3,zh,59.43,60.62,59.88


# All Target

In [109]:
target_all_lang_models = {}

for target_lang in TARGET_LANGS:
    target_all_lang_models[target_lang] = sorted(evals_dict_test.items(), key=lambda item: item[1][f'All-Target_{target_lang}'])[-1]


In [110]:
for target_lang in TARGET_LANGS:
    max_model = target_all_lang_models[target_lang]
    df.loc[df.Lang == target_lang, f"All-Target_{meta_split_seed}"] = round(max_model[1][f'Test_{target_lang}']*100, 2)
print("All-Target")
df   

All-Target


Unnamed: 0,Lang,En_Dev_8,Pivot__8,100-Target_8,All-Target_8
0,de,70.74,70.52,70.49,70.95
1,es,73.92,73.36,76.0,74.47
2,nl,77.73,78.7,78.88,80.07
3,zh,59.43,60.62,59.88,54.67


# LMS

In [111]:
LMS_lang_models = {}

# the best models for column LMS
for target_lang in TARGET_LANGS:
    df_lms = pd.read_csv(LMS_models_dir + f'/meta_test_scores_{target_lang}.csv')
    
    model_name = df_lms[df_lms['score'] == df_lms['score'].max()]['model_name'].item()
    LMS_lang_models[target_lang] = model_name

LMS_lang_models    

{'de': 'model-seed_27477-lr_5e-05-ep_4',
 'es': 'model-seed_27477-lr_5e-05-ep_4',
 'nl': 'model-seed_27477-lr_5e-05-ep_4',
 'zh': 'model-seed_14770-lr_7e-05-ep_4'}

In [112]:
for target_lang in TARGET_LANGS:
    max_model_name = LMS_lang_models[target_lang]
    df.loc[df.Lang == target_lang, "LMS"] = round(all_models_evals_dict[max_model_name][f'Test_{target_lang}']*100, 2)
df 

Unnamed: 0,Lang,En_Dev_8,Pivot__8,100-Target_8,All-Target_8,LMS
0,de,70.74,70.52,70.49,70.95,70.17
1,es,73.92,73.36,76.0,74.47,75.14
2,nl,77.73,78.7,78.88,80.07,77.88
3,zh,59.43,60.62,59.88,54.67,56.22
