In [3]:
## In this notebook, we run evaluation on curriculum learning 
## model using predicted tags. 
import os
import glob
from pathlib import Path
from mtrfg.utils import load_json, dict_as_readable_string

In [4]:
## get all dir paths for curriculum learning models.

curriculum_models = glob.glob("../saved_models/curriculum_learning_models/train_bin_by_bin/*/model.pth")
curriculum_models_dirs = [Path(model_path).parent.absolute() for model_path in curriculum_models]
curriculum_models_dirs.sort()

In [5]:
## test file path
test_file = '/data/Ahmad/Silver/Parser/test.conllu'
test_file_out = 'silver_test_out.json'

In [None]:
## let's evaluate! 
for dir_path in curriculum_models_dirs:
    cmd = f'CUDA_VISIBLE_DEVICE=2 python3 ../tools/evaluate.py --opts --dir_name "{dir_path}" --test_file "{test_file}" --batch_size "16" --save_file_name "{test_file_out}" --use_pred_tags "True"'
    if os.system(cmd) != 0:
        print("Evaluation failed.")

In [7]:
## time to gather the results

result_files_list = [os.path.join(model_dir, test_file_out) for model_dir in curriculum_models_dirs]
all_results = {f'bin_{i}': load_json(file_path) for i, file_path in enumerate(result_files_list)}

## let's get the results
parser_labeled_results = {key: value['parser_labeled_results'] for key, value in all_results.items()}
tagger_results = {key: value['tagger_results'] for key, value in all_results.items()}
parser_unlabeled_results = {key: value['parser_unlabeled_results'] for key, value in all_results.items()}

In [8]:
print(f'Parser labeled results:\n{dict_as_readable_string(parser_labeled_results)}')
print(f'Parser unlabeled results:\n{dict_as_readable_string(parser_unlabeled_results)}')
print(f'Tagger results:\n{dict_as_readable_string(tagger_results)}')

Parser labeled results:
bin_0 : OrderedDict([('P', 0.3821), ('R', 0.4161), ('F1', 0.3984)])
bin_1 : OrderedDict([('P', 0.5244), ('R', 0.5759), ('F1', 0.5489)])
bin_2 : OrderedDict([('P', 0.463), ('R', 0.6766), ('F1', 0.5498)])
bin_3 : OrderedDict([('P', 0.4728), ('R', 0.7122), ('F1', 0.5683)])
bin_4 : OrderedDict([('P', 0.3803), ('R', 0.7305), ('F1', 0.5002)])
bin_5 : OrderedDict([('P', 0.3331), ('R', 0.7491), ('F1', 0.4612)])
bin_6 : OrderedDict([('P', 0.3803), ('R', 0.7704), ('F1', 0.5093)])
bin_7 : OrderedDict([('P', 0.3138), ('R', 0.7854), ('F1', 0.4484)])
bin_8 : OrderedDict([('P', 0.3206), ('R', 0.7894), ('F1', 0.456)])
bin_9 : OrderedDict([('P', 0.3243), ('R', 0.7952), ('F1', 0.4607)])
Parser unlabeled results:
bin_0 : OrderedDict([('P', 0.4196), ('R', 0.4569), ('F1', 0.4374)])
bin_1 : OrderedDict([('P', 0.5627), ('R', 0.6179), ('F1', 0.589)])
bin_2 : OrderedDict([('P', 0.4941), ('R', 0.722), ('F1', 0.5867)])
bin_3 : OrderedDict([('P', 0.5038), ('R', 0.759), ('F1', 0.6056)])
bin